clang  19.0.0git
avx512fintrin.h
Go to the documentation of this file.
1 /*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 #ifndef __IMMINTRIN_H
10 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
11 #endif
12 
13 #ifndef __AVX512FINTRIN_H
14 #define __AVX512FINTRIN_H
15 
16 typedef char __v64qi __attribute__((__vector_size__(64)));
17 typedef short __v32hi __attribute__((__vector_size__(64)));
18 typedef double __v8df __attribute__((__vector_size__(64)));
19 typedef float __v16sf __attribute__((__vector_size__(64)));
20 typedef long long __v8di __attribute__((__vector_size__(64)));
21 typedef int __v16si __attribute__((__vector_size__(64)));
22 
23 /* Unsigned types */
24 typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
25 typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
26 typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
27 typedef unsigned int __v16su __attribute__((__vector_size__(64)));
28 
29 /* We need an explicitly signed variant for char. Note that this shouldn't
30  * appear in the interface though. */
31 typedef signed char __v64qs __attribute__((__vector_size__(64)));
32 
33 typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
34 typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64)));
35 typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64)));
36 
37 typedef float __m512_u __attribute__((__vector_size__(64), __aligned__(1)));
38 typedef double __m512d_u __attribute__((__vector_size__(64), __aligned__(1)));
39 typedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1)));
40 
41 typedef unsigned char __mmask8;
42 typedef unsigned short __mmask16;
43 
44 /* Rounding mode macros. */
45 #define _MM_FROUND_TO_NEAREST_INT 0x00
46 #define _MM_FROUND_TO_NEG_INF 0x01
47 #define _MM_FROUND_TO_POS_INF 0x02
48 #define _MM_FROUND_TO_ZERO 0x03
49 #define _MM_FROUND_CUR_DIRECTION 0x04
50 
51 /* Constants for integer comparison predicates */
52 typedef enum {
53  _MM_CMPINT_EQ, /* Equal */
54  _MM_CMPINT_LT, /* Less than */
55  _MM_CMPINT_LE, /* Less than or Equal */
57  _MM_CMPINT_NE, /* Not Equal */
58  _MM_CMPINT_NLT, /* Not Less than */
59 #define _MM_CMPINT_GE _MM_CMPINT_NLT /* Greater than or Equal */
60  _MM_CMPINT_NLE /* Not Less than or Equal */
61 #define _MM_CMPINT_GT _MM_CMPINT_NLE /* Greater than */
63 
64 typedef enum
65 {
66  _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
67  _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
68  _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
69  _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
70  _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
71  _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
72  _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
73  _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
74  _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
75  _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
76  _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
77  _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
78  _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
79  _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
80  _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
81  _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
82  _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
83  _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
84  _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
85  _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
86  _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
87  _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
88  _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
89  _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
90  _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
91  _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
92  _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
93  _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
94  _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
95  _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
96  _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
97  _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
98  _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
99  _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
100  _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
101  _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
102  _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
103  _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
104  _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
105  _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
106  _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
107  _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
108  _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
109  _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
110  _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
111  _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
112  _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
113  _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
114  _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
115  _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
116  _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
117  _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
118  _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
119  _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
120  _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
121  _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
122  _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
123  _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
124  _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
125  _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
126  _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
127  _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
128  _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
129  _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
130  _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
131  _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
132  _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
133  _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
134  _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
135  _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
136  _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
137  _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
138  _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
139  _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
140  _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
141  _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
142  _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
143  _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
144  _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
145  _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
146  _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
147  _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
148  _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
149  _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
150  _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
151  _MM_PERM_DDDD = 0xFF
153 
154 typedef enum
155 {
156  _MM_MANT_NORM_1_2, /* interval [1, 2) */
157  _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
158  _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
159  _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
161 
162 typedef enum
163 {
164  _MM_MANT_SIGN_src, /* sign = sign(SRC) */
165  _MM_MANT_SIGN_zero, /* sign = 0 */
166  _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
168 
169 /* Define the default attributes for the functions in this file. */
170 #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f,evex512"), __min_vector_width__(512)))
171 #define __DEFAULT_FN_ATTRS128 \
172  __attribute__((__always_inline__, __nodebug__, \
173  __target__("avx512f,no-evex512"), __min_vector_width__(128)))
174 #define __DEFAULT_FN_ATTRS \
175  __attribute__((__always_inline__, __nodebug__, \
176  __target__("avx512f,no-evex512")))
177 
178 /* Create vectors with repeated elements */
179 
180 static __inline __m512i __DEFAULT_FN_ATTRS512
182 {
183  return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
184 }
185 
186 #define _mm512_setzero_epi32 _mm512_setzero_si512
187 
188 static __inline__ __m512d __DEFAULT_FN_ATTRS512
190 {
191  return (__m512d)__builtin_ia32_undef512();
192 }
193 
194 static __inline__ __m512 __DEFAULT_FN_ATTRS512
196 {
197  return (__m512)__builtin_ia32_undef512();
198 }
199 
200 static __inline__ __m512 __DEFAULT_FN_ATTRS512
202 {
203  return (__m512)__builtin_ia32_undef512();
204 }
205 
206 static __inline__ __m512i __DEFAULT_FN_ATTRS512
208 {
209  return (__m512i)__builtin_ia32_undef512();
210 }
211 
212 static __inline__ __m512i __DEFAULT_FN_ATTRS512
214 {
215  return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
216  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
217 }
218 
219 static __inline__ __m512i __DEFAULT_FN_ATTRS512
220 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
221 {
222  return (__m512i)__builtin_ia32_selectd_512(__M,
223  (__v16si) _mm512_broadcastd_epi32(__A),
224  (__v16si) __O);
225 }
226 
227 static __inline__ __m512i __DEFAULT_FN_ATTRS512
229 {
230  return (__m512i)__builtin_ia32_selectd_512(__M,
231  (__v16si) _mm512_broadcastd_epi32(__A),
232  (__v16si) _mm512_setzero_si512());
233 }
234 
235 static __inline__ __m512i __DEFAULT_FN_ATTRS512
237 {
238  return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
239  0, 0, 0, 0, 0, 0, 0, 0);
240 }
241 
242 static __inline__ __m512i __DEFAULT_FN_ATTRS512
243 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
244 {
245  return (__m512i)__builtin_ia32_selectq_512(__M,
246  (__v8di) _mm512_broadcastq_epi64(__A),
247  (__v8di) __O);
248 
249 }
250 
251 static __inline__ __m512i __DEFAULT_FN_ATTRS512
253 {
254  return (__m512i)__builtin_ia32_selectq_512(__M,
255  (__v8di) _mm512_broadcastq_epi64(__A),
256  (__v8di) _mm512_setzero_si512());
257 }
258 
259 
260 static __inline __m512 __DEFAULT_FN_ATTRS512
262 {
263  return __extension__ (__m512){ 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
264  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f };
265 }
266 
267 #define _mm512_setzero _mm512_setzero_ps
268 
269 static __inline __m512d __DEFAULT_FN_ATTRS512
271 {
272  return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
273 }
274 
275 static __inline __m512 __DEFAULT_FN_ATTRS512
276 _mm512_set1_ps(float __w)
277 {
278  return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
279  __w, __w, __w, __w, __w, __w, __w, __w };
280 }
281 
282 static __inline __m512d __DEFAULT_FN_ATTRS512
283 _mm512_set1_pd(double __w)
284 {
285  return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
286 }
287 
288 static __inline __m512i __DEFAULT_FN_ATTRS512
290 {
291  return __extension__ (__m512i)(__v64qi){
292  __w, __w, __w, __w, __w, __w, __w, __w,
293  __w, __w, __w, __w, __w, __w, __w, __w,
294  __w, __w, __w, __w, __w, __w, __w, __w,
295  __w, __w, __w, __w, __w, __w, __w, __w,
296  __w, __w, __w, __w, __w, __w, __w, __w,
297  __w, __w, __w, __w, __w, __w, __w, __w,
298  __w, __w, __w, __w, __w, __w, __w, __w,
299  __w, __w, __w, __w, __w, __w, __w, __w };
300 }
301 
302 static __inline __m512i __DEFAULT_FN_ATTRS512
304 {
305  return __extension__ (__m512i)(__v32hi){
306  __w, __w, __w, __w, __w, __w, __w, __w,
307  __w, __w, __w, __w, __w, __w, __w, __w,
308  __w, __w, __w, __w, __w, __w, __w, __w,
309  __w, __w, __w, __w, __w, __w, __w, __w };
310 }
311 
312 static __inline __m512i __DEFAULT_FN_ATTRS512
314 {
315  return __extension__ (__m512i)(__v16si){
316  __s, __s, __s, __s, __s, __s, __s, __s,
317  __s, __s, __s, __s, __s, __s, __s, __s };
318 }
319 
320 static __inline __m512i __DEFAULT_FN_ATTRS512
322 {
323  return (__m512i)__builtin_ia32_selectd_512(__M,
324  (__v16si)_mm512_set1_epi32(__A),
325  (__v16si)_mm512_setzero_si512());
326 }
327 
328 static __inline __m512i __DEFAULT_FN_ATTRS512
329 _mm512_set1_epi64(long long __d)
330 {
331  return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
332 }
333 
334 static __inline __m512i __DEFAULT_FN_ATTRS512
336 {
337  return (__m512i)__builtin_ia32_selectq_512(__M,
338  (__v8di)_mm512_set1_epi64(__A),
339  (__v8di)_mm512_setzero_si512());
340 }
341 
342 static __inline__ __m512 __DEFAULT_FN_ATTRS512
344 {
345  return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
346  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
347 }
348 
349 static __inline __m512i __DEFAULT_FN_ATTRS512
350 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
351 {
352  return __extension__ (__m512i)(__v16si)
353  { __D, __C, __B, __A, __D, __C, __B, __A,
354  __D, __C, __B, __A, __D, __C, __B, __A };
355 }
356 
357 static __inline __m512i __DEFAULT_FN_ATTRS512
358 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
359  long long __D)
360 {
361  return __extension__ (__m512i) (__v8di)
362  { __D, __C, __B, __A, __D, __C, __B, __A };
363 }
364 
365 static __inline __m512d __DEFAULT_FN_ATTRS512
366 _mm512_set4_pd (double __A, double __B, double __C, double __D)
367 {
368  return __extension__ (__m512d)
369  { __D, __C, __B, __A, __D, __C, __B, __A };
370 }
371 
372 static __inline __m512 __DEFAULT_FN_ATTRS512
373 _mm512_set4_ps (float __A, float __B, float __C, float __D)
374 {
375  return __extension__ (__m512)
376  { __D, __C, __B, __A, __D, __C, __B, __A,
377  __D, __C, __B, __A, __D, __C, __B, __A };
378 }
379 
380 #define _mm512_setr4_epi32(e0,e1,e2,e3) \
381  _mm512_set4_epi32((e3),(e2),(e1),(e0))
382 
383 #define _mm512_setr4_epi64(e0,e1,e2,e3) \
384  _mm512_set4_epi64((e3),(e2),(e1),(e0))
385 
386 #define _mm512_setr4_pd(e0,e1,e2,e3) \
387  _mm512_set4_pd((e3),(e2),(e1),(e0))
388 
389 #define _mm512_setr4_ps(e0,e1,e2,e3) \
390  _mm512_set4_ps((e3),(e2),(e1),(e0))
391 
392 static __inline__ __m512d __DEFAULT_FN_ATTRS512
394 {
395  return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
396  0, 0, 0, 0, 0, 0, 0, 0);
397 }
398 
399 /* Cast between vector types */
400 
401 static __inline __m512d __DEFAULT_FN_ATTRS512
403 {
404  return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0,
405  1, 2, 3, 4, 5, 6, 7);
406 }
407 
408 static __inline __m512 __DEFAULT_FN_ATTRS512
410 {
411  return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0,
412  1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
413 }
414 
415 static __inline __m128d __DEFAULT_FN_ATTRS512
417 {
418  return __builtin_shufflevector(__a, __a, 0, 1);
419 }
420 
421 static __inline __m256d __DEFAULT_FN_ATTRS512
423 {
424  return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
425 }
426 
427 static __inline __m128 __DEFAULT_FN_ATTRS512
429 {
430  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
431 }
432 
433 static __inline __m256 __DEFAULT_FN_ATTRS512
435 {
436  return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
437 }
438 
439 static __inline __m512 __DEFAULT_FN_ATTRS512
440 _mm512_castpd_ps (__m512d __A)
441 {
442  return (__m512) (__A);
443 }
444 
445 static __inline __m512i __DEFAULT_FN_ATTRS512
446 _mm512_castpd_si512 (__m512d __A)
447 {
448  return (__m512i) (__A);
449 }
450 
451 static __inline__ __m512d __DEFAULT_FN_ATTRS512
453 {
454  __m256d __B = __builtin_nondeterministic_value(__B);
455  return __builtin_shufflevector(
456  __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
457  __B, 0, 1, 2, 3, 4, 5, 6, 7);
458 }
459 
460 static __inline __m512d __DEFAULT_FN_ATTRS512
461 _mm512_castps_pd (__m512 __A)
462 {
463  return (__m512d) (__A);
464 }
465 
466 static __inline __m512i __DEFAULT_FN_ATTRS512
468 {
469  return (__m512i) (__A);
470 }
471 
472 static __inline__ __m512 __DEFAULT_FN_ATTRS512
474 {
475  __m256 __B = __builtin_nondeterministic_value(__B);
476  return __builtin_shufflevector(
477  __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7),
478  __B, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
479 }
480 
481 static __inline__ __m512i __DEFAULT_FN_ATTRS512
483 {
484  __m256i __B = __builtin_nondeterministic_value(__B);
485  return __builtin_shufflevector(
486  __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
487  __B, 0, 1, 2, 3, 4, 5, 6, 7);
488 }
489 
490 static __inline__ __m512i __DEFAULT_FN_ATTRS512
492 {
493  return __builtin_shufflevector( __A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7);
494 }
495 
496 static __inline __m512 __DEFAULT_FN_ATTRS512
497 _mm512_castsi512_ps (__m512i __A)
498 {
499  return (__m512) (__A);
500 }
501 
502 static __inline __m512d __DEFAULT_FN_ATTRS512
503 _mm512_castsi512_pd (__m512i __A)
504 {
505  return (__m512d) (__A);
506 }
507 
508 static __inline __m128i __DEFAULT_FN_ATTRS512
510 {
511  return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
512 }
513 
514 static __inline __m256i __DEFAULT_FN_ATTRS512
516 {
517  return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
518 }
519 
520 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
522 {
523  return (__mmask16)__a;
524 }
525 
526 static __inline__ int __DEFAULT_FN_ATTRS
528 {
529  return (int)__a;
530 }
531 
532 /// Constructs a 512-bit floating-point vector of [8 x double] from a
533 /// 128-bit floating-point vector of [2 x double]. The lower 128 bits
534 /// contain the value of the source vector. The upper 384 bits are set
535 /// to zero.
536 ///
537 /// \headerfile <x86intrin.h>
538 ///
539 /// This intrinsic has no corresponding instruction.
540 ///
541 /// \param __a
542 /// A 128-bit vector of [2 x double].
543 /// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
544 /// contain the value of the parameter. The upper 384 bits are set to zero.
545 static __inline __m512d __DEFAULT_FN_ATTRS512
547 {
548  return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
549 }
550 
551 /// Constructs a 512-bit floating-point vector of [8 x double] from a
552 /// 256-bit floating-point vector of [4 x double]. The lower 256 bits
553 /// contain the value of the source vector. The upper 256 bits are set
554 /// to zero.
555 ///
556 /// \headerfile <x86intrin.h>
557 ///
558 /// This intrinsic has no corresponding instruction.
559 ///
560 /// \param __a
561 /// A 256-bit vector of [4 x double].
562 /// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
563 /// contain the value of the parameter. The upper 256 bits are set to zero.
564 static __inline __m512d __DEFAULT_FN_ATTRS512
566 {
567  return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
568 }
569 
570 /// Constructs a 512-bit floating-point vector of [16 x float] from a
571 /// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain
572 /// the value of the source vector. The upper 384 bits are set to zero.
573 ///
574 /// \headerfile <x86intrin.h>
575 ///
576 /// This intrinsic has no corresponding instruction.
577 ///
578 /// \param __a
579 /// A 128-bit vector of [4 x float].
580 /// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
581 /// contain the value of the parameter. The upper 384 bits are set to zero.
582 static __inline __m512 __DEFAULT_FN_ATTRS512
584 {
585  return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
586 }
587 
588 /// Constructs a 512-bit floating-point vector of [16 x float] from a
589 /// 256-bit floating-point vector of [8 x float]. The lower 256 bits contain
590 /// the value of the source vector. The upper 256 bits are set to zero.
591 ///
592 /// \headerfile <x86intrin.h>
593 ///
594 /// This intrinsic has no corresponding instruction.
595 ///
596 /// \param __a
597 /// A 256-bit vector of [8 x float].
598 /// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
599 /// contain the value of the parameter. The upper 256 bits are set to zero.
600 static __inline __m512 __DEFAULT_FN_ATTRS512
602 {
603  return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
604 }
605 
606 /// Constructs a 512-bit integer vector from a 128-bit integer vector.
607 /// The lower 128 bits contain the value of the source vector. The upper
608 /// 384 bits are set to zero.
609 ///
610 /// \headerfile <x86intrin.h>
611 ///
612 /// This intrinsic has no corresponding instruction.
613 ///
614 /// \param __a
615 /// A 128-bit integer vector.
616 /// \returns A 512-bit integer vector. The lower 128 bits contain the value of
617 /// the parameter. The upper 384 bits are set to zero.
618 static __inline __m512i __DEFAULT_FN_ATTRS512
620 {
621  return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
622 }
623 
624 /// Constructs a 512-bit integer vector from a 256-bit integer vector.
625 /// The lower 256 bits contain the value of the source vector. The upper
626 /// 256 bits are set to zero.
627 ///
628 /// \headerfile <x86intrin.h>
629 ///
630 /// This intrinsic has no corresponding instruction.
631 ///
632 /// \param __a
633 /// A 256-bit integer vector.
634 /// \returns A 512-bit integer vector. The lower 256 bits contain the value of
635 /// the parameter. The upper 256 bits are set to zero.
636 static __inline __m512i __DEFAULT_FN_ATTRS512
638 {
639  return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
640 }
641 
642 /* Bitwise operators */
643 static __inline__ __m512i __DEFAULT_FN_ATTRS512
644 _mm512_and_epi32(__m512i __a, __m512i __b)
645 {
646  return (__m512i)((__v16su)__a & (__v16su)__b);
647 }
648 
649 static __inline__ __m512i __DEFAULT_FN_ATTRS512
650 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
651 {
652  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
653  (__v16si) _mm512_and_epi32(__a, __b),
654  (__v16si) __src);
655 }
656 
657 static __inline__ __m512i __DEFAULT_FN_ATTRS512
658 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
659 {
660  return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (),
661  __k, __a, __b);
662 }
663 
664 static __inline__ __m512i __DEFAULT_FN_ATTRS512
665 _mm512_and_epi64(__m512i __a, __m512i __b)
666 {
667  return (__m512i)((__v8du)__a & (__v8du)__b);
668 }
669 
670 static __inline__ __m512i __DEFAULT_FN_ATTRS512
671 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
672 {
673  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
674  (__v8di) _mm512_and_epi64(__a, __b),
675  (__v8di) __src);
676 }
677 
678 static __inline__ __m512i __DEFAULT_FN_ATTRS512
679 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
680 {
681  return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (),
682  __k, __a, __b);
683 }
684 
685 static __inline__ __m512i __DEFAULT_FN_ATTRS512
686 _mm512_andnot_si512 (__m512i __A, __m512i __B)
687 {
688  return (__m512i)(~(__v8du)__A & (__v8du)__B);
689 }
690 
691 static __inline__ __m512i __DEFAULT_FN_ATTRS512
692 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
693 {
694  return (__m512i)(~(__v16su)__A & (__v16su)__B);
695 }
696 
697 static __inline__ __m512i __DEFAULT_FN_ATTRS512
698 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
699 {
700  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
701  (__v16si)_mm512_andnot_epi32(__A, __B),
702  (__v16si)__W);
703 }
704 
705 static __inline__ __m512i __DEFAULT_FN_ATTRS512
706 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
707 {
709  __U, __A, __B);
710 }
711 
712 static __inline__ __m512i __DEFAULT_FN_ATTRS512
713 _mm512_andnot_epi64(__m512i __A, __m512i __B)
714 {
715  return (__m512i)(~(__v8du)__A & (__v8du)__B);
716 }
717 
718 static __inline__ __m512i __DEFAULT_FN_ATTRS512
719 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
720 {
721  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
722  (__v8di)_mm512_andnot_epi64(__A, __B),
723  (__v8di)__W);
724 }
725 
726 static __inline__ __m512i __DEFAULT_FN_ATTRS512
727 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
728 {
730  __U, __A, __B);
731 }
732 
733 static __inline__ __m512i __DEFAULT_FN_ATTRS512
734 _mm512_or_epi32(__m512i __a, __m512i __b)
735 {
736  return (__m512i)((__v16su)__a | (__v16su)__b);
737 }
738 
739 static __inline__ __m512i __DEFAULT_FN_ATTRS512
740 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
741 {
742  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
743  (__v16si)_mm512_or_epi32(__a, __b),
744  (__v16si)__src);
745 }
746 
747 static __inline__ __m512i __DEFAULT_FN_ATTRS512
748 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
749 {
750  return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
751 }
752 
753 static __inline__ __m512i __DEFAULT_FN_ATTRS512
754 _mm512_or_epi64(__m512i __a, __m512i __b)
755 {
756  return (__m512i)((__v8du)__a | (__v8du)__b);
757 }
758 
759 static __inline__ __m512i __DEFAULT_FN_ATTRS512
760 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
761 {
762  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
763  (__v8di)_mm512_or_epi64(__a, __b),
764  (__v8di)__src);
765 }
766 
767 static __inline__ __m512i __DEFAULT_FN_ATTRS512
768 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
769 {
770  return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
771 }
772 
773 static __inline__ __m512i __DEFAULT_FN_ATTRS512
774 _mm512_xor_epi32(__m512i __a, __m512i __b)
775 {
776  return (__m512i)((__v16su)__a ^ (__v16su)__b);
777 }
778 
779 static __inline__ __m512i __DEFAULT_FN_ATTRS512
780 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
781 {
782  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
783  (__v16si)_mm512_xor_epi32(__a, __b),
784  (__v16si)__src);
785 }
786 
787 static __inline__ __m512i __DEFAULT_FN_ATTRS512
788 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
789 {
790  return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
791 }
792 
793 static __inline__ __m512i __DEFAULT_FN_ATTRS512
794 _mm512_xor_epi64(__m512i __a, __m512i __b)
795 {
796  return (__m512i)((__v8du)__a ^ (__v8du)__b);
797 }
798 
799 static __inline__ __m512i __DEFAULT_FN_ATTRS512
800 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
801 {
802  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
803  (__v8di)_mm512_xor_epi64(__a, __b),
804  (__v8di)__src);
805 }
806 
807 static __inline__ __m512i __DEFAULT_FN_ATTRS512
808 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
809 {
810  return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
811 }
812 
813 static __inline__ __m512i __DEFAULT_FN_ATTRS512
814 _mm512_and_si512(__m512i __a, __m512i __b)
815 {
816  return (__m512i)((__v8du)__a & (__v8du)__b);
817 }
818 
819 static __inline__ __m512i __DEFAULT_FN_ATTRS512
820 _mm512_or_si512(__m512i __a, __m512i __b)
821 {
822  return (__m512i)((__v8du)__a | (__v8du)__b);
823 }
824 
825 static __inline__ __m512i __DEFAULT_FN_ATTRS512
826 _mm512_xor_si512(__m512i __a, __m512i __b)
827 {
828  return (__m512i)((__v8du)__a ^ (__v8du)__b);
829 }
830 
831 /* Arithmetic */
832 
833 static __inline __m512d __DEFAULT_FN_ATTRS512
834 _mm512_add_pd(__m512d __a, __m512d __b)
835 {
836  return (__m512d)((__v8df)__a + (__v8df)__b);
837 }
838 
839 static __inline __m512 __DEFAULT_FN_ATTRS512
840 _mm512_add_ps(__m512 __a, __m512 __b)
841 {
842  return (__m512)((__v16sf)__a + (__v16sf)__b);
843 }
844 
845 static __inline __m512d __DEFAULT_FN_ATTRS512
846 _mm512_mul_pd(__m512d __a, __m512d __b)
847 {
848  return (__m512d)((__v8df)__a * (__v8df)__b);
849 }
850 
851 static __inline __m512 __DEFAULT_FN_ATTRS512
852 _mm512_mul_ps(__m512 __a, __m512 __b)
853 {
854  return (__m512)((__v16sf)__a * (__v16sf)__b);
855 }
856 
857 static __inline __m512d __DEFAULT_FN_ATTRS512
858 _mm512_sub_pd(__m512d __a, __m512d __b)
859 {
860  return (__m512d)((__v8df)__a - (__v8df)__b);
861 }
862 
863 static __inline __m512 __DEFAULT_FN_ATTRS512
864 _mm512_sub_ps(__m512 __a, __m512 __b)
865 {
866  return (__m512)((__v16sf)__a - (__v16sf)__b);
867 }
868 
869 static __inline__ __m512i __DEFAULT_FN_ATTRS512
870 _mm512_add_epi64 (__m512i __A, __m512i __B)
871 {
872  return (__m512i) ((__v8du) __A + (__v8du) __B);
873 }
874 
875 static __inline__ __m512i __DEFAULT_FN_ATTRS512
876 _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
877 {
878  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
879  (__v8di)_mm512_add_epi64(__A, __B),
880  (__v8di)__W);
881 }
882 
883 static __inline__ __m512i __DEFAULT_FN_ATTRS512
884 _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
885 {
886  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
887  (__v8di)_mm512_add_epi64(__A, __B),
888  (__v8di)_mm512_setzero_si512());
889 }
890 
891 static __inline__ __m512i __DEFAULT_FN_ATTRS512
892 _mm512_sub_epi64 (__m512i __A, __m512i __B)
893 {
894  return (__m512i) ((__v8du) __A - (__v8du) __B);
895 }
896 
897 static __inline__ __m512i __DEFAULT_FN_ATTRS512
898 _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
899 {
900  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
901  (__v8di)_mm512_sub_epi64(__A, __B),
902  (__v8di)__W);
903 }
904 
905 static __inline__ __m512i __DEFAULT_FN_ATTRS512
906 _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
907 {
908  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
909  (__v8di)_mm512_sub_epi64(__A, __B),
910  (__v8di)_mm512_setzero_si512());
911 }
912 
913 static __inline__ __m512i __DEFAULT_FN_ATTRS512
914 _mm512_add_epi32 (__m512i __A, __m512i __B)
915 {
916  return (__m512i) ((__v16su) __A + (__v16su) __B);
917 }
918 
919 static __inline__ __m512i __DEFAULT_FN_ATTRS512
920 _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
921 {
922  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
923  (__v16si)_mm512_add_epi32(__A, __B),
924  (__v16si)__W);
925 }
926 
927 static __inline__ __m512i __DEFAULT_FN_ATTRS512
928 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
929 {
930  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
931  (__v16si)_mm512_add_epi32(__A, __B),
932  (__v16si)_mm512_setzero_si512());
933 }
934 
935 static __inline__ __m512i __DEFAULT_FN_ATTRS512
936 _mm512_sub_epi32 (__m512i __A, __m512i __B)
937 {
938  return (__m512i) ((__v16su) __A - (__v16su) __B);
939 }
940 
941 static __inline__ __m512i __DEFAULT_FN_ATTRS512
942 _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
943 {
944  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
945  (__v16si)_mm512_sub_epi32(__A, __B),
946  (__v16si)__W);
947 }
948 
949 static __inline__ __m512i __DEFAULT_FN_ATTRS512
950 _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
951 {
952  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
953  (__v16si)_mm512_sub_epi32(__A, __B),
954  (__v16si)_mm512_setzero_si512());
955 }
956 
957 #define _mm512_max_round_pd(A, B, R) \
958  ((__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \
959  (__v8df)(__m512d)(B), (int)(R)))
960 
961 #define _mm512_mask_max_round_pd(W, U, A, B, R) \
962  ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
963  (__v8df)_mm512_max_round_pd((A), (B), (R)), \
964  (__v8df)(W)))
965 
966 #define _mm512_maskz_max_round_pd(U, A, B, R) \
967  ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
968  (__v8df)_mm512_max_round_pd((A), (B), (R)), \
969  (__v8df)_mm512_setzero_pd()))
970 
971 static __inline__ __m512d __DEFAULT_FN_ATTRS512
972 _mm512_max_pd(__m512d __A, __m512d __B)
973 {
974  return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
976 }
977 
978 static __inline__ __m512d __DEFAULT_FN_ATTRS512
979 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
980 {
981  return (__m512d)__builtin_ia32_selectpd_512(__U,
982  (__v8df)_mm512_max_pd(__A, __B),
983  (__v8df)__W);
984 }
985 
986 static __inline__ __m512d __DEFAULT_FN_ATTRS512
987 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
988 {
989  return (__m512d)__builtin_ia32_selectpd_512(__U,
990  (__v8df)_mm512_max_pd(__A, __B),
991  (__v8df)_mm512_setzero_pd());
992 }
993 
994 #define _mm512_max_round_ps(A, B, R) \
995  ((__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
996  (__v16sf)(__m512)(B), (int)(R)))
997 
998 #define _mm512_mask_max_round_ps(W, U, A, B, R) \
999  ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1000  (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
1001  (__v16sf)(W)))
1002 
1003 #define _mm512_maskz_max_round_ps(U, A, B, R) \
1004  ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1005  (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
1006  (__v16sf)_mm512_setzero_ps()))
1007 
1008 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1009 _mm512_max_ps(__m512 __A, __m512 __B)
1010 {
1011  return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
1013 }
1014 
1015 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1016 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1017 {
1018  return (__m512)__builtin_ia32_selectps_512(__U,
1019  (__v16sf)_mm512_max_ps(__A, __B),
1020  (__v16sf)__W);
1021 }
1022 
1023 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1024 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
1025 {
1026  return (__m512)__builtin_ia32_selectps_512(__U,
1027  (__v16sf)_mm512_max_ps(__A, __B),
1028  (__v16sf)_mm512_setzero_ps());
1029 }
1030 
1031 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1032 _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1033  return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1034  (__v4sf) __B,
1035  (__v4sf) __W,
1036  (__mmask8) __U,
1038 }
1039 
1040 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1041 _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1042  return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1043  (__v4sf) __B,
1044  (__v4sf) _mm_setzero_ps (),
1045  (__mmask8) __U,
1047 }
1048 
1049 #define _mm_max_round_ss(A, B, R) \
1050  ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1051  (__v4sf)(__m128)(B), \
1052  (__v4sf)_mm_setzero_ps(), \
1053  (__mmask8)-1, (int)(R)))
1054 
1055 #define _mm_mask_max_round_ss(W, U, A, B, R) \
1056  ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1057  (__v4sf)(__m128)(B), \
1058  (__v4sf)(__m128)(W), (__mmask8)(U), \
1059  (int)(R)))
1060 
1061 #define _mm_maskz_max_round_ss(U, A, B, R) \
1062  ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1063  (__v4sf)(__m128)(B), \
1064  (__v4sf)_mm_setzero_ps(), \
1065  (__mmask8)(U), (int)(R)))
1066 
1067 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1068 _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1069  return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1070  (__v2df) __B,
1071  (__v2df) __W,
1072  (__mmask8) __U,
1074 }
1075 
1076 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1077 _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1078  return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1079  (__v2df) __B,
1080  (__v2df) _mm_setzero_pd (),
1081  (__mmask8) __U,
1083 }
1084 
1085 #define _mm_max_round_sd(A, B, R) \
1086  ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1087  (__v2df)(__m128d)(B), \
1088  (__v2df)_mm_setzero_pd(), \
1089  (__mmask8)-1, (int)(R)))
1090 
1091 #define _mm_mask_max_round_sd(W, U, A, B, R) \
1092  ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1093  (__v2df)(__m128d)(B), \
1094  (__v2df)(__m128d)(W), \
1095  (__mmask8)(U), (int)(R)))
1096 
1097 #define _mm_maskz_max_round_sd(U, A, B, R) \
1098  ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1099  (__v2df)(__m128d)(B), \
1100  (__v2df)_mm_setzero_pd(), \
1101  (__mmask8)(U), (int)(R)))
1102 
1103 static __inline __m512i
1105 _mm512_max_epi32(__m512i __A, __m512i __B)
1106 {
1107  return (__m512i)__builtin_elementwise_max((__v16si)__A, (__v16si)__B);
1108 }
1109 
1110 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1111 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1112 {
1113  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1114  (__v16si)_mm512_max_epi32(__A, __B),
1115  (__v16si)__W);
1116 }
1117 
1118 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1119 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1120 {
1121  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1122  (__v16si)_mm512_max_epi32(__A, __B),
1123  (__v16si)_mm512_setzero_si512());
1124 }
1125 
1126 static __inline __m512i __DEFAULT_FN_ATTRS512
1127 _mm512_max_epu32(__m512i __A, __m512i __B)
1128 {
1129  return (__m512i)__builtin_elementwise_max((__v16su)__A, (__v16su)__B);
1130 }
1131 
1132 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1133 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1134 {
1135  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1136  (__v16si)_mm512_max_epu32(__A, __B),
1137  (__v16si)__W);
1138 }
1139 
1140 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1141 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1142 {
1143  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1144  (__v16si)_mm512_max_epu32(__A, __B),
1145  (__v16si)_mm512_setzero_si512());
1146 }
1147 
1148 static __inline __m512i __DEFAULT_FN_ATTRS512
1149 _mm512_max_epi64(__m512i __A, __m512i __B)
1150 {
1151  return (__m512i)__builtin_elementwise_max((__v8di)__A, (__v8di)__B);
1152 }
1153 
1154 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1155 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1156 {
1157  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1158  (__v8di)_mm512_max_epi64(__A, __B),
1159  (__v8di)__W);
1160 }
1161 
1162 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1163 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1164 {
1165  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1166  (__v8di)_mm512_max_epi64(__A, __B),
1167  (__v8di)_mm512_setzero_si512());
1168 }
1169 
1170 static __inline __m512i __DEFAULT_FN_ATTRS512
1171 _mm512_max_epu64(__m512i __A, __m512i __B)
1172 {
1173  return (__m512i)__builtin_elementwise_max((__v8du)__A, (__v8du)__B);
1174 }
1175 
1176 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1177 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1178 {
1179  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1180  (__v8di)_mm512_max_epu64(__A, __B),
1181  (__v8di)__W);
1182 }
1183 
1184 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1185 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1186 {
1187  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1188  (__v8di)_mm512_max_epu64(__A, __B),
1189  (__v8di)_mm512_setzero_si512());
1190 }
1191 
1192 #define _mm512_min_round_pd(A, B, R) \
1193  ((__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \
1194  (__v8df)(__m512d)(B), (int)(R)))
1195 
1196 #define _mm512_mask_min_round_pd(W, U, A, B, R) \
1197  ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1198  (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1199  (__v8df)(W)))
1200 
1201 #define _mm512_maskz_min_round_pd(U, A, B, R) \
1202  ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1203  (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1204  (__v8df)_mm512_setzero_pd()))
1205 
1206 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1207 _mm512_min_pd(__m512d __A, __m512d __B)
1208 {
1209  return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
1211 }
1212 
1213 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1214 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
1215 {
1216  return (__m512d)__builtin_ia32_selectpd_512(__U,
1217  (__v8df)_mm512_min_pd(__A, __B),
1218  (__v8df)__W);
1219 }
1220 
1221 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1222 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
1223 {
1224  return (__m512d)__builtin_ia32_selectpd_512(__U,
1225  (__v8df)_mm512_min_pd(__A, __B),
1226  (__v8df)_mm512_setzero_pd());
1227 }
1228 
1229 #define _mm512_min_round_ps(A, B, R) \
1230  ((__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
1231  (__v16sf)(__m512)(B), (int)(R)))
1232 
1233 #define _mm512_mask_min_round_ps(W, U, A, B, R) \
1234  ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1235  (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1236  (__v16sf)(W)))
1237 
1238 #define _mm512_maskz_min_round_ps(U, A, B, R) \
1239  ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1240  (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1241  (__v16sf)_mm512_setzero_ps()))
1242 
1243 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1244 _mm512_min_ps(__m512 __A, __m512 __B)
1245 {
1246  return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
1248 }
1249 
1250 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1251 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1252 {
1253  return (__m512)__builtin_ia32_selectps_512(__U,
1254  (__v16sf)_mm512_min_ps(__A, __B),
1255  (__v16sf)__W);
1256 }
1257 
1258 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1259 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
1260 {
1261  return (__m512)__builtin_ia32_selectps_512(__U,
1262  (__v16sf)_mm512_min_ps(__A, __B),
1263  (__v16sf)_mm512_setzero_ps());
1264 }
1265 
1266 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1267 _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1268  return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1269  (__v4sf) __B,
1270  (__v4sf) __W,
1271  (__mmask8) __U,
1273 }
1274 
1275 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1276 _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1277  return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1278  (__v4sf) __B,
1279  (__v4sf) _mm_setzero_ps (),
1280  (__mmask8) __U,
1282 }
1283 
1284 #define _mm_min_round_ss(A, B, R) \
1285  ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1286  (__v4sf)(__m128)(B), \
1287  (__v4sf)_mm_setzero_ps(), \
1288  (__mmask8)-1, (int)(R)))
1289 
1290 #define _mm_mask_min_round_ss(W, U, A, B, R) \
1291  ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1292  (__v4sf)(__m128)(B), \
1293  (__v4sf)(__m128)(W), (__mmask8)(U), \
1294  (int)(R)))
1295 
1296 #define _mm_maskz_min_round_ss(U, A, B, R) \
1297  ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1298  (__v4sf)(__m128)(B), \
1299  (__v4sf)_mm_setzero_ps(), \
1300  (__mmask8)(U), (int)(R)))
1301 
1302 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1303 _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1304  return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1305  (__v2df) __B,
1306  (__v2df) __W,
1307  (__mmask8) __U,
1309 }
1310 
1311 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1312 _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1313  return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1314  (__v2df) __B,
1315  (__v2df) _mm_setzero_pd (),
1316  (__mmask8) __U,
1318 }
1319 
1320 #define _mm_min_round_sd(A, B, R) \
1321  ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1322  (__v2df)(__m128d)(B), \
1323  (__v2df)_mm_setzero_pd(), \
1324  (__mmask8)-1, (int)(R)))
1325 
1326 #define _mm_mask_min_round_sd(W, U, A, B, R) \
1327  ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1328  (__v2df)(__m128d)(B), \
1329  (__v2df)(__m128d)(W), \
1330  (__mmask8)(U), (int)(R)))
1331 
1332 #define _mm_maskz_min_round_sd(U, A, B, R) \
1333  ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1334  (__v2df)(__m128d)(B), \
1335  (__v2df)_mm_setzero_pd(), \
1336  (__mmask8)(U), (int)(R)))
1337 
1338 static __inline __m512i
1340 _mm512_min_epi32(__m512i __A, __m512i __B)
1341 {
1342  return (__m512i)__builtin_elementwise_min((__v16si)__A, (__v16si)__B);
1343 }
1344 
1345 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1346 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1347 {
1348  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1349  (__v16si)_mm512_min_epi32(__A, __B),
1350  (__v16si)__W);
1351 }
1352 
1353 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1354 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1355 {
1356  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1357  (__v16si)_mm512_min_epi32(__A, __B),
1358  (__v16si)_mm512_setzero_si512());
1359 }
1360 
1361 static __inline __m512i __DEFAULT_FN_ATTRS512
1362 _mm512_min_epu32(__m512i __A, __m512i __B)
1363 {
1364  return (__m512i)__builtin_elementwise_min((__v16su)__A, (__v16su)__B);
1365 }
1366 
1367 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1368 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1369 {
1370  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1371  (__v16si)_mm512_min_epu32(__A, __B),
1372  (__v16si)__W);
1373 }
1374 
1375 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1376 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1377 {
1378  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1379  (__v16si)_mm512_min_epu32(__A, __B),
1380  (__v16si)_mm512_setzero_si512());
1381 }
1382 
1383 static __inline __m512i __DEFAULT_FN_ATTRS512
1384 _mm512_min_epi64(__m512i __A, __m512i __B)
1385 {
1386  return (__m512i)__builtin_elementwise_min((__v8di)__A, (__v8di)__B);
1387 }
1388 
1389 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1390 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1391 {
1392  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1393  (__v8di)_mm512_min_epi64(__A, __B),
1394  (__v8di)__W);
1395 }
1396 
1397 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1398 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1399 {
1400  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1401  (__v8di)_mm512_min_epi64(__A, __B),
1402  (__v8di)_mm512_setzero_si512());
1403 }
1404 
1405 static __inline __m512i __DEFAULT_FN_ATTRS512
1406 _mm512_min_epu64(__m512i __A, __m512i __B)
1407 {
1408  return (__m512i)__builtin_elementwise_min((__v8du)__A, (__v8du)__B);
1409 }
1410 
1411 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1412 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1413 {
1414  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1415  (__v8di)_mm512_min_epu64(__A, __B),
1416  (__v8di)__W);
1417 }
1418 
1419 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1420 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1421 {
1422  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1423  (__v8di)_mm512_min_epu64(__A, __B),
1424  (__v8di)_mm512_setzero_si512());
1425 }
1426 
1427 static __inline __m512i __DEFAULT_FN_ATTRS512
1428 _mm512_mul_epi32(__m512i __X, __m512i __Y)
1429 {
1430  return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1431 }
1432 
1433 static __inline __m512i __DEFAULT_FN_ATTRS512
1434 _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1435 {
1436  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1437  (__v8di)_mm512_mul_epi32(__X, __Y),
1438  (__v8di)__W);
1439 }
1440 
1441 static __inline __m512i __DEFAULT_FN_ATTRS512
1442 _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
1443 {
1444  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1445  (__v8di)_mm512_mul_epi32(__X, __Y),
1446  (__v8di)_mm512_setzero_si512 ());
1447 }
1448 
1449 static __inline __m512i __DEFAULT_FN_ATTRS512
1450 _mm512_mul_epu32(__m512i __X, __m512i __Y)
1451 {
1452  return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1453 }
1454 
1455 static __inline __m512i __DEFAULT_FN_ATTRS512
1456 _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1457 {
1458  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1459  (__v8di)_mm512_mul_epu32(__X, __Y),
1460  (__v8di)__W);
1461 }
1462 
1463 static __inline __m512i __DEFAULT_FN_ATTRS512
1464 _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
1465 {
1466  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1467  (__v8di)_mm512_mul_epu32(__X, __Y),
1468  (__v8di)_mm512_setzero_si512 ());
1469 }
1470 
1471 static __inline __m512i __DEFAULT_FN_ATTRS512
1472 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
1473 {
1474  return (__m512i) ((__v16su) __A * (__v16su) __B);
1475 }
1476 
1477 static __inline __m512i __DEFAULT_FN_ATTRS512
1478 _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
1479 {
1480  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1481  (__v16si)_mm512_mullo_epi32(__A, __B),
1482  (__v16si)_mm512_setzero_si512());
1483 }
1484 
1485 static __inline __m512i __DEFAULT_FN_ATTRS512
1486 _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1487 {
1488  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1489  (__v16si)_mm512_mullo_epi32(__A, __B),
1490  (__v16si)__W);
1491 }
1492 
1493 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1494 _mm512_mullox_epi64 (__m512i __A, __m512i __B) {
1495  return (__m512i) ((__v8du) __A * (__v8du) __B);
1496 }
1497 
1498 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1499 _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
1500  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1501  (__v8di)_mm512_mullox_epi64(__A, __B),
1502  (__v8di)__W);
1503 }
1504 
1505 #define _mm512_sqrt_round_pd(A, R) \
1506  ((__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R)))
1507 
1508 #define _mm512_mask_sqrt_round_pd(W, U, A, R) \
1509  ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1510  (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1511  (__v8df)(__m512d)(W)))
1512 
1513 #define _mm512_maskz_sqrt_round_pd(U, A, R) \
1514  ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1515  (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1516  (__v8df)_mm512_setzero_pd()))
1517 
1518 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1519 _mm512_sqrt_pd(__m512d __A)
1520 {
1521  return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A,
1523 }
1524 
1525 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1526 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
1527 {
1528  return (__m512d)__builtin_ia32_selectpd_512(__U,
1529  (__v8df)_mm512_sqrt_pd(__A),
1530  (__v8df)__W);
1531 }
1532 
1533 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1534 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
1535 {
1536  return (__m512d)__builtin_ia32_selectpd_512(__U,
1537  (__v8df)_mm512_sqrt_pd(__A),
1538  (__v8df)_mm512_setzero_pd());
1539 }
1540 
1541 #define _mm512_sqrt_round_ps(A, R) \
1542  ((__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R)))
1543 
1544 #define _mm512_mask_sqrt_round_ps(W, U, A, R) \
1545  ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1546  (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1547  (__v16sf)(__m512)(W)))
1548 
1549 #define _mm512_maskz_sqrt_round_ps(U, A, R) \
1550  ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1551  (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1552  (__v16sf)_mm512_setzero_ps()))
1553 
1554 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1555 _mm512_sqrt_ps(__m512 __A)
1556 {
1557  return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A,
1559 }
1560 
1561 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1562 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
1563 {
1564  return (__m512)__builtin_ia32_selectps_512(__U,
1565  (__v16sf)_mm512_sqrt_ps(__A),
1566  (__v16sf)__W);
1567 }
1568 
1569 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1571 {
1572  return (__m512)__builtin_ia32_selectps_512(__U,
1573  (__v16sf)_mm512_sqrt_ps(__A),
1574  (__v16sf)_mm512_setzero_ps());
1575 }
1576 
1577 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1578 _mm512_rsqrt14_pd(__m512d __A)
1579 {
1580  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1581  (__v8df)
1582  _mm512_setzero_pd (),
1583  (__mmask8) -1);}
1584 
1585 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1586 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1587 {
1588  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1589  (__v8df) __W,
1590  (__mmask8) __U);
1591 }
1592 
1593 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1595 {
1596  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1597  (__v8df)
1598  _mm512_setzero_pd (),
1599  (__mmask8) __U);
1600 }
1601 
1602 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1604 {
1605  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1606  (__v16sf)
1607  _mm512_setzero_ps (),
1608  (__mmask16) -1);
1609 }
1610 
1611 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1612 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1613 {
1614  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1615  (__v16sf) __W,
1616  (__mmask16) __U);
1617 }
1618 
1619 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1621 {
1622  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1623  (__v16sf)
1624  _mm512_setzero_ps (),
1625  (__mmask16) __U);
1626 }
1627 
1628 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1629 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
1630 {
1631  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1632  (__v4sf) __B,
1633  (__v4sf)
1634  _mm_setzero_ps (),
1635  (__mmask8) -1);
1636 }
1637 
1638 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1639 _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1640 {
1641  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1642  (__v4sf) __B,
1643  (__v4sf) __W,
1644  (__mmask8) __U);
1645 }
1646 
1647 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1648 _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1649 {
1650  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1651  (__v4sf) __B,
1652  (__v4sf) _mm_setzero_ps (),
1653  (__mmask8) __U);
1654 }
1655 
1656 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1657 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
1658 {
1659  return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1660  (__v2df) __B,
1661  (__v2df)
1662  _mm_setzero_pd (),
1663  (__mmask8) -1);
1664 }
1665 
1666 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1667 _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1668 {
1669  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1670  (__v2df) __B,
1671  (__v2df) __W,
1672  (__mmask8) __U);
1673 }
1674 
1675 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1676 _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1677 {
1678  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1679  (__v2df) __B,
1680  (__v2df) _mm_setzero_pd (),
1681  (__mmask8) __U);
1682 }
1683 
1684 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1685 _mm512_rcp14_pd(__m512d __A)
1686 {
1687  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1688  (__v8df)
1689  _mm512_setzero_pd (),
1690  (__mmask8) -1);
1691 }
1692 
1693 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1694 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1695 {
1696  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1697  (__v8df) __W,
1698  (__mmask8) __U);
1699 }
1700 
1701 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1703 {
1704  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1705  (__v8df)
1706  _mm512_setzero_pd (),
1707  (__mmask8) __U);
1708 }
1709 
1710 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1711 _mm512_rcp14_ps(__m512 __A)
1712 {
1713  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1714  (__v16sf)
1715  _mm512_setzero_ps (),
1716  (__mmask16) -1);
1717 }
1718 
1719 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1720 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1721 {
1722  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1723  (__v16sf) __W,
1724  (__mmask16) __U);
1725 }
1726 
1727 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1729 {
1730  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1731  (__v16sf)
1732  _mm512_setzero_ps (),
1733  (__mmask16) __U);
1734 }
1735 
1736 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1737 _mm_rcp14_ss(__m128 __A, __m128 __B)
1738 {
1739  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1740  (__v4sf) __B,
1741  (__v4sf)
1742  _mm_setzero_ps (),
1743  (__mmask8) -1);
1744 }
1745 
1746 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1747 _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1748 {
1749  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1750  (__v4sf) __B,
1751  (__v4sf) __W,
1752  (__mmask8) __U);
1753 }
1754 
1755 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1756 _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1757 {
1758  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1759  (__v4sf) __B,
1760  (__v4sf) _mm_setzero_ps (),
1761  (__mmask8) __U);
1762 }
1763 
1764 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1765 _mm_rcp14_sd(__m128d __A, __m128d __B)
1766 {
1767  return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1768  (__v2df) __B,
1769  (__v2df)
1770  _mm_setzero_pd (),
1771  (__mmask8) -1);
1772 }
1773 
1774 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1775 _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1776 {
1777  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1778  (__v2df) __B,
1779  (__v2df) __W,
1780  (__mmask8) __U);
1781 }
1782 
1783 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1784 _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1785 {
1786  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1787  (__v2df) __B,
1788  (__v2df) _mm_setzero_pd (),
1789  (__mmask8) __U);
1790 }
1791 
1792 static __inline __m512 __DEFAULT_FN_ATTRS512
1793 _mm512_floor_ps(__m512 __A)
1794 {
1795  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1797  (__v16sf) __A, (unsigned short)-1,
1799 }
1800 
1801 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1802 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1803 {
1804  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1806  (__v16sf) __W, __U,
1808 }
1809 
1810 static __inline __m512d __DEFAULT_FN_ATTRS512
1811 _mm512_floor_pd(__m512d __A)
1812 {
1813  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1815  (__v8df) __A, (unsigned char)-1,
1817 }
1818 
1819 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1820 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1821 {
1822  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1824  (__v8df) __W, __U,
1826 }
1827 
1828 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1829 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1830 {
1831  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1833  (__v16sf) __W, __U,
1835 }
1836 
1837 static __inline __m512 __DEFAULT_FN_ATTRS512
1838 _mm512_ceil_ps(__m512 __A)
1839 {
1840  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1842  (__v16sf) __A, (unsigned short)-1,
1844 }
1845 
1846 static __inline __m512d __DEFAULT_FN_ATTRS512
1847 _mm512_ceil_pd(__m512d __A)
1848 {
1849  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1851  (__v8df) __A, (unsigned char)-1,
1853 }
1854 
1855 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1856 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1857 {
1858  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1860  (__v8df) __W, __U,
1862 }
1863 
1864 static __inline __m512i __DEFAULT_FN_ATTRS512
1865 _mm512_abs_epi64(__m512i __A)
1866 {
1867  return (__m512i)__builtin_elementwise_abs((__v8di)__A);
1868 }
1869 
1870 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1871 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
1872 {
1873  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1874  (__v8di)_mm512_abs_epi64(__A),
1875  (__v8di)__W);
1876 }
1877 
1878 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1880 {
1881  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1882  (__v8di)_mm512_abs_epi64(__A),
1883  (__v8di)_mm512_setzero_si512());
1884 }
1885 
1886 static __inline __m512i __DEFAULT_FN_ATTRS512
1887 _mm512_abs_epi32(__m512i __A)
1888 {
1889  return (__m512i)__builtin_elementwise_abs((__v16si) __A);
1890 }
1891 
1892 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1893 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
1894 {
1895  return (__m512i)__builtin_ia32_selectd_512(__U,
1896  (__v16si)_mm512_abs_epi32(__A),
1897  (__v16si)__W);
1898 }
1899 
1900 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1902 {
1903  return (__m512i)__builtin_ia32_selectd_512(__U,
1904  (__v16si)_mm512_abs_epi32(__A),
1905  (__v16si)_mm512_setzero_si512());
1906 }
1907 
1908 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1909 _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1910  __A = _mm_add_ss(__A, __B);
1911  return __builtin_ia32_selectss_128(__U, __A, __W);
1912 }
1913 
1914 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1915 _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1916  __A = _mm_add_ss(__A, __B);
1917  return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
1918 }
1919 
1920 #define _mm_add_round_ss(A, B, R) \
1921  ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1922  (__v4sf)(__m128)(B), \
1923  (__v4sf)_mm_setzero_ps(), \
1924  (__mmask8)-1, (int)(R)))
1925 
1926 #define _mm_mask_add_round_ss(W, U, A, B, R) \
1927  ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1928  (__v4sf)(__m128)(B), \
1929  (__v4sf)(__m128)(W), (__mmask8)(U), \
1930  (int)(R)))
1931 
1932 #define _mm_maskz_add_round_ss(U, A, B, R) \
1933  ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1934  (__v4sf)(__m128)(B), \
1935  (__v4sf)_mm_setzero_ps(), \
1936  (__mmask8)(U), (int)(R)))
1937 
1938 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1939 _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1940  __A = _mm_add_sd(__A, __B);
1941  return __builtin_ia32_selectsd_128(__U, __A, __W);
1942 }
1943 
1944 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1945 _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1946  __A = _mm_add_sd(__A, __B);
1947  return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
1948 }
1949 #define _mm_add_round_sd(A, B, R) \
1950  ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1951  (__v2df)(__m128d)(B), \
1952  (__v2df)_mm_setzero_pd(), \
1953  (__mmask8)-1, (int)(R)))
1954 
1955 #define _mm_mask_add_round_sd(W, U, A, B, R) \
1956  ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1957  (__v2df)(__m128d)(B), \
1958  (__v2df)(__m128d)(W), \
1959  (__mmask8)(U), (int)(R)))
1960 
1961 #define _mm_maskz_add_round_sd(U, A, B, R) \
1962  ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1963  (__v2df)(__m128d)(B), \
1964  (__v2df)_mm_setzero_pd(), \
1965  (__mmask8)(U), (int)(R)))
1966 
1967 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1968 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1969  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1970  (__v8df)_mm512_add_pd(__A, __B),
1971  (__v8df)__W);
1972 }
1973 
1974 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1975 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1976  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1977  (__v8df)_mm512_add_pd(__A, __B),
1978  (__v8df)_mm512_setzero_pd());
1979 }
1980 
1981 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1982 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1983  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1984  (__v16sf)_mm512_add_ps(__A, __B),
1985  (__v16sf)__W);
1986 }
1987 
1988 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1989 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1990  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1991  (__v16sf)_mm512_add_ps(__A, __B),
1992  (__v16sf)_mm512_setzero_ps());
1993 }
1994 
1995 #define _mm512_add_round_pd(A, B, R) \
1996  ((__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
1997  (__v8df)(__m512d)(B), (int)(R)))
1998 
1999 #define _mm512_mask_add_round_pd(W, U, A, B, R) \
2000  ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2001  (__v8df)_mm512_add_round_pd((A), (B), (R)), \
2002  (__v8df)(__m512d)(W)))
2003 
2004 #define _mm512_maskz_add_round_pd(U, A, B, R) \
2005  ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2006  (__v8df)_mm512_add_round_pd((A), (B), (R)), \
2007  (__v8df)_mm512_setzero_pd()))
2008 
2009 #define _mm512_add_round_ps(A, B, R) \
2010  ((__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
2011  (__v16sf)(__m512)(B), (int)(R)))
2012 
2013 #define _mm512_mask_add_round_ps(W, U, A, B, R) \
2014  ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2015  (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
2016  (__v16sf)(__m512)(W)))
2017 
2018 #define _mm512_maskz_add_round_ps(U, A, B, R) \
2019  ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2020  (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
2021  (__v16sf)_mm512_setzero_ps()))
2022 
2023 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2024 _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2025  __A = _mm_sub_ss(__A, __B);
2026  return __builtin_ia32_selectss_128(__U, __A, __W);
2027 }
2028 
2029 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2030 _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2031  __A = _mm_sub_ss(__A, __B);
2032  return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2033 }
2034 #define _mm_sub_round_ss(A, B, R) \
2035  ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2036  (__v4sf)(__m128)(B), \
2037  (__v4sf)_mm_setzero_ps(), \
2038  (__mmask8)-1, (int)(R)))
2039 
2040 #define _mm_mask_sub_round_ss(W, U, A, B, R) \
2041  ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2042  (__v4sf)(__m128)(B), \
2043  (__v4sf)(__m128)(W), (__mmask8)(U), \
2044  (int)(R)))
2045 
2046 #define _mm_maskz_sub_round_ss(U, A, B, R) \
2047  ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2048  (__v4sf)(__m128)(B), \
2049  (__v4sf)_mm_setzero_ps(), \
2050  (__mmask8)(U), (int)(R)))
2051 
2052 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2053 _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2054  __A = _mm_sub_sd(__A, __B);
2055  return __builtin_ia32_selectsd_128(__U, __A, __W);
2056 }
2057 
2058 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2059 _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2060  __A = _mm_sub_sd(__A, __B);
2061  return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2062 }
2063 
2064 #define _mm_sub_round_sd(A, B, R) \
2065  ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2066  (__v2df)(__m128d)(B), \
2067  (__v2df)_mm_setzero_pd(), \
2068  (__mmask8)-1, (int)(R)))
2069 
2070 #define _mm_mask_sub_round_sd(W, U, A, B, R) \
2071  ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2072  (__v2df)(__m128d)(B), \
2073  (__v2df)(__m128d)(W), \
2074  (__mmask8)(U), (int)(R)))
2075 
2076 #define _mm_maskz_sub_round_sd(U, A, B, R) \
2077  ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2078  (__v2df)(__m128d)(B), \
2079  (__v2df)_mm_setzero_pd(), \
2080  (__mmask8)(U), (int)(R)))
2081 
2082 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2083 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2084  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2085  (__v8df)_mm512_sub_pd(__A, __B),
2086  (__v8df)__W);
2087 }
2088 
2089 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2090 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2091  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2092  (__v8df)_mm512_sub_pd(__A, __B),
2093  (__v8df)_mm512_setzero_pd());
2094 }
2095 
2096 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2097 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2098  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2099  (__v16sf)_mm512_sub_ps(__A, __B),
2100  (__v16sf)__W);
2101 }
2102 
2103 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2104 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2105  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2106  (__v16sf)_mm512_sub_ps(__A, __B),
2107  (__v16sf)_mm512_setzero_ps());
2108 }
2109 
2110 #define _mm512_sub_round_pd(A, B, R) \
2111  ((__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
2112  (__v8df)(__m512d)(B), (int)(R)))
2113 
2114 #define _mm512_mask_sub_round_pd(W, U, A, B, R) \
2115  ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2116  (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2117  (__v8df)(__m512d)(W)))
2118 
2119 #define _mm512_maskz_sub_round_pd(U, A, B, R) \
2120  ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2121  (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2122  (__v8df)_mm512_setzero_pd()))
2123 
2124 #define _mm512_sub_round_ps(A, B, R) \
2125  ((__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
2126  (__v16sf)(__m512)(B), (int)(R)))
2127 
2128 #define _mm512_mask_sub_round_ps(W, U, A, B, R) \
2129  ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2130  (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2131  (__v16sf)(__m512)(W)))
2132 
2133 #define _mm512_maskz_sub_round_ps(U, A, B, R) \
2134  ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2135  (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2136  (__v16sf)_mm512_setzero_ps()))
2137 
2138 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2139 _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2140  __A = _mm_mul_ss(__A, __B);
2141  return __builtin_ia32_selectss_128(__U, __A, __W);
2142 }
2143 
2144 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2145 _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2146  __A = _mm_mul_ss(__A, __B);
2147  return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2148 }
2149 #define _mm_mul_round_ss(A, B, R) \
2150  ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2151  (__v4sf)(__m128)(B), \
2152  (__v4sf)_mm_setzero_ps(), \
2153  (__mmask8)-1, (int)(R)))
2154 
2155 #define _mm_mask_mul_round_ss(W, U, A, B, R) \
2156  ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2157  (__v4sf)(__m128)(B), \
2158  (__v4sf)(__m128)(W), (__mmask8)(U), \
2159  (int)(R)))
2160 
2161 #define _mm_maskz_mul_round_ss(U, A, B, R) \
2162  ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2163  (__v4sf)(__m128)(B), \
2164  (__v4sf)_mm_setzero_ps(), \
2165  (__mmask8)(U), (int)(R)))
2166 
2167 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2168 _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2169  __A = _mm_mul_sd(__A, __B);
2170  return __builtin_ia32_selectsd_128(__U, __A, __W);
2171 }
2172 
2173 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2174 _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2175  __A = _mm_mul_sd(__A, __B);
2176  return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2177 }
2178 
2179 #define _mm_mul_round_sd(A, B, R) \
2180  ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2181  (__v2df)(__m128d)(B), \
2182  (__v2df)_mm_setzero_pd(), \
2183  (__mmask8)-1, (int)(R)))
2184 
2185 #define _mm_mask_mul_round_sd(W, U, A, B, R) \
2186  ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2187  (__v2df)(__m128d)(B), \
2188  (__v2df)(__m128d)(W), \
2189  (__mmask8)(U), (int)(R)))
2190 
2191 #define _mm_maskz_mul_round_sd(U, A, B, R) \
2192  ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2193  (__v2df)(__m128d)(B), \
2194  (__v2df)_mm_setzero_pd(), \
2195  (__mmask8)(U), (int)(R)))
2196 
2197 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2198 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2199  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2200  (__v8df)_mm512_mul_pd(__A, __B),
2201  (__v8df)__W);
2202 }
2203 
2204 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2205 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2206  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2207  (__v8df)_mm512_mul_pd(__A, __B),
2208  (__v8df)_mm512_setzero_pd());
2209 }
2210 
2211 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2212 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2213  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2214  (__v16sf)_mm512_mul_ps(__A, __B),
2215  (__v16sf)__W);
2216 }
2217 
2218 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2219 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2220  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2221  (__v16sf)_mm512_mul_ps(__A, __B),
2222  (__v16sf)_mm512_setzero_ps());
2223 }
2224 
2225 #define _mm512_mul_round_pd(A, B, R) \
2226  ((__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
2227  (__v8df)(__m512d)(B), (int)(R)))
2228 
2229 #define _mm512_mask_mul_round_pd(W, U, A, B, R) \
2230  ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2231  (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2232  (__v8df)(__m512d)(W)))
2233 
2234 #define _mm512_maskz_mul_round_pd(U, A, B, R) \
2235  ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2236  (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2237  (__v8df)_mm512_setzero_pd()))
2238 
2239 #define _mm512_mul_round_ps(A, B, R) \
2240  ((__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
2241  (__v16sf)(__m512)(B), (int)(R)))
2242 
2243 #define _mm512_mask_mul_round_ps(W, U, A, B, R) \
2244  ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2245  (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2246  (__v16sf)(__m512)(W)))
2247 
2248 #define _mm512_maskz_mul_round_ps(U, A, B, R) \
2249  ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2250  (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2251  (__v16sf)_mm512_setzero_ps()))
2252 
2253 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2254 _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2255  __A = _mm_div_ss(__A, __B);
2256  return __builtin_ia32_selectss_128(__U, __A, __W);
2257 }
2258 
2259 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2260 _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2261  __A = _mm_div_ss(__A, __B);
2262  return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2263 }
2264 
2265 #define _mm_div_round_ss(A, B, R) \
2266  ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2267  (__v4sf)(__m128)(B), \
2268  (__v4sf)_mm_setzero_ps(), \
2269  (__mmask8)-1, (int)(R)))
2270 
2271 #define _mm_mask_div_round_ss(W, U, A, B, R) \
2272  ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2273  (__v4sf)(__m128)(B), \
2274  (__v4sf)(__m128)(W), (__mmask8)(U), \
2275  (int)(R)))
2276 
2277 #define _mm_maskz_div_round_ss(U, A, B, R) \
2278  ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2279  (__v4sf)(__m128)(B), \
2280  (__v4sf)_mm_setzero_ps(), \
2281  (__mmask8)(U), (int)(R)))
2282 
2283 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2284 _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2285  __A = _mm_div_sd(__A, __B);
2286  return __builtin_ia32_selectsd_128(__U, __A, __W);
2287 }
2288 
2289 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2290 _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2291  __A = _mm_div_sd(__A, __B);
2292  return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2293 }
2294 
2295 #define _mm_div_round_sd(A, B, R) \
2296  ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2297  (__v2df)(__m128d)(B), \
2298  (__v2df)_mm_setzero_pd(), \
2299  (__mmask8)-1, (int)(R)))
2300 
2301 #define _mm_mask_div_round_sd(W, U, A, B, R) \
2302  ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2303  (__v2df)(__m128d)(B), \
2304  (__v2df)(__m128d)(W), \
2305  (__mmask8)(U), (int)(R)))
2306 
2307 #define _mm_maskz_div_round_sd(U, A, B, R) \
2308  ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2309  (__v2df)(__m128d)(B), \
2310  (__v2df)_mm_setzero_pd(), \
2311  (__mmask8)(U), (int)(R)))
2312 
2313 static __inline __m512d __DEFAULT_FN_ATTRS512
2314 _mm512_div_pd(__m512d __a, __m512d __b)
2315 {
2316  return (__m512d)((__v8df)__a/(__v8df)__b);
2317 }
2318 
2319 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2320 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2321  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2322  (__v8df)_mm512_div_pd(__A, __B),
2323  (__v8df)__W);
2324 }
2325 
2326 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2327 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2328  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2329  (__v8df)_mm512_div_pd(__A, __B),
2330  (__v8df)_mm512_setzero_pd());
2331 }
2332 
2333 static __inline __m512 __DEFAULT_FN_ATTRS512
2334 _mm512_div_ps(__m512 __a, __m512 __b)
2335 {
2336  return (__m512)((__v16sf)__a/(__v16sf)__b);
2337 }
2338 
2339 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2340 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2341  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2342  (__v16sf)_mm512_div_ps(__A, __B),
2343  (__v16sf)__W);
2344 }
2345 
2346 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2347 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2348  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2349  (__v16sf)_mm512_div_ps(__A, __B),
2350  (__v16sf)_mm512_setzero_ps());
2351 }
2352 
2353 #define _mm512_div_round_pd(A, B, R) \
2354  ((__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
2355  (__v8df)(__m512d)(B), (int)(R)))
2356 
2357 #define _mm512_mask_div_round_pd(W, U, A, B, R) \
2358  ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2359  (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2360  (__v8df)(__m512d)(W)))
2361 
2362 #define _mm512_maskz_div_round_pd(U, A, B, R) \
2363  ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2364  (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2365  (__v8df)_mm512_setzero_pd()))
2366 
2367 #define _mm512_div_round_ps(A, B, R) \
2368  ((__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
2369  (__v16sf)(__m512)(B), (int)(R)))
2370 
2371 #define _mm512_mask_div_round_ps(W, U, A, B, R) \
2372  ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2373  (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2374  (__v16sf)(__m512)(W)))
2375 
2376 #define _mm512_maskz_div_round_ps(U, A, B, R) \
2377  ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2378  (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2379  (__v16sf)_mm512_setzero_ps()))
2380 
2381 #define _mm512_roundscale_ps(A, B) \
2382  ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2383  (__v16sf)_mm512_undefined_ps(), \
2384  (__mmask16)-1, \
2385  _MM_FROUND_CUR_DIRECTION))
2386 
2387 #define _mm512_mask_roundscale_ps(A, B, C, imm) \
2388  ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2389  (__v16sf)(__m512)(A), (__mmask16)(B), \
2390  _MM_FROUND_CUR_DIRECTION))
2391 
2392 #define _mm512_maskz_roundscale_ps(A, B, imm) \
2393  ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2394  (__v16sf)_mm512_setzero_ps(), \
2395  (__mmask16)(A), \
2396  _MM_FROUND_CUR_DIRECTION))
2397 
2398 #define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \
2399  ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2400  (__v16sf)(__m512)(A), (__mmask16)(B), \
2401  (int)(R)))
2402 
2403 #define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \
2404  ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2405  (__v16sf)_mm512_setzero_ps(), \
2406  (__mmask16)(A), (int)(R)))
2407 
2408 #define _mm512_roundscale_round_ps(A, imm, R) \
2409  ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2410  (__v16sf)_mm512_undefined_ps(), \
2411  (__mmask16)-1, (int)(R)))
2412 
2413 #define _mm512_roundscale_pd(A, B) \
2414  ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2415  (__v8df)_mm512_undefined_pd(), \
2416  (__mmask8)-1, \
2417  _MM_FROUND_CUR_DIRECTION))
2418 
2419 #define _mm512_mask_roundscale_pd(A, B, C, imm) \
2420  ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2421  (__v8df)(__m512d)(A), (__mmask8)(B), \
2422  _MM_FROUND_CUR_DIRECTION))
2423 
2424 #define _mm512_maskz_roundscale_pd(A, B, imm) \
2425  ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2426  (__v8df)_mm512_setzero_pd(), \
2427  (__mmask8)(A), \
2428  _MM_FROUND_CUR_DIRECTION))
2429 
2430 #define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \
2431  ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2432  (__v8df)(__m512d)(A), (__mmask8)(B), \
2433  (int)(R)))
2434 
2435 #define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \
2436  ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2437  (__v8df)_mm512_setzero_pd(), \
2438  (__mmask8)(A), (int)(R)))
2439 
2440 #define _mm512_roundscale_round_pd(A, imm, R) \
2441  ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2442  (__v8df)_mm512_undefined_pd(), \
2443  (__mmask8)-1, (int)(R)))
2444 
2445 #define _mm512_fmadd_round_pd(A, B, C, R) \
2446  ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2447  (__v8df)(__m512d)(B), \
2448  (__v8df)(__m512d)(C), \
2449  (__mmask8)-1, (int)(R)))
2450 
2451 
2452 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
2453  ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2454  (__v8df)(__m512d)(B), \
2455  (__v8df)(__m512d)(C), \
2456  (__mmask8)(U), (int)(R)))
2457 
2458 
2459 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
2460  ((__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2461  (__v8df)(__m512d)(B), \
2462  (__v8df)(__m512d)(C), \
2463  (__mmask8)(U), (int)(R)))
2464 
2465 
2466 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
2467  ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2468  (__v8df)(__m512d)(B), \
2469  (__v8df)(__m512d)(C), \
2470  (__mmask8)(U), (int)(R)))
2471 
2472 
2473 #define _mm512_fmsub_round_pd(A, B, C, R) \
2474  ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2475  (__v8df)(__m512d)(B), \
2476  -(__v8df)(__m512d)(C), \
2477  (__mmask8)-1, (int)(R)))
2478 
2479 
2480 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
2481  ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2482  (__v8df)(__m512d)(B), \
2483  -(__v8df)(__m512d)(C), \
2484  (__mmask8)(U), (int)(R)))
2485 
2486 
2487 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
2488  ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2489  (__v8df)(__m512d)(B), \
2490  -(__v8df)(__m512d)(C), \
2491  (__mmask8)(U), (int)(R)))
2492 
2493 
2494 #define _mm512_fnmadd_round_pd(A, B, C, R) \
2495  ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2496  (__v8df)(__m512d)(B), \
2497  (__v8df)(__m512d)(C), \
2498  (__mmask8)-1, (int)(R)))
2499 
2500 
2501 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
2502  ((__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2503  (__v8df)(__m512d)(B), \
2504  (__v8df)(__m512d)(C), \
2505  (__mmask8)(U), (int)(R)))
2506 
2507 
2508 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
2509  ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2510  (__v8df)(__m512d)(B), \
2511  (__v8df)(__m512d)(C), \
2512  (__mmask8)(U), (int)(R)))
2513 
2514 
2515 #define _mm512_fnmsub_round_pd(A, B, C, R) \
2516  ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2517  (__v8df)(__m512d)(B), \
2518  -(__v8df)(__m512d)(C), \
2519  (__mmask8)-1, (int)(R)))
2520 
2521 
2522 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
2523  ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2524  (__v8df)(__m512d)(B), \
2525  -(__v8df)(__m512d)(C), \
2526  (__mmask8)(U), (int)(R)))
2527 
2528 
2529 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2530 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2531 {
2532  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2533  (__v8df) __B,
2534  (__v8df) __C,
2535  (__mmask8) -1,
2537 }
2538 
2539 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2540 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2541 {
2542  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2543  (__v8df) __B,
2544  (__v8df) __C,
2545  (__mmask8) __U,
2547 }
2548 
2549 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2550 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2551 {
2552  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2553  (__v8df) __B,
2554  (__v8df) __C,
2555  (__mmask8) __U,
2557 }
2558 
2559 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2560 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2561 {
2562  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2563  (__v8df) __B,
2564  (__v8df) __C,
2565  (__mmask8) __U,
2567 }
2568 
2569 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2570 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2571 {
2572  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2573  (__v8df) __B,
2574  -(__v8df) __C,
2575  (__mmask8) -1,
2577 }
2578 
2579 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2580 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2581 {
2582  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2583  (__v8df) __B,
2584  -(__v8df) __C,
2585  (__mmask8) __U,
2587 }
2588 
2589 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2590 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2591 {
2592  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2593  (__v8df) __B,
2594  -(__v8df) __C,
2595  (__mmask8) __U,
2597 }
2598 
2599 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2600 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2601 {
2602  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2603  -(__v8df) __B,
2604  (__v8df) __C,
2605  (__mmask8) -1,
2607 }
2608 
2609 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2610 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2611 {
2612  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
2613  (__v8df) __B,
2614  (__v8df) __C,
2615  (__mmask8) __U,
2617 }
2618 
2619 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2620 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2621 {
2622  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2623  (__v8df) __B,
2624  (__v8df) __C,
2625  (__mmask8) __U,
2627 }
2628 
2629 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2630 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2631 {
2632  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2633  -(__v8df) __B,
2634  -(__v8df) __C,
2635  (__mmask8) -1,
2637 }
2638 
2639 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2640 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2641 {
2642  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2643  (__v8df) __B,
2644  -(__v8df) __C,
2645  (__mmask8) __U,
2647 }
2648 
2649 #define _mm512_fmadd_round_ps(A, B, C, R) \
2650  ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2651  (__v16sf)(__m512)(B), \
2652  (__v16sf)(__m512)(C), \
2653  (__mmask16)-1, (int)(R)))
2654 
2655 
2656 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
2657  ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2658  (__v16sf)(__m512)(B), \
2659  (__v16sf)(__m512)(C), \
2660  (__mmask16)(U), (int)(R)))
2661 
2662 
2663 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
2664  ((__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2665  (__v16sf)(__m512)(B), \
2666  (__v16sf)(__m512)(C), \
2667  (__mmask16)(U), (int)(R)))
2668 
2669 
2670 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
2671  ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2672  (__v16sf)(__m512)(B), \
2673  (__v16sf)(__m512)(C), \
2674  (__mmask16)(U), (int)(R)))
2675 
2676 
2677 #define _mm512_fmsub_round_ps(A, B, C, R) \
2678  ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2679  (__v16sf)(__m512)(B), \
2680  -(__v16sf)(__m512)(C), \
2681  (__mmask16)-1, (int)(R)))
2682 
2683 
2684 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
2685  ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2686  (__v16sf)(__m512)(B), \
2687  -(__v16sf)(__m512)(C), \
2688  (__mmask16)(U), (int)(R)))
2689 
2690 
2691 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
2692  ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2693  (__v16sf)(__m512)(B), \
2694  -(__v16sf)(__m512)(C), \
2695  (__mmask16)(U), (int)(R)))
2696 
2697 
2698 #define _mm512_fnmadd_round_ps(A, B, C, R) \
2699  ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2700  -(__v16sf)(__m512)(B), \
2701  (__v16sf)(__m512)(C), \
2702  (__mmask16)-1, (int)(R)))
2703 
2704 
2705 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
2706  ((__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2707  (__v16sf)(__m512)(B), \
2708  (__v16sf)(__m512)(C), \
2709  (__mmask16)(U), (int)(R)))
2710 
2711 
2712 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
2713  ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2714  (__v16sf)(__m512)(B), \
2715  (__v16sf)(__m512)(C), \
2716  (__mmask16)(U), (int)(R)))
2717 
2718 
2719 #define _mm512_fnmsub_round_ps(A, B, C, R) \
2720  ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2721  -(__v16sf)(__m512)(B), \
2722  -(__v16sf)(__m512)(C), \
2723  (__mmask16)-1, (int)(R)))
2724 
2725 
2726 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
2727  ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2728  (__v16sf)(__m512)(B), \
2729  -(__v16sf)(__m512)(C), \
2730  (__mmask16)(U), (int)(R)))
2731 
2732 
2733 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2734 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2735 {
2736  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2737  (__v16sf) __B,
2738  (__v16sf) __C,
2739  (__mmask16) -1,
2741 }
2742 
2743 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2744 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2745 {
2746  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2747  (__v16sf) __B,
2748  (__v16sf) __C,
2749  (__mmask16) __U,
2751 }
2752 
2753 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2754 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2755 {
2756  return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2757  (__v16sf) __B,
2758  (__v16sf) __C,
2759  (__mmask16) __U,
2761 }
2762 
2763 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2764 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2765 {
2766  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2767  (__v16sf) __B,
2768  (__v16sf) __C,
2769  (__mmask16) __U,
2771 }
2772 
2773 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2774 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2775 {
2776  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2777  (__v16sf) __B,
2778  -(__v16sf) __C,
2779  (__mmask16) -1,
2781 }
2782 
2783 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2784 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2785 {
2786  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2787  (__v16sf) __B,
2788  -(__v16sf) __C,
2789  (__mmask16) __U,
2791 }
2792 
2793 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2794 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2795 {
2796  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2797  (__v16sf) __B,
2798  -(__v16sf) __C,
2799  (__mmask16) __U,
2801 }
2802 
2803 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2804 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2805 {
2806  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2807  -(__v16sf) __B,
2808  (__v16sf) __C,
2809  (__mmask16) -1,
2811 }
2812 
2813 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2814 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2815 {
2816  return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
2817  (__v16sf) __B,
2818  (__v16sf) __C,
2819  (__mmask16) __U,
2821 }
2822 
2823 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2824 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2825 {
2826  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2827  (__v16sf) __B,
2828  (__v16sf) __C,
2829  (__mmask16) __U,
2831 }
2832 
2833 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2834 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2835 {
2836  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2837  -(__v16sf) __B,
2838  -(__v16sf) __C,
2839  (__mmask16) -1,
2841 }
2842 
2843 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2844 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2845 {
2846  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2847  (__v16sf) __B,
2848  -(__v16sf) __C,
2849  (__mmask16) __U,
2851 }
2852 
2853 #define _mm512_fmaddsub_round_pd(A, B, C, R) \
2854  ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2855  (__v8df)(__m512d)(B), \
2856  (__v8df)(__m512d)(C), \
2857  (__mmask8)-1, (int)(R)))
2858 
2859 
2860 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
2861  ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2862  (__v8df)(__m512d)(B), \
2863  (__v8df)(__m512d)(C), \
2864  (__mmask8)(U), (int)(R)))
2865 
2866 
2867 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
2868  ((__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
2869  (__v8df)(__m512d)(B), \
2870  (__v8df)(__m512d)(C), \
2871  (__mmask8)(U), (int)(R)))
2872 
2873 
2874 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
2875  ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2876  (__v8df)(__m512d)(B), \
2877  (__v8df)(__m512d)(C), \
2878  (__mmask8)(U), (int)(R)))
2879 
2880 
2881 #define _mm512_fmsubadd_round_pd(A, B, C, R) \
2882  ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2883  (__v8df)(__m512d)(B), \
2884  -(__v8df)(__m512d)(C), \
2885  (__mmask8)-1, (int)(R)))
2886 
2887 
2888 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
2889  ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2890  (__v8df)(__m512d)(B), \
2891  -(__v8df)(__m512d)(C), \
2892  (__mmask8)(U), (int)(R)))
2893 
2894 
2895 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
2896  ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2897  (__v8df)(__m512d)(B), \
2898  -(__v8df)(__m512d)(C), \
2899  (__mmask8)(U), (int)(R)))
2900 
2901 
2902 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2903 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
2904 {
2905  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2906  (__v8df) __B,
2907  (__v8df) __C,
2908  (__mmask8) -1,
2910 }
2911 
2912 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2913 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2914 {
2915  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2916  (__v8df) __B,
2917  (__v8df) __C,
2918  (__mmask8) __U,
2920 }
2921 
2922 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2923 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2924 {
2925  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2926  (__v8df) __B,
2927  (__v8df) __C,
2928  (__mmask8) __U,
2930 }
2931 
2932 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2933 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2934 {
2935  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2936  (__v8df) __B,
2937  (__v8df) __C,
2938  (__mmask8) __U,
2940 }
2941 
2942 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2943 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
2944 {
2945  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2946  (__v8df) __B,
2947  -(__v8df) __C,
2948  (__mmask8) -1,
2950 }
2951 
2952 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2953 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2954 {
2955  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2956  (__v8df) __B,
2957  -(__v8df) __C,
2958  (__mmask8) __U,
2960 }
2961 
2962 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2963 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2964 {
2965  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2966  (__v8df) __B,
2967  -(__v8df) __C,
2968  (__mmask8) __U,
2970 }
2971 
2972 #define _mm512_fmaddsub_round_ps(A, B, C, R) \
2973  ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2974  (__v16sf)(__m512)(B), \
2975  (__v16sf)(__m512)(C), \
2976  (__mmask16)-1, (int)(R)))
2977 
2978 
2979 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
2980  ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2981  (__v16sf)(__m512)(B), \
2982  (__v16sf)(__m512)(C), \
2983  (__mmask16)(U), (int)(R)))
2984 
2985 
2986 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
2987  ((__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
2988  (__v16sf)(__m512)(B), \
2989  (__v16sf)(__m512)(C), \
2990  (__mmask16)(U), (int)(R)))
2991 
2992 
2993 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
2994  ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2995  (__v16sf)(__m512)(B), \
2996  (__v16sf)(__m512)(C), \
2997  (__mmask16)(U), (int)(R)))
2998 
2999 
3000 #define _mm512_fmsubadd_round_ps(A, B, C, R) \
3001  ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3002  (__v16sf)(__m512)(B), \
3003  -(__v16sf)(__m512)(C), \
3004  (__mmask16)-1, (int)(R)))
3005 
3006 
3007 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3008  ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3009  (__v16sf)(__m512)(B), \
3010  -(__v16sf)(__m512)(C), \
3011  (__mmask16)(U), (int)(R)))
3012 
3013 
3014 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3015  ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3016  (__v16sf)(__m512)(B), \
3017  -(__v16sf)(__m512)(C), \
3018  (__mmask16)(U), (int)(R)))
3019 
3020 
3021 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3022 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
3023 {
3024  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3025  (__v16sf) __B,
3026  (__v16sf) __C,
3027  (__mmask16) -1,
3029 }
3030 
3031 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3032 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3033 {
3034  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3035  (__v16sf) __B,
3036  (__v16sf) __C,
3037  (__mmask16) __U,
3039 }
3040 
3041 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3042 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3043 {
3044  return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3045  (__v16sf) __B,
3046  (__v16sf) __C,
3047  (__mmask16) __U,
3049 }
3050 
3051 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3052 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3053 {
3054  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3055  (__v16sf) __B,
3056  (__v16sf) __C,
3057  (__mmask16) __U,
3059 }
3060 
3061 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3062 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
3063 {
3064  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3065  (__v16sf) __B,
3066  -(__v16sf) __C,
3067  (__mmask16) -1,
3069 }
3070 
3071 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3072 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3073 {
3074  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3075  (__v16sf) __B,
3076  -(__v16sf) __C,
3077  (__mmask16) __U,
3079 }
3080 
3081 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3082 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3083 {
3084  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3085  (__v16sf) __B,
3086  -(__v16sf) __C,
3087  (__mmask16) __U,
3089 }
3090 
3091 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3092  ((__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
3093  (__v8df)(__m512d)(B), \
3094  (__v8df)(__m512d)(C), \
3095  (__mmask8)(U), (int)(R)))
3096 
3097 
3098 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3099 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3100 {
3101  return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3102  (__v8df) __B,
3103  (__v8df) __C,
3104  (__mmask8) __U,
3106 }
3107 
3108 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3109  ((__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
3110  (__v16sf)(__m512)(B), \
3111  (__v16sf)(__m512)(C), \
3112  (__mmask16)(U), (int)(R)))
3113 
3114 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3115 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3116 {
3117  return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3118  (__v16sf) __B,
3119  (__v16sf) __C,
3120  (__mmask16) __U,
3122 }
3123 
3124 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3125  ((__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
3126  (__v8df)(__m512d)(B), \
3127  (__v8df)(__m512d)(C), \
3128  (__mmask8)(U), (int)(R)))
3129 
3130 
3131 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3132 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3133 {
3134  return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3135  (__v8df) __B,
3136  (__v8df) __C,
3137  (__mmask8) __U,
3139 }
3140 
3141 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3142  ((__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
3143  (__v16sf)(__m512)(B), \
3144  (__v16sf)(__m512)(C), \
3145  (__mmask16)(U), (int)(R)))
3146 
3147 
3148 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3149 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3150 {
3151  return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3152  (__v16sf) __B,
3153  (__v16sf) __C,
3154  (__mmask16) __U,
3156 }
3157 
3158 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3159  ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3160  -(__v8df)(__m512d)(B), \
3161  (__v8df)(__m512d)(C), \
3162  (__mmask8)(U), (int)(R)))
3163 
3164 
3165 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3166 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3167 {
3168  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3169  -(__v8df) __B,
3170  (__v8df) __C,
3171  (__mmask8) __U,
3173 }
3174 
3175 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3176  ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3177  -(__v16sf)(__m512)(B), \
3178  (__v16sf)(__m512)(C), \
3179  (__mmask16)(U), (int)(R)))
3180 
3181 
3182 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3183 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3184 {
3185  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3186  -(__v16sf) __B,
3187  (__v16sf) __C,
3188  (__mmask16) __U,
3190 }
3191 
3192 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3193  ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3194  -(__v8df)(__m512d)(B), \
3195  -(__v8df)(__m512d)(C), \
3196  (__mmask8)(U), (int)(R)))
3197 
3198 
3199 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3200  ((__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \
3201  (__v8df)(__m512d)(B), \
3202  (__v8df)(__m512d)(C), \
3203  (__mmask8)(U), (int)(R)))
3204 
3205 
3206 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3207 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3208 {
3209  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3210  -(__v8df) __B,
3211  -(__v8df) __C,
3212  (__mmask8) __U,
3214 }
3215 
3216 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3217 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3218 {
3219  return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A,
3220  (__v8df) __B,
3221  (__v8df) __C,
3222  (__mmask8) __U,
3224 }
3225 
3226 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3227  ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3228  -(__v16sf)(__m512)(B), \
3229  -(__v16sf)(__m512)(C), \
3230  (__mmask16)(U), (int)(R)))
3231 
3232 
3233 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3234  ((__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
3235  (__v16sf)(__m512)(B), \
3236  (__v16sf)(__m512)(C), \
3237  (__mmask16)(U), (int)(R)))
3238 
3239 
3240 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3241 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3242 {
3243  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3244  -(__v16sf) __B,
3245  -(__v16sf) __C,
3246  (__mmask16) __U,
3248 }
3249 
3250 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3251 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3252 {
3253  return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A,
3254  (__v16sf) __B,
3255  (__v16sf) __C,
3256  (__mmask16) __U,
3258 }
3259 
3260 
3261 
3262 /* Vector permutations */
3263 
3264 static __inline __m512i __DEFAULT_FN_ATTRS512
3265 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
3266 {
3267  return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
3268  (__v16si) __B);
3269 }
3270 
3271 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3272 _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I,
3273  __m512i __B)
3274 {
3275  return (__m512i)__builtin_ia32_selectd_512(__U,
3276  (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3277  (__v16si)__A);
3278 }
3279 
3280 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3281 _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U,
3282  __m512i __B)
3283 {
3284  return (__m512i)__builtin_ia32_selectd_512(__U,
3285  (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3286  (__v16si)__I);
3287 }
3288 
3289 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3290 _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I,
3291  __m512i __B)
3292 {
3293  return (__m512i)__builtin_ia32_selectd_512(__U,
3294  (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3295  (__v16si)_mm512_setzero_si512());
3296 }
3297 
3298 static __inline __m512i __DEFAULT_FN_ATTRS512
3299 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
3300 {
3301  return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
3302  (__v8di) __B);
3303 }
3304 
3305 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3306 _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I,
3307  __m512i __B)
3308 {
3309  return (__m512i)__builtin_ia32_selectq_512(__U,
3310  (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3311  (__v8di)__A);
3312 }
3313 
3314 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3315 _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U,
3316  __m512i __B)
3317 {
3318  return (__m512i)__builtin_ia32_selectq_512(__U,
3319  (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3320  (__v8di)__I);
3321 }
3322 
3323 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3324 _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
3325  __m512i __B)
3326 {
3327  return (__m512i)__builtin_ia32_selectq_512(__U,
3328  (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3329  (__v8di)_mm512_setzero_si512());
3330 }
3331 
3332 #define _mm512_alignr_epi64(A, B, I) \
3333  ((__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
3334  (__v8di)(__m512i)(B), (int)(I)))
3335 
3336 #define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
3337  ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3338  (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3339  (__v8di)(__m512i)(W)))
3340 
3341 #define _mm512_maskz_alignr_epi64(U, A, B, imm) \
3342  ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3343  (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3344  (__v8di)_mm512_setzero_si512()))
3345 
3346 #define _mm512_alignr_epi32(A, B, I) \
3347  ((__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
3348  (__v16si)(__m512i)(B), (int)(I)))
3349 
3350 #define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
3351  ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3352  (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3353  (__v16si)(__m512i)(W)))
3354 
3355 #define _mm512_maskz_alignr_epi32(U, A, B, imm) \
3356  ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3357  (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3358  (__v16si)_mm512_setzero_si512()))
3359 /* Vector Extract */
3360 
3361 #define _mm512_extractf64x4_pd(A, I) \
3362  ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
3363  (__v4df)_mm256_undefined_pd(), \
3364  (__mmask8)-1))
3365 
3366 #define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
3367  ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3368  (__v4df)(__m256d)(W), \
3369  (__mmask8)(U)))
3370 
3371 #define _mm512_maskz_extractf64x4_pd(U, A, imm) \
3372  ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3373  (__v4df)_mm256_setzero_pd(), \
3374  (__mmask8)(U)))
3375 
3376 #define _mm512_extractf32x4_ps(A, I) \
3377  ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
3378  (__v4sf)_mm_undefined_ps(), \
3379  (__mmask8)-1))
3380 
3381 #define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
3382  ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3383  (__v4sf)(__m128)(W), \
3384  (__mmask8)(U)))
3385 
3386 #define _mm512_maskz_extractf32x4_ps(U, A, imm) \
3387  ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3388  (__v4sf)_mm_setzero_ps(), \
3389  (__mmask8)(U)))
3390 
3391 /* Vector Blend */
3392 
3393 static __inline __m512d __DEFAULT_FN_ATTRS512
3394 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
3395 {
3396  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3397  (__v8df) __W,
3398  (__v8df) __A);
3399 }
3400 
3401 static __inline __m512 __DEFAULT_FN_ATTRS512
3402 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
3403 {
3404  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3405  (__v16sf) __W,
3406  (__v16sf) __A);
3407 }
3408 
3409 static __inline __m512i __DEFAULT_FN_ATTRS512
3410 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
3411 {
3412  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3413  (__v8di) __W,
3414  (__v8di) __A);
3415 }
3416 
3417 static __inline __m512i __DEFAULT_FN_ATTRS512
3418 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
3419 {
3420  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3421  (__v16si) __W,
3422  (__v16si) __A);
3423 }
3424 
3425 /* Compare */
3426 
3427 #define _mm512_cmp_round_ps_mask(A, B, P, R) \
3428  ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3429  (__v16sf)(__m512)(B), (int)(P), \
3430  (__mmask16)-1, (int)(R)))
3431 
3432 #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \
3433  ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3434  (__v16sf)(__m512)(B), (int)(P), \
3435  (__mmask16)(U), (int)(R)))
3436 
3437 #define _mm512_cmp_ps_mask(A, B, P) \
3438  _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3439 #define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3440  _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3441 
3442 #define _mm512_cmpeq_ps_mask(A, B) \
3443  _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3444 #define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3445  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3446 
3447 #define _mm512_cmplt_ps_mask(A, B) \
3448  _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3449 #define _mm512_mask_cmplt_ps_mask(k, A, B) \
3450  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3451 
3452 #define _mm512_cmple_ps_mask(A, B) \
3453  _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3454 #define _mm512_mask_cmple_ps_mask(k, A, B) \
3455  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3456 
3457 #define _mm512_cmpunord_ps_mask(A, B) \
3458  _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3459 #define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3460  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3461 
3462 #define _mm512_cmpneq_ps_mask(A, B) \
3463  _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3464 #define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3465  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3466 
3467 #define _mm512_cmpnlt_ps_mask(A, B) \
3468  _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3469 #define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3470  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3471 
3472 #define _mm512_cmpnle_ps_mask(A, B) \
3473  _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3474 #define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3475  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3476 
3477 #define _mm512_cmpord_ps_mask(A, B) \
3478  _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3479 #define _mm512_mask_cmpord_ps_mask(k, A, B) \
3480  _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3481 
3482 #define _mm512_cmp_round_pd_mask(A, B, P, R) \
3483  ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3484  (__v8df)(__m512d)(B), (int)(P), \
3485  (__mmask8)-1, (int)(R)))
3486 
3487 #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \
3488  ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3489  (__v8df)(__m512d)(B), (int)(P), \
3490  (__mmask8)(U), (int)(R)))
3491 
3492 #define _mm512_cmp_pd_mask(A, B, P) \
3493  _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3494 #define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3495  _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3496 
3497 #define _mm512_cmpeq_pd_mask(A, B) \
3498  _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3499 #define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3500  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3501 
3502 #define _mm512_cmplt_pd_mask(A, B) \
3503  _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3504 #define _mm512_mask_cmplt_pd_mask(k, A, B) \
3505  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3506 
3507 #define _mm512_cmple_pd_mask(A, B) \
3508  _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3509 #define _mm512_mask_cmple_pd_mask(k, A, B) \
3510  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3511 
3512 #define _mm512_cmpunord_pd_mask(A, B) \
3513  _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3514 #define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3515  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3516 
3517 #define _mm512_cmpneq_pd_mask(A, B) \
3518  _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3519 #define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3520  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3521 
3522 #define _mm512_cmpnlt_pd_mask(A, B) \
3523  _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3524 #define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3525  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3526 
3527 #define _mm512_cmpnle_pd_mask(A, B) \
3528  _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3529 #define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3530  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3531 
3532 #define _mm512_cmpord_pd_mask(A, B) \
3533  _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3534 #define _mm512_mask_cmpord_pd_mask(k, A, B) \
3535  _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3536 
3537 /* Conversion */
3538 
3539 #define _mm512_cvtt_roundps_epu32(A, R) \
3540  ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3541  (__v16si)_mm512_undefined_epi32(), \
3542  (__mmask16)-1, (int)(R)))
3543 
3544 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \
3545  ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3546  (__v16si)(__m512i)(W), \
3547  (__mmask16)(U), (int)(R)))
3548 
3549 #define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \
3550  ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3551  (__v16si)_mm512_setzero_si512(), \
3552  (__mmask16)(U), (int)(R)))
3553 
3554 
3555 static __inline __m512i __DEFAULT_FN_ATTRS512
3557 {
3558  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3559  (__v16si)
3561  (__mmask16) -1,
3563 }
3564 
3565 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3566 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3567 {
3568  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3569  (__v16si) __W,
3570  (__mmask16) __U,
3572 }
3573 
3574 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3576 {
3577  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3578  (__v16si) _mm512_setzero_si512 (),
3579  (__mmask16) __U,
3581 }
3582 
3583 #define _mm512_cvt_roundepi32_ps(A, R) \
3584  ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3585  (__v16sf)_mm512_setzero_ps(), \
3586  (__mmask16)-1, (int)(R)))
3587 
3588 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \
3589  ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3590  (__v16sf)(__m512)(W), \
3591  (__mmask16)(U), (int)(R)))
3592 
3593 #define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \
3594  ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3595  (__v16sf)_mm512_setzero_ps(), \
3596  (__mmask16)(U), (int)(R)))
3597 
3598 #define _mm512_cvt_roundepu32_ps(A, R) \
3599  ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3600  (__v16sf)_mm512_setzero_ps(), \
3601  (__mmask16)-1, (int)(R)))
3602 
3603 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \
3604  ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3605  (__v16sf)(__m512)(W), \
3606  (__mmask16)(U), (int)(R)))
3607 
3608 #define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \
3609  ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3610  (__v16sf)_mm512_setzero_ps(), \
3611  (__mmask16)(U), (int)(R)))
3612 
3613 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3614 _mm512_cvtepu32_ps (__m512i __A)
3615 {
3616  return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
3617 }
3618 
3619 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3620 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3621 {
3622  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3623  (__v16sf)_mm512_cvtepu32_ps(__A),
3624  (__v16sf)__W);
3625 }
3626 
3627 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3629 {
3630  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3631  (__v16sf)_mm512_cvtepu32_ps(__A),
3632  (__v16sf)_mm512_setzero_ps());
3633 }
3634 
3635 static __inline __m512d __DEFAULT_FN_ATTRS512
3637 {
3638  return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3639 }
3640 
3641 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3642 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3643 {
3644  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3645  (__v8df)_mm512_cvtepi32_pd(__A),
3646  (__v8df)__W);
3647 }
3648 
3649 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3651 {
3652  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3653  (__v8df)_mm512_cvtepi32_pd(__A),
3654  (__v8df)_mm512_setzero_pd());
3655 }
3656 
3657 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3659 {
3660  return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3661 }
3662 
3663 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3664 _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3665 {
3666  return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
3667 }
3668 
3669 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3670 _mm512_cvtepi32_ps (__m512i __A)
3671 {
3672  return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
3673 }
3674 
3675 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3676 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3677 {
3678  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3679  (__v16sf)_mm512_cvtepi32_ps(__A),
3680  (__v16sf)__W);
3681 }
3682 
3683 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3685 {
3686  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3687  (__v16sf)_mm512_cvtepi32_ps(__A),
3688  (__v16sf)_mm512_setzero_ps());
3689 }
3690 
3691 static __inline __m512d __DEFAULT_FN_ATTRS512
3693 {
3694  return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3695 }
3696 
3697 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3698 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3699 {
3700  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3701  (__v8df)_mm512_cvtepu32_pd(__A),
3702  (__v8df)__W);
3703 }
3704 
3705 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3707 {
3708  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3709  (__v8df)_mm512_cvtepu32_pd(__A),
3710  (__v8df)_mm512_setzero_pd());
3711 }
3712 
3713 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3715 {
3716  return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3717 }
3718 
3719 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3720 _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3721 {
3722  return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
3723 }
3724 
3725 #define _mm512_cvt_roundpd_ps(A, R) \
3726  ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3727  (__v8sf)_mm256_setzero_ps(), \
3728  (__mmask8)-1, (int)(R)))
3729 
3730 #define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \
3731  ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3732  (__v8sf)(__m256)(W), (__mmask8)(U), \
3733  (int)(R)))
3734 
3735 #define _mm512_maskz_cvt_roundpd_ps(U, A, R) \
3736  ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3737  (__v8sf)_mm256_setzero_ps(), \
3738  (__mmask8)(U), (int)(R)))
3739 
3740 static __inline__ __m256 __DEFAULT_FN_ATTRS512
3741 _mm512_cvtpd_ps (__m512d __A)
3742 {
3743  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3744  (__v8sf) _mm256_undefined_ps (),
3745  (__mmask8) -1,
3747 }
3748 
3749 static __inline__ __m256 __DEFAULT_FN_ATTRS512
3750 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
3751 {
3752  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3753  (__v8sf) __W,
3754  (__mmask8) __U,
3756 }
3757 
3758 static __inline__ __m256 __DEFAULT_FN_ATTRS512
3760 {
3761  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3762  (__v8sf) _mm256_setzero_ps (),
3763  (__mmask8) __U,
3765 }
3766 
3767 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3768 _mm512_cvtpd_pslo (__m512d __A)
3769 {
3770  return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
3771  (__v8sf) _mm256_setzero_ps (),
3772  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3773 }
3774 
3775 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3776 _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
3777 {
3778  return (__m512) __builtin_shufflevector (
3780  __U, __A),
3781  (__v8sf) _mm256_setzero_ps (),
3782  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3783 }
3784 
3785 #define _mm512_cvt_roundps_ph(A, I) \
3786  ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3787  (__v16hi)_mm256_undefined_si256(), \
3788  (__mmask16)-1))
3789 
3790 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
3791  ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3792  (__v16hi)(__m256i)(U), \
3793  (__mmask16)(W)))
3794 
3795 #define _mm512_maskz_cvt_roundps_ph(W, A, I) \
3796  ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3797  (__v16hi)_mm256_setzero_si256(), \
3798  (__mmask16)(W)))
3799 
3800 #define _mm512_cvtps_ph _mm512_cvt_roundps_ph
3801 #define _mm512_mask_cvtps_ph _mm512_mask_cvt_roundps_ph
3802 #define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph
3803 
3804 #define _mm512_cvt_roundph_ps(A, R) \
3805  ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3806  (__v16sf)_mm512_undefined_ps(), \
3807  (__mmask16)-1, (int)(R)))
3808 
3809 #define _mm512_mask_cvt_roundph_ps(W, U, A, R) \
3810  ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3811  (__v16sf)(__m512)(W), \
3812  (__mmask16)(U), (int)(R)))
3813 
3814 #define _mm512_maskz_cvt_roundph_ps(U, A, R) \
3815  ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3816  (__v16sf)_mm512_setzero_ps(), \
3817  (__mmask16)(U), (int)(R)))
3818 
3819 
3820 static __inline __m512 __DEFAULT_FN_ATTRS512
3821 _mm512_cvtph_ps(__m256i __A)
3822 {
3823  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3824  (__v16sf)
3825  _mm512_setzero_ps (),
3826  (__mmask16) -1,
3828 }
3829 
3830 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3831 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
3832 {
3833  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3834  (__v16sf) __W,
3835  (__mmask16) __U,
3837 }
3838 
3839 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3841 {
3842  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3843  (__v16sf) _mm512_setzero_ps (),
3844  (__mmask16) __U,
3846 }
3847 
3848 #define _mm512_cvtt_roundpd_epi32(A, R) \
3849  ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3850  (__v8si)_mm256_setzero_si256(), \
3851  (__mmask8)-1, (int)(R)))
3852 
3853 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \
3854  ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3855  (__v8si)(__m256i)(W), \
3856  (__mmask8)(U), (int)(R)))
3857 
3858 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \
3859  ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3860  (__v8si)_mm256_setzero_si256(), \
3861  (__mmask8)(U), (int)(R)))
3862 
3863 static __inline __m256i __DEFAULT_FN_ATTRS512
3865 {
3866  return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
3867  (__v8si)_mm256_setzero_si256(),
3868  (__mmask8) -1,
3870 }
3871 
3872 static __inline__ __m256i __DEFAULT_FN_ATTRS512
3873 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3874 {
3875  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3876  (__v8si) __W,
3877  (__mmask8) __U,
3879 }
3880 
3881 static __inline__ __m256i __DEFAULT_FN_ATTRS512
3883 {
3884  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3885  (__v8si) _mm256_setzero_si256 (),
3886  (__mmask8) __U,
3888 }
3889 
3890 #define _mm512_cvtt_roundps_epi32(A, R) \
3891  ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3892  (__v16si)_mm512_setzero_si512(), \
3893  (__mmask16)-1, (int)(R)))
3894 
3895 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \
3896  ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3897  (__v16si)(__m512i)(W), \
3898  (__mmask16)(U), (int)(R)))
3899 
3900 #define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \
3901  ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3902  (__v16si)_mm512_setzero_si512(), \
3903  (__mmask16)(U), (int)(R)))
3904 
3905 static __inline __m512i __DEFAULT_FN_ATTRS512
3907 {
3908  return (__m512i)
3909  __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
3910  (__v16si) _mm512_setzero_si512 (),
3912 }
3913 
3914 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3915 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3916 {
3917  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3918  (__v16si) __W,
3919  (__mmask16) __U,
3921 }
3922 
3923 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3925 {
3926  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3927  (__v16si) _mm512_setzero_si512 (),
3928  (__mmask16) __U,
3930 }
3931 
3932 #define _mm512_cvt_roundps_epi32(A, R) \
3933  ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3934  (__v16si)_mm512_setzero_si512(), \
3935  (__mmask16)-1, (int)(R)))
3936 
3937 #define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \
3938  ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3939  (__v16si)(__m512i)(W), \
3940  (__mmask16)(U), (int)(R)))
3941 
3942 #define _mm512_maskz_cvt_roundps_epi32(U, A, R) \
3943  ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3944  (__v16si)_mm512_setzero_si512(), \
3945  (__mmask16)(U), (int)(R)))
3946 
3947 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3949 {
3950  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3951  (__v16si) _mm512_undefined_epi32 (),
3952  (__mmask16) -1,
3954 }
3955 
3956 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3957 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3958 {
3959  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3960  (__v16si) __W,
3961  (__mmask16) __U,
3963 }
3964 
3965 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3967 {
3968  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3969  (__v16si)
3971  (__mmask16) __U,
3973 }
3974 
3975 #define _mm512_cvt_roundpd_epi32(A, R) \
3976  ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3977  (__v8si)_mm256_setzero_si256(), \
3978  (__mmask8)-1, (int)(R)))
3979 
3980 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \
3981  ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3982  (__v8si)(__m256i)(W), \
3983  (__mmask8)(U), (int)(R)))
3984 
3985 #define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \
3986  ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3987  (__v8si)_mm256_setzero_si256(), \
3988  (__mmask8)(U), (int)(R)))
3989 
3990 static __inline__ __m256i __DEFAULT_FN_ATTRS512
3991 _mm512_cvtpd_epi32 (__m512d __A)
3992 {
3993  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3994  (__v8si)
3996  (__mmask8) -1,
3998 }
3999 
4000 static __inline__ __m256i __DEFAULT_FN_ATTRS512
4001 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
4002 {
4003  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4004  (__v8si) __W,
4005  (__mmask8) __U,
4007 }
4008 
4009 static __inline__ __m256i __DEFAULT_FN_ATTRS512
4011 {
4012  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4013  (__v8si)
4015  (__mmask8) __U,
4017 }
4018 
4019 #define _mm512_cvt_roundps_epu32(A, R) \
4020  ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4021  (__v16si)_mm512_setzero_si512(), \
4022  (__mmask16)-1, (int)(R)))
4023 
4024 #define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \
4025  ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4026  (__v16si)(__m512i)(W), \
4027  (__mmask16)(U), (int)(R)))
4028 
4029 #define _mm512_maskz_cvt_roundps_epu32(U, A, R) \
4030  ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4031  (__v16si)_mm512_setzero_si512(), \
4032  (__mmask16)(U), (int)(R)))
4033 
4034 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4035 _mm512_cvtps_epu32 ( __m512 __A)
4036 {
4037  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4038  (__v16si)\
4040  (__mmask16) -1,\
4042 }
4043 
4044 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4045 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
4046 {
4047  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4048  (__v16si) __W,
4049  (__mmask16) __U,
4051 }
4052 
4053 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4055 {
4056  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4057  (__v16si)
4059  (__mmask16) __U ,
4061 }
4062 
4063 #define _mm512_cvt_roundpd_epu32(A, R) \
4064  ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4065  (__v8si)_mm256_setzero_si256(), \
4066  (__mmask8)-1, (int)(R)))
4067 
4068 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \
4069  ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4070  (__v8si)(__m256i)(W), \
4071  (__mmask8)(U), (int)(R)))
4072 
4073 #define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \
4074  ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4075  (__v8si)_mm256_setzero_si256(), \
4076  (__mmask8)(U), (int)(R)))
4077 
4078 static __inline__ __m256i __DEFAULT_FN_ATTRS512
4079 _mm512_cvtpd_epu32 (__m512d __A)
4080 {
4081  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4082  (__v8si)
4084  (__mmask8) -1,
4086 }
4087 
4088 static __inline__ __m256i __DEFAULT_FN_ATTRS512
4089 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
4090 {
4091  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4092  (__v8si) __W,
4093  (__mmask8) __U,
4095 }
4096 
4097 static __inline__ __m256i __DEFAULT_FN_ATTRS512
4099 {
4100  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4101  (__v8si)
4103  (__mmask8) __U,
4105 }
4106 
4107 static __inline__ double __DEFAULT_FN_ATTRS512
4109 {
4110  return __a[0];
4111 }
4112 
4113 static __inline__ float __DEFAULT_FN_ATTRS512
4115 {
4116  return __a[0];
4117 }
4118 
4119 /* Unpack and Interleave */
4120 
4121 static __inline __m512d __DEFAULT_FN_ATTRS512
4122 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
4123 {
4124  return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4125  1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4126 }
4127 
4128 static __inline__ __m512d __DEFAULT_FN_ATTRS512
4129 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4130 {
4131  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4132  (__v8df)_mm512_unpackhi_pd(__A, __B),
4133  (__v8df)__W);
4134 }
4135 
4136 static __inline__ __m512d __DEFAULT_FN_ATTRS512
4137 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
4138 {
4139  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4140  (__v8df)_mm512_unpackhi_pd(__A, __B),
4141  (__v8df)_mm512_setzero_pd());
4142 }
4143 
4144 static __inline __m512d __DEFAULT_FN_ATTRS512
4145 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
4146 {
4147  return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4148  0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4149 }
4150 
4151 static __inline__ __m512d __DEFAULT_FN_ATTRS512
4152 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4153 {
4154  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4155  (__v8df)_mm512_unpacklo_pd(__A, __B),
4156  (__v8df)__W);
4157 }
4158 
4159 static __inline__ __m512d __DEFAULT_FN_ATTRS512
4160 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
4161 {
4162  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4163  (__v8df)_mm512_unpacklo_pd(__A, __B),
4164  (__v8df)_mm512_setzero_pd());
4165 }
4166 
4167 static __inline __m512 __DEFAULT_FN_ATTRS512
4168 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
4169 {
4170  return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4171  2, 18, 3, 19,
4172  2+4, 18+4, 3+4, 19+4,
4173  2+8, 18+8, 3+8, 19+8,
4174  2+12, 18+12, 3+12, 19+12);
4175 }
4176 
4177 static __inline__ __m512 __DEFAULT_FN_ATTRS512
4178 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4179 {
4180  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4181  (__v16sf)_mm512_unpackhi_ps(__A, __B),
4182  (__v16sf)__W);
4183 }
4184 
4185 static __inline__ __m512 __DEFAULT_FN_ATTRS512
4186 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
4187 {
4188  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4189  (__v16sf)_mm512_unpackhi_ps(__A, __B),
4190  (__v16sf)_mm512_setzero_ps());
4191 }
4192 
4193 static __inline __m512 __DEFAULT_FN_ATTRS512
4194 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
4195 {
4196  return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4197  0, 16, 1, 17,
4198  0+4, 16+4, 1+4, 17+4,
4199  0+8, 16+8, 1+8, 17+8,
4200  0+12, 16+12, 1+12, 17+12);
4201 }
4202 
4203 static __inline__ __m512 __DEFAULT_FN_ATTRS512
4204 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4205 {
4206  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4207  (__v16sf)_mm512_unpacklo_ps(__A, __B),
4208  (__v16sf)__W);
4209 }
4210 
4211 static __inline__ __m512 __DEFAULT_FN_ATTRS512
4212 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
4213 {
4214  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4215  (__v16sf)_mm512_unpacklo_ps(__A, __B),
4216  (__v16sf)_mm512_setzero_ps());
4217 }
4218 
4219 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4220 _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
4221 {
4222  return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4223  2, 18, 3, 19,
4224  2+4, 18+4, 3+4, 19+4,
4225  2+8, 18+8, 3+8, 19+8,
4226  2+12, 18+12, 3+12, 19+12);
4227 }
4228 
4229 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4230 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4231 {
4232  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4233  (__v16si)_mm512_unpackhi_epi32(__A, __B),
4234  (__v16si)__W);
4235 }
4236 
4237 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4238 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4239 {
4240  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4241  (__v16si)_mm512_unpackhi_epi32(__A, __B),
4242  (__v16si)_mm512_setzero_si512());
4243 }
4244 
4245 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4246 _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
4247 {
4248  return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4249  0, 16, 1, 17,
4250  0+4, 16+4, 1+4, 17+4,
4251  0+8, 16+8, 1+8, 17+8,
4252  0+12, 16+12, 1+12, 17+12);
4253 }
4254 
4255 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4256 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4257 {
4258  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4259  (__v16si)_mm512_unpacklo_epi32(__A, __B),
4260  (__v16si)__W);
4261 }
4262 
4263 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4264 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4265 {
4266  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4267  (__v16si)_mm512_unpacklo_epi32(__A, __B),
4268  (__v16si)_mm512_setzero_si512());
4269 }
4270 
4271 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4272 _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
4273 {
4274  return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4275  1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4276 }
4277 
4278 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4279 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4280 {
4281  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4282  (__v8di)_mm512_unpackhi_epi64(__A, __B),
4283  (__v8di)__W);
4284 }
4285 
4286 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4287 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4288 {
4289  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4290  (__v8di)_mm512_unpackhi_epi64(__A, __B),
4291  (__v8di)_mm512_setzero_si512());
4292 }
4293 
4294 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4295 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
4296 {
4297  return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4298  0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4299 }
4300 
4301 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4302 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4303 {
4304  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4305  (__v8di)_mm512_unpacklo_epi64(__A, __B),
4306  (__v8di)__W);
4307 }
4308 
4309 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4310 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4311 {
4312  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4313  (__v8di)_mm512_unpacklo_epi64(__A, __B),
4314  (__v8di)_mm512_setzero_si512());
4315 }
4316 
4317 
4318 /* SIMD load ops */
4319 
4320 static __inline __m512i __DEFAULT_FN_ATTRS512
4322 {
4323  struct __loadu_si512 {
4324  __m512i_u __v;
4325  } __attribute__((__packed__, __may_alias__));
4326  return ((const struct __loadu_si512*)__P)->__v;
4327 }
4328 
4329 static __inline __m512i __DEFAULT_FN_ATTRS512
4331 {
4332  struct __loadu_epi32 {
4333  __m512i_u __v;
4334  } __attribute__((__packed__, __may_alias__));
4335  return ((const struct __loadu_epi32*)__P)->__v;
4336 }
4337 
4338 static __inline __m512i __DEFAULT_FN_ATTRS512
4339 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4340 {
4341  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4342  (__v16si) __W,
4343  (__mmask16) __U);
4344 }
4345 
4346 
4347 static __inline __m512i __DEFAULT_FN_ATTRS512
4349 {
4350  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
4351  (__v16si)
4353  (__mmask16) __U);
4354 }
4355 
4356 static __inline __m512i __DEFAULT_FN_ATTRS512
4358 {
4359  struct __loadu_epi64 {
4360  __m512i_u __v;
4361  } __attribute__((__packed__, __may_alias__));
4362  return ((const struct __loadu_epi64*)__P)->__v;
4363 }
4364 
4365 static __inline __m512i __DEFAULT_FN_ATTRS512
4366 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4367 {
4368  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
4369  (__v8di) __W,
4370  (__mmask8) __U);
4371 }
4372 
4373 static __inline __m512i __DEFAULT_FN_ATTRS512
4375 {
4376  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
4377  (__v8di)
4379  (__mmask8) __U);
4380 }
4381 
4382 static __inline __m512 __DEFAULT_FN_ATTRS512
4383 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4384 {
4385  return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
4386  (__v16sf) __W,
4387  (__mmask16) __U);
4388 }
4389 
4390 static __inline __m512 __DEFAULT_FN_ATTRS512
4392 {
4393  return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
4394  (__v16sf)
4395  _mm512_setzero_ps (),
4396  (__mmask16) __U);
4397 }
4398 
4399 static __inline __m512d __DEFAULT_FN_ATTRS512
4400 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
4401 {
4402  return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
4403  (__v8df) __W,
4404  (__mmask8) __U);
4405 }
4406 
4407 static __inline __m512d __DEFAULT_FN_ATTRS512
4409 {
4410  return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
4411  (__v8df)
4412  _mm512_setzero_pd (),
4413  (__mmask8) __U);
4414 }
4415 
4416 static __inline __m512d __DEFAULT_FN_ATTRS512
4417 _mm512_loadu_pd(void const *__p)
4418 {
4419  struct __loadu_pd {
4420  __m512d_u __v;
4421  } __attribute__((__packed__, __may_alias__));
4422  return ((const struct __loadu_pd*)__p)->__v;
4423 }
4424 
4425 static __inline __m512 __DEFAULT_FN_ATTRS512
4426 _mm512_loadu_ps(void const *__p)
4427 {
4428  struct __loadu_ps {
4429  __m512_u __v;
4430  } __attribute__((__packed__, __may_alias__));
4431  return ((const struct __loadu_ps*)__p)->__v;
4432 }
4433 
4434 static __inline __m512 __DEFAULT_FN_ATTRS512
4435 _mm512_load_ps(void const *__p)
4436 {
4437  return *(const __m512*)__p;
4438 }
4439 
4440 static __inline __m512 __DEFAULT_FN_ATTRS512
4441 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4442 {
4443  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4444  (__v16sf) __W,
4445  (__mmask16) __U);
4446 }
4447 
4448 static __inline __m512 __DEFAULT_FN_ATTRS512
4450 {
4451  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4452  (__v16sf)
4453  _mm512_setzero_ps (),
4454  (__mmask16) __U);
4455 }
4456 
4457 static __inline __m512d __DEFAULT_FN_ATTRS512
4458 _mm512_load_pd(void const *__p)
4459 {
4460  return *(const __m512d*)__p;
4461 }
4462 
4463 static __inline __m512d __DEFAULT_FN_ATTRS512
4464 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4465 {
4466  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4467  (__v8df) __W,
4468  (__mmask8) __U);
4469 }
4470 
4471 static __inline __m512d __DEFAULT_FN_ATTRS512
4473 {
4474  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4475  (__v8df)
4476  _mm512_setzero_pd (),
4477  (__mmask8) __U);
4478 }
4479 
4480 static __inline __m512i __DEFAULT_FN_ATTRS512
4481 _mm512_load_si512 (void const *__P)
4482 {
4483  return *(const __m512i *) __P;
4484 }
4485 
4486 static __inline __m512i __DEFAULT_FN_ATTRS512
4487 _mm512_load_epi32 (void const *__P)
4488 {
4489  return *(const __m512i *) __P;
4490 }
4491 
4492 static __inline __m512i __DEFAULT_FN_ATTRS512
4493 _mm512_load_epi64 (void const *__P)
4494 {
4495  return *(const __m512i *) __P;
4496 }
4497 
4498 /* SIMD store ops */
4499 
4500 static __inline void __DEFAULT_FN_ATTRS512
4501 _mm512_storeu_epi64 (void *__P, __m512i __A)
4502 {
4503  struct __storeu_epi64 {
4504  __m512i_u __v;
4505  } __attribute__((__packed__, __may_alias__));
4506  ((struct __storeu_epi64*)__P)->__v = __A;
4507 }
4508 
4509 static __inline void __DEFAULT_FN_ATTRS512
4510 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
4511 {
4512  __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
4513  (__mmask8) __U);
4514 }
4515 
4516 static __inline void __DEFAULT_FN_ATTRS512
4517 _mm512_storeu_si512 (void *__P, __m512i __A)
4518 {
4519  struct __storeu_si512 {
4520  __m512i_u __v;
4521  } __attribute__((__packed__, __may_alias__));
4522  ((struct __storeu_si512*)__P)->__v = __A;
4523 }
4524 
4525 static __inline void __DEFAULT_FN_ATTRS512
4526 _mm512_storeu_epi32 (void *__P, __m512i __A)
4527 {
4528  struct __storeu_epi32 {
4529  __m512i_u __v;
4530  } __attribute__((__packed__, __may_alias__));
4531  ((struct __storeu_epi32*)__P)->__v = __A;
4532 }
4533 
4534 static __inline void __DEFAULT_FN_ATTRS512
4535 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
4536 {
4537  __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
4538  (__mmask16) __U);
4539 }
4540 
4541 static __inline void __DEFAULT_FN_ATTRS512
4542 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
4543 {
4544  __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
4545 }
4546 
4547 static __inline void __DEFAULT_FN_ATTRS512
4548 _mm512_storeu_pd(void *__P, __m512d __A)
4549 {
4550  struct __storeu_pd {
4551  __m512d_u __v;
4552  } __attribute__((__packed__, __may_alias__));
4553  ((struct __storeu_pd*)__P)->__v = __A;
4554 }
4555 
4556 static __inline void __DEFAULT_FN_ATTRS512
4557 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
4558 {
4559  __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
4560  (__mmask16) __U);
4561 }
4562 
4563 static __inline void __DEFAULT_FN_ATTRS512
4564 _mm512_storeu_ps(void *__P, __m512 __A)
4565 {
4566  struct __storeu_ps {
4567  __m512_u __v;
4568  } __attribute__((__packed__, __may_alias__));
4569  ((struct __storeu_ps*)__P)->__v = __A;
4570 }
4571 
4572 static __inline void __DEFAULT_FN_ATTRS512
4573 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
4574 {
4575  __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4576 }
4577 
4578 static __inline void __DEFAULT_FN_ATTRS512
4579 _mm512_store_pd(void *__P, __m512d __A)
4580 {
4581  *(__m512d*)__P = __A;
4582 }
4583 
4584 static __inline void __DEFAULT_FN_ATTRS512
4585 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4586 {
4587  __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4588  (__mmask16) __U);
4589 }
4590 
4591 static __inline void __DEFAULT_FN_ATTRS512
4592 _mm512_store_ps(void *__P, __m512 __A)
4593 {
4594  *(__m512*)__P = __A;
4595 }
4596 
4597 static __inline void __DEFAULT_FN_ATTRS512
4598 _mm512_store_si512 (void *__P, __m512i __A)
4599 {
4600  *(__m512i *) __P = __A;
4601 }
4602 
4603 static __inline void __DEFAULT_FN_ATTRS512
4604 _mm512_store_epi32 (void *__P, __m512i __A)
4605 {
4606  *(__m512i *) __P = __A;
4607 }
4608 
4609 static __inline void __DEFAULT_FN_ATTRS512
4610 _mm512_store_epi64 (void *__P, __m512i __A)
4611 {
4612  *(__m512i *) __P = __A;
4613 }
4614 
4615 /* Mask ops */
4616 
4617 static __inline __mmask16 __DEFAULT_FN_ATTRS
4619 {
4620  return __builtin_ia32_knothi(__M);
4621 }
4622 
4623 /* Integer compare */
4624 
4625 #define _mm512_cmpeq_epi32_mask(A, B) \
4626  _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4627 #define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4628  _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4629 #define _mm512_cmpge_epi32_mask(A, B) \
4630  _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4631 #define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4632  _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4633 #define _mm512_cmpgt_epi32_mask(A, B) \
4634  _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4635 #define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4636  _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4637 #define _mm512_cmple_epi32_mask(A, B) \
4638  _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4639 #define _mm512_mask_cmple_epi32_mask(k, A, B) \
4640  _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4641 #define _mm512_cmplt_epi32_mask(A, B) \
4642  _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4643 #define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4644  _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4645 #define _mm512_cmpneq_epi32_mask(A, B) \
4646  _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4647 #define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4648  _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
4649 
4650 #define _mm512_cmpeq_epu32_mask(A, B) \
4651  _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4652 #define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4653  _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4654 #define _mm512_cmpge_epu32_mask(A, B) \
4655  _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4656 #define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4657  _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4658 #define _mm512_cmpgt_epu32_mask(A, B) \
4659  _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4660 #define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4661  _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4662 #define _mm512_cmple_epu32_mask(A, B) \
4663  _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4664 #define _mm512_mask_cmple_epu32_mask(k, A, B) \
4665  _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4666 #define _mm512_cmplt_epu32_mask(A, B) \
4667  _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4668 #define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4669  _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4670 #define _mm512_cmpneq_epu32_mask(A, B) \
4671  _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4672 #define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4673  _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
4674 
4675 #define _mm512_cmpeq_epi64_mask(A, B) \
4676  _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4677 #define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4678  _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4679 #define _mm512_cmpge_epi64_mask(A, B) \
4680  _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4681 #define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4682  _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4683 #define _mm512_cmpgt_epi64_mask(A, B) \
4684  _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4685 #define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4686  _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4687 #define _mm512_cmple_epi64_mask(A, B) \
4688  _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4689 #define _mm512_mask_cmple_epi64_mask(k, A, B) \
4690  _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4691 #define _mm512_cmplt_epi64_mask(A, B) \
4692  _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4693 #define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4694  _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4695 #define _mm512_cmpneq_epi64_mask(A, B) \
4696  _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4697 #define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4698  _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
4699 
4700 #define _mm512_cmpeq_epu64_mask(A, B) \
4701  _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4702 #define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4703  _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4704 #define _mm512_cmpge_epu64_mask(A, B) \
4705  _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4706 #define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4707  _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4708 #define _mm512_cmpgt_epu64_mask(A, B) \
4709  _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4710 #define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4711  _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4712 #define _mm512_cmple_epu64_mask(A, B) \
4713  _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4714 #define _mm512_mask_cmple_epu64_mask(k, A, B) \
4715  _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4716 #define _mm512_cmplt_epu64_mask(A, B) \
4717  _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4718 #define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4719  _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4720 #define _mm512_cmpneq_epu64_mask(A, B) \
4721  _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4722 #define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4723  _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
4724 
4725 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4727 {
4728  /* This function always performs a signed extension, but __v16qi is a char
4729  which may be signed or unsigned, so use __v16qs. */
4730  return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4731 }
4732 
4733 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4734 _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
4735 {
4736  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4737  (__v16si)_mm512_cvtepi8_epi32(__A),
4738  (__v16si)__W);
4739 }
4740 
4741 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4743 {
4744  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4745  (__v16si)_mm512_cvtepi8_epi32(__A),
4746  (__v16si)_mm512_setzero_si512());
4747 }
4748 
4749 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4751 {
4752  /* This function always performs a signed extension, but __v16qi is a char
4753  which may be signed or unsigned, so use __v16qs. */
4754  return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4755 }
4756 
4757 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4758 _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4759 {
4760  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4761  (__v8di)_mm512_cvtepi8_epi64(__A),
4762  (__v8di)__W);
4763 }
4764 
4765 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4767 {
4768  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4769  (__v8di)_mm512_cvtepi8_epi64(__A),
4770  (__v8di)_mm512_setzero_si512 ());
4771 }
4772 
4773 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4775 {
4776  return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
4777 }
4778 
4779 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4780 _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
4781 {
4782  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4783  (__v8di)_mm512_cvtepi32_epi64(__X),
4784  (__v8di)__W);
4785 }
4786 
4787 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4789 {
4790  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4791  (__v8di)_mm512_cvtepi32_epi64(__X),
4792  (__v8di)_mm512_setzero_si512());
4793 }
4794 
4795 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4797 {
4798  return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
4799 }
4800 
4801 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4802 _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
4803 {
4804  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4805  (__v16si)_mm512_cvtepi16_epi32(__A),
4806  (__v16si)__W);
4807 }
4808 
4809 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4811 {
4812  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4813  (__v16si)_mm512_cvtepi16_epi32(__A),
4814  (__v16si)_mm512_setzero_si512 ());
4815 }
4816 
4817 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4819 {
4820  return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
4821 }
4822 
4823 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4824 _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4825 {
4826  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4827  (__v8di)_mm512_cvtepi16_epi64(__A),
4828  (__v8di)__W);
4829 }
4830 
4831 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4833 {
4834  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4835  (__v8di)_mm512_cvtepi16_epi64(__A),
4836  (__v8di)_mm512_setzero_si512());
4837 }
4838 
4839 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4841 {
4842  return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
4843 }
4844 
4845 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4846 _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
4847 {
4848  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4849  (__v16si)_mm512_cvtepu8_epi32(__A),
4850  (__v16si)__W);
4851 }
4852 
4853 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4855 {
4856  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4857  (__v16si)_mm512_cvtepu8_epi32(__A),
4858  (__v16si)_mm512_setzero_si512());
4859 }
4860 
4861 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4863 {
4864  return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4865 }
4866 
4867 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4868 _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4869 {
4870  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4871  (__v8di)_mm512_cvtepu8_epi64(__A),
4872  (__v8di)__W);
4873 }
4874 
4875 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4877 {
4878  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4879  (__v8di)_mm512_cvtepu8_epi64(__A),
4880  (__v8di)_mm512_setzero_si512());
4881 }
4882 
4883 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4885 {
4886  return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
4887 }
4888 
4889 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4890 _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
4891 {
4892  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4893  (__v8di)_mm512_cvtepu32_epi64(__X),
4894  (__v8di)__W);
4895 }
4896 
4897 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4899 {
4900  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4901  (__v8di)_mm512_cvtepu32_epi64(__X),
4902  (__v8di)_mm512_setzero_si512());
4903 }
4904 
4905 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4907 {
4908  return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
4909 }
4910 
4911 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4912 _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
4913 {
4914  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4915  (__v16si)_mm512_cvtepu16_epi32(__A),
4916  (__v16si)__W);
4917 }
4918 
4919 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4921 {
4922  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4923  (__v16si)_mm512_cvtepu16_epi32(__A),
4924  (__v16si)_mm512_setzero_si512());
4925 }
4926 
4927 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4929 {
4930  return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
4931 }
4932 
4933 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4934 _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4935 {
4936  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4937  (__v8di)_mm512_cvtepu16_epi64(__A),
4938  (__v8di)__W);
4939 }
4940 
4941 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4943 {
4944  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4945  (__v8di)_mm512_cvtepu16_epi64(__A),
4946  (__v8di)_mm512_setzero_si512());
4947 }
4948 
4949 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4950 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
4951 {
4952  return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B);
4953 }
4954 
4955 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4956 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4957 {
4958  return (__m512i)__builtin_ia32_selectd_512(__U,
4959  (__v16si)_mm512_rorv_epi32(__A, __B),
4960  (__v16si)__W);
4961 }
4962 
4963 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4964 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4965 {
4966  return (__m512i)__builtin_ia32_selectd_512(__U,
4967  (__v16si)_mm512_rorv_epi32(__A, __B),
4968  (__v16si)_mm512_setzero_si512());
4969 }
4970 
4971 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4972 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
4973 {
4974  return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B);
4975 }
4976 
4977 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4978 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4979 {
4980  return (__m512i)__builtin_ia32_selectq_512(__U,
4981  (__v8di)_mm512_rorv_epi64(__A, __B),
4982  (__v8di)__W);
4983 }
4984 
4985 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4986 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4987 {
4988  return (__m512i)__builtin_ia32_selectq_512(__U,
4989  (__v8di)_mm512_rorv_epi64(__A, __B),
4990  (__v8di)_mm512_setzero_si512());
4991 }
4992 
4993 
4994 
4995 #define _mm512_cmp_epi32_mask(a, b, p) \
4996  ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4997  (__v16si)(__m512i)(b), (int)(p), \
4998  (__mmask16)-1))
4999 
5000 #define _mm512_cmp_epu32_mask(a, b, p) \
5001  ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5002  (__v16si)(__m512i)(b), (int)(p), \
5003  (__mmask16)-1))
5004 
5005 #define _mm512_cmp_epi64_mask(a, b, p) \
5006  ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5007  (__v8di)(__m512i)(b), (int)(p), \
5008  (__mmask8)-1))
5009 
5010 #define _mm512_cmp_epu64_mask(a, b, p) \
5011  ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5012  (__v8di)(__m512i)(b), (int)(p), \
5013  (__mmask8)-1))
5014 
5015 #define _mm512_mask_cmp_epi32_mask(m, a, b, p) \
5016  ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5017  (__v16si)(__m512i)(b), (int)(p), \
5018  (__mmask16)(m)))
5019 
5020 #define _mm512_mask_cmp_epu32_mask(m, a, b, p) \
5021  ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5022  (__v16si)(__m512i)(b), (int)(p), \
5023  (__mmask16)(m)))
5024 
5025 #define _mm512_mask_cmp_epi64_mask(m, a, b, p) \
5026  ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5027  (__v8di)(__m512i)(b), (int)(p), \
5028  (__mmask8)(m)))
5029 
5030 #define _mm512_mask_cmp_epu64_mask(m, a, b, p) \
5031  ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5032  (__v8di)(__m512i)(b), (int)(p), \
5033  (__mmask8)(m)))
5034 
5035 #define _mm512_rol_epi32(a, b) \
5036  ((__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b)))
5037 
5038 #define _mm512_mask_rol_epi32(W, U, a, b) \
5039  ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5040  (__v16si)_mm512_rol_epi32((a), (b)), \
5041  (__v16si)(__m512i)(W)))
5042 
5043 #define _mm512_maskz_rol_epi32(U, a, b) \
5044  ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5045  (__v16si)_mm512_rol_epi32((a), (b)), \
5046  (__v16si)_mm512_setzero_si512()))
5047 
5048 #define _mm512_rol_epi64(a, b) \
5049  ((__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b)))
5050 
5051 #define _mm512_mask_rol_epi64(W, U, a, b) \
5052  ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5053  (__v8di)_mm512_rol_epi64((a), (b)), \
5054  (__v8di)(__m512i)(W)))
5055 
5056 #define _mm512_maskz_rol_epi64(U, a, b) \
5057  ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5058  (__v8di)_mm512_rol_epi64((a), (b)), \
5059  (__v8di)_mm512_setzero_si512()))
5060 
5061 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5062 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
5063 {
5064  return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B);
5065 }
5066 
5067 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5068 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5069 {
5070  return (__m512i)__builtin_ia32_selectd_512(__U,
5071  (__v16si)_mm512_rolv_epi32(__A, __B),
5072  (__v16si)__W);
5073 }
5074 
5075 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5076 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5077 {
5078  return (__m512i)__builtin_ia32_selectd_512(__U,
5079  (__v16si)_mm512_rolv_epi32(__A, __B),
5080  (__v16si)_mm512_setzero_si512());
5081 }
5082 
5083 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5084 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
5085 {
5086  return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B);
5087 }
5088 
5089 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5090 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5091 {
5092  return (__m512i)__builtin_ia32_selectq_512(__U,
5093  (__v8di)_mm512_rolv_epi64(__A, __B),
5094  (__v8di)__W);
5095 }
5096 
5097 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5098 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5099 {
5100  return (__m512i)__builtin_ia32_selectq_512(__U,
5101  (__v8di)_mm512_rolv_epi64(__A, __B),
5102  (__v8di)_mm512_setzero_si512());
5103 }
5104 
5105 #define _mm512_ror_epi32(A, B) \
5106  ((__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B)))
5107 
5108 #define _mm512_mask_ror_epi32(W, U, A, B) \
5109  ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5110  (__v16si)_mm512_ror_epi32((A), (B)), \
5111  (__v16si)(__m512i)(W)))
5112 
5113 #define _mm512_maskz_ror_epi32(U, A, B) \
5114  ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5115  (__v16si)_mm512_ror_epi32((A), (B)), \
5116  (__v16si)_mm512_setzero_si512()))
5117 
5118 #define _mm512_ror_epi64(A, B) \
5119  ((__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B)))
5120 
5121 #define _mm512_mask_ror_epi64(W, U, A, B) \
5122  ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5123  (__v8di)_mm512_ror_epi64((A), (B)), \
5124  (__v8di)(__m512i)(W)))
5125 
5126 #define _mm512_maskz_ror_epi64(U, A, B) \
5127  ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5128  (__v8di)_mm512_ror_epi64((A), (B)), \
5129  (__v8di)_mm512_setzero_si512()))
5130 
5131 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5132 _mm512_slli_epi32(__m512i __A, unsigned int __B)
5133 {
5134  return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, (int)__B);
5135 }
5136 
5137 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5138 _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
5139  unsigned int __B)
5140 {
5141  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5142  (__v16si)_mm512_slli_epi32(__A, __B),
5143  (__v16si)__W);
5144 }
5145 
5146 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5147 _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
5148  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5149  (__v16si)_mm512_slli_epi32(__A, __B),
5150  (__v16si)_mm512_setzero_si512());
5151 }
5152 
5153 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5154 _mm512_slli_epi64(__m512i __A, unsigned int __B)
5155 {
5156  return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, (int)__B);
5157 }
5158 
5159 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5160 _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
5161 {
5162  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5163  (__v8di)_mm512_slli_epi64(__A, __B),
5164  (__v8di)__W);
5165 }
5166 
5167 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5168 _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
5169 {
5170  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5171  (__v8di)_mm512_slli_epi64(__A, __B),
5172  (__v8di)_mm512_setzero_si512());
5173 }
5174 
5175 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5176 _mm512_srli_epi32(__m512i __A, unsigned int __B)
5177 {
5178  return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, (int)__B);
5179 }
5180 
5181 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5182 _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
5183  unsigned int __B)
5184 {
5185  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5186  (__v16si)_mm512_srli_epi32(__A, __B),
5187  (__v16si)__W);
5188 }
5189 
5190 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5191 _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
5192  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5193  (__v16si)_mm512_srli_epi32(__A, __B),
5194  (__v16si)_mm512_setzero_si512());
5195 }
5196 
5197 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5198 _mm512_srli_epi64(__m512i __A, unsigned int __B)
5199 {
5200  return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, (int)__B);
5201 }
5202 
5203 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5204 _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A,
5205  unsigned int __B)
5206 {
5207  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5208  (__v8di)_mm512_srli_epi64(__A, __B),
5209  (__v8di)__W);
5210 }
5211 
5212 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5214  unsigned int __B)
5215 {
5216  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5217  (__v8di)_mm512_srli_epi64(__A, __B),
5218  (__v8di)_mm512_setzero_si512());
5219 }
5220 
5221 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5222 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5223 {
5224  return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5225  (__v16si) __W,
5226  (__mmask16) __U);
5227 }
5228 
5229 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5231 {
5232  return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5233  (__v16si)
5235  (__mmask16) __U);
5236 }
5237 
5238 static __inline__ void __DEFAULT_FN_ATTRS512
5239 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
5240 {
5241  __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
5242  (__mmask16) __U);
5243 }
5244 
5245 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5246 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
5247 {
5248  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5249  (__v16si) __A,
5250  (__v16si) __W);
5251 }
5252 
5253 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5255 {
5256  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5257  (__v16si) __A,
5258  (__v16si) _mm512_setzero_si512 ());
5259 }
5260 
5261 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5262 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
5263 {
5264  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5265  (__v8di) __A,
5266  (__v8di) __W);
5267 }
5268 
5269 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5271 {
5272  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5273  (__v8di) __A,
5274  (__v8di) _mm512_setzero_si512 ());
5275 }
5276 
5277 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5278 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5279 {
5280  return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5281  (__v8di) __W,
5282  (__mmask8) __U);
5283 }
5284 
5285 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5287 {
5288  return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5289  (__v8di)
5291  (__mmask8) __U);
5292 }
5293 
5294 static __inline__ void __DEFAULT_FN_ATTRS512
5295 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
5296 {
5297  __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
5298  (__mmask8) __U);
5299 }
5300 
5301 static __inline__ __m512d __DEFAULT_FN_ATTRS512
5302 _mm512_movedup_pd (__m512d __A)
5303 {
5304  return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5305  0, 0, 2, 2, 4, 4, 6, 6);
5306 }
5307 
5308 static __inline__ __m512d __DEFAULT_FN_ATTRS512
5309 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
5310 {
5311  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5312  (__v8df)_mm512_movedup_pd(__A),
5313  (__v8df)__W);
5314 }
5315 
5316 static __inline__ __m512d __DEFAULT_FN_ATTRS512
5318 {
5319  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5320  (__v8df)_mm512_movedup_pd(__A),
5321  (__v8df)_mm512_setzero_pd());
5322 }
5323 
5324 #define _mm512_fixupimm_round_pd(A, B, C, imm, R) \
5325  ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5326  (__v8df)(__m512d)(B), \
5327  (__v8di)(__m512i)(C), (int)(imm), \
5328  (__mmask8)-1, (int)(R)))
5329 
5330 #define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \
5331  ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5332  (__v8df)(__m512d)(B), \
5333  (__v8di)(__m512i)(C), (int)(imm), \
5334  (__mmask8)(U), (int)(R)))
5335 
5336 #define _mm512_fixupimm_pd(A, B, C, imm) \
5337  ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5338  (__v8df)(__m512d)(B), \
5339  (__v8di)(__m512i)(C), (int)(imm), \
5340  (__mmask8)-1, \
5341  _MM_FROUND_CUR_DIRECTION))
5342 
5343 #define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \
5344  ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5345  (__v8df)(__m512d)(B), \
5346  (__v8di)(__m512i)(C), (int)(imm), \
5347  (__mmask8)(U), \
5348  _MM_FROUND_CUR_DIRECTION))
5349 
5350 #define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \
5351  ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5352  (__v8df)(__m512d)(B), \
5353  (__v8di)(__m512i)(C), \
5354  (int)(imm), (__mmask8)(U), \
5355  (int)(R)))
5356 
5357 #define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \
5358  ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5359  (__v8df)(__m512d)(B), \
5360  (__v8di)(__m512i)(C), \
5361  (int)(imm), (__mmask8)(U), \
5362  _MM_FROUND_CUR_DIRECTION))
5363 
5364 #define _mm512_fixupimm_round_ps(A, B, C, imm, R) \
5365  ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5366  (__v16sf)(__m512)(B), \
5367  (__v16si)(__m512i)(C), (int)(imm), \
5368  (__mmask16)-1, (int)(R)))
5369 
5370 #define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \
5371  ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5372  (__v16sf)(__m512)(B), \
5373  (__v16si)(__m512i)(C), (int)(imm), \
5374  (__mmask16)(U), (int)(R)))
5375 
5376 #define _mm512_fixupimm_ps(A, B, C, imm) \
5377  ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5378  (__v16sf)(__m512)(B), \
5379  (__v16si)(__m512i)(C), (int)(imm), \
5380  (__mmask16)-1, \
5381  _MM_FROUND_CUR_DIRECTION))
5382 
5383 #define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \
5384  ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5385  (__v16sf)(__m512)(B), \
5386  (__v16si)(__m512i)(C), (int)(imm), \
5387  (__mmask16)(U), \
5388  _MM_FROUND_CUR_DIRECTION))
5389 
5390 #define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \
5391  ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5392  (__v16sf)(__m512)(B), \
5393  (__v16si)(__m512i)(C), \
5394  (int)(imm), (__mmask16)(U), \
5395  (int)(R)))
5396 
5397 #define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \
5398  ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5399  (__v16sf)(__m512)(B), \
5400  (__v16si)(__m512i)(C), \
5401  (int)(imm), (__mmask16)(U), \
5402  _MM_FROUND_CUR_DIRECTION))
5403 
5404 #define _mm_fixupimm_round_sd(A, B, C, imm, R) \
5405  ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5406  (__v2df)(__m128d)(B), \
5407  (__v2di)(__m128i)(C), (int)(imm), \
5408  (__mmask8)-1, (int)(R)))
5409 
5410 #define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \
5411  ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5412  (__v2df)(__m128d)(B), \
5413  (__v2di)(__m128i)(C), (int)(imm), \
5414  (__mmask8)(U), (int)(R)))
5415 
5416 #define _mm_fixupimm_sd(A, B, C, imm) \
5417  ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5418  (__v2df)(__m128d)(B), \
5419  (__v2di)(__m128i)(C), (int)(imm), \
5420  (__mmask8)-1, \
5421  _MM_FROUND_CUR_DIRECTION))
5422 
5423 #define _mm_mask_fixupimm_sd(A, U, B, C, imm) \
5424  ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5425  (__v2df)(__m128d)(B), \
5426  (__v2di)(__m128i)(C), (int)(imm), \
5427  (__mmask8)(U), \
5428  _MM_FROUND_CUR_DIRECTION))
5429 
5430 #define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \
5431  ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5432  (__v2df)(__m128d)(B), \
5433  (__v2di)(__m128i)(C), (int)(imm), \
5434  (__mmask8)(U), (int)(R)))
5435 
5436 #define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \
5437  ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5438  (__v2df)(__m128d)(B), \
5439  (__v2di)(__m128i)(C), (int)(imm), \
5440  (__mmask8)(U), \
5441  _MM_FROUND_CUR_DIRECTION))
5442 
5443 #define _mm_fixupimm_round_ss(A, B, C, imm, R) \
5444  ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5445  (__v4sf)(__m128)(B), \
5446  (__v4si)(__m128i)(C), (int)(imm), \
5447  (__mmask8)-1, (int)(R)))
5448 
5449 #define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \
5450  ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5451  (__v4sf)(__m128)(B), \
5452  (__v4si)(__m128i)(C), (int)(imm), \
5453  (__mmask8)(U), (int)(R)))
5454 
5455 #define _mm_fixupimm_ss(A, B, C, imm) \
5456  ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5457  (__v4sf)(__m128)(B), \
5458  (__v4si)(__m128i)(C), (int)(imm), \
5459  (__mmask8)-1, \
5460  _MM_FROUND_CUR_DIRECTION))
5461 
5462 #define _mm_mask_fixupimm_ss(A, U, B, C, imm) \
5463  ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5464  (__v4sf)(__m128)(B), \
5465  (__v4si)(__m128i)(C), (int)(imm), \
5466  (__mmask8)(U), \
5467  _MM_FROUND_CUR_DIRECTION))
5468 
5469 #define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \
5470  ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5471  (__v4sf)(__m128)(B), \
5472  (__v4si)(__m128i)(C), (int)(imm), \
5473  (__mmask8)(U), (int)(R)))
5474 
5475 #define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \
5476  ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5477  (__v4sf)(__m128)(B), \
5478  (__v4si)(__m128i)(C), (int)(imm), \
5479  (__mmask8)(U), \
5480  _MM_FROUND_CUR_DIRECTION))
5481 
5482 #define _mm_getexp_round_sd(A, B, R) \
5483  ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5484  (__v2df)(__m128d)(B), \
5485  (__v2df)_mm_setzero_pd(), \
5486  (__mmask8)-1, (int)(R)))
5487 
5488 
5489 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5490 _mm_getexp_sd (__m128d __A, __m128d __B)
5491 {
5492  return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5493  (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5494 }
5495 
5496 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5497 _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5498 {
5499  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5500  (__v2df) __B,
5501  (__v2df) __W,
5502  (__mmask8) __U,
5504 }
5505 
5506 #define _mm_mask_getexp_round_sd(W, U, A, B, R) \
5507  ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5508  (__v2df)(__m128d)(B), \
5509  (__v2df)(__m128d)(W), \
5510  (__mmask8)(U), (int)(R)))
5511 
5512 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5513 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
5514 {
5515  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5516  (__v2df) __B,
5517  (__v2df) _mm_setzero_pd (),
5518  (__mmask8) __U,
5520 }
5521 
5522 #define _mm_maskz_getexp_round_sd(U, A, B, R) \
5523  ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5524  (__v2df)(__m128d)(B), \
5525  (__v2df)_mm_setzero_pd(), \
5526  (__mmask8)(U), (int)(R)))
5527 
5528 #define _mm_getexp_round_ss(A, B, R) \
5529  ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5530  (__v4sf)(__m128)(B), \
5531  (__v4sf)_mm_setzero_ps(), \
5532  (__mmask8)-1, (int)(R)))
5533 
5534 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5535 _mm_getexp_ss (__m128 __A, __m128 __B)
5536 {
5537  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5538  (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5539 }
5540 
5541 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5542 _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5543 {
5544  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5545  (__v4sf) __B,
5546  (__v4sf) __W,
5547  (__mmask8) __U,
5549 }
5550 
5551 #define _mm_mask_getexp_round_ss(W, U, A, B, R) \
5552  ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5553  (__v4sf)(__m128)(B), \
5554  (__v4sf)(__m128)(W), \
5555  (__mmask8)(U), (int)(R)))
5556 
5557 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5558 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
5559 {
5560  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5561  (__v4sf) __B,
5562  (__v4sf) _mm_setzero_ps (),
5563  (__mmask8) __U,
5565 }
5566 
5567 #define _mm_maskz_getexp_round_ss(U, A, B, R) \
5568  ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5569  (__v4sf)(__m128)(B), \
5570  (__v4sf)_mm_setzero_ps(), \
5571  (__mmask8)(U), (int)(R)))
5572 
5573 #define _mm_getmant_round_sd(A, B, C, D, R) \
5574  ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5575  (__v2df)(__m128d)(B), \
5576  (int)(((D)<<2) | (C)), \
5577  (__v2df)_mm_setzero_pd(), \
5578  (__mmask8)-1, (int)(R)))
5579 
5580 #define _mm_getmant_sd(A, B, C, D) \
5581  ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5582  (__v2df)(__m128d)(B), \
5583  (int)(((D)<<2) | (C)), \
5584  (__v2df)_mm_setzero_pd(), \
5585  (__mmask8)-1, \
5586  _MM_FROUND_CUR_DIRECTION))
5587 
5588 #define _mm_mask_getmant_sd(W, U, A, B, C, D) \
5589  ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5590  (__v2df)(__m128d)(B), \
5591  (int)(((D)<<2) | (C)), \
5592  (__v2df)(__m128d)(W), \
5593  (__mmask8)(U), \
5594  _MM_FROUND_CUR_DIRECTION))
5595 
5596 #define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \
5597  ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5598  (__v2df)(__m128d)(B), \
5599  (int)(((D)<<2) | (C)), \
5600  (__v2df)(__m128d)(W), \
5601  (__mmask8)(U), (int)(R)))
5602 
5603 #define _mm_maskz_getmant_sd(U, A, B, C, D) \
5604  ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5605  (__v2df)(__m128d)(B), \
5606  (int)(((D)<<2) | (C)), \
5607  (__v2df)_mm_setzero_pd(), \
5608  (__mmask8)(U), \
5609  _MM_FROUND_CUR_DIRECTION))
5610 
5611 #define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \
5612  ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5613  (__v2df)(__m128d)(B), \
5614  (int)(((D)<<2) | (C)), \
5615  (__v2df)_mm_setzero_pd(), \
5616  (__mmask8)(U), (int)(R)))
5617 
5618 #define _mm_getmant_round_ss(A, B, C, D, R) \
5619  ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5620  (__v4sf)(__m128)(B), \
5621  (int)(((D)<<2) | (C)), \
5622  (__v4sf)_mm_setzero_ps(), \
5623  (__mmask8)-1, (int)(R)))
5624 
5625 #define _mm_getmant_ss(A, B, C, D) \
5626  ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5627  (__v4sf)(__m128)(B), \
5628  (int)(((D)<<2) | (C)), \
5629  (__v4sf)_mm_setzero_ps(), \
5630  (__mmask8)-1, \
5631  _MM_FROUND_CUR_DIRECTION))
5632 
5633 #define _mm_mask_getmant_ss(W, U, A, B, C, D) \
5634  ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5635  (__v4sf)(__m128)(B), \
5636  (int)(((D)<<2) | (C)), \
5637  (__v4sf)(__m128)(W), \
5638  (__mmask8)(U), \
5639  _MM_FROUND_CUR_DIRECTION))
5640 
5641 #define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \
5642  ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5643  (__v4sf)(__m128)(B), \
5644  (int)(((D)<<2) | (C)), \
5645  (__v4sf)(__m128)(W), \
5646  (__mmask8)(U), (int)(R)))
5647 
5648 #define _mm_maskz_getmant_ss(U, A, B, C, D) \
5649  ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5650  (__v4sf)(__m128)(B), \
5651  (int)(((D)<<2) | (C)), \
5652  (__v4sf)_mm_setzero_ps(), \
5653  (__mmask8)(U), \
5654  _MM_FROUND_CUR_DIRECTION))
5655 
5656 #define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \
5657  ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5658  (__v4sf)(__m128)(B), \
5659  (int)(((D)<<2) | (C)), \
5660  (__v4sf)_mm_setzero_ps(), \
5661  (__mmask8)(U), (int)(R)))
5662 
5663 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5665 {
5666  return __A;
5667 }
5668 
5669 #define _mm_comi_round_sd(A, B, P, R) \
5670  ((int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5671  (int)(P), (int)(R)))
5672 
5673 #define _mm_comi_round_ss(A, B, P, R) \
5674  ((int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5675  (int)(P), (int)(R)))
5676 
5677 #ifdef __x86_64__
5678 #define _mm_cvt_roundsd_si64(A, R) \
5679  ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
5680 #endif
5681 
5682 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5683 _mm512_sll_epi32(__m512i __A, __m128i __B)
5684 {
5685  return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
5686 }
5687 
5688 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5689 _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5690 {
5691  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5692  (__v16si)_mm512_sll_epi32(__A, __B),
5693  (__v16si)__W);
5694 }
5695 
5696 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5697 _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5698 {
5699  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5700  (__v16si)_mm512_sll_epi32(__A, __B),
5701  (__v16si)_mm512_setzero_si512());
5702 }
5703 
5704 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5705 _mm512_sll_epi64(__m512i __A, __m128i __B)
5706 {
5707  return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
5708 }
5709 
5710 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5711 _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5712 {
5713  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5714  (__v8di)_mm512_sll_epi64(__A, __B),
5715  (__v8di)__W);
5716 }
5717 
5718 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5719 _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5720 {
5721  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5722  (__v8di)_mm512_sll_epi64(__A, __B),
5723  (__v8di)_mm512_setzero_si512());
5724 }
5725 
5726 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5727 _mm512_sllv_epi32(__m512i __X, __m512i __Y)
5728 {
5729  return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
5730 }
5731 
5732 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5733 _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
5734 {
5735  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5736  (__v16si)_mm512_sllv_epi32(__X, __Y),
5737  (__v16si)__W);
5738 }
5739 
5740 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5741 _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
5742 {
5743  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5744  (__v16si)_mm512_sllv_epi32(__X, __Y),
5745  (__v16si)_mm512_setzero_si512());
5746 }
5747 
5748 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5749 _mm512_sllv_epi64(__m512i __X, __m512i __Y)
5750 {
5751  return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
5752 }
5753 
5754 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5755 _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5756 {
5757  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5758  (__v8di)_mm512_sllv_epi64(__X, __Y),
5759  (__v8di)__W);
5760 }
5761 
5762 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5763 _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5764 {
5765  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5766  (__v8di)_mm512_sllv_epi64(__X, __Y),
5767  (__v8di)_mm512_setzero_si512());
5768 }
5769 
5770 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5771 _mm512_sra_epi32(__m512i __A, __m128i __B)
5772 {
5773  return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
5774 }
5775 
5776 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5777 _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5778 {
5779  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5780  (__v16si)_mm512_sra_epi32(__A, __B),
5781  (__v16si)__W);
5782 }
5783 
5784 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5785 _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5786 {
5787  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5788  (__v16si)_mm512_sra_epi32(__A, __B),
5789  (__v16si)_mm512_setzero_si512());
5790 }
5791 
5792 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5793 _mm512_sra_epi64(__m512i __A, __m128i __B)
5794 {
5795  return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
5796 }
5797 
5798 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5799 _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5800 {
5801  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5802  (__v8di)_mm512_sra_epi64(__A, __B),
5803  (__v8di)__W);
5804 }
5805 
5806 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5807 _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5808 {
5809  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5810  (__v8di)_mm512_sra_epi64(__A, __B),
5811  (__v8di)_mm512_setzero_si512());
5812 }
5813 
5814 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5815 _mm512_srav_epi32(__m512i __X, __m512i __Y)
5816 {
5817  return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
5818 }
5819 
5820 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5821 _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
5822 {
5823  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5824  (__v16si)_mm512_srav_epi32(__X, __Y),
5825  (__v16si)__W);
5826 }
5827 
5828 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5829 _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
5830 {
5831  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5832  (__v16si)_mm512_srav_epi32(__X, __Y),
5833  (__v16si)_mm512_setzero_si512());
5834 }
5835 
5836 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5837 _mm512_srav_epi64(__m512i __X, __m512i __Y)
5838 {
5839  return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
5840 }
5841 
5842 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5843 _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5844 {
5845  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5846  (__v8di)_mm512_srav_epi64(__X, __Y),
5847  (__v8di)__W);
5848 }
5849 
5850 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5851 _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5852 {
5853  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5854  (__v8di)_mm512_srav_epi64(__X, __Y),
5855  (__v8di)_mm512_setzero_si512());
5856 }
5857 
5858 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5859 _mm512_srl_epi32(__m512i __A, __m128i __B)
5860 {
5861  return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
5862 }
5863 
5864 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5865 _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5866 {
5867  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5868  (__v16si)_mm512_srl_epi32(__A, __B),
5869  (__v16si)__W);
5870 }
5871 
5872 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5873 _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5874 {
5875  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5876  (__v16si)_mm512_srl_epi32(__A, __B),
5877  (__v16si)_mm512_setzero_si512());
5878 }
5879 
5880 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5881 _mm512_srl_epi64(__m512i __A, __m128i __B)
5882 {
5883  return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
5884 }
5885 
5886 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5887 _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5888 {
5889  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5890  (__v8di)_mm512_srl_epi64(__A, __B),
5891  (__v8di)__W);
5892 }
5893 
5894 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5895 _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5896 {
5897  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5898  (__v8di)_mm512_srl_epi64(__A, __B),
5899  (__v8di)_mm512_setzero_si512());
5900 }
5901 
5902 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5903 _mm512_srlv_epi32(__m512i __X, __m512i __Y)
5904 {
5905  return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
5906 }
5907 
5908 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5909 _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
5910 {
5911  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5912  (__v16si)_mm512_srlv_epi32(__X, __Y),
5913  (__v16si)__W);
5914 }
5915 
5916 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5917 _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
5918 {
5919  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5920  (__v16si)_mm512_srlv_epi32(__X, __Y),
5921  (__v16si)_mm512_setzero_si512());
5922 }
5923 
5924 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5925 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
5926 {
5927  return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
5928 }
5929 
5930 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5931 _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5932 {
5933  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5934  (__v8di)_mm512_srlv_epi64(__X, __Y),
5935  (__v8di)__W);
5936 }
5937 
5938 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5939 _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5940 {
5941  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5942  (__v8di)_mm512_srlv_epi64(__X, __Y),
5943  (__v8di)_mm512_setzero_si512());
5944 }
5945 
5946 /// \enum _MM_TERNLOG_ENUM
5947 /// A helper to represent the ternary logic operations among vector \a A,
5948 /// \a B and \a C. The representation is passed to \a imm.
5949 typedef enum {
5952  _MM_TERNLOG_C = 0xAA
5954 
5955 #define _mm512_ternarylogic_epi32(A, B, C, imm) \
5956  ((__m512i)__builtin_ia32_pternlogd512_mask( \
5957  (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5958  (unsigned char)(imm), (__mmask16)-1))
5959 
5960 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \
5961  ((__m512i)__builtin_ia32_pternlogd512_mask( \
5962  (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5963  (unsigned char)(imm), (__mmask16)(U)))
5964 
5965 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \
5966  ((__m512i)__builtin_ia32_pternlogd512_maskz( \
5967  (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5968  (unsigned char)(imm), (__mmask16)(U)))
5969 
5970 #define _mm512_ternarylogic_epi64(A, B, C, imm) \
5971  ((__m512i)__builtin_ia32_pternlogq512_mask( \
5972  (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5973  (unsigned char)(imm), (__mmask8)-1))
5974 
5975 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \
5976  ((__m512i)__builtin_ia32_pternlogq512_mask( \
5977  (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5978  (unsigned char)(imm), (__mmask8)(U)))
5979 
5980 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \
5981  ((__m512i)__builtin_ia32_pternlogq512_maskz( \
5982  (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5983  (unsigned char)(imm), (__mmask8)(U)))
5984 
5985 #ifdef __x86_64__
5986 #define _mm_cvt_roundsd_i64(A, R) \
5987  ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
5988 #endif
5989 
5990 #define _mm_cvt_roundsd_si32(A, R) \
5991  ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
5992 
5993 #define _mm_cvt_roundsd_i32(A, R) \
5994  ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
5995 
5996 #define _mm_cvt_roundsd_u32(A, R) \
5997  ((unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)))
5998 
5999 static __inline__ unsigned __DEFAULT_FN_ATTRS128
6000 _mm_cvtsd_u32 (__m128d __A)
6001 {
6002  return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
6004 }
6005 
6006 #ifdef __x86_64__
6007 #define _mm_cvt_roundsd_u64(A, R) \
6008  ((unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
6009  (int)(R)))
6010 
6011 static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6012 _mm_cvtsd_u64 (__m128d __A)
6013 {
6014  return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
6015  __A,
6017 }
6018 #endif
6019 
6020 #define _mm_cvt_roundss_si32(A, R) \
6021  ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
6022 
6023 #define _mm_cvt_roundss_i32(A, R) \
6024  ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
6025 
6026 #ifdef __x86_64__
6027 #define _mm_cvt_roundss_si64(A, R) \
6028  ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
6029 
6030 #define _mm_cvt_roundss_i64(A, R) \
6031  ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
6032 #endif
6033 
6034 #define _mm_cvt_roundss_u32(A, R) \
6035  ((unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)))
6036 
6037 static __inline__ unsigned __DEFAULT_FN_ATTRS128
6038 _mm_cvtss_u32 (__m128 __A)
6039 {
6040  return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
6042 }
6043 
6044 #ifdef __x86_64__
6045 #define _mm_cvt_roundss_u64(A, R) \
6046  ((unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
6047  (int)(R)))
6048 
6049 static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6050 _mm_cvtss_u64 (__m128 __A)
6051 {
6052  return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
6053  __A,
6055 }
6056 #endif
6057 
6058 #define _mm_cvtt_roundsd_i32(A, R) \
6059  ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
6060 
6061 #define _mm_cvtt_roundsd_si32(A, R) \
6062  ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
6063 
6064 static __inline__ int __DEFAULT_FN_ATTRS128
6065 _mm_cvttsd_i32 (__m128d __A)
6066 {
6067  return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
6069 }
6070 
6071 #ifdef __x86_64__
6072 #define _mm_cvtt_roundsd_si64(A, R) \
6073  ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
6074 
6075 #define _mm_cvtt_roundsd_i64(A, R) \
6076  ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
6077 
6078 static __inline__ long long __DEFAULT_FN_ATTRS128
6079 _mm_cvttsd_i64 (__m128d __A)
6080 {
6081  return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
6083 }
6084 #endif
6085 
6086 #define _mm_cvtt_roundsd_u32(A, R) \
6087  ((unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)))
6088 
6089 static __inline__ unsigned __DEFAULT_FN_ATTRS128
6090 _mm_cvttsd_u32 (__m128d __A)
6091 {
6092  return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
6094 }
6095 
6096 #ifdef __x86_64__
6097 #define _mm_cvtt_roundsd_u64(A, R) \
6098  ((unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
6099  (int)(R)))
6100 
6101 static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6102 _mm_cvttsd_u64 (__m128d __A)
6103 {
6104  return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
6105  __A,
6107 }
6108 #endif
6109 
6110 #define _mm_cvtt_roundss_i32(A, R) \
6111  ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
6112 
6113 #define _mm_cvtt_roundss_si32(A, R) \
6114  ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
6115 
6116 static __inline__ int __DEFAULT_FN_ATTRS128
6117 _mm_cvttss_i32 (__m128 __A)
6118 {
6119  return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
6121 }
6122 
6123 #ifdef __x86_64__
6124 #define _mm_cvtt_roundss_i64(A, R) \
6125  ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
6126 
6127 #define _mm_cvtt_roundss_si64(A, R) \
6128  ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
6129 
6130 static __inline__ long long __DEFAULT_FN_ATTRS128
6131 _mm_cvttss_i64 (__m128 __A)
6132 {
6133  return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
6135 }
6136 #endif
6137 
6138 #define _mm_cvtt_roundss_u32(A, R) \
6139  ((unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)))
6140 
6141 static __inline__ unsigned __DEFAULT_FN_ATTRS128
6142 _mm_cvttss_u32 (__m128 __A)
6143 {
6144  return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
6146 }
6147 
6148 #ifdef __x86_64__
6149 #define _mm_cvtt_roundss_u64(A, R) \
6150  ((unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
6151  (int)(R)))
6152 
6153 static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6154 _mm_cvttss_u64 (__m128 __A)
6155 {
6156  return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
6157  __A,
6159 }
6160 #endif
6161 
6162 #define _mm512_permute_pd(X, C) \
6163  ((__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C)))
6164 
6165 #define _mm512_mask_permute_pd(W, U, X, C) \
6166  ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6167  (__v8df)_mm512_permute_pd((X), (C)), \
6168  (__v8df)(__m512d)(W)))
6169 
6170 #define _mm512_maskz_permute_pd(U, X, C) \
6171  ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6172  (__v8df)_mm512_permute_pd((X), (C)), \
6173  (__v8df)_mm512_setzero_pd()))
6174 
6175 #define _mm512_permute_ps(X, C) \
6176  ((__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C)))
6177 
6178 #define _mm512_mask_permute_ps(W, U, X, C) \
6179  ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6180  (__v16sf)_mm512_permute_ps((X), (C)), \
6181  (__v16sf)(__m512)(W)))
6182 
6183 #define _mm512_maskz_permute_ps(U, X, C) \
6184  ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6185  (__v16sf)_mm512_permute_ps((X), (C)), \
6186  (__v16sf)_mm512_setzero_ps()))
6187 
6188 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6189 _mm512_permutevar_pd(__m512d __A, __m512i __C)
6190 {
6191  return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
6192 }
6193 
6194 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6195 _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
6196 {
6197  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6198  (__v8df)_mm512_permutevar_pd(__A, __C),
6199  (__v8df)__W);
6200 }
6201 
6202 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6203 _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
6204 {
6205  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6206  (__v8df)_mm512_permutevar_pd(__A, __C),
6207  (__v8df)_mm512_setzero_pd());
6208 }
6209 
6210 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6211 _mm512_permutevar_ps(__m512 __A, __m512i __C)
6212 {
6213  return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
6214 }
6215 
6216 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6217 _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
6218 {
6219  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6220  (__v16sf)_mm512_permutevar_ps(__A, __C),
6221  (__v16sf)__W);
6222 }
6223 
6224 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6225 _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
6226 {
6227  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6228  (__v16sf)_mm512_permutevar_ps(__A, __C),
6229  (__v16sf)_mm512_setzero_ps());
6230 }
6231 
6232 static __inline __m512d __DEFAULT_FN_ATTRS512
6233 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
6234 {
6235  return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
6236  (__v8df)__B);
6237 }
6238 
6239 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6240 _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
6241 {
6242  return (__m512d)__builtin_ia32_selectpd_512(__U,
6243  (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6244  (__v8df)__A);
6245 }
6246 
6247 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6248 _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U,
6249  __m512d __B)
6250 {
6251  return (__m512d)__builtin_ia32_selectpd_512(__U,
6252  (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6253  (__v8df)(__m512d)__I);
6254 }
6255 
6256 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6257 _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I,
6258  __m512d __B)
6259 {
6260  return (__m512d)__builtin_ia32_selectpd_512(__U,
6261  (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6262  (__v8df)_mm512_setzero_pd());
6263 }
6264 
6265 static __inline __m512 __DEFAULT_FN_ATTRS512
6266 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
6267 {
6268  return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
6269  (__v16sf) __B);
6270 }
6271 
6272 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6273 _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6274 {
6275  return (__m512)__builtin_ia32_selectps_512(__U,
6276  (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6277  (__v16sf)__A);
6278 }
6279 
6280 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6281 _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
6282 {
6283  return (__m512)__builtin_ia32_selectps_512(__U,
6284  (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6285  (__v16sf)(__m512)__I);
6286 }
6287 
6288 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6289 _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
6290 {
6291  return (__m512)__builtin_ia32_selectps_512(__U,
6292  (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6293  (__v16sf)_mm512_setzero_ps());
6294 }
6295 
6296 
6297 #define _mm512_cvtt_roundpd_epu32(A, R) \
6298  ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6299  (__v8si)_mm256_undefined_si256(), \
6300  (__mmask8)-1, (int)(R)))
6301 
6302 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \
6303  ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6304  (__v8si)(__m256i)(W), \
6305  (__mmask8)(U), (int)(R)))
6306 
6307 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \
6308  ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6309  (__v8si)_mm256_setzero_si256(), \
6310  (__mmask8)(U), (int)(R)))
6311 
6312 static __inline__ __m256i __DEFAULT_FN_ATTRS512
6313 _mm512_cvttpd_epu32 (__m512d __A)
6314 {
6315  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6316  (__v8si)
6318  (__mmask8) -1,
6320 }
6321 
6322 static __inline__ __m256i __DEFAULT_FN_ATTRS512
6323 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
6324 {
6325  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6326  (__v8si) __W,
6327  (__mmask8) __U,
6329 }
6330 
6331 static __inline__ __m256i __DEFAULT_FN_ATTRS512
6333 {
6334  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6335  (__v8si)
6337  (__mmask8) __U,
6339 }
6340 
6341 #define _mm_roundscale_round_sd(A, B, imm, R) \
6342  ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6343  (__v2df)(__m128d)(B), \
6344  (__v2df)_mm_setzero_pd(), \
6345  (__mmask8)-1, (int)(imm), \
6346  (int)(R)))
6347 
6348 #define _mm_roundscale_sd(A, B, imm) \
6349  ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6350  (__v2df)(__m128d)(B), \
6351  (__v2df)_mm_setzero_pd(), \
6352  (__mmask8)-1, (int)(imm), \
6353  _MM_FROUND_CUR_DIRECTION))
6354 
6355 #define _mm_mask_roundscale_sd(W, U, A, B, imm) \
6356  ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6357  (__v2df)(__m128d)(B), \
6358  (__v2df)(__m128d)(W), \
6359  (__mmask8)(U), (int)(imm), \
6360  _MM_FROUND_CUR_DIRECTION))
6361 
6362 #define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \
6363  ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6364  (__v2df)(__m128d)(B), \
6365  (__v2df)(__m128d)(W), \
6366  (__mmask8)(U), (int)(I), \
6367  (int)(R)))
6368 
6369 #define _mm_maskz_roundscale_sd(U, A, B, I) \
6370  ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6371  (__v2df)(__m128d)(B), \
6372  (__v2df)_mm_setzero_pd(), \
6373  (__mmask8)(U), (int)(I), \
6374  _MM_FROUND_CUR_DIRECTION))
6375 
6376 #define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
6377  ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6378  (__v2df)(__m128d)(B), \
6379  (__v2df)_mm_setzero_pd(), \
6380  (__mmask8)(U), (int)(I), \
6381  (int)(R)))
6382 
6383 #define _mm_roundscale_round_ss(A, B, imm, R) \
6384  ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6385  (__v4sf)(__m128)(B), \
6386  (__v4sf)_mm_setzero_ps(), \
6387  (__mmask8)-1, (int)(imm), \
6388  (int)(R)))
6389 
6390 #define _mm_roundscale_ss(A, B, imm) \
6391  ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6392  (__v4sf)(__m128)(B), \
6393  (__v4sf)_mm_setzero_ps(), \
6394  (__mmask8)-1, (int)(imm), \
6395  _MM_FROUND_CUR_DIRECTION))
6396 
6397 #define _mm_mask_roundscale_ss(W, U, A, B, I) \
6398  ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6399  (__v4sf)(__m128)(B), \
6400  (__v4sf)(__m128)(W), \
6401  (__mmask8)(U), (int)(I), \
6402  _MM_FROUND_CUR_DIRECTION))
6403 
6404 #define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \
6405  ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6406  (__v4sf)(__m128)(B), \
6407  (__v4sf)(__m128)(W), \
6408  (__mmask8)(U), (int)(I), \
6409  (int)(R)))
6410 
6411 #define _mm_maskz_roundscale_ss(U, A, B, I) \
6412  ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6413  (__v4sf)(__m128)(B), \
6414  (__v4sf)_mm_setzero_ps(), \
6415  (__mmask8)(U), (int)(I), \
6416  _MM_FROUND_CUR_DIRECTION))
6417 
6418 #define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
6419  ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6420  (__v4sf)(__m128)(B), \
6421  (__v4sf)_mm_setzero_ps(), \
6422  (__mmask8)(U), (int)(I), \
6423  (int)(R)))
6424 
6425 #define _mm512_scalef_round_pd(A, B, R) \
6426  ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6427  (__v8df)(__m512d)(B), \
6428  (__v8df)_mm512_undefined_pd(), \
6429  (__mmask8)-1, (int)(R)))
6430 
6431 #define _mm512_mask_scalef_round_pd(W, U, A, B, R) \
6432  ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6433  (__v8df)(__m512d)(B), \
6434  (__v8df)(__m512d)(W), \
6435  (__mmask8)(U), (int)(R)))
6436 
6437 #define _mm512_maskz_scalef_round_pd(U, A, B, R) \
6438  ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6439  (__v8df)(__m512d)(B), \
6440  (__v8df)_mm512_setzero_pd(), \
6441  (__mmask8)(U), (int)(R)))
6442 
6443 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6444 _mm512_scalef_pd (__m512d __A, __m512d __B)
6445 {
6446  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6447  (__v8df) __B,
6448  (__v8df)
6450  (__mmask8) -1,
6452 }
6453 
6454 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6455 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
6456 {
6457  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6458  (__v8df) __B,
6459  (__v8df) __W,
6460  (__mmask8) __U,
6462 }
6463 
6464 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6465 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
6466 {
6467  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6468  (__v8df) __B,
6469  (__v8df)
6470  _mm512_setzero_pd (),
6471  (__mmask8) __U,
6473 }
6474 
6475 #define _mm512_scalef_round_ps(A, B, R) \
6476  ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6477  (__v16sf)(__m512)(B), \
6478  (__v16sf)_mm512_undefined_ps(), \
6479  (__mmask16)-1, (int)(R)))
6480 
6481 #define _mm512_mask_scalef_round_ps(W, U, A, B, R) \
6482  ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6483  (__v16sf)(__m512)(B), \
6484  (__v16sf)(__m512)(W), \
6485  (__mmask16)(U), (int)(R)))
6486 
6487 #define _mm512_maskz_scalef_round_ps(U, A, B, R) \
6488  ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6489  (__v16sf)(__m512)(B), \
6490  (__v16sf)_mm512_setzero_ps(), \
6491  (__mmask16)(U), (int)(R)))
6492 
6493 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6494 _mm512_scalef_ps (__m512 __A, __m512 __B)
6495 {
6496  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6497  (__v16sf) __B,
6498  (__v16sf)
6500  (__mmask16) -1,
6502 }
6503 
6504 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6505 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
6506 {
6507  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6508  (__v16sf) __B,
6509  (__v16sf) __W,
6510  (__mmask16) __U,
6512 }
6513 
6514 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6515 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
6516 {
6517  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6518  (__v16sf) __B,
6519  (__v16sf)
6520  _mm512_setzero_ps (),
6521  (__mmask16) __U,
6523 }
6524 
6525 #define _mm_scalef_round_sd(A, B, R) \
6526  ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6527  (__v2df)(__m128d)(B), \
6528  (__v2df)_mm_setzero_pd(), \
6529  (__mmask8)-1, (int)(R)))
6530 
6531 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6532 _mm_scalef_sd (__m128d __A, __m128d __B)
6533 {
6534  return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6535  (__v2df)( __B), (__v2df) _mm_setzero_pd(),
6536  (__mmask8) -1,
6538 }
6539 
6540 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6541 _mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6542 {
6543  return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6544  (__v2df) __B,
6545  (__v2df) __W,
6546  (__mmask8) __U,
6548 }
6549 
6550 #define _mm_mask_scalef_round_sd(W, U, A, B, R) \
6551  ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6552  (__v2df)(__m128d)(B), \
6553  (__v2df)(__m128d)(W), \
6554  (__mmask8)(U), (int)(R)))
6555 
6556 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6557 _mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
6558 {
6559  return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6560  (__v2df) __B,
6561  (__v2df) _mm_setzero_pd (),
6562  (__mmask8) __U,
6564 }
6565 
6566 #define _mm_maskz_scalef_round_sd(U, A, B, R) \
6567  ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6568  (__v2df)(__m128d)(B), \
6569  (__v2df)_mm_setzero_pd(), \
6570  (__mmask8)(U), (int)(R)))
6571 
6572 #define _mm_scalef_round_ss(A, B, R) \
6573  ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6574  (__v4sf)(__m128)(B), \
6575  (__v4sf)_mm_setzero_ps(), \
6576  (__mmask8)-1, (int)(R)))
6577 
6578 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6579 _mm_scalef_ss (__m128 __A, __m128 __B)
6580 {
6581  return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6582  (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
6583  (__mmask8) -1,
6585 }
6586 
6587 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6588 _mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6589 {
6590  return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6591  (__v4sf) __B,
6592  (__v4sf) __W,
6593  (__mmask8) __U,
6595 }
6596 
6597 #define _mm_mask_scalef_round_ss(W, U, A, B, R) \
6598  ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6599  (__v4sf)(__m128)(B), \
6600  (__v4sf)(__m128)(W), \
6601  (__mmask8)(U), (int)(R)))
6602 
6603 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6604 _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
6605 {
6606  return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6607  (__v4sf) __B,
6608  (__v4sf) _mm_setzero_ps (),
6609  (__mmask8) __U,
6611 }
6612 
6613 #define _mm_maskz_scalef_round_ss(U, A, B, R) \
6614  ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6615  (__v4sf)(__m128)(B), \
6616  (__v4sf)_mm_setzero_ps(), \
6617  (__mmask8)(U), \
6618  (int)(R)))
6619 
6620 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6621 _mm512_srai_epi32(__m512i __A, unsigned int __B)
6622 {
6623  return (__m512i)__builtin_ia32_psradi512((__v16si)__A, (int)__B);
6624 }
6625 
6626 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6627 _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A,
6628  unsigned int __B)
6629 {
6630  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6631  (__v16si)_mm512_srai_epi32(__A, __B),
6632  (__v16si)__W);
6633 }
6634 
6635 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6637  unsigned int __B) {
6638  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6639  (__v16si)_mm512_srai_epi32(__A, __B),
6640  (__v16si)_mm512_setzero_si512());
6641 }
6642 
6643 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6644 _mm512_srai_epi64(__m512i __A, unsigned int __B)
6645 {
6646  return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, (int)__B);
6647 }
6648 
6649 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6650 _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
6651 {
6652  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6653  (__v8di)_mm512_srai_epi64(__A, __B),
6654  (__v8di)__W);
6655 }
6656 
6657 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6658 _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
6659 {
6660  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6661  (__v8di)_mm512_srai_epi64(__A, __B),
6662  (__v8di)_mm512_setzero_si512());
6663 }
6664 
6665 #define _mm512_shuffle_f32x4(A, B, imm) \
6666  ((__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \
6667  (__v16sf)(__m512)(B), (int)(imm)))
6668 
6669 #define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \
6670  ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6671  (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6672  (__v16sf)(__m512)(W)))
6673 
6674 #define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \
6675  ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6676  (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6677  (__v16sf)_mm512_setzero_ps()))
6678 
6679 #define _mm512_shuffle_f64x2(A, B, imm) \
6680  ((__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \
6681  (__v8df)(__m512d)(B), (int)(imm)))
6682 
6683 #define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \
6684  ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6685  (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6686  (__v8df)(__m512d)(W)))
6687 
6688 #define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \
6689  ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6690  (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6691  (__v8df)_mm512_setzero_pd()))
6692 
6693 #define _mm512_shuffle_i32x4(A, B, imm) \
6694  ((__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \
6695  (__v16si)(__m512i)(B), (int)(imm)))
6696 
6697 #define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \
6698  ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6699  (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6700  (__v16si)(__m512i)(W)))
6701 
6702 #define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \
6703  ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6704  (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6705  (__v16si)_mm512_setzero_si512()))
6706 
6707 #define _mm512_shuffle_i64x2(A, B, imm) \
6708  ((__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \
6709  (__v8di)(__m512i)(B), (int)(imm)))
6710 
6711 #define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \
6712  ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6713  (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6714  (__v8di)(__m512i)(W)))
6715 
6716 #define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \
6717  ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6718  (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6719  (__v8di)_mm512_setzero_si512()))
6720 
6721 #define _mm512_shuffle_pd(A, B, M) \
6722  ((__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \
6723  (__v8df)(__m512d)(B), (int)(M)))
6724 
6725 #define _mm512_mask_shuffle_pd(W, U, A, B, M) \
6726  ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6727  (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6728  (__v8df)(__m512d)(W)))
6729 
6730 #define _mm512_maskz_shuffle_pd(U, A, B, M) \
6731  ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6732  (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6733  (__v8df)_mm512_setzero_pd()))
6734 
6735 #define _mm512_shuffle_ps(A, B, M) \
6736  ((__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \
6737  (__v16sf)(__m512)(B), (int)(M)))
6738 
6739 #define _mm512_mask_shuffle_ps(W, U, A, B, M) \
6740  ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6741  (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6742  (__v16sf)(__m512)(W)))
6743 
6744 #define _mm512_maskz_shuffle_ps(U, A, B, M) \
6745  ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6746  (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6747  (__v16sf)_mm512_setzero_ps()))
6748 
6749 #define _mm_sqrt_round_sd(A, B, R) \
6750  ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6751  (__v2df)(__m128d)(B), \
6752  (__v2df)_mm_setzero_pd(), \
6753  (__mmask8)-1, (int)(R)))
6754 
6755 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6756 _mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6757 {
6758  return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6759  (__v2df) __B,
6760  (__v2df) __W,
6761  (__mmask8) __U,
6763 }
6764 
6765 #define _mm_mask_sqrt_round_sd(W, U, A, B, R) \
6766  ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6767  (__v2df)(__m128d)(B), \
6768  (__v2df)(__m128d)(W), \
6769  (__mmask8)(U), (int)(R)))
6770 
6771 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6772 _mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
6773 {
6774  return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6775  (__v2df) __B,
6776  (__v2df) _mm_setzero_pd (),
6777  (__mmask8) __U,
6779 }
6780 
6781 #define _mm_maskz_sqrt_round_sd(U, A, B, R) \
6782  ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6783  (__v2df)(__m128d)(B), \
6784  (__v2df)_mm_setzero_pd(), \
6785  (__mmask8)(U), (int)(R)))
6786 
6787 #define _mm_sqrt_round_ss(A, B, R) \
6788  ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6789  (__v4sf)(__m128)(B), \
6790  (__v4sf)_mm_setzero_ps(), \
6791  (__mmask8)-1, (int)(R)))
6792 
6793 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6794 _mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6795 {
6796  return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6797  (__v4sf) __B,
6798  (__v4sf) __W,
6799  (__mmask8) __U,
6801 }
6802 
6803 #define _mm_mask_sqrt_round_ss(W, U, A, B, R) \
6804  ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6805  (__v4sf)(__m128)(B), \
6806  (__v4sf)(__m128)(W), (__mmask8)(U), \
6807  (int)(R)))
6808 
6809 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6810 _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
6811 {
6812  return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6813  (__v4sf) __B,
6814  (__v4sf) _mm_setzero_ps (),
6815  (__mmask8) __U,
6817 }
6818 
6819 #define _mm_maskz_sqrt_round_ss(U, A, B, R) \
6820  ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6821  (__v4sf)(__m128)(B), \
6822  (__v4sf)_mm_setzero_ps(), \
6823  (__mmask8)(U), (int)(R)))
6824 
6825 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6827 {
6828  return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6829  0, 1, 2, 3, 0, 1, 2, 3,
6830  0, 1, 2, 3, 0, 1, 2, 3);
6831 }
6832 
6833 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6834 _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
6835 {
6836  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6837  (__v16sf)_mm512_broadcast_f32x4(__A),
6838  (__v16sf)__O);
6839 }
6840 
6841 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6843 {
6844  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6845  (__v16sf)_mm512_broadcast_f32x4(__A),
6846  (__v16sf)_mm512_setzero_ps());
6847 }
6848 
6849 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6851 {
6852  return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
6853  0, 1, 2, 3, 0, 1, 2, 3);
6854 }
6855 
6856 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6857 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
6858 {
6859  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6860  (__v8df)_mm512_broadcast_f64x4(__A),
6861  (__v8df)__O);
6862 }
6863 
6864 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6866 {
6867  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6868  (__v8df)_mm512_broadcast_f64x4(__A),
6869  (__v8df)_mm512_setzero_pd());
6870 }
6871 
6872 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6874 {
6875  return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6876  0, 1, 2, 3, 0, 1, 2, 3,
6877  0, 1, 2, 3, 0, 1, 2, 3);
6878 }
6879 
6880 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6881 _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
6882 {
6883  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6884  (__v16si)_mm512_broadcast_i32x4(__A),
6885  (__v16si)__O);
6886 }
6887 
6888 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6890 {
6891  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6892  (__v16si)_mm512_broadcast_i32x4(__A),
6893  (__v16si)_mm512_setzero_si512());
6894 }
6895 
6896 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6898 {
6899  return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
6900  0, 1, 2, 3, 0, 1, 2, 3);
6901 }
6902 
6903 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6904 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
6905 {
6906  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6907  (__v8di)_mm512_broadcast_i64x4(__A),
6908  (__v8di)__O);
6909 }
6910 
6911 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6913 {
6914  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6915  (__v8di)_mm512_broadcast_i64x4(__A),
6916  (__v8di)_mm512_setzero_si512());
6917 }
6918 
6919 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6920 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
6921 {
6922  return (__m512d)__builtin_ia32_selectpd_512(__M,
6923  (__v8df) _mm512_broadcastsd_pd(__A),
6924  (__v8df) __O);
6925 }
6926 
6927 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6929 {
6930  return (__m512d)__builtin_ia32_selectpd_512(__M,
6931  (__v8df) _mm512_broadcastsd_pd(__A),
6932  (__v8df) _mm512_setzero_pd());
6933 }
6934 
6935 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6936 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
6937 {
6938  return (__m512)__builtin_ia32_selectps_512(__M,
6939  (__v16sf) _mm512_broadcastss_ps(__A),
6940  (__v16sf) __O);
6941 }
6942 
6943 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6945 {
6946  return (__m512)__builtin_ia32_selectps_512(__M,
6947  (__v16sf) _mm512_broadcastss_ps(__A),
6948  (__v16sf) _mm512_setzero_ps());
6949 }
6950 
6951 static __inline__ __m128i __DEFAULT_FN_ATTRS512
6953 {
6954  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6955  (__v16qi) _mm_undefined_si128 (),
6956  (__mmask16) -1);
6957 }
6958 
6959 static __inline__ __m128i __DEFAULT_FN_ATTRS512
6960 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6961 {
6962  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6963  (__v16qi) __O, __M);
6964 }
6965 
6966 static __inline__ __m128i __DEFAULT_FN_ATTRS512
6968 {
6969  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6970  (__v16qi) _mm_setzero_si128 (),
6971  __M);
6972 }
6973 
6974 static __inline__ void __DEFAULT_FN_ATTRS512
6976 {
6977  __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6978 }
6979 
6980 static __inline__ __m256i __DEFAULT_FN_ATTRS512
6982 {
6983  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6984  (__v16hi) _mm256_undefined_si256 (),
6985  (__mmask16) -1);
6986 }
6987 
6988 static __inline__ __m256i __DEFAULT_FN_ATTRS512
6989 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6990 {
6991  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6992  (__v16hi) __O, __M);
6993 }
6994 
6995 static __inline__ __m256i __DEFAULT_FN_ATTRS512
6997 {
6998  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6999  (__v16hi) _mm256_setzero_si256 (),
7000  __M);
7001 }
7002 
7003 static __inline__ void __DEFAULT_FN_ATTRS512
7005 {
7006  __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7007 }
7008 
7009 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7011 {
7012  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7013  (__v16qi) _mm_undefined_si128 (),
7014  (__mmask8) -1);
7015 }
7016 
7017 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7018 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7019 {
7020  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7021  (__v16qi) __O, __M);
7022 }
7023 
7024 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7026 {
7027  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7028  (__v16qi) _mm_setzero_si128 (),
7029  __M);
7030 }
7031 
7032 static __inline__ void __DEFAULT_FN_ATTRS512
7034 {
7035  __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7036 }
7037 
7038 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7040 {
7041  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7042  (__v8si) _mm256_undefined_si256 (),
7043  (__mmask8) -1);
7044 }
7045 
7046 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7047 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7048 {
7049  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7050  (__v8si) __O, __M);
7051 }
7052 
7053 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7055 {
7056  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7057  (__v8si) _mm256_setzero_si256 (),
7058  __M);
7059 }
7060 
7061 static __inline__ void __DEFAULT_FN_ATTRS512
7063 {
7064  __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7065 }
7066 
7067 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7069 {
7070  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7071  (__v8hi) _mm_undefined_si128 (),
7072  (__mmask8) -1);
7073 }
7074 
7075 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7076 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7077 {
7078  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7079  (__v8hi) __O, __M);
7080 }
7081 
7082 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7084 {
7085  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7086  (__v8hi) _mm_setzero_si128 (),
7087  __M);
7088 }
7089 
7090 static __inline__ void __DEFAULT_FN_ATTRS512
7092 {
7093  __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7094 }
7095 
7096 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7098 {
7099  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7100  (__v16qi) _mm_undefined_si128 (),
7101  (__mmask16) -1);
7102 }
7103 
7104 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7105 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7106 {
7107  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7108  (__v16qi) __O,
7109  __M);
7110 }
7111 
7112 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7114 {
7115  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7116  (__v16qi) _mm_setzero_si128 (),
7117  __M);
7118 }
7119 
7120 static __inline__ void __DEFAULT_FN_ATTRS512
7122 {
7123  __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7124 }
7125 
7126 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7128 {
7129  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7130  (__v16hi) _mm256_undefined_si256 (),
7131  (__mmask16) -1);
7132 }
7133 
7134 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7135 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7136 {
7137  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7138  (__v16hi) __O,
7139  __M);
7140 }
7141 
7142 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7144 {
7145  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7146  (__v16hi) _mm256_setzero_si256 (),
7147  __M);
7148 }
7149 
7150 static __inline__ void __DEFAULT_FN_ATTRS512
7152 {
7153  __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7154 }
7155 
7156 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7158 {
7159  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7160  (__v16qi) _mm_undefined_si128 (),
7161  (__mmask8) -1);
7162 }
7163 
7164 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7165 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7166 {
7167  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7168  (__v16qi) __O,
7169  __M);
7170 }
7171 
7172 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7174 {
7175  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7176  (__v16qi) _mm_setzero_si128 (),
7177  __M);
7178 }
7179 
7180 static __inline__ void __DEFAULT_FN_ATTRS512
7182 {
7183  __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7184 }
7185 
7186 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7188 {
7189  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7190  (__v8si) _mm256_undefined_si256 (),
7191  (__mmask8) -1);
7192 }
7193 
7194 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7195 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7196 {
7197  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7198  (__v8si) __O, __M);
7199 }
7200 
7201 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7203 {
7204  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7205  (__v8si) _mm256_setzero_si256 (),
7206  __M);
7207 }
7208 
7209 static __inline__ void __DEFAULT_FN_ATTRS512
7211 {
7212  __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
7213 }
7214 
7215 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7217 {
7218  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7219  (__v8hi) _mm_undefined_si128 (),
7220  (__mmask8) -1);
7221 }
7222 
7223 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7224 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7225 {
7226  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7227  (__v8hi) __O, __M);
7228 }
7229 
7230 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7232 {
7233  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7234  (__v8hi) _mm_setzero_si128 (),
7235  __M);
7236 }
7237 
7238 static __inline__ void __DEFAULT_FN_ATTRS512
7240 {
7241  __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
7242 }
7243 
7244 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7246 {
7247  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7248  (__v16qi) _mm_undefined_si128 (),
7249  (__mmask16) -1);
7250 }
7251 
7252 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7253 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7254 {
7255  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7256  (__v16qi) __O, __M);
7257 }
7258 
7259 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7261 {
7262  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7263  (__v16qi) _mm_setzero_si128 (),
7264  __M);
7265 }
7266 
7267 static __inline__ void __DEFAULT_FN_ATTRS512
7269 {
7270  __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7271 }
7272 
7273 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7275 {
7276  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7277  (__v16hi) _mm256_undefined_si256 (),
7278  (__mmask16) -1);
7279 }
7280 
7281 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7282 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7283 {
7284  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7285  (__v16hi) __O, __M);
7286 }
7287 
7288 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7290 {
7291  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7292  (__v16hi) _mm256_setzero_si256 (),
7293  __M);
7294 }
7295 
7296 static __inline__ void __DEFAULT_FN_ATTRS512
7298 {
7299  __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
7300 }
7301 
7302 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7304 {
7305  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7306  (__v16qi) _mm_undefined_si128 (),
7307  (__mmask8) -1);
7308 }
7309 
7310 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7311 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7312 {
7313  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7314  (__v16qi) __O, __M);
7315 }
7316 
7317 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7319 {
7320  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7321  (__v16qi) _mm_setzero_si128 (),
7322  __M);
7323 }
7324 
7325 static __inline__ void __DEFAULT_FN_ATTRS512
7327 {
7328  __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7329 }
7330 
7331 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7333 {
7334  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7335  (__v8si) _mm256_undefined_si256 (),
7336  (__mmask8) -1);
7337 }
7338 
7339 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7340 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7341 {
7342  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7343  (__v8si) __O, __M);
7344 }
7345 
7346 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7348 {
7349  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7350  (__v8si) _mm256_setzero_si256 (),
7351  __M);
7352 }
7353 
7354 static __inline__ void __DEFAULT_FN_ATTRS512
7356 {
7357  __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7358 }
7359 
7360 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7362 {
7363  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7364  (__v8hi) _mm_undefined_si128 (),
7365  (__mmask8) -1);
7366 }
7367 
7368 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7369 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7370 {
7371  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7372  (__v8hi) __O, __M);
7373 }
7374 
7375 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7377 {
7378  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7379  (__v8hi) _mm_setzero_si128 (),
7380  __M);
7381 }
7382 
7383 static __inline__ void __DEFAULT_FN_ATTRS512
7385 {
7386  __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7387 }
7388 
7389 #define _mm512_extracti32x4_epi32(A, imm) \
7390  ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7391  (__v4si)_mm_undefined_si128(), \
7392  (__mmask8)-1))
7393 
7394 #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
7395  ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7396  (__v4si)(__m128i)(W), \
7397  (__mmask8)(U)))
7398 
7399 #define _mm512_maskz_extracti32x4_epi32(U, A, imm) \
7400  ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7401  (__v4si)_mm_setzero_si128(), \
7402  (__mmask8)(U)))
7403 
7404 #define _mm512_extracti64x4_epi64(A, imm) \
7405  ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7406  (__v4di)_mm256_undefined_si256(), \
7407  (__mmask8)-1))
7408 
7409 #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
7410  ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7411  (__v4di)(__m256i)(W), \
7412  (__mmask8)(U)))
7413 
7414 #define _mm512_maskz_extracti64x4_epi64(U, A, imm) \
7415  ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7416  (__v4di)_mm256_setzero_si256(), \
7417  (__mmask8)(U)))
7418 
7419 #define _mm512_insertf64x4(A, B, imm) \
7420  ((__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \
7421  (__v4df)(__m256d)(B), (int)(imm)))
7422 
7423 #define _mm512_mask_insertf64x4(W, U, A, B, imm) \
7424  ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7425  (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7426  (__v8df)(__m512d)(W)))
7427 
7428 #define _mm512_maskz_insertf64x4(U, A, B, imm) \
7429  ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7430  (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7431  (__v8df)_mm512_setzero_pd()))
7432 
7433 #define _mm512_inserti64x4(A, B, imm) \
7434  ((__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
7435  (__v4di)(__m256i)(B), (int)(imm)))
7436 
7437 #define _mm512_mask_inserti64x4(W, U, A, B, imm) \
7438  ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7439  (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7440  (__v8di)(__m512i)(W)))
7441 
7442 #define _mm512_maskz_inserti64x4(U, A, B, imm) \
7443  ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7444  (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7445  (__v8di)_mm512_setzero_si512()))
7446 
7447 #define _mm512_insertf32x4(A, B, imm) \
7448  ((__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \
7449  (__v4sf)(__m128)(B), (int)(imm)))
7450 
7451 #define _mm512_mask_insertf32x4(W, U, A, B, imm) \
7452  ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7453  (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7454  (__v16sf)(__m512)(W)))
7455 
7456 #define _mm512_maskz_insertf32x4(U, A, B, imm) \
7457  ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7458  (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7459  (__v16sf)_mm512_setzero_ps()))
7460 
7461 #define _mm512_inserti32x4(A, B, imm) \
7462  ((__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \
7463  (__v4si)(__m128i)(B), (int)(imm)))
7464 
7465 #define _mm512_mask_inserti32x4(W, U, A, B, imm) \
7466  ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7467  (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7468  (__v16si)(__m512i)(W)))
7469 
7470 #define _mm512_maskz_inserti32x4(U, A, B, imm) \
7471  ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7472  (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7473  (__v16si)_mm512_setzero_si512()))
7474 
7475 #define _mm512_getmant_round_pd(A, B, C, R) \
7476  ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7477  (int)(((C)<<2) | (B)), \
7478  (__v8df)_mm512_undefined_pd(), \
7479  (__mmask8)-1, (int)(R)))
7480 
7481 #define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \
7482  ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7483  (int)(((C)<<2) | (B)), \
7484  (__v8df)(__m512d)(W), \
7485  (__mmask8)(U), (int)(R)))
7486 
7487 #define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \
7488  ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7489  (int)(((C)<<2) | (B)), \
7490  (__v8df)_mm512_setzero_pd(), \
7491  (__mmask8)(U), (int)(R)))
7492 
7493 #define _mm512_getmant_pd(A, B, C) \
7494  ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7495  (int)(((C)<<2) | (B)), \
7496  (__v8df)_mm512_setzero_pd(), \
7497  (__mmask8)-1, \
7498  _MM_FROUND_CUR_DIRECTION))
7499 
7500 #define _mm512_mask_getmant_pd(W, U, A, B, C) \
7501  ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7502  (int)(((C)<<2) | (B)), \
7503  (__v8df)(__m512d)(W), \
7504  (__mmask8)(U), \
7505  _MM_FROUND_CUR_DIRECTION))
7506 
7507 #define _mm512_maskz_getmant_pd(U, A, B, C) \
7508  ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7509  (int)(((C)<<2) | (B)), \
7510  (__v8df)_mm512_setzero_pd(), \
7511  (__mmask8)(U), \
7512  _MM_FROUND_CUR_DIRECTION))
7513 
7514 #define _mm512_getmant_round_ps(A, B, C, R) \
7515  ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7516  (int)(((C)<<2) | (B)), \
7517  (__v16sf)_mm512_undefined_ps(), \
7518  (__mmask16)-1, (int)(R)))
7519 
7520 #define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \
7521  ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7522  (int)(((C)<<2) | (B)), \
7523  (__v16sf)(__m512)(W), \
7524  (__mmask16)(U), (int)(R)))
7525 
7526 #define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \
7527  ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7528  (int)(((C)<<2) | (B)), \
7529  (__v16sf)_mm512_setzero_ps(), \
7530  (__mmask16)(U), (int)(R)))
7531 
7532 #define _mm512_getmant_ps(A, B, C) \
7533  ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7534  (int)(((C)<<2)|(B)), \
7535  (__v16sf)_mm512_undefined_ps(), \
7536  (__mmask16)-1, \
7537  _MM_FROUND_CUR_DIRECTION))
7538 
7539 #define _mm512_mask_getmant_ps(W, U, A, B, C) \
7540  ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7541  (int)(((C)<<2)|(B)), \
7542  (__v16sf)(__m512)(W), \
7543  (__mmask16)(U), \
7544  _MM_FROUND_CUR_DIRECTION))
7545 
7546 #define _mm512_maskz_getmant_ps(U, A, B, C) \
7547  ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7548  (int)(((C)<<2)|(B)), \
7549  (__v16sf)_mm512_setzero_ps(), \
7550  (__mmask16)(U), \
7551  _MM_FROUND_CUR_DIRECTION))
7552 
7553 #define _mm512_getexp_round_pd(A, R) \
7554  ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7555  (__v8df)_mm512_undefined_pd(), \
7556  (__mmask8)-1, (int)(R)))
7557 
7558 #define _mm512_mask_getexp_round_pd(W, U, A, R) \
7559  ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7560  (__v8df)(__m512d)(W), \
7561  (__mmask8)(U), (int)(R)))
7562 
7563 #define _mm512_maskz_getexp_round_pd(U, A, R) \
7564  ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7565  (__v8df)_mm512_setzero_pd(), \
7566  (__mmask8)(U), (int)(R)))
7567 
7568 static __inline__ __m512d __DEFAULT_FN_ATTRS512
7569 _mm512_getexp_pd (__m512d __A)
7570 {
7571  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7572  (__v8df) _mm512_undefined_pd (),
7573  (__mmask8) -1,
7575 }
7576 
7577 static __inline__ __m512d __DEFAULT_FN_ATTRS512
7578 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
7579 {
7580  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7581  (__v8df) __W,
7582  (__mmask8) __U,
7584 }
7585 
7586 static __inline__ __m512d __DEFAULT_FN_ATTRS512
7588 {
7589  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7590  (__v8df) _mm512_setzero_pd (),
7591  (__mmask8) __U,
7593 }
7594 
7595 #define _mm512_getexp_round_ps(A, R) \
7596  ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7597  (__v16sf)_mm512_undefined_ps(), \
7598  (__mmask16)-1, (int)(R)))
7599 
7600 #define _mm512_mask_getexp_round_ps(W, U, A, R) \
7601  ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7602  (__v16sf)(__m512)(W), \
7603  (__mmask16)(U), (int)(R)))
7604 
7605 #define _mm512_maskz_getexp_round_ps(U, A, R) \
7606  ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7607  (__v16sf)_mm512_setzero_ps(), \
7608  (__mmask16)(U), (int)(R)))
7609 
7610 static __inline__ __m512 __DEFAULT_FN_ATTRS512
7611 _mm512_getexp_ps (__m512 __A)
7612 {
7613  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7614  (__v16sf) _mm512_undefined_ps (),
7615  (__mmask16) -1,
7617 }
7618 
7619 static __inline__ __m512 __DEFAULT_FN_ATTRS512
7620 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
7621 {
7622  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7623  (__v16sf) __W,
7624  (__mmask16) __U,
7626 }
7627 
7628 static __inline__ __m512 __DEFAULT_FN_ATTRS512
7630 {
7631  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7632  (__v16sf) _mm512_setzero_ps (),
7633  (__mmask16) __U,
7635 }
7636 
7637 #define _mm512_i64gather_ps(index, addr, scale) \
7638  ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
7639  (void const *)(addr), \
7640  (__v8di)(__m512i)(index), (__mmask8)-1, \
7641  (int)(scale)))
7642 
7643 #define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \
7644  ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
7645  (void const *)(addr), \
7646  (__v8di)(__m512i)(index), \
7647  (__mmask8)(mask), (int)(scale)))
7648 
7649 #define _mm512_i64gather_epi32(index, addr, scale) \
7650  ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \
7651  (void const *)(addr), \
7652  (__v8di)(__m512i)(index), \
7653  (__mmask8)-1, (int)(scale)))
7654 
7655 #define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7656  ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
7657  (void const *)(addr), \
7658  (__v8di)(__m512i)(index), \
7659  (__mmask8)(mask), (int)(scale)))
7660 
7661 #define _mm512_i64gather_pd(index, addr, scale) \
7662  ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
7663  (void const *)(addr), \
7664  (__v8di)(__m512i)(index), (__mmask8)-1, \
7665  (int)(scale)))
7666 
7667 #define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \
7668  ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
7669  (void const *)(addr), \
7670  (__v8di)(__m512i)(index), \
7671  (__mmask8)(mask), (int)(scale)))
7672 
7673 #define _mm512_i64gather_epi64(index, addr, scale) \
7674  ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \
7675  (void const *)(addr), \
7676  (__v8di)(__m512i)(index), (__mmask8)-1, \
7677  (int)(scale)))
7678 
7679 #define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7680  ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
7681  (void const *)(addr), \
7682  (__v8di)(__m512i)(index), \
7683  (__mmask8)(mask), (int)(scale)))
7684 
7685 #define _mm512_i32gather_ps(index, addr, scale) \
7686  ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
7687  (void const *)(addr), \
7688  (__v16si)(__m512)(index), \
7689  (__mmask16)-1, (int)(scale)))
7690 
7691 #define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
7692  ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
7693  (void const *)(addr), \
7694  (__v16si)(__m512)(index), \
7695  (__mmask16)(mask), (int)(scale)))
7696 
7697 #define _mm512_i32gather_epi32(index, addr, scale) \
7698  ((__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
7699  (void const *)(addr), \
7700  (__v16si)(__m512i)(index), \
7701  (__mmask16)-1, (int)(scale)))
7702 
7703 #define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7704  ((__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
7705  (void const *)(addr), \
7706  (__v16si)(__m512i)(index), \
7707  (__mmask16)(mask), (int)(scale)))
7708 
7709 #define _mm512_i32gather_pd(index, addr, scale) \
7710  ((__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
7711  (void const *)(addr), \
7712  (__v8si)(__m256i)(index), (__mmask8)-1, \
7713  (int)(scale)))
7714 
7715 #define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \
7716  ((__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
7717  (void const *)(addr), \
7718  (__v8si)(__m256i)(index), \
7719  (__mmask8)(mask), (int)(scale)))
7720 
7721 #define _mm512_i32gather_epi64(index, addr, scale) \
7722  ((__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
7723  (void const *)(addr), \
7724  (__v8si)(__m256i)(index), (__mmask8)-1, \
7725  (int)(scale)))
7726 
7727 #define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7728  ((__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
7729  (void const *)(addr), \
7730  (__v8si)(__m256i)(index), \
7731  (__mmask8)(mask), (int)(scale)))
7732 
7733 #define _mm512_i64scatter_ps(addr, index, v1, scale) \
7734  __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \
7735  (__v8di)(__m512i)(index), \
7736  (__v8sf)(__m256)(v1), (int)(scale))
7737 
7738 #define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \
7739  __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \
7740  (__v8di)(__m512i)(index), \
7741  (__v8sf)(__m256)(v1), (int)(scale))
7742 
7743 #define _mm512_i64scatter_epi32(addr, index, v1, scale) \
7744  __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \
7745  (__v8di)(__m512i)(index), \
7746  (__v8si)(__m256i)(v1), (int)(scale))
7747 
7748 #define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
7749  __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \
7750  (__v8di)(__m512i)(index), \
7751  (__v8si)(__m256i)(v1), (int)(scale))
7752 
7753 #define _mm512_i64scatter_pd(addr, index, v1, scale) \
7754  __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \
7755  (__v8di)(__m512i)(index), \
7756  (__v8df)(__m512d)(v1), (int)(scale))
7757 
7758 #define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \
7759  __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \
7760  (__v8di)(__m512i)(index), \
7761  (__v8df)(__m512d)(v1), (int)(scale))
7762 
7763 #define _mm512_i64scatter_epi64(addr, index, v1, scale) \
7764  __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \
7765  (__v8di)(__m512i)(index), \
7766  (__v8di)(__m512i)(v1), (int)(scale))
7767 
7768 #define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
7769  __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \
7770  (__v8di)(__m512i)(index), \
7771  (__v8di)(__m512i)(v1), (int)(scale))
7772 
7773 #define _mm512_i32scatter_ps(addr, index, v1, scale) \
7774  __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \
7775  (__v16si)(__m512i)(index), \
7776  (__v16sf)(__m512)(v1), (int)(scale))
7777 
7778 #define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \
7779  __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \
7780  (__v16si)(__m512i)(index), \
7781  (__v16sf)(__m512)(v1), (int)(scale))
7782 
7783 #define _mm512_i32scatter_epi32(addr, index, v1, scale) \
7784  __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \
7785  (__v16si)(__m512i)(index), \
7786  (__v16si)(__m512i)(v1), (int)(scale))
7787 
7788 #define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
7789  __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \
7790  (__v16si)(__m512i)(index), \
7791  (__v16si)(__m512i)(v1), (int)(scale))
7792 
7793 #define _mm512_i32scatter_pd(addr, index, v1, scale) \
7794  __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \
7795  (__v8si)(__m256i)(index), \
7796  (__v8df)(__m512d)(v1), (int)(scale))
7797 
7798 #define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \
7799  __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \
7800  (__v8si)(__m256i)(index), \
7801  (__v8df)(__m512d)(v1), (int)(scale))
7802 
7803 #define _mm512_i32scatter_epi64(addr, index, v1, scale) \
7804  __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \
7805  (__v8si)(__m256i)(index), \
7806  (__v8di)(__m512i)(v1), (int)(scale))
7807 
7808 #define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
7809  __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \
7810  (__v8si)(__m256i)(index), \
7811  (__v8di)(__m512i)(v1), (int)(scale))
7812 
7813 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7814 _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7815 {
7816  return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7817  (__v4sf)__A,
7818  (__v4sf)__B,
7819  (__mmask8)__U,
7821 }
7822 
7823 #define _mm_fmadd_round_ss(A, B, C, R) \
7824  ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7825  (__v4sf)(__m128)(B), \
7826  (__v4sf)(__m128)(C), (__mmask8)-1, \
7827  (int)(R)))
7828 
7829 #define _mm_mask_fmadd_round_ss(W, U, A, B, R) \
7830  ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7831  (__v4sf)(__m128)(A), \
7832  (__v4sf)(__m128)(B), (__mmask8)(U), \
7833  (int)(R)))
7834 
7835 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7836 _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7837 {
7838  return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7839  (__v4sf)__B,
7840  (__v4sf)__C,
7841  (__mmask8)__U,
7843 }
7844 
7845 #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
7846  ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7847  (__v4sf)(__m128)(B), \
7848  (__v4sf)(__m128)(C), (__mmask8)(U), \
7849  (int)(R)))
7850 
7851 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7852 _mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7853 {
7854  return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7855  (__v4sf)__X,
7856  (__v4sf)__Y,
7857  (__mmask8)__U,
7859 }
7860 
7861 #define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \
7862  ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7863  (__v4sf)(__m128)(X), \
7864  (__v4sf)(__m128)(Y), (__mmask8)(U), \
7865  (int)(R)))
7866 
7867 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7868 _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7869 {
7870  return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7871  (__v4sf)__A,
7872  -(__v4sf)__B,
7873  (__mmask8)__U,
7875 }
7876 
7877 #define _mm_fmsub_round_ss(A, B, C, R) \
7878  ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7879  (__v4sf)(__m128)(B), \
7880  -(__v4sf)(__m128)(C), (__mmask8)-1, \
7881  (int)(R)))
7882 
7883 #define _mm_mask_fmsub_round_ss(W, U, A, B, R) \
7884  ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7885  (__v4sf)(__m128)(A), \
7886  -(__v4sf)(__m128)(B), (__mmask8)(U), \
7887  (int)(R)))
7888 
7889 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7890 _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7891 {
7892  return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7893  (__v4sf)__B,
7894  -(__v4sf)__C,
7895  (__mmask8)__U,
7897 }
7898 
7899 #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
7900  ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7901  (__v4sf)(__m128)(B), \
7902  -(__v4sf)(__m128)(C), (__mmask8)(U), \
7903  (int)(R)))
7904 
7905 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7906 _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7907 {
7908  return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7909  (__v4sf)__X,
7910  (__v4sf)__Y,
7911  (__mmask8)__U,
7913 }
7914 
7915 #define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \
7916  ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7917  (__v4sf)(__m128)(X), \
7918  (__v4sf)(__m128)(Y), (__mmask8)(U), \
7919  (int)(R)))
7920 
7921 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7922 _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7923 {
7924  return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7925  -(__v4sf)__A,
7926  (__v4sf)__B,
7927  (__mmask8)__U,
7929 }
7930 
7931 #define _mm_fnmadd_round_ss(A, B, C, R) \
7932  ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7933  -(__v4sf)(__m128)(B), \
7934  (__v4sf)(__m128)(C), (__mmask8)-1, \
7935  (int)(R)))
7936 
7937 #define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \
7938  ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7939  -(__v4sf)(__m128)(A), \
7940  (__v4sf)(__m128)(B), (__mmask8)(U), \
7941  (int)(R)))
7942 
7943 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7944 _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7945 {
7946  return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7947  -(__v4sf)__B,
7948  (__v4sf)__C,
7949  (__mmask8)__U,
7951 }
7952 
7953 #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
7954  ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7955  -(__v4sf)(__m128)(B), \
7956  (__v4sf)(__m128)(C), (__mmask8)(U), \
7957  (int)(R)))
7958 
7959 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7960 _mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7961 {
7962  return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7963  -(__v4sf)__X,
7964  (__v4sf)__Y,
7965  (__mmask8)__U,
7967 }
7968 
7969 #define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \
7970  ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7971  -(__v4sf)(__m128)(X), \
7972  (__v4sf)(__m128)(Y), (__mmask8)(U), \
7973  (int)(R)))
7974 
7975 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7976 _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7977 {
7978  return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7979  -(__v4sf)__A,
7980  -(__v4sf)__B,
7981  (__mmask8)__U,
7983 }
7984 
7985 #define _mm_fnmsub_round_ss(A, B, C, R) \
7986  ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7987  -(__v4sf)(__m128)(B), \
7988  -(__v4sf)(__m128)(C), (__mmask8)-1, \
7989  (int)(R)))
7990 
7991 #define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \
7992  ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7993  -(__v4sf)(__m128)(A), \
7994  -(__v4sf)(__m128)(B), (__mmask8)(U), \
7995  (int)(R)))
7996 
7997 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7998 _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7999 {
8000  return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
8001  -(__v4sf)__B,
8002  -(__v4sf)__C,
8003  (__mmask8)__U,
8005 }
8006 
8007 #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
8008  ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
8009  -(__v4sf)(__m128)(B), \
8010  -(__v4sf)(__m128)(C), (__mmask8)(U), \
8011  (int)(R)))
8012 
8013 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8014 _mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8015 {
8016  return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
8017  -(__v4sf)__X,
8018  (__v4sf)__Y,
8019  (__mmask8)__U,
8021 }
8022 
8023 #define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \
8024  ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
8025  -(__v4sf)(__m128)(X), \
8026  (__v4sf)(__m128)(Y), (__mmask8)(U), \
8027  (int)(R)))
8028 
8029 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8030 _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8031 {
8032  return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8033  (__v2df)__A,
8034  (__v2df)__B,
8035  (__mmask8)__U,
8037 }
8038 
8039 #define _mm_fmadd_round_sd(A, B, C, R) \
8040  ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8041  (__v2df)(__m128d)(B), \
8042  (__v2df)(__m128d)(C), (__mmask8)-1, \
8043  (int)(R)))
8044 
8045 #define _mm_mask_fmadd_round_sd(W, U, A, B, R) \
8046  ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8047  (__v2df)(__m128d)(A), \
8048  (__v2df)(__m128d)(B), (__mmask8)(U), \
8049  (int)(R)))
8050 
8051 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8052 _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8053 {
8054  return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8055  (__v2df)__B,
8056  (__v2df)__C,
8057  (__mmask8)__U,
8059 }
8060 
8061 #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
8062  ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8063  (__v2df)(__m128d)(B), \
8064  (__v2df)(__m128d)(C), (__mmask8)(U), \
8065  (int)(R)))
8066 
8067 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8068 _mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8069 {
8070  return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
8071  (__v2df)__X,
8072  (__v2df)__Y,
8073  (__mmask8)__U,
8075 }
8076 
8077 #define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \
8078  ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8079  (__v2df)(__m128d)(X), \
8080  (__v2df)(__m128d)(Y), (__mmask8)(U), \
8081  (int)(R)))
8082 
8083 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8084 _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8085 {
8086  return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8087  (__v2df)__A,
8088  -(__v2df)__B,
8089  (__mmask8)__U,
8091 }
8092 
8093 #define _mm_fmsub_round_sd(A, B, C, R) \
8094  ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8095  (__v2df)(__m128d)(B), \
8096  -(__v2df)(__m128d)(C), (__mmask8)-1, \
8097  (int)(R)))
8098 
8099 #define _mm_mask_fmsub_round_sd(W, U, A, B, R) \
8100  ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8101  (__v2df)(__m128d)(A), \
8102  -(__v2df)(__m128d)(B), (__mmask8)(U), \
8103  (int)(R)))
8104 
8105 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8106 _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8107 {
8108  return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8109  (__v2df)__B,
8110  -(__v2df)__C,
8111  (__mmask8)__U,
8113 }
8114 
8115 #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
8116  ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8117  (__v2df)(__m128d)(B), \
8118  -(__v2df)(__m128d)(C), \
8119  (__mmask8)(U), (int)(R)))
8120 
8121 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8122 _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8123 {
8124  return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
8125  (__v2df)__X,
8126  (__v2df)__Y,
8127  (__mmask8)__U,
8129 }
8130 
8131 #define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \
8132  ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8133  (__v2df)(__m128d)(X), \
8134  (__v2df)(__m128d)(Y), \
8135  (__mmask8)(U), (int)(R)))
8136 
8137 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8138 _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8139 {
8140  return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8141  -(__v2df)__A,
8142  (__v2df)__B,
8143  (__mmask8)__U,
8145 }
8146 
8147 #define _mm_fnmadd_round_sd(A, B, C, R) \
8148  ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8149  -(__v2df)(__m128d)(B), \
8150  (__v2df)(__m128d)(C), (__mmask8)-1, \
8151  (int)(R)))
8152 
8153 #define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \
8154  ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8155  -(__v2df)(__m128d)(A), \
8156  (__v2df)(__m128d)(B), (__mmask8)(U), \
8157  (int)(R)))
8158 
8159 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8160 _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8161 {
8162  return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8163  -(__v2df)__B,
8164  (__v2df)__C,
8165  (__mmask8)__U,
8167 }
8168 
8169 #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
8170  ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8171  -(__v2df)(__m128d)(B), \
8172  (__v2df)(__m128d)(C), (__mmask8)(U), \
8173  (int)(R)))
8174 
8175 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8176 _mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8177 {
8178  return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
8179  -(__v2df)__X,
8180  (__v2df)__Y,
8181  (__mmask8)__U,
8183 }
8184 
8185 #define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \
8186  ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8187  -(__v2df)(__m128d)(X), \
8188  (__v2df)(__m128d)(Y), (__mmask8)(U), \
8189  (int)(R)))
8190 
8191 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8192 _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8193 {
8194  return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8195  -(__v2df)__A,
8196  -(__v2df)__B,
8197  (__mmask8)__U,
8199 }
8200 
8201 #define _mm_fnmsub_round_sd(A, B, C, R) \
8202  ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8203  -(__v2df)(__m128d)(B), \
8204  -(__v2df)(__m128d)(C), (__mmask8)-1, \
8205  (int)(R)))
8206 
8207 #define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \
8208  ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8209  -(__v2df)(__m128d)(A), \
8210  -(__v2df)(__m128d)(B), (__mmask8)(U), \
8211  (int)(R)))
8212 
8213 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8214 _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8215 {
8216  return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8217  -(__v2df)__B,
8218  -(__v2df)__C,
8219  (__mmask8)__U,
8221 }
8222 
8223 #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
8224  ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8225  -(__v2df)(__m128d)(B), \
8226  -(__v2df)(__m128d)(C), \
8227  (__mmask8)(U), \
8228  (int)(R)))
8229 
8230 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8231 _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8232 {
8233  return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
8234  -(__v2df)__X,
8235  (__v2df)__Y,
8236  (__mmask8)__U,
8238 }
8239 
8240 #define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \
8241  ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8242  -(__v2df)(__m128d)(X), \
8243  (__v2df)(__m128d)(Y), \
8244  (__mmask8)(U), (int)(R)))
8245 
8246 #define _mm512_permutex_pd(X, C) \
8247  ((__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C)))
8248 
8249 #define _mm512_mask_permutex_pd(W, U, X, C) \
8250  ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8251  (__v8df)_mm512_permutex_pd((X), (C)), \
8252  (__v8df)(__m512d)(W)))
8253 
8254 #define _mm512_maskz_permutex_pd(U, X, C) \
8255  ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8256  (__v8df)_mm512_permutex_pd((X), (C)), \
8257  (__v8df)_mm512_setzero_pd()))
8258 
8259 #define _mm512_permutex_epi64(X, C) \
8260  ((__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C)))
8261 
8262 #define _mm512_mask_permutex_epi64(W, U, X, C) \
8263  ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8264  (__v8di)_mm512_permutex_epi64((X), (C)), \
8265  (__v8di)(__m512i)(W)))
8266 
8267 #define _mm512_maskz_permutex_epi64(U, X, C) \
8268  ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8269  (__v8di)_mm512_permutex_epi64((X), (C)), \
8270  (__v8di)_mm512_setzero_si512()))
8271 
8272 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8273 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
8274 {
8275  return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X);
8276 }
8277 
8278 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8279 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
8280 {
8281  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8282  (__v8df)_mm512_permutexvar_pd(__X, __Y),
8283  (__v8df)__W);
8284 }
8285 
8286 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8287 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
8288 {
8289  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8290  (__v8df)_mm512_permutexvar_pd(__X, __Y),
8291  (__v8df)_mm512_setzero_pd());
8292 }
8293 
8294 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8295 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
8296 {
8297  return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
8298 }
8299 
8300 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8301 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
8302 {
8303  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8304  (__v8di)_mm512_permutexvar_epi64(__X, __Y),
8305  (__v8di)_mm512_setzero_si512());
8306 }
8307 
8308 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8309 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
8310  __m512i __Y)
8311 {
8312  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8313  (__v8di)_mm512_permutexvar_epi64(__X, __Y),
8314  (__v8di)__W);
8315 }
8316 
8317 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8318 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
8319 {
8320  return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
8321 }
8322 
8323 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8324 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
8325 {
8326  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8327  (__v16sf)_mm512_permutexvar_ps(__X, __Y),
8328  (__v16sf)__W);
8329 }
8330 
8331 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8332 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
8333 {
8334  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8335  (__v16sf)_mm512_permutexvar_ps(__X, __Y),
8336  (__v16sf)_mm512_setzero_ps());
8337 }
8338 
8339 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8340 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
8341 {
8342  return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
8343 }
8344 
8345 #define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
8346 
8347 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8348 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
8349 {
8350  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8351  (__v16si)_mm512_permutexvar_epi32(__X, __Y),
8352  (__v16si)_mm512_setzero_si512());
8353 }
8354 
8355 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8356 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
8357  __m512i __Y)
8358 {
8359  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8360  (__v16si)_mm512_permutexvar_epi32(__X, __Y),
8361  (__v16si)__W);
8362 }
8363 
8364 #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
8365 
8366 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8368 {
8369  return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
8370 }
8371 
8372 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8374 {
8375  return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
8376 }
8377 
8378 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8380 {
8381  return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
8382 }
8383 
8384 static __inline__ int __DEFAULT_FN_ATTRS
8386 {
8387  return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
8388 }
8389 
8390 static __inline__ int __DEFAULT_FN_ATTRS
8392 {
8393  return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
8394 }
8395 
8396 static __inline__ unsigned char __DEFAULT_FN_ATTRS
8398 {
8399  return (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8400 }
8401 
8402 static __inline__ unsigned char __DEFAULT_FN_ATTRS
8404 {
8405  return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8406 }
8407 
8408 static __inline__ unsigned char __DEFAULT_FN_ATTRS
8409 _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
8410  *__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8411  return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8412 }
8413 
8414 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8416 {
8417  return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
8418 }
8419 
8420 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8422 {
8423  return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
8424 }
8425 
8426 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8428 {
8429  return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
8430 }
8431 
8432 #define _kand_mask16 _mm512_kand
8433 #define _kandn_mask16 _mm512_kandn
8434 #define _knot_mask16 _mm512_knot
8435 #define _kor_mask16 _mm512_kor
8436 #define _kxnor_mask16 _mm512_kxnor
8437 #define _kxor_mask16 _mm512_kxor
8438 
8439 #define _kshiftli_mask16(A, I) \
8440  ((__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I)))
8441 
8442 #define _kshiftri_mask16(A, I) \
8443  ((__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I)))
8444 
8445 static __inline__ unsigned int __DEFAULT_FN_ATTRS
8447  return (unsigned int)__builtin_ia32_kmovw((__mmask16)__A);
8448 }
8449 
8450 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8451 _cvtu32_mask16(unsigned int __A) {
8452  return (__mmask16)__builtin_ia32_kmovw((__mmask16)__A);
8453 }
8454 
8455 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8457  return (__mmask16)__builtin_ia32_kmovw(*(__mmask16 *)__A);
8458 }
8459 
8460 static __inline__ void __DEFAULT_FN_ATTRS
8462  *(__mmask16 *)__A = __builtin_ia32_kmovw((__mmask16)__B);
8463 }
8464 
8465 static __inline__ void __DEFAULT_FN_ATTRS512
8466 _mm512_stream_si512 (void * __P, __m512i __A)
8467 {
8468  typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8469  __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
8470 }
8471 
8472 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8474 {
8475  typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8476  return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
8477 }
8478 
8479 static __inline__ void __DEFAULT_FN_ATTRS512
8480 _mm512_stream_pd (void *__P, __m512d __A)
8481 {
8482  typedef __v8df __v8df_aligned __attribute__((aligned(64)));
8483  __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
8484 }
8485 
8486 static __inline__ void __DEFAULT_FN_ATTRS512
8487 _mm512_stream_ps (void *__P, __m512 __A)
8488 {
8489  typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
8490  __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
8491 }
8492 
8493 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8494 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
8495 {
8496  return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8497  (__v8df) __W,
8498  (__mmask8) __U);
8499 }
8500 
8501 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8503 {
8504  return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8505  (__v8df)
8506  _mm512_setzero_pd (),
8507  (__mmask8) __U);
8508 }
8509 
8510 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8511 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8512 {
8513  return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8514  (__v8di) __W,
8515  (__mmask8) __U);
8516 }
8517 
8518 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8520 {
8521  return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8522  (__v8di)
8524  (__mmask8) __U);
8525 }
8526 
8527 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8528 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
8529 {
8530  return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8531  (__v16sf) __W,
8532  (__mmask16) __U);
8533 }
8534 
8535 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8537 {
8538  return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8539  (__v16sf)
8540  _mm512_setzero_ps (),
8541  (__mmask16) __U);
8542 }
8543 
8544 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8545 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8546 {
8547  return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8548  (__v16si) __W,
8549  (__mmask16) __U);
8550 }
8551 
8552 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8554 {
8555  return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8556  (__v16si)
8558  (__mmask16) __U);
8559 }
8560 
8561 #define _mm_cmp_round_ss_mask(X, Y, P, R) \
8562  ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8563  (__v4sf)(__m128)(Y), (int)(P), \
8564  (__mmask8)-1, (int)(R)))
8565 
8566 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
8567  ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8568  (__v4sf)(__m128)(Y), (int)(P), \
8569  (__mmask8)(M), (int)(R)))
8570 
8571 #define _mm_cmp_ss_mask(X, Y, P) \
8572  ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8573  (__v4sf)(__m128)(Y), (int)(P), \
8574  (__mmask8)-1, \
8575  _MM_FROUND_CUR_DIRECTION))
8576 
8577 #define _mm_mask_cmp_ss_mask(M, X, Y, P) \
8578  ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8579  (__v4sf)(__m128)(Y), (int)(P), \
8580  (__mmask8)(M), \
8581  _MM_FROUND_CUR_DIRECTION))
8582 
8583 #define _mm_cmp_round_sd_mask(X, Y, P, R) \
8584  ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8585  (__v2df)(__m128d)(Y), (int)(P), \
8586  (__mmask8)-1, (int)(R)))
8587 
8588 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
8589  ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8590  (__v2df)(__m128d)(Y), (int)(P), \
8591  (__mmask8)(M), (int)(R)))
8592 
8593 #define _mm_cmp_sd_mask(X, Y, P) \
8594  ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8595  (__v2df)(__m128d)(Y), (int)(P), \
8596  (__mmask8)-1, \
8597  _MM_FROUND_CUR_DIRECTION))
8598 
8599 #define _mm_mask_cmp_sd_mask(M, X, Y, P) \
8600  ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8601  (__v2df)(__m128d)(Y), (int)(P), \
8602  (__mmask8)(M), \
8603  _MM_FROUND_CUR_DIRECTION))
8604 
8605 /* Bit Test */
8606 
8607 static __inline __mmask16 __DEFAULT_FN_ATTRS512
8608 _mm512_test_epi32_mask (__m512i __A, __m512i __B)
8609 {
8610  return _mm512_cmpneq_epi32_mask (_mm512_and_epi32(__A, __B),
8612 }
8613 
8614 static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8615 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8616 {
8617  return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8619 }
8620 
8621 static __inline __mmask8 __DEFAULT_FN_ATTRS512
8622 _mm512_test_epi64_mask (__m512i __A, __m512i __B)
8623 {
8624  return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B),
8626 }
8627 
8628 static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8629 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8630 {
8631  return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8633 }
8634 
8635 static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8636 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
8637 {
8638  return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B),
8640 }
8641 
8642 static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8643 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8644 {
8645  return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8647 }
8648 
8649 static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8650 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
8651 {
8652  return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B),
8654 }
8655 
8656 static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8657 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8658 {
8659  return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8661 }
8662 
8663 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8665 {
8666  return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8667  1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
8668 }
8669 
8670 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8671 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
8672 {
8673  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8674  (__v16sf)_mm512_movehdup_ps(__A),
8675  (__v16sf)__W);
8676 }
8677 
8678 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8680 {
8681  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8682  (__v16sf)_mm512_movehdup_ps(__A),
8683  (__v16sf)_mm512_setzero_ps());
8684 }
8685 
8686 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8688 {
8689  return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8690  0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
8691 }
8692 
8693 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8694 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
8695 {
8696  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8697  (__v16sf)_mm512_moveldup_ps(__A),
8698  (__v16sf)__W);
8699 }
8700 
8701 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8703 {
8704  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8705  (__v16sf)_mm512_moveldup_ps(__A),
8706  (__v16sf)_mm512_setzero_ps());
8707 }
8708 
8709 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8710 _mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8711 {
8712  return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W);
8713 }
8714 
8715 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8716 _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
8717 {
8718  return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B),
8719  _mm_setzero_ps());
8720 }
8721 
8722 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8723 _mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8724 {
8725  return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W);
8726 }
8727 
8728 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8729 _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
8730 {
8731  return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B),
8732  _mm_setzero_pd());
8733 }
8734 
8735 static __inline__ void __DEFAULT_FN_ATTRS128
8736 _mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
8737 {
8738  __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1);
8739 }
8740 
8741 static __inline__ void __DEFAULT_FN_ATTRS128
8742 _mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
8743 {
8744  __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1);
8745 }
8746 
8747 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8748 _mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
8749 {
8750  __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
8751  (__v4sf)_mm_setzero_ps(),
8752  0, 4, 4, 4);
8753 
8754  return (__m128) __builtin_ia32_loadss128_mask ((const __v4sf *) __A, src, __U & 1);
8755 }
8756 
8757 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8758 _mm_maskz_load_ss (__mmask8 __U, const float* __A)
8759 {
8760  return (__m128)__builtin_ia32_loadss128_mask ((const __v4sf *) __A,
8761  (__v4sf) _mm_setzero_ps(),
8762  __U & 1);
8763 }
8764 
8765 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8766 _mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
8767 {
8768  __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
8769  (__v2df)_mm_setzero_pd(),
8770  0, 2);
8771 
8772  return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A, src, __U & 1);
8773 }
8774 
8775 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8776 _mm_maskz_load_sd (__mmask8 __U, const double* __A)
8777 {
8778  return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A,
8779  (__v2df) _mm_setzero_pd(),
8780  __U & 1);
8781 }
8782 
8783 #define _mm512_shuffle_epi32(A, I) \
8784  ((__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I)))
8785 
8786 #define _mm512_mask_shuffle_epi32(W, U, A, I) \
8787  ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8788  (__v16si)_mm512_shuffle_epi32((A), (I)), \
8789  (__v16si)(__m512i)(W)))
8790 
8791 #define _mm512_maskz_shuffle_epi32(U, A, I) \
8792  ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8793  (__v16si)_mm512_shuffle_epi32((A), (I)), \
8794  (__v16si)_mm512_setzero_si512()))
8795 
8796 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8797 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
8798 {
8799  return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8800  (__v8df) __W,
8801  (__mmask8) __U);
8802 }
8803 
8804 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8806 {
8807  return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8808  (__v8df) _mm512_setzero_pd (),
8809  (__mmask8) __U);
8810 }
8811 
8812 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8813 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8814 {
8815  return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8816  (__v8di) __W,
8817  (__mmask8) __U);
8818 }
8819 
8820 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8822 {
8823  return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8824  (__v8di) _mm512_setzero_si512 (),
8825  (__mmask8) __U);
8826 }
8827 
8828 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8829 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
8830 {
8831  return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8832  (__v8df) __W,
8833  (__mmask8) __U);
8834 }
8835 
8836 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8838 {
8839  return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8840  (__v8df) _mm512_setzero_pd(),
8841  (__mmask8) __U);
8842 }
8843 
8844 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8845 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
8846 {
8847  return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8848  (__v8di) __W,
8849  (__mmask8) __U);
8850 }
8851 
8852 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8854 {
8855  return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8856  (__v8di) _mm512_setzero_si512(),
8857  (__mmask8) __U);
8858 }
8859 
8860 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8861 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
8862 {
8863  return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8864  (__v16sf) __W,
8865  (__mmask16) __U);
8866 }
8867 
8868 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8870 {
8871  return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8872  (__v16sf) _mm512_setzero_ps(),
8873  (__mmask16) __U);
8874 }
8875 
8876 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8877 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
8878 {
8879  return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8880  (__v16si) __W,
8881  (__mmask16) __U);
8882 }
8883 
8884 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8886 {
8887  return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8888  (__v16si) _mm512_setzero_si512(),
8889  (__mmask16) __U);
8890 }
8891 
8892 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8893 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
8894 {
8895  return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8896  (__v16sf) __W,
8897  (__mmask16) __U);
8898 }
8899 
8900 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8902 {
8903  return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8904  (__v16sf) _mm512_setzero_ps(),
8905  (__mmask16) __U);
8906 }
8907 
8908 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8909 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8910 {
8911  return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8912  (__v16si) __W,
8913  (__mmask16) __U);
8914 }
8915 
8916 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8918 {
8919  return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8920  (__v16si) _mm512_setzero_si512(),
8921  (__mmask16) __U);
8922 }
8923 
8924 #define _mm512_cvt_roundps_pd(A, R) \
8925  ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8926  (__v8df)_mm512_undefined_pd(), \
8927  (__mmask8)-1, (int)(R)))
8928 
8929 #define _mm512_mask_cvt_roundps_pd(W, U, A, R) \
8930  ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8931  (__v8df)(__m512d)(W), \
8932  (__mmask8)(U), (int)(R)))
8933 
8934 #define _mm512_maskz_cvt_roundps_pd(U, A, R) \
8935  ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8936  (__v8df)_mm512_setzero_pd(), \
8937  (__mmask8)(U), (int)(R)))
8938 
8939 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8940 _mm512_cvtps_pd (__m256 __A)
8941 {
8942  return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
8943 }
8944 
8945 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8946 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
8947 {
8948  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8949  (__v8df)_mm512_cvtps_pd(__A),
8950  (__v8df)__W);
8951 }
8952 
8953 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8955 {
8956  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8957  (__v8df)_mm512_cvtps_pd(__A),
8958  (__v8df)_mm512_setzero_pd());
8959 }
8960 
8961 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8962 _mm512_cvtpslo_pd (__m512 __A)
8963 {
8964  return (__m512d) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
8965 }
8966 
8967 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8968 _mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A)
8969 {
8970  return (__m512d) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
8971 }
8972 
8973 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8974 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
8975 {
8976  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
8977  (__v8df) __A,
8978  (__v8df) __W);
8979 }
8980 
8981 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8982 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
8983 {
8984  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
8985  (__v8df) __A,
8986  (__v8df) _mm512_setzero_pd ());
8987 }
8988 
8989 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8990 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
8991 {
8992  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
8993  (__v16sf) __A,
8994  (__v16sf) __W);
8995 }
8996 
8997 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8999 {
9000  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
9001  (__v16sf) __A,
9002  (__v16sf) _mm512_setzero_ps ());
9003 }
9004 
9005 static __inline__ void __DEFAULT_FN_ATTRS512
9006 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9007 {
9008  __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9009  (__mmask8) __U);
9010 }
9011 
9012 static __inline__ void __DEFAULT_FN_ATTRS512
9014 {
9015  __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9016  (__mmask8) __U);
9017 }
9018 
9019 static __inline__ void __DEFAULT_FN_ATTRS512
9021 {
9022  __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9023  (__mmask16) __U);
9024 }
9025 
9026 static __inline__ void __DEFAULT_FN_ATTRS512
9028 {
9029  __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9030  (__mmask16) __U);
9031 }
9032 
9033 #define _mm_cvt_roundsd_ss(A, B, R) \
9034  ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9035  (__v2df)(__m128d)(B), \
9036  (__v4sf)_mm_undefined_ps(), \
9037  (__mmask8)-1, (int)(R)))
9038 
9039 #define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \
9040  ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9041  (__v2df)(__m128d)(B), \
9042  (__v4sf)(__m128)(W), \
9043  (__mmask8)(U), (int)(R)))
9044 
9045 #define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \
9046  ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9047  (__v2df)(__m128d)(B), \
9048  (__v4sf)_mm_setzero_ps(), \
9049  (__mmask8)(U), (int)(R)))
9050 
9051 static __inline__ __m128 __DEFAULT_FN_ATTRS128
9052 _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
9053 {
9054  return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
9055  (__v2df)__B,
9056  (__v4sf)__W,
9058 }
9059 
9060 static __inline__ __m128 __DEFAULT_FN_ATTRS128
9061 _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
9062 {
9063  return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
9064  (__v2df)__B,
9065  (__v4sf)_mm_setzero_ps(),
9067 }
9068 
9069 #define _mm_cvtss_i32 _mm_cvtss_si32
9070 #define _mm_cvtsd_i32 _mm_cvtsd_si32
9071 #define _mm_cvti32_sd _mm_cvtsi32_sd
9072 #define _mm_cvti32_ss _mm_cvtsi32_ss
9073 #ifdef __x86_64__
9074 #define _mm_cvtss_i64 _mm_cvtss_si64
9075 #define _mm_cvtsd_i64 _mm_cvtsd_si64
9076 #define _mm_cvti64_sd _mm_cvtsi64_sd
9077 #define _mm_cvti64_ss _mm_cvtsi64_ss
9078 #endif
9079 
9080 #ifdef __x86_64__
9081 #define _mm_cvt_roundi64_sd(A, B, R) \
9082  ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9083  (int)(R)))
9084 
9085 #define _mm_cvt_roundsi64_sd(A, B, R) \
9086  ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9087  (int)(R)))
9088 #endif
9089 
9090 #define _mm_cvt_roundsi32_ss(A, B, R) \
9091  ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
9092 
9093 #define _mm_cvt_roundi32_ss(A, B, R) \
9094  ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
9095 
9096 #ifdef __x86_64__
9097 #define _mm_cvt_roundsi64_ss(A, B, R) \
9098  ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9099  (int)(R)))
9100 
9101 #define _mm_cvt_roundi64_ss(A, B, R) \
9102  ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9103  (int)(R)))
9104 #endif
9105 
9106 #define _mm_cvt_roundss_sd(A, B, R) \
9107  ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9108  (__v4sf)(__m128)(B), \
9109  (__v2df)_mm_undefined_pd(), \
9110  (__mmask8)-1, (int)(R)))
9111 
9112 #define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \
9113  ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9114  (__v4sf)(__m128)(B), \
9115  (__v2df)(__m128d)(W), \
9116  (__mmask8)(U), (int)(R)))
9117 
9118 #define _mm_maskz_cvt_roundss_sd(U, A, B, R) \
9119  ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9120  (__v4sf)(__m128)(B), \
9121  (__v2df)_mm_setzero_pd(), \
9122  (__mmask8)(U), (int)(R)))
9123 
9124 static __inline__ __m128d __DEFAULT_FN_ATTRS128
9125 _mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
9126 {
9127  return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9128  (__v4sf)__B,
9129  (__v2df)__W,
9131 }
9132 
9133 static __inline__ __m128d __DEFAULT_FN_ATTRS128
9134 _mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
9135 {
9136  return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9137  (__v4sf)__B,
9138  (__v2df)_mm_setzero_pd(),
9140 }
9141 
9142 static __inline__ __m128d __DEFAULT_FN_ATTRS128
9143 _mm_cvtu32_sd (__m128d __A, unsigned __B)
9144 {
9145  __A[0] = __B;
9146  return __A;
9147 }
9148 
9149 #ifdef __x86_64__
9150 #define _mm_cvt_roundu64_sd(A, B, R) \
9151  ((__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
9152  (unsigned long long)(B), (int)(R)))
9153 
9154 static __inline__ __m128d __DEFAULT_FN_ATTRS128
9155 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
9156 {
9157  __A[0] = __B;
9158  return __A;
9159 }
9160 #endif
9161 
9162 #define _mm_cvt_roundu32_ss(A, B, R) \
9163  ((__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
9164  (int)(R)))
9165 
9166 static __inline__ __m128 __DEFAULT_FN_ATTRS128
9167 _mm_cvtu32_ss (__m128 __A, unsigned __B)
9168 {
9169  __A[0] = __B;
9170  return __A;
9171 }
9172 
9173 #ifdef __x86_64__
9174 #define _mm_cvt_roundu64_ss(A, B, R) \
9175  ((__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
9176  (unsigned long long)(B), (int)(R)))
9177 
9178 static __inline__ __m128 __DEFAULT_FN_ATTRS128
9179 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
9180 {
9181  __A[0] = __B;
9182  return __A;
9183 }
9184 #endif
9185 
9186 static __inline__ __m512i __DEFAULT_FN_ATTRS512
9187 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
9188 {
9189  return (__m512i) __builtin_ia32_selectd_512(__M,
9190  (__v16si) _mm512_set1_epi32(__A),
9191  (__v16si) __O);
9192 }
9193 
9194 static __inline__ __m512i __DEFAULT_FN_ATTRS512
9195 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
9196 {
9197  return (__m512i) __builtin_ia32_selectq_512(__M,
9198  (__v8di) _mm512_set1_epi64(__A),
9199  (__v8di) __O);
9200 }
9201 
9202 static __inline __m512i __DEFAULT_FN_ATTRS512
9203 _mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59,
9204  char __e58, char __e57, char __e56, char __e55, char __e54, char __e53,
9205  char __e52, char __e51, char __e50, char __e49, char __e48, char __e47,
9206  char __e46, char __e45, char __e44, char __e43, char __e42, char __e41,
9207  char __e40, char __e39, char __e38, char __e37, char __e36, char __e35,
9208  char __e34, char __e33, char __e32, char __e31, char __e30, char __e29,
9209  char __e28, char __e27, char __e26, char __e25, char __e24, char __e23,
9210  char __e22, char __e21, char __e20, char __e19, char __e18, char __e17,
9211  char __e16, char __e15, char __e14, char __e13, char __e12, char __e11,
9212  char __e10, char __e9, char __e8, char __e7, char __e6, char __e5,
9213  char __e4, char __e3, char __e2, char __e1, char __e0) {
9214 
9215  return __extension__ (__m512i)(__v64qi)
9216  {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9217  __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9218  __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9219  __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
9220  __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
9221  __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
9222  __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
9223  __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
9224 }
9225 
9226 static __inline __m512i __DEFAULT_FN_ATTRS512
9227 _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28,
9228  short __e27, short __e26, short __e25, short __e24, short __e23,
9229  short __e22, short __e21, short __e20, short __e19, short __e18,
9230  short __e17, short __e16, short __e15, short __e14, short __e13,
9231  short __e12, short __e11, short __e10, short __e9, short __e8,
9232  short __e7, short __e6, short __e5, short __e4, short __e3,
9233  short __e2, short __e1, short __e0) {
9234  return __extension__ (__m512i)(__v32hi)
9235  {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9236  __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9237  __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9238  __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
9239 }
9240 
9241 static __inline __m512i __DEFAULT_FN_ATTRS512
9242 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
9243  int __E, int __F, int __G, int __H,
9244  int __I, int __J, int __K, int __L,
9245  int __M, int __N, int __O, int __P)
9246 {
9247  return __extension__ (__m512i)(__v16si)
9248  { __P, __O, __N, __M, __L, __K, __J, __I,
9249  __H, __G, __F, __E, __D, __C, __B, __A };
9250 }
9251 
9252 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
9253  e8,e9,e10,e11,e12,e13,e14,e15) \
9254  _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
9255  (e5),(e4),(e3),(e2),(e1),(e0))
9256 
9257 static __inline__ __m512i __DEFAULT_FN_ATTRS512
9258 _mm512_set_epi64 (long long __A, long long __B, long long __C,
9259  long long __D, long long __E, long long __F,
9260  long long __G, long long __H)
9261 {
9262  return __extension__ (__m512i) (__v8di)
9263  { __H, __G, __F, __E, __D, __C, __B, __A };
9264 }
9265 
9266 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
9267  _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9268 
9269 static __inline__ __m512d __DEFAULT_FN_ATTRS512
9270 _mm512_set_pd (double __A, double __B, double __C, double __D,
9271  double __E, double __F, double __G, double __H)
9272 {
9273  return __extension__ (__m512d)
9274  { __H, __G, __F, __E, __D, __C, __B, __A };
9275 }
9276 
9277 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
9278  _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9279 
9280 static __inline__ __m512 __DEFAULT_FN_ATTRS512
9281 _mm512_set_ps (float __A, float __B, float __C, float __D,
9282  float __E, float __F, float __G, float __H,
9283  float __I, float __J, float __K, float __L,
9284  float __M, float __N, float __O, float __P)
9285 {
9286  return __extension__ (__m512)
9287  { __P, __O, __N, __M, __L, __K, __J, __I,
9288  __H, __G, __F, __E, __D, __C, __B, __A };
9289 }
9290 
9291 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
9292  _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
9293  (e4),(e3),(e2),(e1),(e0))
9294 
9295 static __inline__ __m512 __DEFAULT_FN_ATTRS512
9296 _mm512_abs_ps(__m512 __A)
9297 {
9298  return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9299 }
9300 
9301 static __inline__ __m512 __DEFAULT_FN_ATTRS512
9302 _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
9303 {
9304  return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9305 }
9306 
9307 static __inline__ __m512d __DEFAULT_FN_ATTRS512
9308 _mm512_abs_pd(__m512d __A)
9309 {
9310  return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
9311 }
9312 
9313 static __inline__ __m512d __DEFAULT_FN_ATTRS512
9314 _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
9315 {
9316  return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
9317 }
9318 
9319 /* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
9320  * outputs. This class of vector operation forms the basis of many scientific
9321  * computations. In vector-reduction arithmetic, the evaluation order is
9322  * independent of the order of the input elements of V.
9323 
9324  * For floating-point intrinsics:
9325  * 1. When using fadd/fmul intrinsics, the order of operations within the
9326  * vector is unspecified (associative math).
9327  * 2. When using fmin/fmax intrinsics, NaN or -0.0 elements within the vector
9328  * produce unspecified results.
9329 
9330  * Used bisection method. At each step, we partition the vector with previous
9331  * step in half, and the operation is performed on its two halves.
9332  * This takes log2(n) steps where n is the number of elements in the vector.
9333  */
9334 
9335 static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) {
9336  return __builtin_reduce_add((__v8di)__W);
9337 }
9338 
9339 static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W) {
9340  return __builtin_reduce_mul((__v8di)__W);
9341 }
9342 
9343 static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W) {
9344  return __builtin_reduce_and((__v8di)__W);
9345 }
9346 
9347 static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W) {
9348  return __builtin_reduce_or((__v8di)__W);
9349 }
9350 
9351 static __inline__ long long __DEFAULT_FN_ATTRS512
9353  __W = _mm512_maskz_mov_epi64(__M, __W);
9354  return __builtin_reduce_add((__v8di)__W);
9355 }
9356 
9357 static __inline__ long long __DEFAULT_FN_ATTRS512
9359  __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(1), __M, __W);
9360  return __builtin_reduce_mul((__v8di)__W);
9361 }
9362 
9363 static __inline__ long long __DEFAULT_FN_ATTRS512
9365  __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __W);
9366  return __builtin_reduce_and((__v8di)__W);
9367 }
9368 
9369 static __inline__ long long __DEFAULT_FN_ATTRS512
9371  __W = _mm512_maskz_mov_epi64(__M, __W);
9372  return __builtin_reduce_or((__v8di)__W);
9373 }
9374 
9375 // -0.0 is used to ignore the start value since it is the neutral value of
9376 // floating point addition. For more information, please refer to
9377 // https://llvm.org/docs/LangRef.html#llvm-vector-reduce-fadd-intrinsic
9378 static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) {
9379  return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
9380 }
9381 
9382 static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W) {
9383  return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
9384 }
9385 
9386 static __inline__ double __DEFAULT_FN_ATTRS512
9388  __W = _mm512_maskz_mov_pd(__M, __W);
9389  return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
9390 }
9391 
9392 static __inline__ double __DEFAULT_FN_ATTRS512
9394  __W = _mm512_mask_mov_pd(_mm512_set1_pd(1.0), __M, __W);
9395  return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
9396 }
9397 
9398 static __inline__ int __DEFAULT_FN_ATTRS512
9400  return __builtin_reduce_add((__v16si)__W);
9401 }
9402 
9403 static __inline__ int __DEFAULT_FN_ATTRS512
9405  return __builtin_reduce_mul((__v16si)__W);
9406 }
9407 
9408 static __inline__ int __DEFAULT_FN_ATTRS512
9410  return __builtin_reduce_and((__v16si)__W);
9411 }
9412 
9413 static __inline__ int __DEFAULT_FN_ATTRS512
9415  return __builtin_reduce_or((__v16si)__W);
9416 }
9417 
9418 static __inline__ int __DEFAULT_FN_ATTRS512
9420  __W = _mm512_maskz_mov_epi32(__M, __W);
9421  return __builtin_reduce_add((__v16si)__W);
9422 }
9423 
9424 static __inline__ int __DEFAULT_FN_ATTRS512
9426  __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(1), __M, __W);
9427  return __builtin_reduce_mul((__v16si)__W);
9428 }
9429 
9430 static __inline__ int __DEFAULT_FN_ATTRS512
9432  __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), __M, __W);
9433  return __builtin_reduce_and((__v16si)__W);
9434 }
9435 
9436 static __inline__ int __DEFAULT_FN_ATTRS512
9438  __W = _mm512_maskz_mov_epi32(__M, __W);
9439  return __builtin_reduce_or((__v16si)__W);
9440 }
9441 
9442 static __inline__ float __DEFAULT_FN_ATTRS512
9444  return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
9445 }
9446 
9447 static __inline__ float __DEFAULT_FN_ATTRS512
9449  return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
9450 }
9451 
9452 static __inline__ float __DEFAULT_FN_ATTRS512
9454  __W = _mm512_maskz_mov_ps(__M, __W);
9455  return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
9456 }
9457 
9458 static __inline__ float __DEFAULT_FN_ATTRS512
9460  __W = _mm512_mask_mov_ps(_mm512_set1_ps(1.0f), __M, __W);
9461  return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
9462 }
9463 
9464 static __inline__ long long __DEFAULT_FN_ATTRS512
9466  return __builtin_reduce_max((__v8di)__V);
9467 }
9468 
9469 static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9471  return __builtin_reduce_max((__v8du)__V);
9472 }
9473 
9474 static __inline__ long long __DEFAULT_FN_ATTRS512
9476  return __builtin_reduce_min((__v8di)__V);
9477 }
9478 
9479 static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9481  return __builtin_reduce_min((__v8du)__V);
9482 }
9483 
9484 static __inline__ long long __DEFAULT_FN_ATTRS512
9486  __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-__LONG_LONG_MAX__ - 1LL), __M, __V);
9487  return __builtin_reduce_max((__v8di)__V);
9488 }
9489 
9490 static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9492  __V = _mm512_maskz_mov_epi64(__M, __V);
9493  return __builtin_reduce_max((__v8du)__V);
9494 }
9495 
9496 static __inline__ long long __DEFAULT_FN_ATTRS512
9498  __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(__LONG_LONG_MAX__), __M, __V);
9499  return __builtin_reduce_min((__v8di)__V);
9500 }
9501 
9502 static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9504  __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __V);
9505  return __builtin_reduce_min((__v8du)__V);
9506 }
9507 static __inline__ int __DEFAULT_FN_ATTRS512
9509  return __builtin_reduce_max((__v16si)__V);
9510 }
9511 
9512 static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9514  return __builtin_reduce_max((__v16su)__V);
9515 }
9516 
9517 static __inline__ int __DEFAULT_FN_ATTRS512
9519  return __builtin_reduce_min((__v16si)__V);
9520 }
9521 
9522 static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9524  return __builtin_reduce_min((__v16su)__V);
9525 }
9526 
9527 static __inline__ int __DEFAULT_FN_ATTRS512
9529  __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-__INT_MAX__ - 1), __M, __V);
9530  return __builtin_reduce_max((__v16si)__V);
9531 }
9532 
9533 static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9535  __V = _mm512_maskz_mov_epi32(__M, __V);
9536  return __builtin_reduce_max((__v16su)__V);
9537 }
9538 
9539 static __inline__ int __DEFAULT_FN_ATTRS512
9541  __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(__INT_MAX__), __M, __V);
9542  return __builtin_reduce_min((__v16si)__V);
9543 }
9544 
9545 static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9547  __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), __M, __V);
9548  return __builtin_reduce_min((__v16su)__V);
9549 }
9550 
9551 static __inline__ double __DEFAULT_FN_ATTRS512
9552 _mm512_reduce_max_pd(__m512d __V) {
9553  return __builtin_ia32_reduce_fmax_pd512(__V);
9554 }
9555 
9556 static __inline__ double __DEFAULT_FN_ATTRS512
9557 _mm512_reduce_min_pd(__m512d __V) {
9558  return __builtin_ia32_reduce_fmin_pd512(__V);
9559 }
9560 
9561 static __inline__ double __DEFAULT_FN_ATTRS512
9563  __V = _mm512_mask_mov_pd(_mm512_set1_pd(-__builtin_inf()), __M, __V);
9564  return __builtin_ia32_reduce_fmax_pd512(__V);
9565 }
9566 
9567 static __inline__ double __DEFAULT_FN_ATTRS512
9569  __V = _mm512_mask_mov_pd(_mm512_set1_pd(__builtin_inf()), __M, __V);
9570  return __builtin_ia32_reduce_fmin_pd512(__V);
9571 }
9572 
9573 static __inline__ float __DEFAULT_FN_ATTRS512
9575  return __builtin_ia32_reduce_fmax_ps512(__V);
9576 }
9577 
9578 static __inline__ float __DEFAULT_FN_ATTRS512
9580  return __builtin_ia32_reduce_fmin_ps512(__V);
9581 }
9582 
9583 static __inline__ float __DEFAULT_FN_ATTRS512
9585  __V = _mm512_mask_mov_ps(_mm512_set1_ps(-__builtin_inff()), __M, __V);
9586  return __builtin_ia32_reduce_fmax_ps512(__V);
9587 }
9588 
9589 static __inline__ float __DEFAULT_FN_ATTRS512
9591  __V = _mm512_mask_mov_ps(_mm512_set1_ps(__builtin_inff()), __M, __V);
9592  return __builtin_ia32_reduce_fmin_ps512(__V);
9593 }
9594 
9595 /// Moves the least significant 32 bits of a vector of [16 x i32] to a
9596 /// 32-bit signed integer value.
9597 ///
9598 /// \headerfile <x86intrin.h>
9599 ///
9600 /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
9601 ///
9602 /// \param __A
9603 /// A vector of [16 x i32]. The least significant 32 bits are moved to the
9604 /// destination.
9605 /// \returns A 32-bit signed integer containing the moved value.
9606 static __inline__ int __DEFAULT_FN_ATTRS512
9607 _mm512_cvtsi512_si32(__m512i __A) {
9608  __v16si __b = (__v16si)__A;
9609  return __b[0];
9610 }
9611 
9612 /// Loads 8 double-precision (64-bit) floating-point elements stored at memory
9613 /// locations starting at location \a base_addr at packed 32-bit integer indices
9614 /// stored in the lower half of \a vindex scaled by \a scale them in dst.
9615 ///
9616 /// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions.
9617 ///
9618 /// \code{.operation}
9619 /// FOR j := 0 to 7
9620 /// i := j*64
9621 /// m := j*32
9622 /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9623 /// dst[i+63:i] := MEM[addr+63:addr]
9624 /// ENDFOR
9625 /// dst[MAX:512] := 0
9626 /// \endcode
9627 #define _mm512_i32logather_pd(vindex, base_addr, scale) \
9628  _mm512_i32gather_pd(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9629 
9630 /// Loads 8 double-precision (64-bit) floating-point elements from memory
9631 /// starting at location \a base_addr at packed 32-bit integer indices stored in
9632 /// the lower half of \a vindex scaled by \a scale into dst using writemask
9633 /// \a mask (elements are copied from \a src when the corresponding mask bit is
9634 /// not set).
9635 ///
9636 /// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions.
9637 ///
9638 /// \code{.operation}
9639 /// FOR j := 0 to 7
9640 /// i := j*64
9641 /// m := j*32
9642 /// IF mask[j]
9643 /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9644 /// dst[i+63:i] := MEM[addr+63:addr]
9645 /// ELSE
9646 /// dst[i+63:i] := src[i+63:i]
9647 /// FI
9648 /// ENDFOR
9649 /// dst[MAX:512] := 0
9650 /// \endcode
9651 #define _mm512_mask_i32logather_pd(src, mask, vindex, base_addr, scale) \
9652  _mm512_mask_i32gather_pd((src), (mask), _mm512_castsi512_si256(vindex), \
9653  (base_addr), (scale))
9654 
9655 /// Loads 8 64-bit integer elements from memory starting at location \a base_addr
9656 /// at packed 32-bit integer indices stored in the lower half of \a vindex
9657 /// scaled by \a scale and stores them in dst.
9658 ///
9659 /// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions.
9660 ///
9661 /// \code{.operation}
9662 /// FOR j := 0 to 7
9663 /// i := j*64
9664 /// m := j*32
9665 /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9666 /// dst[i+63:i] := MEM[addr+63:addr]
9667 /// ENDFOR
9668 /// dst[MAX:512] := 0
9669 /// \endcode
9670 #define _mm512_i32logather_epi64(vindex, base_addr, scale) \
9671  _mm512_i32gather_epi64(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9672 
9673 /// Loads 8 64-bit integer elements from memory starting at location \a base_addr
9674 /// at packed 32-bit integer indices stored in the lower half of \a vindex
9675 /// scaled by \a scale and stores them in dst using writemask \a mask (elements
9676 /// are copied from \a src when the corresponding mask bit is not set).
9677 ///
9678 /// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions.
9679 ///
9680 /// \code{.operation}
9681 /// FOR j := 0 to 7
9682 /// i := j*64
9683 /// m := j*32
9684 /// IF mask[j]
9685 /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9686 /// dst[i+63:i] := MEM[addr+63:addr]
9687 /// ELSE
9688 /// dst[i+63:i] := src[i+63:i]
9689 /// FI
9690 /// ENDFOR
9691 /// dst[MAX:512] := 0
9692 /// \endcode
9693 #define _mm512_mask_i32logather_epi64(src, mask, vindex, base_addr, scale) \
9694  _mm512_mask_i32gather_epi64((src), (mask), _mm512_castsi512_si256(vindex), \
9695  (base_addr), (scale))
9696 
9697 /// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1
9698 /// and to memory locations starting at location \a base_addr at packed 32-bit
9699 /// integer indices stored in \a vindex scaled by \a scale.
9700 ///
9701 /// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions.
9702 ///
9703 /// \code{.operation}
9704 /// FOR j := 0 to 7
9705 /// i := j*64
9706 /// m := j*32
9707 /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9708 /// MEM[addr+63:addr] := v1[i+63:i]
9709 /// ENDFOR
9710 /// \endcode
9711 #define _mm512_i32loscatter_pd(base_addr, vindex, v1, scale) \
9712  _mm512_i32scatter_pd((base_addr), _mm512_castsi512_si256(vindex), (v1), (scale))
9713 
9714 /// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1
9715 /// to memory locations starting at location \a base_addr at packed 32-bit
9716 /// integer indices stored in \a vindex scaled by \a scale. Only those elements
9717 /// whose corresponding mask bit is set in writemask \a mask are written to
9718 /// memory.
9719 ///
9720 /// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions.
9721 ///
9722 /// \code{.operation}
9723 /// FOR j := 0 to 7
9724 /// i := j*64
9725 /// m := j*32
9726 /// IF mask[j]
9727 /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9728 /// MEM[addr+63:addr] := a[i+63:i]
9729 /// FI
9730 /// ENDFOR
9731 /// \endcode
9732 #define _mm512_mask_i32loscatter_pd(base_addr, mask, vindex, v1, scale) \
9733  _mm512_mask_i32scatter_pd((base_addr), (mask), \
9734  _mm512_castsi512_si256(vindex), (v1), (scale))
9735 
9736 /// Stores 8 packed 64-bit integer elements located in \a v1 and stores them in
9737 /// memory locations starting at location \a base_addr at packed 32-bit integer
9738 /// indices stored in \a vindex scaled by \a scale.
9739 ///
9740 /// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions.
9741 ///
9742 /// \code{.operation}
9743 /// FOR j := 0 to 7
9744 /// i := j*64
9745 /// m := j*32
9746 /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9747 /// MEM[addr+63:addr] := a[i+63:i]
9748 /// ENDFOR
9749 /// \endcode
9750 #define _mm512_i32loscatter_epi64(base_addr, vindex, v1, scale) \
9751  _mm512_i32scatter_epi64((base_addr), \
9752  _mm512_castsi512_si256(vindex), (v1), (scale))
9753 
9754 /// Stores 8 packed 64-bit integer elements located in a and stores them in
9755 /// memory locations starting at location \a base_addr at packed 32-bit integer
9756 /// indices stored in \a vindex scaled by scale using writemask \a mask (elements
9757 /// whose corresponding mask bit is not set are not written to memory).
9758 ///
9759 /// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions.
9760 ///
9761 /// \code{.operation}
9762 /// FOR j := 0 to 7
9763 /// i := j*64
9764 /// m := j*32
9765 /// IF mask[j]
9766 /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9767 /// MEM[addr+63:addr] := a[i+63:i]
9768 /// FI
9769 /// ENDFOR
9770 /// \endcode
9771 #define _mm512_mask_i32loscatter_epi64(base_addr, mask, vindex, v1, scale) \
9772  _mm512_mask_i32scatter_epi64((base_addr), (mask), \
9773  _mm512_castsi512_si256(vindex), (v1), (scale))
9774 
9775 #undef __DEFAULT_FN_ATTRS512
9776 #undef __DEFAULT_FN_ATTRS128
9777 #undef __DEFAULT_FN_ATTRS
9778 
9779 #endif /* __AVX512FINTRIN_H */
static __inline__ vector float vector float __b
Definition: altivec.h:578
static __inline__ uint32_t volatile uint32_t * __p
Definition: arm_acle.h:80
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_si512(__m512i __a, __m512i __b)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttsd_u32(__m128d __A)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtsd_u32(__m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_min_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_setzero_pd(void)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi32(__mmask16 __U, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi16(__m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_load_ps(void const *__p)
#define _mm512_mask_cmpeq_epi64_mask(k, A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ss(__m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu64(__m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_ps(__m512 __W)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castpd_ps(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi32(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttss_i32(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_si512(void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi32(int __s)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_si512(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rcp14_pd(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epi32(__m512 __a)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epu64(__m512i __V)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castps_si512(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_epi32(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcastd_epi32(__m128i __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epu32(__m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi8(__m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi16(short __w)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcast_i64x4(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epu32(__m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_movedup_pd(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_min_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_pd(__m512d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_scalef_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_setzero_ps(void)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpacklo_epi64(__m512i __A, __m512i __B)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttss_u32(__m128 __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kmov(__mmask16 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_cvtsi512_si32(__m512i __A)
Moves the least significant 32 bits of a vector of [16 x i32] to a 32-bit signed integer value.
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_movedup_pd(__mmask8 __U, __m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epu32(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
#define _mm512_cmpeq_epi32_mask(A, B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_max_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi32(__m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __mmask8 __DEFAULT_FN_ATTRS512 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_epi64(__m256i __X)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_cvtsd_f64(__m512d __a)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rolv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kor(__mmask16 __A, __mmask16 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_floor_ps(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castps128_ps512(__m128 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_getexp_ps(__m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ss(__m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epi32(__mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mul_epu32(__m512i __X, __m512i __Y)
static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epi32(__m512d __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_load_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rolv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srli_epi32(__m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_ps(__m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_div_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi128_si512(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS _store_mask16(__mmask16 *__A, __mmask16 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_abs_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kand(__mmask16 __A, __mmask16 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_pd(void *__P, __m512d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_testn_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_movehdup_ps(__m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi8(char __w)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ss(__mmask8 __U, const float *__A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi8(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi32(__m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, __m512d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi32(int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H, int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epu32(__m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expand_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_set_pd(double __A, double __B, double __C, double __D, double __E, double __F, double __G, double __H)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m256d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd256(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi8_epi32(__m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi8(char __e63, char __e62, char __e61, char __e60, char __e59, char __e58, char __e57, char __e56, char __e55, char __e54, char __e53, char __e52, char __e51, char __e50, char __e49, char __e48, char __e47, char __e46, char __e45, char __e44, char __e43, char __e42, char __e41, char __e40, char __e39, char __e38, char __e37, char __e36, char __e35, char __e34, char __e33, char __e32, char __e31, char __e30, char __e29, char __e28, char __e27, char __e26, char __e25, char __e24, char __e23, char __e22, char __e21, char __e20, char __e19, char __e18, char __e17, char __e16, char __e15, char __e14, char __e13, char __e12, char __e11, char __e10, char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3, char __e2, char __e1, char __e0)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mullo_epi32(__m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi64(void *__P, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtph_ps(__m256i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_si512(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi64(long long __A, long long __B, long long __C, long long __D, long long __E, long long __F, long long __G, long long __H)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_sd(__m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_pd(__mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
#define __DEFAULT_FN_ATTRS512
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_zextpd256_pd512(__m256d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 256-bit floating-point vector of [4...
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srli_epi64(__m512i __A, unsigned int __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sd(double *__W, __mmask8 __U, __m128d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F, float __G, float __H, float __I, float __J, float __K, float __L, float __M, float __N, float __O, float __P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcastq_epi64(__m128i __A)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_kortestz(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srlv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_mask2int(__mmask16 __a)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_zextsi256_si512(__m256i __a)
Constructs a 512-bit integer vector from a 256-bit integer vector.
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_load_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi16_epi64(__m128i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epi32(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_set1_pd(double __w)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_permutevar_ps(__m512 __A, __m512i __C)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_si512(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_sqrt_pd(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
#define __DEFAULT_FN_ATTRS
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A)
#define _mm512_cmpneq_epi64_mask(A, B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutexvar_pd(__m512i __X, __m512d __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epu32(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mul_ps(__m512 __a, __m512 __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_abs_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_slli_epi64(__m512i __A, unsigned int __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_loadu_ps(void const *__p)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_broadcastss_ps(__m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
_MM_MANTISSA_NORM_ENUM
@ _MM_MANT_NORM_p5_1
@ _MM_MANT_NORM_p5_2
@ _MM_MANT_NORM_1_2
@ _MM_MANT_NORM_p75_1p5
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcast_i32x4(__m128i __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi32(__m512i __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi8(__mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rolv_epi32(__m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi64(long long __d)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ss(__m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi16(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu64(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtpslo_pd(__m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi8(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_max_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castps_pd(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_min_pd(__m512d __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_compress_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi64(__m512i __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epu64(__m512i __V)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V)
#define _mm512_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi32(__m512i __A)
static __inline __mmask16 __DEFAULT_FN_ATTRS512 _mm512_test_epi32_mask(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_ps(__m256 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_min_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutevar_pd(__m512d __A, __m512i __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_pslo(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_undefined_pd(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
#define _mm512_cmpneq_epi32_mask(A, B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_ps(__m512 __W, __mmask16 __U, __m256i __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi32(__m512i __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtu32_sd(__m128d __A, unsigned __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_ps(__mmask16 __U, __m512 __A)
static __inline __m128i __DEFAULT_FN_ATTRS512 _mm512_castsi512_si128(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_floor_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_si512(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu8_epi64(__m128i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_epi64(__m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_ceil_ps(__m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srav_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi8(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepu32lo_pd(__m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_pd(void *__P, __m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi8(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
#define _mm512_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_min_ps(__m512 __A, __m512 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_div_ps(__m512 __a, __m512 __b)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epi32(__m512i __V)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_epi64(__m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
char __v64qi __attribute__((__vector_size__(64)))
Definition: avx512fintrin.h:16
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_set4_ps(float __A, float __B, float __C, float __D)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_zextps256_ps512(__m256 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 256-bit floating-point vector of [8...
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_si512(void *__P, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_broadcast_f32x4(__m128 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_broadcastsd_pd(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu16_epi32(__m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epu32(__m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_max_ps(__m512 __A, __m512 __B)
static __inline __m256 __DEFAULT_FN_ATTRS512 _mm512_castps512_ps256(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_pd(void *__P, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_min_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srav_epi64(__m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epi32(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
#define _mm512_mask_cmpneq_epi64_mask(k, A, B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi32(__m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sllv_epi64(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtps_pd(__m256 __A)
_MM_CMPINT_ENUM
Definition: avx512fintrin.h:52
@ _MM_CMPINT_NE
Definition: avx512fintrin.h:57
@ _MM_CMPINT_NLT
Definition: avx512fintrin.h:58
@ _MM_CMPINT_LE
Definition: avx512fintrin.h:55
@ _MM_CMPINT_EQ
Definition: avx512fintrin.h:53
@ _MM_CMPINT_LT
Definition: avx512fintrin.h:54
@ _MM_CMPINT_UNUSED
Definition: avx512fintrin.h:56
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rolv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_add_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu8_epi32(__m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_sqrt_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi64(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi64(__m256i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_ps(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_min_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _load_mask16(__mmask16 *__A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epu32(__mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_max_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_cvtss_f32(__m512 __a)
unsigned char __mmask8
Definition: avx512fintrin.h:41
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kxor(__mmask16 __A, __mmask16 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_zextsi128_si512(__m128i __a)
Constructs a 512-bit integer vector from a 128-bit integer vector.
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsd_i32(__m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_max_pd(__mmask8 __U, __m512d __A, __m512d __B)
_MM_TERNLOG_ENUM
A helper to represent the ternary logic operations among vector A, B and C.
@ _MM_TERNLOG_A
@ _MM_TERNLOG_B
@ _MM_TERNLOG_C
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_set4_pd(double __A, double __B, double __C, double __D)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_epi32(__m512i __a, __m512i __b)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kandn(__mmask16 __A, __mmask16 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_abs_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_epi64(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_ps(void *__P, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_max_pd(__m512d __V)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epi32(__mmask16 __U, __m512 __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expand_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi32(__mmask16 __U, void const *__P)
_MM_MANTISSA_SIGN_ENUM
@ _MM_MANT_SIGN_zero
@ _MM_MANT_SIGN_src
@ _MM_MANT_SIGN_nan
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_int2mask(int __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epu32(__mmask16 __U, __m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_ps(void *__P, __m512 __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
#define __DEFAULT_FN_ATTRS128
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epi64(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_movedup_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_scalef_ps(__m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ss(__m128 __W, __mmask8 __U, const float *__A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_floor_pd(__m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_loadu_pd(void const *__p)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi32(__m512i __W)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi32(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_pd(__mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castpd_si512(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu16_epi64(__m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi16(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_stream_load_si512(void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_min_ps(__m512 __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_max_ps(__m512 __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srai_epi64(__m512i __A, unsigned int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_kortestc(__mmask16 __A, __mmask16 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepi32lo_pd(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_undefined_epi32(void)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_pd(__mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi32(__m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi32(void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd128_pd512(__m128d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mul_pd(__m512d __a, __m512d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi64(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_set1_ps(float __w)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castsi512_ps(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epi32(__mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_add_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_pd(__mmask8 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi32(__mmask16 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castps256_ps512(__m256 __a)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_add_ps(__m512 __a, __m512 __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rorv_epi32(__m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi32(void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_add_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sllv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_epi64(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi8_epi64(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_sd(__mmask8 __U, const double *__A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_ps(void *__P, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mul_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rorv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_add_ps(__m512 __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi16(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi32(__m512i __X, __m512i __Y)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sd(__mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
_MM_PERM_ENUM
Definition: avx512fintrin.h:65
@ _MM_PERM_BBCA
Definition: avx512fintrin.h:95
@ _MM_PERM_BCBD
@ _MM_PERM_DAAB
@ _MM_PERM_CBBD
@ _MM_PERM_DCCC
@ _MM_PERM_CDBB
@ _MM_PERM_DDDD
@ _MM_PERM_CCCC
@ _MM_PERM_CADA
@ _MM_PERM_BACD
Definition: avx512fintrin.h:91
@ _MM_PERM_CCAD
@ _MM_PERM_ABDB
Definition: avx512fintrin.h:75
@ _MM_PERM_BBBD
Definition: avx512fintrin.h:95
@ _MM_PERM_DCAB
@ _MM_PERM_BABC
Definition: avx512fintrin.h:89
@ _MM_PERM_AACD
Definition: avx512fintrin.h:69
@ _MM_PERM_BBAB
Definition: avx512fintrin.h:93
@ _MM_PERM_DCDB
@ _MM_PERM_BACC
Definition: avx512fintrin.h:90
@ _MM_PERM_ABDA
Definition: avx512fintrin.h:75
@ _MM_PERM_ACCC
Definition: avx512fintrin.h:80
@ _MM_PERM_ADAC
Definition: avx512fintrin.h:82
@ _MM_PERM_CCCD
@ _MM_PERM_CADD
@ _MM_PERM_ACCB
Definition: avx512fintrin.h:79
@ _MM_PERM_BBDB
Definition: avx512fintrin.h:97
@ _MM_PERM_ABBB
Definition: avx512fintrin.h:73
@ _MM_PERM_BACB
Definition: avx512fintrin.h:90
@ _MM_PERM_CDCA
@ _MM_PERM_ACBC
Definition: avx512fintrin.h:78
@ _MM_PERM_ADCB
Definition: avx512fintrin.h:85
@ _MM_PERM_BBBC
Definition: avx512fintrin.h:94
@ _MM_PERM_DBBA
@ _MM_PERM_BBBB
Definition: avx512fintrin.h:94
@ _MM_PERM_DDDB
@ _MM_PERM_CAAA
@ _MM_PERM_ADBB
Definition: avx512fintrin.h:83
@ _MM_PERM_ACDB
Definition: avx512fintrin.h:81
@ _MM_PERM_DCAD
@ _MM_PERM_DBBC
@ _MM_PERM_BAAB
Definition: avx512fintrin.h:87
@ _MM_PERM_BDDD
@ _MM_PERM_BBAD
Definition: avx512fintrin.h:93
@ _MM_PERM_DDBA
@ _MM_PERM_CDCD
@ _MM_PERM_CCCA
@ _MM_PERM_DBBB
@ _MM_PERM_DAAD
@ _MM_PERM_DCBA
@ _MM_PERM_CCBC
@ _MM_PERM_ADDD
Definition: avx512fintrin.h:87
@ _MM_PERM_DBAC
@ _MM_PERM_ABAB
Definition: avx512fintrin.h:71
@ _MM_PERM_CBDB
@ _MM_PERM_CDBC
@ _MM_PERM_AABC
Definition: avx512fintrin.h:68
@ _MM_PERM_DABD
@ _MM_PERM_CBBA
@ _MM_PERM_CBAA
@ _MM_PERM_BDDB
@ _MM_PERM_CABC
@ _MM_PERM_BDBD
@ _MM_PERM_BCAD
Definition: avx512fintrin.h:99
@ _MM_PERM_ACBA
Definition: avx512fintrin.h:78
@ _MM_PERM_ADBA
Definition: avx512fintrin.h:83
@ _MM_PERM_ADBC
Definition: avx512fintrin.h:84
@ _MM_PERM_DBCB
@ _MM_PERM_CBDC
@ _MM_PERM_CBAD
@ _MM_PERM_ABCC
Definition: avx512fintrin.h:74
@ _MM_PERM_AAAD
Definition: avx512fintrin.h:67
@ _MM_PERM_CBAC
@ _MM_PERM_CCDA
@ _MM_PERM_CDAC
@ _MM_PERM_BADD
Definition: avx512fintrin.h:92
@ _MM_PERM_DAAC
@ _MM_PERM_BCCC
@ _MM_PERM_DBBD
@ _MM_PERM_DDAC
@ _MM_PERM_DACD
@ _MM_PERM_BAAC
Definition: avx512fintrin.h:88
@ _MM_PERM_ACCA
Definition: avx512fintrin.h:79
@ _MM_PERM_ABDD
Definition: avx512fintrin.h:76
@ _MM_PERM_BBCC
Definition: avx512fintrin.h:96
@ _MM_PERM_DAAA
@ _MM_PERM_CAAB
@ _MM_PERM_BCDB
@ _MM_PERM_ACBB
Definition: avx512fintrin.h:78
@ _MM_PERM_CDAB
@ _MM_PERM_DBDB
@ _MM_PERM_AABB
Definition: avx512fintrin.h:67
@ _MM_PERM_DBDA
@ _MM_PERM_BCBA
Definition: avx512fintrin.h:99
@ _MM_PERM_CBAB
@ _MM_PERM_DCDC
@ _MM_PERM_BBCB
Definition: avx512fintrin.h:95
@ _MM_PERM_CDCB
@ _MM_PERM_AACA
Definition: avx512fintrin.h:68
@ _MM_PERM_ACBD
Definition: avx512fintrin.h:79
@ _MM_PERM_AAAB
Definition: avx512fintrin.h:66
@ _MM_PERM_DCCB
@ _MM_PERM_ADDB
Definition: avx512fintrin.h:86
@ _MM_PERM_AAAA
Definition: avx512fintrin.h:66
@ _MM_PERM_AACC
Definition: avx512fintrin.h:69
@ _MM_PERM_BDDC
@ _MM_PERM_CBBC
@ _MM_PERM_DDCC
@ _MM_PERM_CABD
@ _MM_PERM_AADC
Definition: avx512fintrin.h:70
@ _MM_PERM_BCBC
@ _MM_PERM_BCCA
@ _MM_PERM_CCBD
@ _MM_PERM_CBBB
@ _MM_PERM_CDBA
@ _MM_PERM_CACD
@ _MM_PERM_BDAD
@ _MM_PERM_ADCA
Definition: avx512fintrin.h:84
@ _MM_PERM_AAAC
Definition: avx512fintrin.h:66
@ _MM_PERM_BDDA
@ _MM_PERM_CCAC
@ _MM_PERM_ACDC
Definition: avx512fintrin.h:81
@ _MM_PERM_DBCA
@ _MM_PERM_DBAA
@ _MM_PERM_AABD
Definition: avx512fintrin.h:68
@ _MM_PERM_CDCC
@ _MM_PERM_DCAA
@ _MM_PERM_DDDC
@ _MM_PERM_CDDB
@ _MM_PERM_AABA
Definition: avx512fintrin.h:67
@ _MM_PERM_DDBB
@ _MM_PERM_CDDA
@ _MM_PERM_AADD
Definition: avx512fintrin.h:71
@ _MM_PERM_BADC
Definition: avx512fintrin.h:92
@ _MM_PERM_BDBA
@ _MM_PERM_DBDD
@ _MM_PERM_BDAC
@ _MM_PERM_DBDC
@ _MM_PERM_BBBA
Definition: avx512fintrin.h:94
@ _MM_PERM_DDBC
@ _MM_PERM_BAAA
Definition: avx512fintrin.h:87
@ _MM_PERM_BDCC
@ _MM_PERM_DDAB
@ _MM_PERM_BCCB
@ _MM_PERM_BCCD
@ _MM_PERM_ADBD
Definition: avx512fintrin.h:84
@ _MM_PERM_ADCC
Definition: avx512fintrin.h:85
@ _MM_PERM_CCBB
@ _MM_PERM_CDAA
@ _MM_PERM_BBDA
Definition: avx512fintrin.h:96
@ _MM_PERM_CACC
@ _MM_PERM_DCBB
@ _MM_PERM_DABA
@ _MM_PERM_BADB
Definition: avx512fintrin.h:91
@ _MM_PERM_ABCA
Definition: avx512fintrin.h:74
@ _MM_PERM_CBCC
@ _MM_PERM_ABAD
Definition: avx512fintrin.h:72
@ _MM_PERM_BDBC
@ _MM_PERM_DDDA
@ _MM_PERM_ADAB
Definition: avx512fintrin.h:82
@ _MM_PERM_CADB
@ _MM_PERM_ADAA
Definition: avx512fintrin.h:82
@ _MM_PERM_ACAC
Definition: avx512fintrin.h:77
@ _MM_PERM_DADD
@ _MM_PERM_BABD
Definition: avx512fintrin.h:89
@ _MM_PERM_ACCD
Definition: avx512fintrin.h:80
@ _MM_PERM_CCAA
@ _MM_PERM_AADA
Definition: avx512fintrin.h:70
@ _MM_PERM_BDCA
@ _MM_PERM_CDDD
@ _MM_PERM_ABBD
Definition: avx512fintrin.h:73
@ _MM_PERM_ACAA
Definition: avx512fintrin.h:76
@ _MM_PERM_ACDD
Definition: avx512fintrin.h:81
@ _MM_PERM_DABB
@ _MM_PERM_CCCB
@ _MM_PERM_AADB
Definition: avx512fintrin.h:70
@ _MM_PERM_DBAD
@ _MM_PERM_BBDD
Definition: avx512fintrin.h:97
@ _MM_PERM_BCDC
@ _MM_PERM_CABA
@ _MM_PERM_BBAA
Definition: avx512fintrin.h:92
@ _MM_PERM_ADAD
Definition: avx512fintrin.h:83
@ _MM_PERM_BADA
Definition: avx512fintrin.h:91
@ _MM_PERM_DCDA
@ _MM_PERM_ABBA
Definition: avx512fintrin.h:72
@ _MM_PERM_ACAB
Definition: avx512fintrin.h:77
@ _MM_PERM_CCDD
@ _MM_PERM_CADC
@ _MM_PERM_DDCB
@ _MM_PERM_BABB
Definition: avx512fintrin.h:89
@ _MM_PERM_CCDB
@ _MM_PERM_DDAD
@ _MM_PERM_DBCC
@ _MM_PERM_BCBB
Definition: avx512fintrin.h:99
@ _MM_PERM_ADDC
Definition: avx512fintrin.h:86
@ _MM_PERM_CCBA
@ _MM_PERM_ABCD
Definition: avx512fintrin.h:75
@ _MM_PERM_BCAB
Definition: avx512fintrin.h:98
@ _MM_PERM_DCBC
@ _MM_PERM_BCDD
@ _MM_PERM_CCDC
@ _MM_PERM_ABAC
Definition: avx512fintrin.h:72
@ _MM_PERM_CBCB
@ _MM_PERM_CCAB
@ _MM_PERM_DDCD
@ _MM_PERM_DACA
@ _MM_PERM_ACAD
Definition: avx512fintrin.h:77
@ _MM_PERM_BABA
Definition: avx512fintrin.h:88
@ _MM_PERM_CBCD
@ _MM_PERM_CAAD
@ _MM_PERM_DCDD
@ _MM_PERM_BDBB
@ _MM_PERM_BCAA
Definition: avx512fintrin.h:98
@ _MM_PERM_ABDC
Definition: avx512fintrin.h:76
@ _MM_PERM_BBCD
Definition: avx512fintrin.h:96
@ _MM_PERM_CAAC
@ _MM_PERM_BBAC
Definition: avx512fintrin.h:93
@ _MM_PERM_CBCA
@ _MM_PERM_DCAC
@ _MM_PERM_ABAA
Definition: avx512fintrin.h:71
@ _MM_PERM_CACB
@ _MM_PERM_BBDC
Definition: avx512fintrin.h:97
@ _MM_PERM_CDAD
@ _MM_PERM_ADCD
Definition: avx512fintrin.h:85
@ _MM_PERM_DADB
@ _MM_PERM_DBCD
@ _MM_PERM_DACC
@ _MM_PERM_DACB
@ _MM_PERM_DCBD
@ _MM_PERM_CACA
@ _MM_PERM_ABBC
Definition: avx512fintrin.h:73
@ _MM_PERM_DCCA
@ _MM_PERM_DABC
@ _MM_PERM_CBDD
@ _MM_PERM_DDBD
@ _MM_PERM_DDCA
@ _MM_PERM_BDCD
@ _MM_PERM_CDBD
@ _MM_PERM_ABCB
Definition: avx512fintrin.h:74
@ _MM_PERM_CDDC
@ _MM_PERM_AACB
Definition: avx512fintrin.h:69
@ _MM_PERM_DDAA
@ _MM_PERM_ADDA
Definition: avx512fintrin.h:86
@ _MM_PERM_DADA
@ _MM_PERM_BCDA
@ _MM_PERM_BDAB
@ _MM_PERM_BAAD
Definition: avx512fintrin.h:88
@ _MM_PERM_DBAB
@ _MM_PERM_DCCD
@ _MM_PERM_CABB
@ _MM_PERM_BDAA
@ _MM_PERM_BDCB
@ _MM_PERM_ACDA
Definition: avx512fintrin.h:80
@ _MM_PERM_DADC
@ _MM_PERM_CBDA
@ _MM_PERM_BCAC
Definition: avx512fintrin.h:98
@ _MM_PERM_BACA
Definition: avx512fintrin.h:90
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi32(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_epi32(__m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_load_pd(void const *__p)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_getexp_pd(__m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_abs_pd(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rorv_epi64(__m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi64(void const *__P)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_ceil_pd(__m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi32(void *__P, __m512i __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epi32(__m512i __V)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_ps(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_testn_epi32_mask(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epi64(__m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rcp14_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rorv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_pd(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rolv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_permutexvar_ps(__m512i __X, __m512 __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _cvtu32_mask16(unsigned int __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kunpackb(__mmask16 __A, __mmask16 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_compress_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_sd(__m128d __W, __mmask8 __U, const double *__A)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_ps(__m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd256_pd512(__m256d __a)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_sd(__m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi64(__m512i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, short __e27, short __e26, short __e25, short __e24, short __e23, short __e22, short __e21, short __e20, short __e19, short __e18, short __e17, short __e16, short __e15, short __e14, short __e13, short __e12, short __e11, short __e10, short __e9, short __e8, short __e7, short __e6, short __e5, short __e4, short __e3, short __e2, short __e1, short __e0)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_broadcast_f64x4(__m256d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi16(__m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_sub_ps(__m512 __a, __m512 __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ss(float *__W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_moveldup_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi32(__m512i __A, __m128i __B)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtss_u32(__m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi32(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
unsigned short __mmask16
Definition: avx512fintrin.h:42
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi32(__mmask8 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_zextps128_ps512(__m128 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 128-bit floating-point vector of [4...
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srlv_epi64(__m512i __X, __m512i __Y)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_pd(__m256i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi64(__m512i __A, __m512i __B)
static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_castsi512_si256(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_si512(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_si512(__m512i __a, __m512i __b)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_ps(__mmask16 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
#define _MM_FROUND_CUR_DIRECTION
Definition: avx512fintrin.h:49
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_max_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_floor_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_sub_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rorv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kxnor(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set4_epi32(int __A, int __B, int __C, int __D)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_slli_epi32(__m512i __A, unsigned int __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_epi32(__m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi16_epi32(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi32(__m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_zextpd128_pd512(__m128d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 128-bit floating-point vector of [2...
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
#define _mm512_cmpeq_epi64_mask(A, B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi64(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rorv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rolv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epi32(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi256_si512(__m256i __A)
static __inline __mmask16 __DEFAULT_FN_ATTRS _mm512_knot(__mmask16 __M)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mullox_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_setzero_si512(void)
static __inline __m128d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd128(__m512d __a)
static __inline__ unsigned int __DEFAULT_FN_ATTRS _cvtmask16_u32(__mmask16 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline __m128 __DEFAULT_FN_ATTRS512 _mm512_castps512_ps128(__m512 __a)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtu32_ss(__m128 __A, unsigned __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srai_epi32(__m512i __A, unsigned int __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi64(__mmask8 __U, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castsi512_pd(__m512i __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_sd(__m128d __A, __m128d __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi64(__m512i __A, __m128i __B)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
Definition: avxintrin.h:4340
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void)
Create a 256-bit vector of [8 x float] with undefined values.
Definition: avxintrin.h:3653
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
Definition: avxintrin.h:3666
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
Definition: avxintrin.h:4326
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
Definition: avxintrin.h:4353
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_sd(__m128d __a, __m128d __b)
Adds lower double-precision values in both operands and returns the sum in the lower 64 bits of the r...
Definition: emmintrin.h:74
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_sd(__m128d __a, __m128d __b)
Divides the lower double-precision value of the first operand by the lower double-precision value of ...
Definition: emmintrin.h:193
static __inline__ void int __a
Definition: emmintrin.h:4057
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_sd(__m128d __a, __m128d __b)
Multiplies lower double-precision values in both operands and returns the product in the lower 64 bit...
Definition: emmintrin.h:153
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_move_sd(__m128d __a, __m128d __b)
Constructs a 128-bit floating-point vector of [2 x double].
Definition: emmintrin.h:1876
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
Definition: emmintrin.h:3477
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_sd(__m128d __a, __m128d __b)
Subtracts the lower double-precision value of the second operand from the lower double-precision valu...
Definition: emmintrin.h:114
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
Definition: emmintrin.h:1857
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
Definition: emmintrin.h:3858
static __inline__ void short __D
Definition: immintrin.h:468
struct __storeu_i16 *__P __v
Definition: immintrin.h:472
static __inline__ void const void * __src
Definition: movdirintrin.h:45
constexpr bool aligned(uintptr_t Value)
Definition: PrimType.h:103
__inline unsigned int unsigned int unsigned int * __P
Definition: bmi2intrin.h:25
__inline unsigned int unsigned int __Y
Definition: bmi2intrin.h:19
__inline unsigned int unsigned int unsigned int __L
Definition: bmiintrin.h:31
#define _MM_FROUND_FLOOR
Definition: smmintrin.h:35
#define _MM_FROUND_CEIL
Definition: smmintrin.h:36
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
Definition: xmmintrin.h:2018
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sub_ss(__m128 __a, __m128 __b)
Subtracts the 32-bit float value in the low-order bits of the second operand from the corresponding v...
Definition: xmmintrin.h:100
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_move_ss(__m128 __a, __m128 __b)
Constructs a 128-bit floating-point vector of [4 x float].
Definition: xmmintrin.h:2789
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mul_ss(__m128 __a, __m128 __b)
Multiplies two 32-bit float values in the low-order bits of the operands.
Definition: xmmintrin.h:143
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_add_ss(__m128 __a, __m128 __b)
Adds the 32-bit float values in the low-order bits of the operands.
Definition: xmmintrin.h:58
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_div_ss(__m128 __a, __m128 __b)
Divides the value in the low-order 32 bits of the first operand by the corresponding value in the sec...
Definition: xmmintrin.h:185