clang  20.0.0git
avx10_2satcvtintrin.h
Go to the documentation of this file.
1 /*===----------- avx10_2satcvtintrin.h - AVX10_2SATCVT intrinsics ----------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 #ifndef __IMMINTRIN_H
10 #error \
11  "Never use <avx10_2satcvtintrin.h> directly; include <immintrin.h> instead."
12 #endif // __IMMINTRIN_H
13 
14 #ifndef __AVX10_2SATCVTINTRIN_H
15 #define __AVX10_2SATCVTINTRIN_H
16 
17 #define _mm_ipcvtnebf16_epi8(A) \
18  ((__m128i)__builtin_ia32_vcvtnebf162ibs128((__v8bf)(__m128bh)(A)))
19 
20 #define _mm_mask_ipcvtnebf16_epi8(W, U, A) \
21  ((__m128i)__builtin_ia32_selectw_128( \
22  (__mmask8)(U), (__v8hi)_mm_ipcvtnebf16_epi8(A), (__v8hi)(__m128i)(W)))
23 
24 #define _mm_maskz_ipcvtnebf16_epi8(U, A) \
25  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
26  (__v8hi)_mm_ipcvtnebf16_epi8(A), \
27  (__v8hi)_mm_setzero_si128()))
28 
29 #define _mm256_ipcvtnebf16_epi8(A) \
30  ((__m256i)__builtin_ia32_vcvtnebf162ibs256((__v16bf)(__m256bh)(A)))
31 
32 #define _mm256_mask_ipcvtnebf16_epi8(W, U, A) \
33  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
34  (__v16hi)_mm256_ipcvtnebf16_epi8(A), \
35  (__v16hi)(__m256i)(W)))
36 
37 #define _mm256_maskz_ipcvtnebf16_epi8(U, A) \
38  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
39  (__v16hi)_mm256_ipcvtnebf16_epi8(A), \
40  (__v16hi)_mm256_setzero_si256()))
41 
42 #define _mm_ipcvtnebf16_epu8(A) \
43  ((__m128i)__builtin_ia32_vcvtnebf162iubs128((__v8bf)(__m128bh)(A)))
44 
45 #define _mm_mask_ipcvtnebf16_epu8(W, U, A) \
46  ((__m128i)__builtin_ia32_selectw_128( \
47  (__mmask8)(U), (__v8hi)_mm_ipcvtnebf16_epu8(A), (__v8hi)(__m128i)(W)))
48 
49 #define _mm_maskz_ipcvtnebf16_epu8(U, A) \
50  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
51  (__v8hi)_mm_ipcvtnebf16_epu8(A), \
52  (__v8hi)_mm_setzero_si128()))
53 
54 #define _mm256_ipcvtnebf16_epu8(A) \
55  ((__m256i)__builtin_ia32_vcvtnebf162iubs256((__v16bf)(__m256bh)(A)))
56 
57 #define _mm256_mask_ipcvtnebf16_epu8(W, U, A) \
58  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
59  (__v16hi)_mm256_ipcvtnebf16_epu8(A), \
60  (__v16hi)(__m256i)(W)))
61 
62 #define _mm256_maskz_ipcvtnebf16_epu8(U, A) \
63  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
64  (__v16hi)_mm256_ipcvtnebf16_epu8(A), \
65  (__v16hi)_mm256_setzero_si256()))
66 
67 #define _mm_ipcvtph_epi8(A) \
68  ((__m128i)__builtin_ia32_vcvtph2ibs128_mask( \
69  (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
70 
71 #define _mm_mask_ipcvtph_epi8(W, U, A) \
72  ((__m128i)__builtin_ia32_vcvtph2ibs128_mask((__v8hf)(__m128h)(A), \
73  (__v8hu)(W), (__mmask8)(U)))
74 
75 #define _mm_maskz_ipcvtph_epi8(U, A) \
76  ((__m128i)__builtin_ia32_vcvtph2ibs128_mask( \
77  (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
78 
79 #define _mm256_ipcvtph_epi8(A) \
80  ((__m256i)__builtin_ia32_vcvtph2ibs256_mask( \
81  (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
82  _MM_FROUND_CUR_DIRECTION))
83 
84 #define _mm256_mask_ipcvtph_epi8(W, U, A) \
85  ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A), \
86  (__v16hu)(W), (__mmask16)(U), \
87  _MM_FROUND_CUR_DIRECTION))
88 
89 #define _mm256_maskz_ipcvtph_epi8(U, A) \
90  ((__m256i)__builtin_ia32_vcvtph2ibs256_mask( \
91  (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \
92  (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
93 
94 #define _mm256_ipcvt_roundph_epi8(A, R) \
95  ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A), \
96  (__v16hu)_mm256_setzero_si256(), \
97  (__mmask16)-1, (const int)R))
98 
99 #define _mm256_mask_ipcvt_roundph_epi8(W, U, A, R) \
100  ((__m256i)__builtin_ia32_vcvtph2ibs256_mask( \
101  (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R))
102 
103 #define _mm256_maskz_ipcvt_roundph_epi8(U, A, R) \
104  ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A), \
105  (__v16hu)_mm256_setzero_si256(), \
106  (__mmask16)(U), (const int)R))
107 
108 #define _mm_ipcvtph_epu8(A) \
109  ((__m128i)__builtin_ia32_vcvtph2iubs128_mask( \
110  (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
111 
112 #define _mm_mask_ipcvtph_epu8(W, U, A) \
113  ((__m128i)__builtin_ia32_vcvtph2iubs128_mask((__v8hf)(__m128h)(A), \
114  (__v8hu)(W), (__mmask8)(U)))
115 
116 #define _mm_maskz_ipcvtph_epu8(U, A) \
117  ((__m128i)__builtin_ia32_vcvtph2iubs128_mask( \
118  (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
119 
120 #define _mm256_ipcvtph_epu8(A) \
121  ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \
122  (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
123  _MM_FROUND_CUR_DIRECTION))
124 
125 #define _mm256_mask_ipcvtph_epu8(W, U, A) \
126  ((__m256i)__builtin_ia32_vcvtph2iubs256_mask((__v16hf)(__m256h)(A), \
127  (__v16hu)(W), (__mmask16)(U), \
128  _MM_FROUND_CUR_DIRECTION))
129 
130 #define _mm256_maskz_ipcvtph_epu8(U, A) \
131  ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \
132  (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \
133  (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
134 
135 #define _mm256_ipcvt_roundph_epu8(A, R) \
136  ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \
137  (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
138  (const int)R))
139 
140 #define _mm256_mask_ipcvt_roundph_epu8(W, U, A, R) \
141  ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \
142  (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R))
143 
144 #define _mm256_maskz_ipcvt_roundph_epu8(U, A, R) \
145  ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \
146  (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U), \
147  (const int)R))
148 
149 #define _mm_ipcvtps_epi8(A) \
150  ((__m128i)__builtin_ia32_vcvtps2ibs128_mask( \
151  (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
152 
153 #define _mm_mask_ipcvtps_epi8(W, U, A) \
154  ((__m128i)__builtin_ia32_vcvtps2ibs128_mask((__v4sf)(__m128)(A), \
155  (__v4su)(W), (__mmask8)(U)))
156 
157 #define _mm_maskz_ipcvtps_epi8(U, A) \
158  ((__m128i)__builtin_ia32_vcvtps2ibs128_mask( \
159  (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
160 
161 #define _mm256_ipcvtps_epi8(A) \
162  ((__m256i)__builtin_ia32_vcvtps2ibs256_mask( \
163  (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \
164  _MM_FROUND_CUR_DIRECTION))
165 
166 #define _mm256_mask_ipcvtps_epi8(W, U, A) \
167  ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A), \
168  (__v8su)(W), (__mmask8)(U), \
169  _MM_FROUND_CUR_DIRECTION))
170 
171 #define _mm256_maskz_ipcvtps_epi8(U, A) \
172  ((__m256i)__builtin_ia32_vcvtps2ibs256_mask( \
173  (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \
174  _MM_FROUND_CUR_DIRECTION))
175 
176 #define _mm256_ipcvt_roundps_epi8(A, R) \
177  ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A), \
178  (__v8su)_mm256_setzero_si256(), \
179  (__mmask8)-1, (const int)R))
180 
181 #define _mm256_mask_ipcvt_roundps_epi8(W, U, A, R) \
182  ((__m256i)__builtin_ia32_vcvtps2ibs256_mask( \
183  (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R))
184 
185 #define _mm256_maskz_ipcvt_roundps_epi8(U, A, R) \
186  ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A), \
187  (__v8su)_mm256_setzero_si256(), \
188  (__mmask8)(U), (const int)R))
189 
190 #define _mm_ipcvtps_epu8(A) \
191  ((__m128i)__builtin_ia32_vcvtps2iubs128_mask( \
192  (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
193 
194 #define _mm_mask_ipcvtps_epu8(W, U, A) \
195  ((__m128i)__builtin_ia32_vcvtps2iubs128_mask((__v4sf)(__m128)(A), \
196  (__v4su)(W), (__mmask8)(U)))
197 
198 #define _mm_maskz_ipcvtps_epu8(U, A) \
199  ((__m128i)__builtin_ia32_vcvtps2iubs128_mask( \
200  (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
201 
202 #define _mm256_ipcvtps_epu8(A) \
203  ((__m256i)__builtin_ia32_vcvtps2iubs256_mask( \
204  (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \
205  _MM_FROUND_CUR_DIRECTION))
206 
207 #define _mm256_mask_ipcvtps_epu8(W, U, A) \
208  ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A), \
209  (__v8su)(W), (__mmask8)(U), \
210  _MM_FROUND_CUR_DIRECTION))
211 
212 #define _mm256_maskz_ipcvtps_epu8(U, A) \
213  ((__m256i)__builtin_ia32_vcvtps2iubs256_mask( \
214  (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \
215  _MM_FROUND_CUR_DIRECTION))
216 
217 #define _mm256_ipcvt_roundps_epu8(A, R) \
218  ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A), \
219  (__v8su)_mm256_setzero_si256(), \
220  (__mmask8)-1, (const int)R))
221 
222 #define _mm256_mask_ipcvt_roundps_epu8(W, U, A, R) \
223  ((__m256i)__builtin_ia32_vcvtps2iubs256_mask( \
224  (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R))
225 
226 #define _mm256_maskz_ipcvt_roundps_epu8(U, A, R) \
227  ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A), \
228  (__v8su)_mm256_setzero_si256(), \
229  (__mmask8)(U), (const int)R))
230 
231 #define _mm_ipcvttnebf16_epi8(A) \
232  ((__m128i)__builtin_ia32_vcvttnebf162ibs128((__v8bf)(__m128bh)(A)))
233 
234 #define _mm_mask_ipcvttnebf16_epi8(W, U, A) \
235  ((__m128i)__builtin_ia32_selectw_128( \
236  (__mmask8)(U), (__v8hi)_mm_ipcvttnebf16_epi8(A), (__v8hi)(__m128i)(W)))
237 
238 #define _mm_maskz_ipcvttnebf16_epi8(U, A) \
239  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
240  (__v8hi)_mm_ipcvttnebf16_epi8(A), \
241  (__v8hi)_mm_setzero_si128()))
242 
243 #define _mm256_ipcvttnebf16_epi8(A) \
244  ((__m256i)__builtin_ia32_vcvttnebf162ibs256((__v16bf)(__m256bh)(A)))
245 
246 #define _mm256_mask_ipcvttnebf16_epi8(W, U, A) \
247  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
248  (__v16hi)_mm256_ipcvttnebf16_epi8(A), \
249  (__v16hi)(__m256i)(W)))
250 
251 #define _mm256_maskz_ipcvttnebf16_epi8(U, A) \
252  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
253  (__v16hi)_mm256_ipcvttnebf16_epi8(A), \
254  (__v16hi)_mm256_setzero_si256()))
255 
256 #define _mm_ipcvttnebf16_epu8(A) \
257  ((__m128i)__builtin_ia32_vcvttnebf162iubs128((__v8bf)(__m128bh)(A)))
258 
259 #define _mm_mask_ipcvttnebf16_epu8(W, U, A) \
260  ((__m128i)__builtin_ia32_selectw_128( \
261  (__mmask8)(U), (__v8hi)_mm_ipcvttnebf16_epu8(A), (__v8hi)(__m128i)(W)))
262 
263 #define _mm_maskz_ipcvttnebf16_epu8(U, A) \
264  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
265  (__v8hi)_mm_ipcvttnebf16_epu8(A), \
266  (__v8hi)_mm_setzero_si128()))
267 
268 #define _mm256_ipcvttnebf16_epu8(A) \
269  ((__m256i)__builtin_ia32_vcvttnebf162iubs256((__v16bf)(__m256bh)(A)))
270 
271 #define _mm256_mask_ipcvttnebf16_epu8(W, U, A) \
272  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
273  (__v16hi)_mm256_ipcvttnebf16_epu8(A), \
274  (__v16hi)(__m256i)(W)))
275 
276 #define _mm256_maskz_ipcvttnebf16_epu8(U, A) \
277  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
278  (__v16hi)_mm256_ipcvttnebf16_epu8(A), \
279  (__v16hi)_mm256_setzero_si256()))
280 
281 #define _mm_ipcvttph_epi8(A) \
282  ((__m128i)__builtin_ia32_vcvttph2ibs128_mask( \
283  (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
284 
285 #define _mm_mask_ipcvttph_epi8(W, U, A) \
286  ((__m128i)__builtin_ia32_vcvttph2ibs128_mask((__v8hf)(__m128h)(A), \
287  (__v8hu)(W), (__mmask8)(U)))
288 
289 #define _mm_maskz_ipcvttph_epi8(U, A) \
290  ((__m128i)__builtin_ia32_vcvttph2ibs128_mask( \
291  (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
292 
293 #define _mm256_ipcvttph_epi8(A) \
294  ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \
295  (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
296  _MM_FROUND_CUR_DIRECTION))
297 
298 #define _mm256_mask_ipcvttph_epi8(W, U, A) \
299  ((__m256i)__builtin_ia32_vcvttph2ibs256_mask((__v16hf)(__m256h)(A), \
300  (__v16hu)(W), (__mmask16)(U), \
301  _MM_FROUND_CUR_DIRECTION))
302 
303 #define _mm256_maskz_ipcvttph_epi8(U, A) \
304  ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \
305  (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \
306  (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
307 
308 #define _mm256_ipcvtt_roundph_epi8(A, R) \
309  ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \
310  (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
311  (const int)R))
312 
313 #define _mm256_mask_ipcvtt_roundph_epi8(W, U, A, R) \
314  ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \
315  (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R))
316 
317 #define _mm256_maskz_ipcvtt_roundph_epi8(U, A, R) \
318  ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \
319  (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U), \
320  (const int)R))
321 
322 #define _mm_ipcvttph_epu8(A) \
323  ((__m128i)__builtin_ia32_vcvttph2iubs128_mask( \
324  (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
325 
326 #define _mm_mask_ipcvttph_epu8(W, U, A) \
327  ((__m128i)__builtin_ia32_vcvttph2iubs128_mask((__v8hf)(__m128h)(A), \
328  (__v8hu)(W), (__mmask8)(U)))
329 
330 #define _mm_maskz_ipcvttph_epu8(U, A) \
331  ((__m128i)__builtin_ia32_vcvttph2iubs128_mask( \
332  (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
333 
334 #define _mm256_ipcvttph_epu8(A) \
335  ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \
336  (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
337  _MM_FROUND_CUR_DIRECTION))
338 
339 #define _mm256_mask_ipcvttph_epu8(W, U, A) \
340  ((__m256i)__builtin_ia32_vcvttph2iubs256_mask((__v16hf)(__m256h)(A), \
341  (__v16hu)(W), (__mmask16)(U), \
342  _MM_FROUND_CUR_DIRECTION))
343 
344 #define _mm256_maskz_ipcvttph_epu8(U, A) \
345  ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \
346  (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \
347  (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
348 
349 #define _mm256_ipcvtt_roundph_epu8(A, R) \
350  ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \
351  (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
352  (const int)R))
353 
354 #define _mm256_mask_ipcvtt_roundph_epu8(W, U, A, R) \
355  ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \
356  (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R))
357 
358 #define _mm256_maskz_ipcvtt_roundph_epu8(U, A, R) \
359  ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \
360  (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U), \
361  (const int)R))
362 
363 #define _mm_ipcvttps_epi8(A) \
364  ((__m128i)__builtin_ia32_vcvttps2ibs128_mask( \
365  (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
366 
367 #define _mm_mask_ipcvttps_epi8(W, U, A) \
368  ((__m128i)__builtin_ia32_vcvttps2ibs128_mask((__v4sf)(__m128)(A), \
369  (__v4su)(W), (__mmask8)(U)))
370 
371 #define _mm_maskz_ipcvttps_epi8(U, A) \
372  ((__m128i)__builtin_ia32_vcvttps2ibs128_mask( \
373  (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
374 
375 #define _mm256_ipcvttps_epi8(A) \
376  ((__m256i)__builtin_ia32_vcvttps2ibs256_mask( \
377  (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \
378  _MM_FROUND_CUR_DIRECTION))
379 
380 #define _mm256_mask_ipcvttps_epi8(W, U, A) \
381  ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A), \
382  (__v8su)(W), (__mmask8)(U), \
383  _MM_FROUND_CUR_DIRECTION))
384 
385 #define _mm256_maskz_ipcvttps_epi8(U, A) \
386  ((__m256i)__builtin_ia32_vcvttps2ibs256_mask( \
387  (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \
388  _MM_FROUND_CUR_DIRECTION))
389 
390 #define _mm256_ipcvtt_roundps_epi8(A, R) \
391  ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A), \
392  (__v8su)_mm256_setzero_si256(), \
393  (__mmask8)-1, (const int)R))
394 
395 #define _mm256_mask_ipcvtt_roundps_epi8(W, U, A, R) \
396  ((__m256i)__builtin_ia32_vcvttps2ibs256_mask( \
397  (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R))
398 
399 #define _mm256_maskz_ipcvtt_roundps_epi8(U, A, R) \
400  ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A), \
401  (__v8su)_mm256_setzero_si256(), \
402  (__mmask8)(U), (const int)R))
403 
404 #define _mm_ipcvttps_epu8(A) \
405  ((__m128i)__builtin_ia32_vcvttps2iubs128_mask( \
406  (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
407 
408 #define _mm_mask_ipcvttps_epu8(W, U, A) \
409  ((__m128i)__builtin_ia32_vcvttps2iubs128_mask((__v4sf)(__m128)(A), \
410  (__v4su)(W), (__mmask8)(U)))
411 
412 #define _mm_maskz_ipcvttps_epu8(U, A) \
413  ((__m128i)__builtin_ia32_vcvttps2iubs128_mask( \
414  (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
415 
416 #define _mm256_ipcvttps_epu8(A) \
417  ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \
418  (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \
419  _MM_FROUND_CUR_DIRECTION))
420 
421 #define _mm256_mask_ipcvttps_epu8(W, U, A) \
422  ((__m256i)__builtin_ia32_vcvttps2iubs256_mask((__v8sf)(__m256)(A), \
423  (__v8su)(W), (__mmask8)(U), \
424  _MM_FROUND_CUR_DIRECTION))
425 
426 #define _mm256_maskz_ipcvttps_epu8(U, A) \
427  ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \
428  (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \
429  _MM_FROUND_CUR_DIRECTION))
430 
431 #define _mm256_ipcvtt_roundps_epu8(A, R) \
432  ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \
433  (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \
434  (const int)R))
435 
436 #define _mm256_mask_ipcvtt_roundps_epu8(W, U, A, R) \
437  ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \
438  (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R))
439 
440 #define _mm256_maskz_ipcvtt_roundps_epu8(U, A, R) \
441  ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \
442  (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)(U), \
443  (const int)R))
444 #endif // __AVX10_2SATCVTINTRIN_H