clang  20.0.0git
avx10_2_512satcvtintrin.h
Go to the documentation of this file.
1 /*===------ avx10_2_512satcvtintrin.h - AVX10_2_512SATCVT intrinsics -------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 #ifndef __IMMINTRIN_H
10 #error \
11  "Never use <avx10_2_512satcvtintrin.h> directly; include <immintrin.h> instead."
12 #endif // __IMMINTRIN_H
13 
14 #ifndef __AVX10_2_512SATCVTINTRIN_H
15 #define __AVX10_2_512SATCVTINTRIN_H
16 
17 #define _mm512_ipcvtnebf16_epi8(A) \
18  ((__m512i)__builtin_ia32_vcvtnebf162ibs512((__v32bf)(__m512bh)(A)))
19 
20 #define _mm512_mask_ipcvtnebf16_epi8(W, U, A) \
21  ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
22  (__v32hi)_mm512_ipcvtnebf16_epi8(A), \
23  (__v32hi)(__m512i)(W)))
24 
25 #define _mm512_maskz_ipcvtnebf16_epi8(U, A) \
26  ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
27  (__v32hi)_mm512_ipcvtnebf16_epi8(A), \
28  (__v32hi)_mm512_setzero_si512()))
29 
30 #define _mm512_ipcvtnebf16_epu8(A) \
31  ((__m512i)__builtin_ia32_vcvtnebf162iubs512((__v32bf)(__m512bh)(A)))
32 
33 #define _mm512_mask_ipcvtnebf16_epu8(W, U, A) \
34  ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
35  (__v32hi)_mm512_ipcvtnebf16_epu8(A), \
36  (__v32hi)(__m512i)(W)))
37 
38 #define _mm512_maskz_ipcvtnebf16_epu8(U, A) \
39  ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
40  (__v32hi)_mm512_ipcvtnebf16_epu8(A), \
41  (__v32hi)_mm512_setzero_si512()))
42 
43 #define _mm512_ipcvttnebf16_epi8(A) \
44  ((__m512i)__builtin_ia32_vcvttnebf162ibs512((__v32bf)(__m512bh)(A)))
45 
46 #define _mm512_mask_ipcvttnebf16_epi8(W, U, A) \
47  ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
48  (__v32hi)_mm512_ipcvttnebf16_epi8(A), \
49  (__v32hi)(__m512i)(W)))
50 
51 #define _mm512_maskz_ipcvttnebf16_epi8(U, A) \
52  ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
53  (__v32hi)_mm512_ipcvttnebf16_epi8(A), \
54  (__v32hi)_mm512_setzero_si512()))
55 
56 #define _mm512_ipcvttnebf16_epu8(A) \
57  ((__m512i)__builtin_ia32_vcvttnebf162iubs512((__v32bf)(__m512bh)(A)))
58 
59 #define _mm512_mask_ipcvttnebf16_epu8(W, U, A) \
60  ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
61  (__v32hi)_mm512_ipcvttnebf16_epu8(A), \
62  (__v32hi)(__m512i)(W)))
63 
64 #define _mm512_maskz_ipcvttnebf16_epu8(U, A) \
65  ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
66  (__v32hi)_mm512_ipcvttnebf16_epu8(A), \
67  (__v32hi)_mm512_setzero_si512()))
68 
69 #define _mm512_ipcvtph_epi8(A) \
70  ((__m512i)__builtin_ia32_vcvtph2ibs512_mask( \
71  (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
72  _MM_FROUND_CUR_DIRECTION))
73 
74 #define _mm512_mask_ipcvtph_epi8(W, U, A) \
75  ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A), \
76  (__v32hu)(W), (__mmask32)(U), \
77  _MM_FROUND_CUR_DIRECTION))
78 
79 #define _mm512_maskz_ipcvtph_epi8(U, A) \
80  ((__m512i)__builtin_ia32_vcvtph2ibs512_mask( \
81  (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \
82  _MM_FROUND_CUR_DIRECTION))
83 
84 #define _mm512_ipcvt_roundph_epi8(A, R) \
85  ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A), \
86  (__v32hu)_mm512_setzero_si512(), \
87  (__mmask32)-1, (const int)R))
88 
89 #define _mm512_mask_ipcvt_roundph_epi8(W, U, A, R) \
90  ((__m512i)__builtin_ia32_vcvtph2ibs512_mask( \
91  (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), (const int)R))
92 
93 #define _mm512_maskz_ipcvt_roundph_epi8(U, A, R) \
94  ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A), \
95  (__v32hu)_mm512_setzero_si512(), \
96  (__mmask32)(U), (const int)R))
97 
98 #define _mm512_ipcvtph_epu8(A) \
99  ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \
100  (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
101  _MM_FROUND_CUR_DIRECTION))
102 
103 #define _mm512_mask_ipcvtph_epu8(W, U, A) \
104  ((__m512i)__builtin_ia32_vcvtph2iubs512_mask((__v32hf)(__m512h)(A), \
105  (__v32hu)(W), (__mmask32)(U), \
106  _MM_FROUND_CUR_DIRECTION))
107 
108 #define _mm512_maskz_ipcvtph_epu8(U, A) \
109  ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \
110  (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \
111  _MM_FROUND_CUR_DIRECTION))
112 
113 #define _mm512_ipcvt_roundph_epu8(A, R) \
114  ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \
115  (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
116  (const int)R))
117 
118 #define _mm512_mask_ipcvt_roundph_epu8(W, U, A, R) \
119  ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \
120  (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), (const int)R))
121 
122 #define _mm512_maskz_ipcvt_roundph_epu8(U, A, R) \
123  ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \
124  (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \
125  (const int)R))
126 
127 #define _mm512_ipcvtps_epi8(A) \
128  ((__m512i)__builtin_ia32_vcvtps2ibs512_mask( \
129  (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \
130  _MM_FROUND_CUR_DIRECTION))
131 
132 #define _mm512_mask_ipcvtps_epi8(W, U, A) \
133  ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A), \
134  (__v16su)(W), (__mmask16)(U), \
135  _MM_FROUND_CUR_DIRECTION))
136 
137 #define _mm512_maskz_ipcvtps_epi8(U, A) \
138  ((__m512i)__builtin_ia32_vcvtps2ibs512_mask( \
139  (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \
140  _MM_FROUND_CUR_DIRECTION))
141 
142 #define _mm512_ipcvt_roundps_epi8(A, R) \
143  ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A), \
144  (__v16su)_mm512_setzero_si512(), \
145  (__mmask16)-1, (const int)R))
146 
147 #define _mm512_mask_ipcvt_roundps_epi8(W, U, A, R) \
148  ((__m512i)__builtin_ia32_vcvtps2ibs512_mask( \
149  (__v16sf)(__m512)(A), (__v16su)(W), (__mmask16)(U), (const int)R))
150 
151 #define _mm512_maskz_ipcvt_roundps_epi8(U, A, R) \
152  ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A), \
153  (__v16su)_mm512_setzero_si512(), \
154  (__mmask16)(U), (const int)R))
155 
156 #define _mm512_ipcvtps_epu8(A) \
157  ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \
158  (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \
159  _MM_FROUND_CUR_DIRECTION))
160 
161 #define _mm512_mask_ipcvtps_epu8(W, U, A) \
162  ((__m512i)__builtin_ia32_vcvtps2iubs512_mask((__v16sf)(__m512)(A), \
163  (__v16su)(W), (__mmask16)(U), \
164  _MM_FROUND_CUR_DIRECTION))
165 
166 #define _mm512_maskz_ipcvtps_epu8(U, A) \
167  ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \
168  (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \
169  _MM_FROUND_CUR_DIRECTION))
170 
171 #define _mm512_ipcvt_roundps_epu8(A, R) \
172  ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \
173  (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \
174  (const int)R))
175 
176 #define _mm512_mask_ipcvt_roundps_epu8(W, U, A, R) \
177  ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \
178  (__v16sf)(__m512)(A), (__v16su)(W), (__mmask16)(U), (const int)R))
179 
180 #define _mm512_maskz_ipcvt_roundps_epu8(U, A, R) \
181  ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \
182  (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \
183  (const int)R))
184 
185 #define _mm512_ipcvttph_epi8(A) \
186  ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \
187  (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
188  _MM_FROUND_CUR_DIRECTION))
189 
190 #define _mm512_mask_ipcvttph_epi8(W, U, A) \
191  ((__m512i)__builtin_ia32_vcvttph2ibs512_mask((__v32hf)(__m512h)(A), \
192  (__v32hu)(W), (__mmask32)(U), \
193  _MM_FROUND_CUR_DIRECTION))
194 
195 #define _mm512_maskz_ipcvttph_epi8(U, A) \
196  ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \
197  (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \
198  _MM_FROUND_CUR_DIRECTION))
199 
200 #define _mm512_ipcvtt_roundph_epi8(A, S) \
201  ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \
202  (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
203  S))
204 
205 #define _mm512_mask_ipcvtt_roundph_epi8(W, U, A, S) \
206  ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \
207  (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), S))
208 
209 #define _mm512_maskz_ipcvtt_roundph_epi8(U, A, S) \
210  ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \
211  (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \
212  S))
213 
214 #define _mm512_ipcvttph_epu8(A) \
215  ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \
216  (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
217  _MM_FROUND_CUR_DIRECTION))
218 
219 #define _mm512_mask_ipcvttph_epu8(W, U, A) \
220  ((__m512i)__builtin_ia32_vcvttph2iubs512_mask((__v32hf)(__m512h)(A), \
221  (__v32hu)(W), (__mmask32)(U), \
222  _MM_FROUND_CUR_DIRECTION))
223 
224 #define _mm512_maskz_ipcvttph_epu8(U, A) \
225  ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \
226  (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \
227  _MM_FROUND_CUR_DIRECTION))
228 
229 #define _mm512_ipcvtt_roundph_epu8(A, S) \
230  ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \
231  (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
232  S))
233 
234 #define _mm512_mask_ipcvtt_roundph_epu8(W, U, A, S) \
235  ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \
236  (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), S))
237 
238 #define _mm512_maskz_ipcvtt_roundph_epu8(U, A, S) \
239  ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \
240  (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \
241  S))
242 
243 #define _mm512_ipcvttps_epi8(A) \
244  ((__m512i)__builtin_ia32_vcvttps2ibs512_mask( \
245  (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \
246  _MM_FROUND_CUR_DIRECTION))
247 
248 #define _mm512_mask_ipcvttps_epi8(W, U, A) \
249  ((__m512i)__builtin_ia32_vcvttps2ibs512_mask((__v16sf)(__m512h)(A), \
250  (__v16su)(W), (__mmask16)(U), \
251  _MM_FROUND_CUR_DIRECTION))
252 
253 #define _mm512_maskz_ipcvttps_epi8(U, A) \
254  ((__m512i)__builtin_ia32_vcvttps2ibs512_mask( \
255  (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \
256  _MM_FROUND_CUR_DIRECTION))
257 
258 #define _mm512_ipcvtt_roundps_epi8(A, S) \
259  ((__m512i)__builtin_ia32_vcvttps2ibs512_mask( \
260  (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \
261  S))
262 
263 #define _mm512_mask_ipcvtt_roundps_epi8(W, U, A, S) \
264  ((__m512i)__builtin_ia32_vcvttps2ibs512_mask( \
265  (__v16sf)(__m512h)(A), (__v16su)(W), (__mmask16)(U), S))
266 
267 #define _mm512_maskz_ipcvtt_roundps_epi8(U, A, S) \
268  ((__m512i)__builtin_ia32_vcvttps2ibs512_mask( \
269  (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \
270  S))
271 
272 #define _mm512_ipcvttps_epu8(A) \
273  ((__m512i)__builtin_ia32_vcvttps2iubs512_mask( \
274  (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \
275  _MM_FROUND_CUR_DIRECTION))
276 
277 #define _mm512_mask_ipcvttps_epu8(W, U, A) \
278  ((__m512i)__builtin_ia32_vcvttps2iubs512_mask((__v16sf)(__m512h)(A), \
279  (__v16su)(W), (__mmask16)(U), \
280  _MM_FROUND_CUR_DIRECTION))
281 
282 #define _mm512_maskz_ipcvttps_epu8(U, A) \
283  ((__m512i)__builtin_ia32_vcvttps2iubs512_mask( \
284  (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \
285  _MM_FROUND_CUR_DIRECTION))
286 
287 #define _mm512_ipcvtt_roundps_epu8(A, S) \
288  ((__m512i)__builtin_ia32_vcvttps2iubs512_mask( \
289  (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \
290  S))
291 
292 #define _mm512_mask_ipcvtt_roundps_epu8(W, U, A, S) \
293  ((__m512i)__builtin_ia32_vcvttps2iubs512_mask( \
294  (__v16sf)(__m512h)(A), (__v16su)(W), (__mmask16)(U), S))
295 
296 #define _mm512_maskz_ipcvtt_roundps_epu8(U, A, S) \
297  ((__m512i)__builtin_ia32_vcvttps2iubs512_mask( \
298  (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \
299  S))
300 
301 #endif // __AVX10_2_512SATCVTINTRIN_H