12 "Never use <avxvnniint16intrin.h> directly; include <immintrin.h> instead."
15 #ifndef __AVXVNNIINT16INTRIN_H
16 #define __AVXVNNIINT16INTRIN_H
19 #define __DEFAULT_FN_ATTRS128 \
20 __attribute__((__always_inline__, __nodebug__, __target__("avxvnniint16"), \
21 __min_vector_width__(128)))
22 #define __DEFAULT_FN_ATTRS256 \
23 __attribute__((__always_inline__, __nodebug__, __target__("avxvnniint16"), \
24 __min_vector_width__(256)))
59 return (__m128i)__builtin_ia32_vpdpwsud128((__v4si)__W, (__v4si)__A,
95 return (__m256i)__builtin_ia32_vpdpwsud256((__v8si)__W, (__v8si)__A,
133 return (__m128i)__builtin_ia32_vpdpwsuds128((__v4si)__W, (__v4si)__A,
170 return (__m256i)__builtin_ia32_vpdpwsuds256((__v8si)__W, (__v8si)__A,
207 return (__m128i)__builtin_ia32_vpdpwusd128((__v4si)__W, (__v4si)__A,
243 return (__m256i)__builtin_ia32_vpdpwusd256((__v8si)__W, (__v8si)__A,
281 return (__m128i)__builtin_ia32_vpdpwusds128((__v4si)__W, (__v4si)__A,
318 return (__m256i)__builtin_ia32_vpdpwusds256((__v8si)__W, (__v8si)__A,
355 return (__m128i)__builtin_ia32_vpdpwuud128((__v4si)__W, (__v4si)__A,
391 return (__m256i)__builtin_ia32_vpdpwuud256((__v8si)__W, (__v8si)__A,
429 return (__m128i)__builtin_ia32_vpdpwuuds128((__v4si)__W, (__v4si)__A,
466 return (__m256i)__builtin_ia32_vpdpwuuds256((__v8si)__W, (__v8si)__A,
470 #undef __DEFAULT_FN_ATTRS128
471 #undef __DEFAULT_FN_ATTRS256
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwusds_epi32(__m256i __W, __m256i __A, __m256i __B)
Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding signed 16-b...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwsuds_epi32(__m256i __W, __m256i __A, __m256i __B)
Multiply groups of 2 adjacent pairs of signed 16-bit integers in __A with corresponding unsigned 16-b...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwsuds_epi32(__m128i __W, __m128i __A, __m128i __B)
Multiply groups of 2 adjacent pairs of signed 16-bit integers in __A with corresponding unsigned 16-b...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwuud_epi32(__m256i __W, __m256i __A, __m256i __B)
Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding unsigned 16...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwusd_epi32(__m256i __W, __m256i __A, __m256i __B)
Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding signed 16-b...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwsud_epi32(__m128i __W, __m128i __A, __m128i __B)
Multiply groups of 2 adjacent pairs of signed 16-bit integers in __A with corresponding unsigned 16-b...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwuuds_epi32(__m128i __W, __m128i __A, __m128i __B)
Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding unsigned 16...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwusds_epi32(__m128i __W, __m128i __A, __m128i __B)
Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding signed 16-b...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwusd_epi32(__m128i __W, __m128i __A, __m128i __B)
Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding signed 16-b...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwuud_epi32(__m128i __W, __m128i __A, __m128i __B)
Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding unsigned 16...
#define __DEFAULT_FN_ATTRS256
#define __DEFAULT_FN_ATTRS128
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwsud_epi32(__m256i __W, __m256i __A, __m256i __B)
Multiply groups of 2 adjacent pairs of signed 16-bit integers in __A with corresponding unsigned 16-b...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwuuds_epi32(__m256i __W, __m256i __A, __m256i __B)
Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding unsigned 16...