13 #if !defined(__i386__) && !defined(__x86_64__)
14 #error "This header is only meant to be used on x86 and x64 architecture"
19 #if !defined(__SCE__) || __has_feature(modules) || defined(__MMX__)
23 #if !defined(__SCE__) || __has_feature(modules) || defined(__SSE__)
27 #if !defined(__SCE__) || __has_feature(modules) || defined(__SSE2__)
31 #if !defined(__SCE__) || __has_feature(modules) || defined(__SSE3__)
35 #if !defined(__SCE__) || __has_feature(modules) || defined(__SSSE3__)
39 #if !defined(__SCE__) || __has_feature(modules) || \
40 (defined(__SSE4_2__) || defined(__SSE4_1__))
44 #if !defined(__SCE__) || __has_feature(modules) || \
45 (defined(__AES__) || defined(__PCLMUL__))
49 #if !defined(__SCE__) || __has_feature(modules) || defined(__CLFLUSHOPT__)
53 #if !defined(__SCE__) || __has_feature(modules) || defined(__CLWB__)
57 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX__)
61 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX2__)
65 #if !defined(__SCE__) || __has_feature(modules) || defined(__F16C__)
72 #if !defined(__SCE__) || __has_feature(modules) || defined(__BMI2__)
76 #if !defined(__SCE__) || __has_feature(modules) || defined(__LZCNT__)
80 #if !defined(__SCE__) || __has_feature(modules) || defined(__POPCNT__)
84 #if !defined(__SCE__) || __has_feature(modules) || defined(__FMA__)
88 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512F__)
92 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VL__)
96 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BW__)
100 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BITALG__)
104 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512CD__)
108 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)
112 #if !defined(__SCE__) || __has_feature(modules) || \
113 (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
117 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VNNI__)
121 #if !defined(__SCE__) || __has_feature(modules) || \
122 (defined(__AVX512VL__) && defined(__AVX512VNNI__))
126 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNI__)
130 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512DQ__)
134 #if !defined(__SCE__) || __has_feature(modules) || \
135 (defined(__AVX512VL__) && defined(__AVX512BITALG__))
139 #if !defined(__SCE__) || __has_feature(modules) || \
140 (defined(__AVX512VL__) && defined(__AVX512BW__))
144 #if !defined(__SCE__) || __has_feature(modules) || \
145 (defined(__AVX512VL__) && defined(__AVX512CD__))
149 #if !defined(__SCE__) || __has_feature(modules) || \
150 (defined(__AVX512VL__) && defined(__AVX512DQ__))
154 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512IFMA__)
158 #if !defined(__SCE__) || __has_feature(modules) || \
159 (defined(__AVX512IFMA__) && defined(__AVX512VL__))
163 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVXIFMA__)
167 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VBMI__)
171 #if !defined(__SCE__) || __has_feature(modules) || \
172 (defined(__AVX512VBMI__) && defined(__AVX512VL__))
176 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VBMI2__)
180 #if !defined(__SCE__) || __has_feature(modules) || \
181 (defined(__AVX512VBMI2__) && defined(__AVX512VL__))
185 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512FP16__)
189 #if !defined(__SCE__) || __has_feature(modules) || \
190 (defined(__AVX512VL__) && defined(__AVX512FP16__))
194 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BF16__)
198 #if !defined(__SCE__) || __has_feature(modules) || \
199 (defined(__AVX512VL__) && defined(__AVX512BF16__))
203 #if !defined(__SCE__) || __has_feature(modules) || defined(__PKU__)
207 #if !defined(__SCE__) || __has_feature(modules) || defined(__VPCLMULQDQ__)
211 #if !defined(__SCE__) || __has_feature(modules) || defined(__VAES__)
215 #if !defined(__SCE__) || __has_feature(modules) || defined(__GFNI__)
219 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNIINT8__)
223 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVXNECONVERT__)
227 #if !defined(__SCE__) || __has_feature(modules) || defined(__SHA512__)
231 #if !defined(__SCE__) || __has_feature(modules) || defined(__SM3__)
235 #if !defined(__SCE__) || __has_feature(modules) || defined(__SM4__)
239 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNIINT16__)
243 #if !defined(__SCE__) || __has_feature(modules) || defined(__RDPID__)
251 static __inline__
unsigned int __attribute__((__always_inline__, __nodebug__, __target__(
"rdpid")))
253 return __builtin_ia32_rdpid();
257 #if !defined(__SCE__) || __has_feature(modules) || defined(__RDRND__)
267 static __inline__
int __attribute__((__always_inline__, __nodebug__, __target__(
"rdrnd")))
268 _rdrand16_step(
unsigned short *
__p)
270 return (
int)__builtin_ia32_rdrand16_step(
__p);
282 static __inline__
int __attribute__((__always_inline__, __nodebug__, __target__(
"rdrnd")))
283 _rdrand32_step(
unsigned int *
__p)
285 return (
int)__builtin_ia32_rdrand32_step(
__p);
297 static __inline__
int __attribute__((__always_inline__, __nodebug__, __target__(
"rdrnd")))
298 _rdrand64_step(
unsigned long long *
__p)
301 return (
int)__builtin_ia32_rdrand64_step(
__p);
305 unsigned int __lo, __hi;
306 unsigned int __res_lo = __builtin_ia32_rdrand32_step(&__lo);
307 unsigned int __res_hi = __builtin_ia32_rdrand32_step(&__hi);
308 if (__res_lo && __res_hi) {
309 *
__p = ((
unsigned long long)__hi << 32) | (
unsigned long long)__lo;
319 #if !defined(__SCE__) || __has_feature(modules) || defined(__FSGSBASE__)
328 static __inline__
unsigned int __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
329 _readfsbase_u32(
void)
331 return __builtin_ia32_rdfsbase32();
341 static __inline__
unsigned long long __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
342 _readfsbase_u64(
void)
344 return __builtin_ia32_rdfsbase64();
354 static __inline__
unsigned int __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
355 _readgsbase_u32(
void)
357 return __builtin_ia32_rdgsbase32();
367 static __inline__
unsigned long long __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
368 _readgsbase_u64(
void)
370 return __builtin_ia32_rdgsbase64();
381 static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
382 _writefsbase_u32(
unsigned int __V)
384 __builtin_ia32_wrfsbase32(__V);
395 static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
396 _writefsbase_u64(
unsigned long long __V)
398 __builtin_ia32_wrfsbase64(__V);
409 static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
410 _writegsbase_u32(
unsigned int __V)
412 __builtin_ia32_wrgsbase32(__V);
423 static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
424 _writegsbase_u64(
unsigned long long __V)
426 __builtin_ia32_wrgsbase64(__V);
432 #if !defined(__SCE__) || __has_feature(modules) || defined(__MOVBE__)
449 static __inline__
short __attribute__((__always_inline__, __nodebug__, __target__(
"movbe")))
450 _loadbe_i16(
void const *
__P) {
454 return (
short)__builtin_bswap16(((
const struct __loadu_i16*)
__P)->
__v);
467 static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"movbe")))
469 struct __storeu_i16 {
472 ((
struct __storeu_i16*)
__P)->__v = __builtin_bswap16((
unsigned short)
__D);
484 static __inline__
int __attribute__((__always_inline__, __nodebug__, __target__(
"movbe")))
485 _loadbe_i32(
void const *
__P) {
489 return (
int)__builtin_bswap32(((
const struct __loadu_i32*)
__P)->
__v);
502 static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"movbe")))
503 _storebe_i32(
void *
__P,
int __D) {
504 struct __storeu_i32 {
507 ((
struct __storeu_i32*)
__P)->__v = __builtin_bswap32((
unsigned int)
__D);
520 static __inline__
long long __attribute__((__always_inline__, __nodebug__, __target__(
"movbe")))
521 _loadbe_i64(
void const *
__P) {
523 unsigned long long __v;
525 return (
long long)__builtin_bswap64(((
const struct __loadu_i64*)
__P)->
__v);
538 static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"movbe")))
539 _storebe_i64(
void *
__P,
long long __D) {
540 struct __storeu_i64 {
541 unsigned long long __v;
543 ((
struct __storeu_i64*)
__P)->__v = __builtin_bswap64((
unsigned long long)
__D);
548 #if !defined(__SCE__) || __has_feature(modules) || defined(__RTM__)
553 #if !defined(__SCE__) || __has_feature(modules) || defined(__SHA__)
557 #if !defined(__SCE__) || __has_feature(modules) || defined(__FXSR__)
564 #if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVEOPT__)
568 #if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVEC__)
572 #if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVES__)
576 #if !defined(__SCE__) || __has_feature(modules) || defined(__SHSTK__)
583 #if !defined(__SCE__) || __has_feature(modules) || defined(__ADX__)
587 #if !defined(__SCE__) || __has_feature(modules) || defined(__RDSEED__)
591 #if !defined(__SCE__) || __has_feature(modules) || defined(__WBNOINVD__)
595 #if !defined(__SCE__) || __has_feature(modules) || defined(__CLDEMOTE__)
599 #if !defined(__SCE__) || __has_feature(modules) || defined(__WAITPKG__)
603 #if !defined(__SCE__) || __has_feature(modules) || defined(__MOVDIRI__) || \
604 defined(__MOVDIR64B__)
608 #if !defined(__SCE__) || __has_feature(modules) || defined(__PCONFIG__)
612 #if !defined(__SCE__) || __has_feature(modules) || defined(__SGX__)
616 #if !defined(__SCE__) || __has_feature(modules) || defined(__PTWRITE__)
620 #if !defined(__SCE__) || __has_feature(modules) || defined(__INVPCID__)
623 #if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_FP16__)
627 #if !defined(__SCE__) || __has_feature(modules) || defined(__KL__) || \
632 #if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_TILE__) || \
633 defined(__AMX_INT8__) || defined(__AMX_BF16__)
637 #if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_COMPLEX__)
641 #if !defined(__SCE__) || __has_feature(modules) || \
642 defined(__AVX512VP2INTERSECT__)
646 #if !defined(__SCE__) || __has_feature(modules) || \
647 (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__))
651 #if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__)
655 #if !defined(__SCE__) || __has_feature(modules) || defined(__SERIALIZE__)
659 #if !defined(__SCE__) || __has_feature(modules) || defined(__TSXLDTRK__)
663 #if defined(_MSC_VER) && __has_extension(gnu_asm)
665 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
672 #if defined(__i386__) || defined(__x86_64__)
674 _InterlockedExchange_HLEAcquire(
long volatile *_Target,
long _Value) {
675 __asm__ __volatile__(
".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
676 :
"+r" (_Value),
"+m" (*_Target) ::
"memory");
680 _InterlockedExchange_HLERelease(
long volatile *_Target,
long _Value) {
681 __asm__ __volatile__(
".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
682 :
"+r" (_Value),
"+m" (*_Target) ::
"memory");
686 #if defined(__x86_64__)
688 _InterlockedExchange64_HLEAcquire(__int64
volatile *_Target, __int64 _Value) {
689 __asm__ __volatile__(
".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
690 :
"+r" (_Value),
"+m" (*_Target) ::
"memory");
694 _InterlockedExchange64_HLERelease(__int64
volatile *_Target, __int64 _Value) {
695 __asm__ __volatile__(
".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
696 :
"+r" (_Value),
"+m" (*_Target) ::
"memory");
703 #if defined(__i386__) || defined(__x86_64__)
705 _InterlockedCompareExchange_HLEAcquire(
long volatile *_Destination,
706 long _Exchange,
long _Comparand) {
707 __asm__ __volatile__(
".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
708 :
"+a" (_Comparand),
"+m" (*_Destination)
709 :
"r" (_Exchange) :
"memory");
713 _InterlockedCompareExchange_HLERelease(
long volatile *_Destination,
714 long _Exchange,
long _Comparand) {
715 __asm__ __volatile__(
".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
716 :
"+a" (_Comparand),
"+m" (*_Destination)
717 :
"r" (_Exchange) :
"memory");
721 #if defined(__x86_64__)
723 _InterlockedCompareExchange64_HLEAcquire(__int64
volatile *_Destination,
724 __int64 _Exchange, __int64 _Comparand) {
725 __asm__ __volatile__(
".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
726 :
"+a" (_Comparand),
"+m" (*_Destination)
727 :
"r" (_Exchange) :
"memory");
731 _InterlockedCompareExchange64_HLERelease(__int64
volatile *_Destination,
732 __int64 _Exchange, __int64 _Comparand) {
733 __asm__ __volatile__(
".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
734 :
"+a" (_Comparand),
"+m" (*_Destination)
735 :
"r" (_Exchange) :
"memory");
743 #undef __DEFAULT_FN_ATTRS
#define __DEFAULT_FN_ATTRS
static __inline__ uint32_t volatile uint32_t * __p
static __inline__ void short __D
struct __storeu_i16 *__P __v
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid"))) _rdpid_u32(void)
Reads the value of the IA32_TSC_AUX MSR (0xc0000103).
__inline unsigned int unsigned int unsigned int * __P