|
34 | 34 | #ifdef TRY_POPCNT_FAST
|
35 | 35 |
|
36 | 36 | /*
|
37 |
| - * Returns true if the CPU supports the instructions required for the AVX-512 |
38 |
| - * pg_popcount() implementation. |
| 37 | + * Does CPUID say there's support for XSAVE instructions? |
39 | 38 | */
|
40 |
| -bool |
41 |
| -pg_popcount_avx512_available(void) |
| 39 | +static inline bool |
| 40 | +xsave_available(void) |
42 | 41 | {
|
43 | 42 | unsigned int exx[4] = {0, 0, 0, 0};
|
44 | 43 |
|
45 |
| - /* Does CPUID say there's support for AVX-512 popcount instructions? */ |
46 |
| -#if defined(HAVE__GET_CPUID_COUNT) |
47 |
| - __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]); |
48 |
| -#elif defined(HAVE__CPUIDEX) |
49 |
| - __cpuidex(exx, 7, 0); |
50 |
| -#else |
51 |
| -#error cpuid instruction not available |
52 |
| -#endif |
53 |
| - if ((exx[2] & (1 << 14)) == 0) /* avx512-vpopcntdq */ |
54 |
| - return false; |
55 |
| - |
56 |
| - /* Does CPUID say there's support for AVX-512 byte and word instructions? */ |
57 |
| - memset(exx, 0, sizeof(exx)); |
58 |
| -#if defined(HAVE__GET_CPUID_COUNT) |
59 |
| - __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]); |
60 |
| -#elif defined(HAVE__CPUIDEX) |
61 |
| - __cpuidex(exx, 7, 0); |
62 |
| -#else |
63 |
| -#error cpuid instruction not available |
64 |
| -#endif |
65 |
| - if ((exx[1] & (1 << 30)) == 0) /* avx512-bw */ |
66 |
| - return false; |
67 |
| - |
68 |
| - /* Does CPUID say there's support for XSAVE instructions? */ |
69 |
| - memset(exx, 0, sizeof(exx)); |
70 | 44 | #if defined(HAVE__GET_CPUID)
|
71 | 45 | __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
|
72 | 46 | #elif defined(HAVE__CPUID)
|
73 | 47 | __cpuid(exx, 1);
|
74 | 48 | #else
|
75 | 49 | #error cpuid instruction not available
|
76 | 50 | #endif
|
77 |
| - if ((exx[2] & (1 << 26)) == 0) /* xsave */ |
78 |
| - return false; |
| 51 | + return (exx[2] & (1 << 27)) != 0; /* osxsave */ |
| 52 | +} |
79 | 53 |
|
80 |
| - /* Does XGETBV say the ZMM registers are enabled? */ |
| 54 | +/* |
| 55 | + * Does XGETBV say the ZMM registers are enabled? |
| 56 | + * |
| 57 | + * NB: Caller is responsible for verifying that xsave_available() returns true |
| 58 | + * before calling this. |
| 59 | + */ |
| 60 | +static inline bool |
| 61 | +zmm_regs_available(void) |
| 62 | +{ |
81 | 63 | #ifdef HAVE_XSAVE_INTRINSICS
|
82 |
| - return (_xgetbv(0) & 0xe0) != 0; |
| 64 | + return (_xgetbv(0) & 0xe6) == 0xe6; |
83 | 65 | #else
|
84 | 66 | return false;
|
85 | 67 | #endif
|
86 | 68 | }
|
87 | 69 |
|
| 70 | +/* |
| 71 | + * Does CPUID say there's support for AVX-512 popcount and byte-and-word |
| 72 | + * instructions? |
| 73 | + */ |
| 74 | +static inline bool |
| 75 | +avx512_popcnt_available(void) |
| 76 | +{ |
| 77 | + unsigned int exx[4] = {0, 0, 0, 0}; |
| 78 | + |
| 79 | +#if defined(HAVE__GET_CPUID_COUNT) |
| 80 | + __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]); |
| 81 | +#elif defined(HAVE__CPUIDEX) |
| 82 | + __cpuidex(exx, 7, 0); |
| 83 | +#else |
| 84 | +#error cpuid instruction not available |
| 85 | +#endif |
| 86 | + return (exx[2] & (1 << 14)) != 0 && /* avx512-vpopcntdq */ |
| 87 | + (exx[1] & (1 << 30)) != 0; /* avx512-bw */ |
| 88 | +} |
| 89 | + |
| 90 | +/* |
| 91 | + * Returns true if the CPU supports the instructions required for the AVX-512 |
| 92 | + * pg_popcount() implementation. |
| 93 | + */ |
| 94 | +bool |
| 95 | +pg_popcount_avx512_available(void) |
| 96 | +{ |
| 97 | + return xsave_available() && |
| 98 | + zmm_regs_available() && |
| 99 | + avx512_popcnt_available(); |
| 100 | +} |
| 101 | + |
88 | 102 | #endif /* TRY_POPCNT_FAST */
|
0 commit comments