Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 598e011

Browse files
Fix code for probing availability of AVX-512.
This commit fixes a few things: * Instead of checking for CPU support of the "xsave" extension, we need to check for OS support of XGETBV instructions via the "osxsave" flag. * We must check that additional XCR0 bits are set to be sure the ZMM registers are fully enabled. * We should use the recommended ordering of steps. Specifically, we need to check that the ZMM registers are enabled prior to checking for AVX-512 via CPUID. In passing, split this code into separate functions to improve readability. Reported-by: Andrew Kane Reviewed-by: Akash Shankaran, Raghuveer Devulapalli Discussion: https://postgr.es/m/20240418024459.GA3385227%40nathanxps13
1 parent bb3ca23 commit 598e011

File tree

1 file changed

+47
-33
lines changed

1 file changed

+47
-33
lines changed

src/port/pg_popcount_avx512_choose.c

Lines changed: 47 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -34,55 +34,69 @@
3434
#ifdef TRY_POPCNT_FAST
3535

3636
/*
37-
* Returns true if the CPU supports the instructions required for the AVX-512
38-
* pg_popcount() implementation.
37+
* Does CPUID say there's support for XSAVE instructions?
3938
*/
40-
bool
41-
pg_popcount_avx512_available(void)
39+
static inline bool
40+
xsave_available(void)
4241
{
4342
unsigned int exx[4] = {0, 0, 0, 0};
4443

45-
/* Does CPUID say there's support for AVX-512 popcount instructions? */
46-
#if defined(HAVE__GET_CPUID_COUNT)
47-
__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
48-
#elif defined(HAVE__CPUIDEX)
49-
__cpuidex(exx, 7, 0);
50-
#else
51-
#error cpuid instruction not available
52-
#endif
53-
if ((exx[2] & (1 << 14)) == 0) /* avx512-vpopcntdq */
54-
return false;
55-
56-
/* Does CPUID say there's support for AVX-512 byte and word instructions? */
57-
memset(exx, 0, sizeof(exx));
58-
#if defined(HAVE__GET_CPUID_COUNT)
59-
__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
60-
#elif defined(HAVE__CPUIDEX)
61-
__cpuidex(exx, 7, 0);
62-
#else
63-
#error cpuid instruction not available
64-
#endif
65-
if ((exx[1] & (1 << 30)) == 0) /* avx512-bw */
66-
return false;
67-
68-
/* Does CPUID say there's support for XSAVE instructions? */
69-
memset(exx, 0, sizeof(exx));
7044
#if defined(HAVE__GET_CPUID)
7145
__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
7246
#elif defined(HAVE__CPUID)
7347
__cpuid(exx, 1);
7448
#else
7549
#error cpuid instruction not available
7650
#endif
77-
if ((exx[2] & (1 << 26)) == 0) /* xsave */
78-
return false;
51+
return (exx[2] & (1 << 27)) != 0; /* osxsave */
52+
}
7953

80-
/* Does XGETBV say the ZMM registers are enabled? */
54+
/*
55+
* Does XGETBV say the ZMM registers are enabled?
56+
*
57+
* NB: Caller is responsible for verifying that xsave_available() returns true
58+
* before calling this.
59+
*/
60+
static inline bool
61+
zmm_regs_available(void)
62+
{
8163
#ifdef HAVE_XSAVE_INTRINSICS
82-
return (_xgetbv(0) & 0xe0) != 0;
64+
return (_xgetbv(0) & 0xe6) == 0xe6;
8365
#else
8466
return false;
8567
#endif
8668
}
8769

70+
/*
71+
* Does CPUID say there's support for AVX-512 popcount and byte-and-word
72+
* instructions?
73+
*/
74+
static inline bool
75+
avx512_popcnt_available(void)
76+
{
77+
unsigned int exx[4] = {0, 0, 0, 0};
78+
79+
#if defined(HAVE__GET_CPUID_COUNT)
80+
__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
81+
#elif defined(HAVE__CPUIDEX)
82+
__cpuidex(exx, 7, 0);
83+
#else
84+
#error cpuid instruction not available
85+
#endif
86+
return (exx[2] & (1 << 14)) != 0 && /* avx512-vpopcntdq */
87+
(exx[1] & (1 << 30)) != 0; /* avx512-bw */
88+
}
89+
90+
/*
91+
* Returns true if the CPU supports the instructions required for the AVX-512
92+
* pg_popcount() implementation.
93+
*/
94+
bool
95+
pg_popcount_avx512_available(void)
96+
{
97+
return xsave_available() &&
98+
zmm_regs_available() &&
99+
avx512_popcnt_available();
100+
}
101+
88102
#endif /* TRY_POPCNT_FAST */

0 commit comments

Comments
 (0)