From 9a6ccb54c1052aa7fd2cd8d486e703c8da848d9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 5 Oct 2024 20:15:28 +0200 Subject: [PATCH 01/78] support simple SIMD detection --- Include/internal/pycore_cpuinfo.h | 26 ++ Makefile.pre.in | 2 + PCbuild/pythoncore.vcxproj | 2 + PCbuild/pythoncore.vcxproj.filters | 6 + Python/cpuinfo.c | 110 ++++++++ configure | 395 +++++++++++++++++++++++++++++ configure.ac | 30 +++ pyconfig.h.in | 24 ++ 8 files changed, 595 insertions(+) create mode 100644 Include/internal/pycore_cpuinfo.h create mode 100644 Python/cpuinfo.c diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h new file mode 100644 index 00000000000000..d4e9428dfb49dd --- /dev/null +++ b/Include/internal/pycore_cpuinfo.h @@ -0,0 +1,26 @@ +#ifndef Py_INTERNAL_CPUINFO_H +#define Py_INTERNAL_CPUINFO_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include + +typedef struct { + bool sse, sse2, sse3, sse41, sse42, avx, avx2, avx512vbmi; + bool done; +} cpu_simd_flags; + +extern void +detect_cpu_simd_features(cpu_simd_flags *flags); + +#ifdef __cplusplus +} +#endif + +#endif /* !Py_INTERNAL_CPUINFO_H */ diff --git a/Makefile.pre.in b/Makefile.pre.in index 07c8a4d20142db..f3640921a501b6 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -434,6 +434,7 @@ PYTHON_OBJS= \ Python/context.o \ Python/critical_section.o \ Python/crossinterp.o \ + Python/cpuinfo.o \ Python/dynamic_annotations.o \ Python/errors.o \ Python/flowgraph.o \ @@ -1191,6 +1192,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_complexobject.h \ $(srcdir)/Include/internal/pycore_condvar.h \ $(srcdir)/Include/internal/pycore_context.h \ + $(srcdir)/Include/internal/pycore_cpuinfo.h \ $(srcdir)/Include/internal/pycore_critical_section.h \ $(srcdir)/Include/internal/pycore_crossinterp.h \ $(srcdir)/Include/internal/pycore_descrobject.h \ diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 3b33c6bf6bb91d..989c82e396128c 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -225,6 +225,7 @@ + @@ -584,6 +585,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index ee2930b10439a9..d60294818c8fb8 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -597,6 +597,9 @@ Include\internal + + Include\cpython + Include\internal @@ -1304,6 +1307,9 @@ Python + + Source Files + Python diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c new file mode 100644 index 00000000000000..2eaafe1380b418 --- /dev/null +++ b/Python/cpuinfo.c @@ -0,0 +1,110 @@ +/* + * Naive CPU SIMD features detection. + * + * See Modules/black2module.c. + */ + +#include "Python.h" +#include "pycore_cpuinfo.h" + +#include + +#if defined(__x86_64__) && defined(__GNUC__) +#include +#elif defined(_M_X64) +#include +#endif + +// AVX2 cannot be compiled on macOS ARM64 (yet it can be compiled on x86_64). +// However, since autoconf incorrectly assumes so when compiling a universal2 +// binary, we disable all AVX-related instructions. +#if defined(__APPLE__) && defined(__arm64__) +# undef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS +# undef CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS +# undef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS +#endif + +#define EDX1_SSE (1 << 25) // sse, EDX, page 1, bit 25 +#define EDX1_SSE2 (1 << 26) // sse2, EDX, page 1, bit 26 +#define ECX1_SSE3 (1 << 9) // sse3, ECX, page 1, bit 0 +#define ECX1_SSE4_1 (1 << 19) // sse4.1, ECX, page 1, bit 19 +#define ECX1_SSE4_2 (1 << 20) // sse4.2, ECX, page 1, bit 20 +#define ECX1_AVX (1 << 28) // avx, ECX, page 1, bit 28 +#define EBX7_AVX2 (1 << 5) // avx2, EBX, page 7, bit 5 +#define ECX7_AVX512_VBMI (1 << 1) // avx512-vbmi, ECX, page 7, bit 1 + +void +detect_cpu_simd_features(cpu_simd_flags *flags) +{ + if (flags->done) { + return; + } + + int eax1 = 0, ebx1 = 0, ecx1 = 0, edx1 = 0; + int eax7 = 0, ebx7 = 0, ecx7 = 0, edx7 = 0; +#if defined(__x86_64__) && defined(__GNUC__) + __cpuid_count(1, 0, eax1, ebx1, ecx1, edx1); + __cpuid_count(7, 0, eax7, ebx7, ecx7, edx7); +#elif defined(_M_X64) + int info1[4] = {0}; + __cpuidex(info1, 1, 0); + eax1 = info1[0]; + ebx1 = info1[1]; + ecx1 = info1[2]; + edx1 = info1[3]; + + int info7[4] = {0}; + __cpuidex(info7, 7, 0); + eax7 = info7[0]; + ebx7 = info7[1]; + ecx7 = info7[2]; + edx7 = info7[3]; +#else + // use (void) expressions to avoid warnings + (void) eax1; (void) ebx1; (void) ecx1; (void) edx1; + (void) eax7; (void) ebx7; (void) ecx7; (void) edx7; +#endif + +#ifdef CAN_COMPILE_SIMD_SSE_INSTRUCTIONS + flags->sse = (edx1 & EDX1_SSE) != 0; +#else + flags->sse = false; +#endif +#ifdef CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS + flags->sse2 = (edx1 & EDX1_SSE2) != 0; +#else + flags->sse2 = false; +#endif +#ifdef CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS + flags->sse3 = (ecx1 & ECX1_SSE3) != 0; + #else +#endif + flags->sse3 = false; +#ifdef CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS + flags->sse41 = (ecx1 & ECX1_SSE4_1) != 0; +#else + flags->sse41 = false; +#endif +#ifdef CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS + flags->sse42 = (ecx1 & ECX1_SSE4_2) != 0; +#else + flags->sse42 = false; +#endif +#ifdef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS + flags->avx = (ecx1 & ECX1_AVX) != 0; +#else + flags->avx = false; +#endif +#ifdef CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS + flags->avx2 = (ebx7 & EBX7_AVX2) != 0; +#else + flags->avx2 = false; +#endif +#ifdef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS + flags->avx512vbmi = (ecx7 & ECX7_AVX512_VBMI) != 0; +#else + flags->avx512vbmi = false; +#endif + + flags->done = true; +} diff --git a/configure b/configure index 0cc73e4e66552d..8899ad2eb4f5de 100755 --- a/configure +++ b/configure @@ -30617,6 +30617,401 @@ fi printf "%s\n" "$py_cv_module__blake2" >&6; } + + +# Detection of suported SIMD instruction sets for CPython. Since +# we do not necessarily know which instruction sets will be used, +# we disable SIMD support on some older Android platforms. +# +# Detection for more instruction sets can be added. By default, we detect +# SSE-based instruction sets, AVX/AVX2 and AVX512 VBMI. +if test "$ac_sys_system" != "Linux-android" || test "$ANDROID_API_LEVEL" -ge 28; then + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse" >&5 +printf %s "checking whether C compiler accepts -msse... " >&6; } +if test ${ax_cv_check_cflags___msse+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -msse" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___msse=yes +else $as_nop + ax_cv_check_cflags___msse=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse" >&5 +printf "%s\n" "$ax_cv_check_cflags___msse" >&6; } +if test "x$ax_cv_check_cflags___msse" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_SSE_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse2" >&5 +printf %s "checking whether C compiler accepts -msse2... " >&6; } +if test ${ax_cv_check_cflags___msse2+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -msse2" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___msse2=yes +else $as_nop + ax_cv_check_cflags___msse2=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse2" >&5 +printf "%s\n" "$ax_cv_check_cflags___msse2" >&6; } +if test "x$ax_cv_check_cflags___msse2" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse3" >&5 +printf %s "checking whether C compiler accepts -msse3... " >&6; } +if test ${ax_cv_check_cflags___msse3+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -msse3" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___msse3=yes +else $as_nop + ax_cv_check_cflags___msse3=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse3" >&5 +printf "%s\n" "$ax_cv_check_cflags___msse3" >&6; } +if test "x$ax_cv_check_cflags___msse3" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse4.2" >&5 +printf %s "checking whether C compiler accepts -msse4.2... " >&6; } +if test ${ax_cv_check_cflags___msse4_2+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -msse4.2" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___msse4_2=yes +else $as_nop + ax_cv_check_cflags___msse4_2=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse4_2" >&5 +printf "%s\n" "$ax_cv_check_cflags___msse4_2" >&6; } +if test "x$ax_cv_check_cflags___msse4_2" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse4.2" >&5 +printf %s "checking whether C compiler accepts -msse4.2... " >&6; } +if test ${ax_cv_check_cflags___msse4_2+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -msse4.2" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___msse4_2=yes +else $as_nop + ax_cv_check_cflags___msse4_2=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse4_2" >&5 +printf "%s\n" "$ax_cv_check_cflags___msse4_2" >&6; } +if test "x$ax_cv_check_cflags___msse4_2" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx" >&5 +printf %s "checking whether C compiler accepts -mavx... " >&6; } +if test ${ax_cv_check_cflags___mavx+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx=yes +else $as_nop + ax_cv_check_cflags___mavx=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx" >&6; } +if test "x$ax_cv_check_cflags___mavx" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx2" >&5 +printf %s "checking whether C compiler accepts -mavx2... " >&6; } +if test ${ax_cv_check_cflags___mavx2+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx2" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx2=yes +else $as_nop + ax_cv_check_cflags___mavx2=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx2" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx2" >&6; } +if test "x$ax_cv_check_cflags___mavx2" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vbmi" >&5 +printf %s "checking whether C compiler accepts -mavx512vbmi... " >&6; } +if test ${ax_cv_check_cflags___mavx512vbmi+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512vbmi" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512vbmi=yes +else $as_nop + ax_cv_check_cflags___mavx512vbmi=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vbmi" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512vbmi" >&6; } +if test "x$ax_cv_check_cflags___mavx512vbmi" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + +fi + LIBHACL_CFLAGS='-I$(srcdir)/Modules/_hacl -I$(srcdir)/Modules/_hacl/include -D_BSD_SOURCE -D_DEFAULT_SOURCE $(PY_STDMODULE_CFLAGS) $(CCSHARED)' case "$ac_sys_system" in Linux*) diff --git a/configure.ac b/configure.ac index 1864e94ace9243..dae39e0e5d1edb 100644 --- a/configure.ac +++ b/configure.ac @@ -7789,6 +7789,36 @@ PY_STDLIB_MOD([_sha2], [test "$with_builtin_sha2" = yes]) PY_STDLIB_MOD([_sha3], [test "$with_builtin_sha3" = yes]) PY_STDLIB_MOD([_blake2], [test "$with_builtin_blake2" = yes]) +dnl PY_SIMD_DETECT(INSTRUCTION_SET_NAME, COMPILER_FLAG, DEFINE_CONSTANT_SUFFIX) +AC_DEFUN([PY_SIMD_DETECT], [ + AS_VAR_PUSHDEF([py_var], [[ac_cv_simd_]m4_tolower($1)]) + AS_VAR_PUSHDEF([py_define], [m4_ifblank([$3], + [[CAN_COMPILE_SIMD_]m4_toupper($1)[_INSTRUCTIONS]], [$3])]) + AC_MSG_CHECKING([checking SIMD instruction set]) + AX_CHECK_COMPILE_FLAG([$2], + [AC_DEFINE([py_define], [1], [Define if '$2' is a valid compiler flag.])], + [], []) + AS_VAR_POPDEF([py_var]) + AS_VAR_POPDEF([py_define]) +]) + +# Detection of suported SIMD instruction sets for CPython. Since +# we do not necessarily know which instruction sets will be used, +# we disable SIMD support on some older Android platforms. +# +# Detection for more instruction sets can be added. By default, we detect +# SSE-based instruction sets, AVX/AVX2 and AVX512 VBMI. +if test "$ac_sys_system" != "Linux-android" || test "$ANDROID_API_LEVEL" -ge 28; then + PY_SIMD_DETECT([SSE], [-msse]) + PY_SIMD_DETECT([SSE2], [-msse2]) + PY_SIMD_DETECT([SSE3], [-msse3]) + PY_SIMD_DETECT([SSE4.1], [-msse4.2], [CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS]) + PY_SIMD_DETECT([SSE4.2], [-msse4.2], [CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS]) + PY_SIMD_DETECT([AVX], [-mavx]) + PY_SIMD_DETECT([AVX2], [-mavx2]) + PY_SIMD_DETECT([AVX512_VBMI], [-mavx512vbmi]) +fi + LIBHACL_CFLAGS='-I$(srcdir)/Modules/_hacl -I$(srcdir)/Modules/_hacl/include -D_BSD_SOURCE -D_DEFAULT_SOURCE $(PY_STDMODULE_CFLAGS) $(CCSHARED)' case "$ac_sys_system" in Linux*) diff --git a/pyconfig.h.in b/pyconfig.h.in index 7f02603e26f5d0..123a4cc40936ae 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -32,6 +32,30 @@ /* The Android API level. */ #undef ANDROID_API_LEVEL +/* Define if '-mavx2' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS + +/* Define if '-mavx512vbmi' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS + +/* Define if '-mavx' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS + +/* Define if '-msse2' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS + +/* Define if '-msse3' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS + +/* Define if '-msse4.2' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS + +/* Define if '-msse4.2' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS + +/* Define if '-msse' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_SSE_INSTRUCTIONS + /* Define if C doubles are 64-bit IEEE 754 binary format, stored in ARM mixed-endian order (byte order 45670123) */ #undef DOUBLE_IS_ARM_MIXED_ENDIAN_IEEE754 From f4e4f99720166179bc1627830e26c1c3664887d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 5 Oct 2024 21:22:01 +0200 Subject: [PATCH 02/78] add _Py prefix --- Include/internal/pycore_cpuinfo.h | 2 +- Python/cpuinfo.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index d4e9428dfb49dd..1c8a040d664ddf 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -17,7 +17,7 @@ typedef struct { } cpu_simd_flags; extern void -detect_cpu_simd_features(cpu_simd_flags *flags); +_Py_detect_cpu_simd_features(cpu_simd_flags *flags); #ifdef __cplusplus } diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 2eaafe1380b418..aa2361373688be 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -34,7 +34,7 @@ #define ECX7_AVX512_VBMI (1 << 1) // avx512-vbmi, ECX, page 7, bit 1 void -detect_cpu_simd_features(cpu_simd_flags *flags) +_Py_detect_cpu_simd_features(cpu_simd_flags *flags) { if (flags->done) { return; From 5006686633e9dc61ad607f4adf523605c0dcdcd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 6 Oct 2024 00:25:48 +0200 Subject: [PATCH 03/78] Use `_py` prefix --- Include/internal/pycore_cpuinfo.h | 4 ++-- Python/cpuinfo.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index 1c8a040d664ddf..27b4bc0fad8638 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -14,10 +14,10 @@ extern "C" { typedef struct { bool sse, sse2, sse3, sse41, sse42, avx, avx2, avx512vbmi; bool done; -} cpu_simd_flags; +} _py_cpu_simd_flags; extern void -_Py_detect_cpu_simd_features(cpu_simd_flags *flags); +_Py_detect_cpu_simd_features(_py_cpu_simd_flags *flags); #ifdef __cplusplus } diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index aa2361373688be..28ad48ab52bd73 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -10,9 +10,9 @@ #include #if defined(__x86_64__) && defined(__GNUC__) -#include +# include #elif defined(_M_X64) -#include +# include #endif // AVX2 cannot be compiled on macOS ARM64 (yet it can be compiled on x86_64). @@ -34,7 +34,7 @@ #define ECX7_AVX512_VBMI (1 << 1) // avx512-vbmi, ECX, page 7, bit 1 void -_Py_detect_cpu_simd_features(cpu_simd_flags *flags) +_Py_detect_cpu_simd_features(_py_cpu_simd_flags *flags) { if (flags->done) { return; From 3c0b4f1c8182416594c68ea70867bd8ad6cdb3d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 6 Oct 2024 11:16:39 +0200 Subject: [PATCH 04/78] make the interface friendlier for future adjustments --- Include/internal/pycore_cpuinfo.h | 22 +++-- Python/cpuinfo.c | 154 +++++++++++++++++++----------- 2 files changed, 115 insertions(+), 61 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index 27b4bc0fad8638..c6ac446c2fc135 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -9,15 +9,25 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif -#include - typedef struct { - bool sse, sse2, sse3, sse41, sse42, avx, avx2, avx512vbmi; - bool done; -} _py_cpu_simd_flags; + /* Streaming SIMD Extensions */ + uint8_t sse: 1; + uint8_t sse2: 1; + uint8_t sse3: 1; + uint8_t sse41: 1; // SSE4.1 + uint8_t sse42: 1; // SSE4.2 + + /* Advanced Vector Extensions */ + uint8_t avx: 1; + uint8_t avx2: 1; + uint8_t avx512vbmi: 1; // AVX-512 Vector Byte Manipulation Instructions + + uint8_t done; // indicate whether the structure was filled or not +} py_cpu_simd_flags; +/* Detect the available SIMD features on this machine. */ extern void -_Py_detect_cpu_simd_features(_py_cpu_simd_flags *flags); +_Py_detect_cpu_simd_features(py_cpu_simd_flags *flags); #ifdef __cplusplus } diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 28ad48ab52bd73..d1799264642b71 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -1,18 +1,25 @@ /* - * Naive CPU SIMD features detection. + * Python CPU SIMD features detection. * - * See Modules/black2module.c. + * See https://en.wikipedia.org/wiki/CPUID for details. */ #include "Python.h" #include "pycore_cpuinfo.h" -#include +#define CPUID_REG(ARG) ARG +/* + * For simplicity, we only enable SIMD instructions for Intel CPUs, + * even though we could support ARM NEON and POWER. + */ #if defined(__x86_64__) && defined(__GNUC__) # include #elif defined(_M_X64) # include +#else +# undef CPUID_REG +# define CPUID_REG(ARG) Py_UNUSED(ARG) #endif // AVX2 cannot be compiled on macOS ARM64 (yet it can be compiled on x86_64). @@ -24,6 +31,15 @@ # undef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS #endif +/* + * The macros below describe masks to apply on CPUID output registers. + * + * Each macro is of the form [REGISTER][PAGE]_[FEATURE] where + * + * - REGISTER is either EBX, ECX or EDX, + * - PAGE is either 1 or 7 depending, and + * - FEATURE is an SIMD instruction set. + */ #define EDX1_SSE (1 << 25) // sse, EDX, page 1, bit 25 #define EDX1_SSE2 (1 << 26) // sse2, EDX, page 1, bit 26 #define ECX1_SSE3 (1 << 9) // sse3, ECX, page 1, bit 0 @@ -33,78 +49,106 @@ #define EBX7_AVX2 (1 << 5) // avx2, EBX, page 7, bit 5 #define ECX7_AVX512_VBMI (1 << 1) // avx512-vbmi, ECX, page 7, bit 1 -void -_Py_detect_cpu_simd_features(_py_cpu_simd_flags *flags) -{ - if (flags->done) { - return; - } +#define CHECK_CPUID_REGISTER(REGISTER, MASK) ((REGISTER) & (MASK)) == 0 ? 0 : 1 - int eax1 = 0, ebx1 = 0, ecx1 = 0, edx1 = 0; - int eax7 = 0, ebx7 = 0, ecx7 = 0, edx7 = 0; +/* + * Indicate whether the CPUID input EAX=1 may be needed to + * detect SIMD basic features (e.g., SSE). + */ +#if defined(CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX_INSTRUCTIONS) +# define MAY_DETECT_CPUID_SIMD_FEATURES +#endif + +/* + * Indicate whether the CPUID input EAX=7 may be needed to + * detect SIMD extended features (e.g., AVX2 or AVX-512). + */ +#if defined(CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS) +# define MAY_DETECT_CPUID_SIMD_EXTENDED_FEATURES +#endif + +static inline void +get_cpuid_info(int32_t level /* input eax */, + int32_t count /* input ecx */, + int32_t *CPUID_REG(eax), + int32_t *CPUID_REG(ebx), + int32_t *CPUID_REG(ecx), + int32_t *CPUID_REG(edx)) +{ #if defined(__x86_64__) && defined(__GNUC__) - __cpuid_count(1, 0, eax1, ebx1, ecx1, edx1); - __cpuid_count(7, 0, eax7, ebx7, ecx7, edx7); + __cpuid_count(level, count, *eax, *ebx, *ecx, *edx); #elif defined(_M_X64) - int info1[4] = {0}; - __cpuidex(info1, 1, 0); - eax1 = info1[0]; - ebx1 = info1[1]; - ecx1 = info1[2]; - edx1 = info1[3]; - - int info7[4] = {0}; - __cpuidex(info7, 7, 0); - eax7 = info7[0]; - ebx7 = info7[1]; - ecx7 = info7[2]; - edx7 = info7[3]; -#else - // use (void) expressions to avoid warnings - (void) eax1; (void) ebx1; (void) ecx1; (void) edx1; - (void) eax7; (void) ebx7; (void) ecx7; (void) edx7; + int32_t info[4] = {0}; + __cpuidex(info, level, count); + *eax = info[0]; + *ebx = info[1]; + *ecx = info[2]; + *edx = info[3]; #endif +} +/* Processor Info and Feature Bits (EAX=1, ECX=0). */ +static inline void +detect_cpu_simd_features(py_cpu_simd_flags *flags) +{ + int32_t eax = 0, ebx = 0, ecx = 0, edx = 0; + get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx); #ifdef CAN_COMPILE_SIMD_SSE_INSTRUCTIONS - flags->sse = (edx1 & EDX1_SSE) != 0; -#else - flags->sse = false; + flags->sse = CHECK_CPUID_REGISTER(edx, EDX1_SSE); #endif #ifdef CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS - flags->sse2 = (edx1 & EDX1_SSE2) != 0; -#else - flags->sse2 = false; + flags->sse2 = CHECK_CPUID_REGISTER(edx, EDX1_SSE2); #endif #ifdef CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS - flags->sse3 = (ecx1 & ECX1_SSE3) != 0; - #else + flags->sse3 = CHECK_CPUID_REGISTER(ecx, ECX1_SSE3); #endif - flags->sse3 = false; #ifdef CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS - flags->sse41 = (ecx1 & ECX1_SSE4_1) != 0; -#else - flags->sse41 = false; + flags->sse41 = CHECK_CPUID_REGISTER(ecx, ECX1_SSE4_1); #endif #ifdef CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS - flags->sse42 = (ecx1 & ECX1_SSE4_2) != 0; -#else - flags->sse42 = false; + flags->sse42 = CHECK_CPUID_REGISTER(ecx, ECX1_SSE4_2); #endif #ifdef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS - flags->avx = (ecx1 & ECX1_AVX) != 0; -#else - flags->avx = false; + flags->avx = CHECK_CPUID_REGISTER(ecx, ECX1_AVX); #endif +} + +/* Extended feature bits (EAX=7, ECX=0). */ +static inline void +detect_cpu_simd_extended_features(py_cpu_simd_flags *flags) +{ + int32_t eax = 0, ebx = 0, ecx = 0, edx = 0; + get_cpuid_info(7, 0, &eax, &ebx, &ecx, &edx); #ifdef CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS - flags->avx2 = (ebx7 & EBX7_AVX2) != 0; -#else - flags->avx2 = false; + flags->avx2 = CHECK_CPUID_REGISTER(ebx, EBX7_AVX2); #endif #ifdef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS - flags->avx512vbmi = (ecx7 & ECX7_AVX512_VBMI) != 0; -#else - flags->avx512vbmi = false; + flags->avx512vbmi = CHECK_CPUID_REGISTER(ecx, ECX7_AVX512_VBMI); #endif +} - flags->done = true; +void +_Py_detect_cpu_simd_features(py_cpu_simd_flags *flags) +{ + if (flags->done) { + return; + } +#ifdef MAY_DETECT_CPUID_SIMD_FEATURES + detect_cpu_simd_features(flags); +#else + flags->sse = flags->sse2 = flags->sse3 = flags->sse41 = flags->sse42 = 0; + flags->avx = 0; +#endif +#ifdef MAY_DETECT_CPUID_SIMD_EXTENDED_FEATURES + detect_cpu_simd_extended_features(flags); +#else + flags->avx2 = flags->avx512vbmi = 0; +#endif + flags->done = 1; } From 01ed21af7c750dfd4d94549cf90b957bf822a471 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 6 Oct 2024 11:36:50 +0200 Subject: [PATCH 05/78] Allow `cpu_simd_flags` to be merged. --- Include/internal/pycore_cpuinfo.h | 8 ++++++++ Python/cpuinfo.c | 20 +++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index c6ac446c2fc135..418c3e7d3fb107 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -29,6 +29,14 @@ typedef struct { extern void _Py_detect_cpu_simd_features(py_cpu_simd_flags *flags); +/* + * Apply a bitwise-OR on all flags in 'out' using those in 'src', + * unconditionally updating 'out' (i.e. out->done is ignored) and + * setting 'out->done' to 1. + */ +extern void +_Py_extend_cpu_simd_features(py_cpu_simd_flags *out, const py_cpu_simd_flags *src); + #ifdef __cplusplus } #endif diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index d1799264642b71..121ba59380e667 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -119,7 +119,7 @@ detect_cpu_simd_features(py_cpu_simd_flags *flags) #endif } -/* Extended feature bits (EAX=7, ECX=0). */ +/* Extended Feature Bits (EAX=7, ECX=0). */ static inline void detect_cpu_simd_extended_features(py_cpu_simd_flags *flags) { @@ -152,3 +152,21 @@ _Py_detect_cpu_simd_features(py_cpu_simd_flags *flags) #endif flags->done = 1; } + +void +_Py_extend_cpu_simd_features(py_cpu_simd_flags *out, + const py_cpu_simd_flags *src) +{ +#define UPDATE(FLAG) out->FLAG |= src->FLAG + UPDATE(sse); + UPDATE(sse2); + UPDATE(sse3); + UPDATE(sse41); + UPDATE(sse42); + + UPDATE(avx); + UPDATE(avx2); + UPDATE(avx512vbmi); +#undef UPDATE + out->done = 1; +} From 969a619c82d56741168e31ccfeb8c659a60f074f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 6 Oct 2024 11:39:31 +0200 Subject: [PATCH 06/78] update comments --- Python/cpuinfo.c | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 121ba59380e667..2cd98b3a17fbf4 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -7,7 +7,10 @@ #include "Python.h" #include "pycore_cpuinfo.h" -#define CPUID_REG(ARG) ARG +/* Macro to mark a CPUID register function parameter as being used. */ +#define CPUID_REG(PARAM) PARAM +/* Macro to check a CPUID register bit. */ +#define CPUID_CHECK_REG(REGISTER, MASK) ((REGISTER) & (MASK)) == 0 ? 0 : 1 /* * For simplicity, we only enable SIMD instructions for Intel CPUs, @@ -19,7 +22,7 @@ # include #else # undef CPUID_REG -# define CPUID_REG(ARG) Py_UNUSED(ARG) +# define CPUID_REG(PARAM) Py_UNUSED(PARAM) #endif // AVX2 cannot be compiled on macOS ARM64 (yet it can be compiled on x86_64). @@ -38,18 +41,16 @@ * * - REGISTER is either EBX, ECX or EDX, * - PAGE is either 1 or 7 depending, and - * - FEATURE is an SIMD instruction set. + * - FEATURE is a SIMD feature (with one or more specialized instructions). */ -#define EDX1_SSE (1 << 25) // sse, EDX, page 1, bit 25 -#define EDX1_SSE2 (1 << 26) // sse2, EDX, page 1, bit 26 -#define ECX1_SSE3 (1 << 9) // sse3, ECX, page 1, bit 0 -#define ECX1_SSE4_1 (1 << 19) // sse4.1, ECX, page 1, bit 19 -#define ECX1_SSE4_2 (1 << 20) // sse4.2, ECX, page 1, bit 20 -#define ECX1_AVX (1 << 28) // avx, ECX, page 1, bit 28 -#define EBX7_AVX2 (1 << 5) // avx2, EBX, page 7, bit 5 -#define ECX7_AVX512_VBMI (1 << 1) // avx512-vbmi, ECX, page 7, bit 1 - -#define CHECK_CPUID_REGISTER(REGISTER, MASK) ((REGISTER) & (MASK)) == 0 ? 0 : 1 +#define EDX1_SSE (1 << 25) +#define EDX1_SSE2 (1 << 26) +#define ECX1_SSE3 (1 << 9) +#define ECX1_SSE4_1 (1 << 19) +#define ECX1_SSE4_2 (1 << 20) +#define ECX1_AVX (1 << 28) +#define EBX7_AVX2 (1 << 5) +#define ECX7_AVX512_VBMI (1 << 1) /* * Indicate whether the CPUID input EAX=1 may be needed to @@ -100,22 +101,22 @@ detect_cpu_simd_features(py_cpu_simd_flags *flags) int32_t eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx); #ifdef CAN_COMPILE_SIMD_SSE_INSTRUCTIONS - flags->sse = CHECK_CPUID_REGISTER(edx, EDX1_SSE); + flags->sse = CPUID_CHECK_REG(edx, EDX1_SSE); #endif #ifdef CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS - flags->sse2 = CHECK_CPUID_REGISTER(edx, EDX1_SSE2); + flags->sse2 = CPUID_CHECK_REG(edx, EDX1_SSE2); #endif #ifdef CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS - flags->sse3 = CHECK_CPUID_REGISTER(ecx, ECX1_SSE3); + flags->sse3 = CPUID_CHECK_REG(ecx, ECX1_SSE3); #endif #ifdef CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS - flags->sse41 = CHECK_CPUID_REGISTER(ecx, ECX1_SSE4_1); + flags->sse41 = CPUID_CHECK_REG(ecx, ECX1_SSE4_1); #endif #ifdef CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS - flags->sse42 = CHECK_CPUID_REGISTER(ecx, ECX1_SSE4_2); + flags->sse42 = CPUID_CHECK_REG(ecx, ECX1_SSE4_2); #endif #ifdef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS - flags->avx = CHECK_CPUID_REGISTER(ecx, ECX1_AVX); + flags->avx = CPUID_CHECK_REG(ecx, ECX1_AVX); #endif } @@ -126,10 +127,10 @@ detect_cpu_simd_extended_features(py_cpu_simd_flags *flags) int32_t eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(7, 0, &eax, &ebx, &ecx, &edx); #ifdef CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS - flags->avx2 = CHECK_CPUID_REGISTER(ebx, EBX7_AVX2); + flags->avx2 = CPUID_CHECK_REG(ebx, EBX7_AVX2); #endif #ifdef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS - flags->avx512vbmi = CHECK_CPUID_REGISTER(ecx, ECX7_AVX512_VBMI); + flags->avx512vbmi = CPUID_CHECK_REG(ecx, ECX7_AVX512_VBMI); #endif } From 5a5acc202b830470ceda896e7de59cc8d2050766 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 6 Oct 2024 13:58:19 +0200 Subject: [PATCH 07/78] fix typo --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index dae39e0e5d1edb..3867e30ae90414 100644 --- a/configure.ac +++ b/configure.ac @@ -7812,7 +7812,7 @@ if test "$ac_sys_system" != "Linux-android" || test "$ANDROID_API_LEVEL" -ge 28; PY_SIMD_DETECT([SSE], [-msse]) PY_SIMD_DETECT([SSE2], [-msse2]) PY_SIMD_DETECT([SSE3], [-msse3]) - PY_SIMD_DETECT([SSE4.1], [-msse4.2], [CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS]) + PY_SIMD_DETECT([SSE4.1], [-msse4.1], [CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS]) PY_SIMD_DETECT([SSE4.2], [-msse4.2], [CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS]) PY_SIMD_DETECT([AVX], [-mavx]) PY_SIMD_DETECT([AVX2], [-mavx2]) From ac1b1657939edf880e927a3858267d278a83879d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 6 Oct 2024 15:03:34 +0200 Subject: [PATCH 08/78] fix configure script --- configure | 18 +++++++++--------- configure.ac | 13 ++++++++----- pyconfig.h.in | 2 +- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/configure b/configure index 8899ad2eb4f5de..e749010ccfe815 100755 --- a/configure +++ b/configure @@ -30775,15 +30775,15 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse4.2" >&5 -printf %s "checking whether C compiler accepts -msse4.2... " >&6; } -if test ${ax_cv_check_cflags___msse4_2+y} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse4.1" >&5 +printf %s "checking whether C compiler accepts -msse4.1... " >&6; } +if test ${ax_cv_check_cflags___msse4_1+y} then : printf %s "(cached) " >&6 else $as_nop ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -msse4.2" + CFLAGS="$CFLAGS -msse4.1" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -30797,16 +30797,16 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___msse4_2=yes + ax_cv_check_cflags___msse4_1=yes else $as_nop - ax_cv_check_cflags___msse4_2=no + ax_cv_check_cflags___msse4_1=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse4_2" >&5 -printf "%s\n" "$ax_cv_check_cflags___msse4_2" >&6; } -if test "x$ax_cv_check_cflags___msse4_2" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse4_1" >&5 +printf "%s\n" "$ax_cv_check_cflags___msse4_1" >&6; } +if test "x$ax_cv_check_cflags___msse4_1" = xyes then : printf "%s\n" "#define CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS 1" >>confdefs.h diff --git a/configure.ac b/configure.ac index 3867e30ae90414..707cda71c9c903 100644 --- a/configure.ac +++ b/configure.ac @@ -7789,11 +7789,14 @@ PY_STDLIB_MOD([_sha2], [test "$with_builtin_sha2" = yes]) PY_STDLIB_MOD([_sha3], [test "$with_builtin_sha3" = yes]) PY_STDLIB_MOD([_blake2], [test "$with_builtin_blake2" = yes]) -dnl PY_SIMD_DETECT(INSTRUCTION_SET_NAME, COMPILER_FLAG, DEFINE_CONSTANT_SUFFIX) +dnl PY_SIMD_DETECT(INSTRUCTION_SET_NAME, COMPILER_FLAG, NORMALIZED_NAME) AC_DEFUN([PY_SIMD_DETECT], [ - AS_VAR_PUSHDEF([py_var], [[ac_cv_simd_]m4_tolower($1)]) + AS_VAR_PUSHDEF([py_var], [m4_ifblank([$3], + [[ac_cv_can_compile_simd_]m4_tolower([$1])], + [[ac_cv_can_compile_simd_]m4_tolower([$3])])]) AS_VAR_PUSHDEF([py_define], [m4_ifblank([$3], - [[CAN_COMPILE_SIMD_]m4_toupper($1)[_INSTRUCTIONS]], [$3])]) + [[CAN_COMPILE_SIMD_]m4_toupper([$1])[_INSTRUCTIONS]], + [[CAN_COMPILE_SIMD_]m4_toupper([$3])[_INSTRUCTIONS]])]) AC_MSG_CHECKING([checking SIMD instruction set]) AX_CHECK_COMPILE_FLAG([$2], [AC_DEFINE([py_define], [1], [Define if '$2' is a valid compiler flag.])], @@ -7812,8 +7815,8 @@ if test "$ac_sys_system" != "Linux-android" || test "$ANDROID_API_LEVEL" -ge 28; PY_SIMD_DETECT([SSE], [-msse]) PY_SIMD_DETECT([SSE2], [-msse2]) PY_SIMD_DETECT([SSE3], [-msse3]) - PY_SIMD_DETECT([SSE4.1], [-msse4.1], [CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS]) - PY_SIMD_DETECT([SSE4.2], [-msse4.2], [CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS]) + PY_SIMD_DETECT([SSE4.1], [-msse4.1], [SSE4_1]) + PY_SIMD_DETECT([SSE4.2], [-msse4.2], [SSE4_2]) PY_SIMD_DETECT([AVX], [-mavx]) PY_SIMD_DETECT([AVX2], [-mavx2]) PY_SIMD_DETECT([AVX512_VBMI], [-mavx512vbmi]) diff --git a/pyconfig.h.in b/pyconfig.h.in index 123a4cc40936ae..b5ad1b310f3e5d 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -47,7 +47,7 @@ /* Define if '-msse3' is a valid compiler flag. */ #undef CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS -/* Define if '-msse4.2' is a valid compiler flag. */ +/* Define if '-msse4.1' is a valid compiler flag. */ #undef CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS /* Define if '-msse4.2' is a valid compiler flag. */ From 6f304f2bea99c1a1102bf84dcd582148167acc1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 6 Oct 2024 15:47:41 +0200 Subject: [PATCH 09/78] fix bit detection --- Python/cpuinfo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 2cd98b3a17fbf4..40423d577b4221 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -9,8 +9,8 @@ /* Macro to mark a CPUID register function parameter as being used. */ #define CPUID_REG(PARAM) PARAM -/* Macro to check a CPUID register bit. */ -#define CPUID_CHECK_REG(REGISTER, MASK) ((REGISTER) & (MASK)) == 0 ? 0 : 1 +/* Macro to check one or more CPUID register bits. */ +#define CPUID_CHECK_REG(REG, MASK) ((((REG) & (MASK)) == (MASK)) ? 0 : 1) /* * For simplicity, we only enable SIMD instructions for Intel CPUs, From f3bd0275f8d5c80a68f196aa4fcbeaa9a5eae721 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 7 Oct 2024 15:47:24 +0200 Subject: [PATCH 10/78] Harden detection of AVX instructions. --- Include/internal/pycore_cpuinfo.h | 117 +++- Python/cpuinfo.c | 568 +++++++++++++--- configure | 1046 ++++++++++++++++++++++++++++- configure.ac | 39 +- pyconfig.h.in | 63 ++ 5 files changed, 1722 insertions(+), 111 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index 418c3e7d3fb107..145da8c9d2d2ae 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -9,33 +9,114 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif -typedef struct { +#include // uint8_t + +/* Macro indicating that the member is a CPUID bit. */ +#define _Py_SIMD_FEAT uint8_t +/* Macro indicating that the member is a XCR0 bit. */ +#define _Py_SIMD_XCR0_BIT uint8_t + +typedef struct py_simd_features { /* Streaming SIMD Extensions */ - uint8_t sse: 1; - uint8_t sse2: 1; - uint8_t sse3: 1; - uint8_t sse41: 1; // SSE4.1 - uint8_t sse42: 1; // SSE4.2 + _Py_SIMD_FEAT sse: 1; + _Py_SIMD_FEAT sse2: 1; + _Py_SIMD_FEAT sse3: 1; + _Py_SIMD_FEAT ssse3: 1; // Supplemental SSE3 instructions + _Py_SIMD_FEAT sse41: 1; // SSE4.1 + _Py_SIMD_FEAT sse42: 1; // SSE4.2 /* Advanced Vector Extensions */ - uint8_t avx: 1; - uint8_t avx2: 1; - uint8_t avx512vbmi: 1; // AVX-512 Vector Byte Manipulation Instructions + _Py_SIMD_FEAT avx: 1; + _Py_SIMD_FEAT avx_ifma: 1; + _Py_SIMD_FEAT avx_ne_convert: 1; - uint8_t done; // indicate whether the structure was filled or not -} py_cpu_simd_flags; + _Py_SIMD_FEAT avx_vnni: 1; + _Py_SIMD_FEAT avx_vnni_int8: 1; + _Py_SIMD_FEAT avx_vnni_int16: 1; -/* Detect the available SIMD features on this machine. */ -extern void -_Py_detect_cpu_simd_features(py_cpu_simd_flags *flags); + /* Advanced Vector Extensions 2. */ + _Py_SIMD_FEAT avx2: 1; + + /* + * AVX-512 instruction set are grouped by the processor generation + * that implements them (see https://en.wikipedia.org/wiki/AVX-512). + * + * We do not include GFNI, VPCLMULQDQ and VAES instructions since + * they are not exactly AVX-512 per se, nor do we include BF16 or + * FP16 since they operate on bfloat16 and binary16 (half-float). + */ + _Py_SIMD_FEAT avx512_f: 1; + _Py_SIMD_FEAT avx512_cd: 1; + + _Py_SIMD_FEAT avx512_er: 1; + _Py_SIMD_FEAT avx512_pf: 1; + + _Py_SIMD_FEAT avx512_4fmaps: 1; + _Py_SIMD_FEAT avx512_4vnniw: 1; + + _Py_SIMD_FEAT avx512_vpopcntdq: 1; + + _Py_SIMD_FEAT avx512_vl: 1; + _Py_SIMD_FEAT avx512_dq: 1; + _Py_SIMD_FEAT avx512_bw: 1; + + _Py_SIMD_FEAT avx512_ifma: 1; + + _Py_SIMD_FEAT avx512_vbmi: 1; + + _Py_SIMD_FEAT avx512_vnni: 1; + + _Py_SIMD_FEAT avx512_vbmi2: 1; + _Py_SIMD_FEAT avx512_bitalg: 1; + + _Py_SIMD_FEAT avx512_vp2intersect: 1; + + _Py_SIMD_FEAT os_xsave: 1; // XSAVE is supported + + /* XCR0 register bits */ + _Py_SIMD_XCR0_BIT xcr0_sse: 1; + + /* + * On some Intel CPUs, it is possible for the CPU to support AVX2 + * instructions even though the underlying OS does not know about + * AVX. In particular, only (SSE) XMM registers will be saved and + * restored on context-switch, but not (AVX) YMM registers. + */ + _Py_SIMD_XCR0_BIT xcr0_avx: 1; + _Py_SIMD_XCR0_BIT xcr0_avx512_opmask: 1; + _Py_SIMD_XCR0_BIT xcr0_avx512_zmm_hi256: 1; + _Py_SIMD_XCR0_BIT xcr0_avx512_hi16_zmm: 1; + + /* + * We want to align the bit-fields correctly so the bitsize of + * 'done' must be chosen so that the sum of all bit fields is + * a multiple of 8. + * + * Whenever a field is added or removed above, update the + * following number (35) and adjust the bitsize of 'done'. + */ + uint8_t done: 5; // set if the structure was filled +} py_simd_features; /* - * Apply a bitwise-OR on all flags in 'out' using those in 'src', - * unconditionally updating 'out' (i.e. out->done is ignored) and - * setting 'out->done' to 1. + * Explicitly initialize all members to zero to guarantee that + * we never have an un-initialized attribute at runtime which + * could lead to an illegal instruction error. */ extern void -_Py_extend_cpu_simd_features(py_cpu_simd_flags *out, const py_cpu_simd_flags *src); +_Py_disable_simd_features(py_simd_features *flags); + +/* +* Apply a bitwise-OR on all flags in 'out' using those in 'src', +* unconditionally updating 'out' (i.e. out->done is ignored) and +* setting 'out->done' to 1. +*/ +extern void +_Py_update_simd_features(py_simd_features *out, const py_simd_features *src); + +/* Detect the available SIMD features on this machine. */ +extern void +_Py_detect_simd_features(py_simd_features *flags); #ifdef __cplusplus } diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 40423d577b4221..5ab068fa4af0e9 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -4,9 +4,18 @@ * See https://en.wikipedia.org/wiki/CPUID for details. */ +/* + * In order to properly maintain this file, the following rules should + * be observed and enforced if possible: + * + * - Defining the SIMD_*_INSTRUCTIONS_DETECTION_GUARD macros should + */ + #include "Python.h" #include "pycore_cpuinfo.h" +#include // UINT32_C() + /* Macro to mark a CPUID register function parameter as being used. */ #define CPUID_REG(PARAM) PARAM /* Macro to check one or more CPUID register bits. */ @@ -17,70 +26,164 @@ * even though we could support ARM NEON and POWER. */ #if defined(__x86_64__) && defined(__GNUC__) -# include +# include // __cpuid_count() #elif defined(_M_X64) -# include +# include // __cpuidex() #else # undef CPUID_REG # define CPUID_REG(PARAM) Py_UNUSED(PARAM) #endif -// AVX2 cannot be compiled on macOS ARM64 (yet it can be compiled on x86_64). -// However, since autoconf incorrectly assumes so when compiling a universal2 -// binary, we disable all AVX-related instructions. -#if defined(__APPLE__) && defined(__arm64__) -# undef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS -# undef CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS -# undef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS +#if defined(CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS) \ + // macros above should be sorted in an alphabetical order +/* Used to guard any SSE instructions detection code. */ +# define SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD +#endif + +#if defined(CAN_COMPILE_SIMD_AVX_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS) \ + // macros above should be sorted in an alphabetical order +/* Used to guard any AVX instructions detection code. */ +# define SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD +#endif + +#if defined(CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS) \ + // macros above should be sorted in an alphabetical order +/* Used to guard any AVX-2 instructions detection code. */ +# define SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD +#endif + +#if defined(CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS) \ + // macros above should be sorted in an alphabetical order +/* Used to guard any AVX-512 instructions detection code. */ +# define SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD +#endif + +// On macOS, checking the XCR0 register is NOT a guaranteed way +// to ensure the usability of AVX-512. As such, we disable the +// entire set of AVX-512 instructions. +// +// See https://stackoverflow.com/a/72523150/9579194. +// +// Additionally, AVX2 cannot be compiled on macOS ARM64 (yet it can be +// compiled on x86_64). However, since autoconf incorrectly assumes so +// when compiling a universal2 binary, we disable AVX for such builds. +#if defined(__APPLE__) +# undef SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD +# if defined(__arm64__) +# undef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD +# undef SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD +# endif +#endif + +#if defined(SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD) \ + || defined(SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD) +/* Indicate that cpuid should be called once with EAX=1 and ECX=0. */ +# define SHOULD_DETECT_SIMD_FEATURES_L1 +#endif + +#if defined(SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD) \ + || defined(SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD) +/* Indicate that cpuid should be called once with EAX=7 and ECX=0. */ +# define SHOULD_DETECT_SIMD_FEATURES_L7 +# define SHOULD_DETECT_SIMD_FEATURES_L7S0 +#endif + +#if defined(SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD) +/* Indicate that cpuid should be called once with EAX=7 and ECX=1. */ +# define SHOULD_DETECT_SIMD_FEATURES_L7 +# define SHOULD_DETECT_SIMD_FEATURES_L7S1 #endif /* * The macros below describe masks to apply on CPUID output registers. * - * Each macro is of the form [REGISTER][PAGE]_[FEATURE] where + * Each macro is of the form _L[S]_, + * where <> (resp. []) denotes a required (resp. optional) group and: * - * - REGISTER is either EBX, ECX or EDX, - * - PAGE is either 1 or 7 depending, and + * - REGISTER is EAX, EBX, ECX or EDX, + * - LEAF is the initial value of the EAX register (1 or 7), + * - SUBLEAF is the initial value of the ECX register (omitted if 0), and * - FEATURE is a SIMD feature (with one or more specialized instructions). + * + * For maintainability, the flags are ordered by registers, leafs, subleafs, + * and bits. See https://en.wikipedia.org/wiki/CPUID for the values. + * + * Note 1: The LEAF is also called the 'page' or the 'level'. + * Note 2: The SUBLEAF is also referred to as the 'count'. */ -#define EDX1_SSE (1 << 25) -#define EDX1_SSE2 (1 << 26) -#define ECX1_SSE3 (1 << 9) -#define ECX1_SSE4_1 (1 << 19) -#define ECX1_SSE4_2 (1 << 20) -#define ECX1_AVX (1 << 28) -#define EBX7_AVX2 (1 << 5) -#define ECX7_AVX512_VBMI (1 << 1) -/* - * Indicate whether the CPUID input EAX=1 may be needed to - * detect SIMD basic features (e.g., SSE). - */ -#if defined(CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX_INSTRUCTIONS) -# define MAY_DETECT_CPUID_SIMD_FEATURES -#endif +/* CPUID (LEAF=1, SUBLEAF=0) */ +#define ECX_L1_SSE3 (UINT32_C(1) << 0) +#define ECX_L1_SSSE3 (UINT32_C(1) << 9) +#define ECX_L1_SSE4_1 (UINT32_C(1) << 19) +#define ECX_L1_SSE4_2 (UINT32_C(1) << 20) +#define ECX_L1_OSXSAVE (UINT32_C(1) << 27) +#define ECX_L1_AVX (UINT32_C(1) << 28) -/* - * Indicate whether the CPUID input EAX=7 may be needed to - * detect SIMD extended features (e.g., AVX2 or AVX-512). - */ -#if defined(CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS) -# define MAY_DETECT_CPUID_SIMD_EXTENDED_FEATURES -#endif +#define EDX_L1_SSE (UINT32_C(1) << 25) +#define EDX_L1_SSE2 (UINT32_C(1) << 26) + +/* CPUID (LEAF=7, SUBLEAF=0) */ +#define EBX_L7_AVX2 (UINT32_C(1) << 5) +#define EBX_L7_AVX512_F (UINT32_C(1) << 16) +#define EBX_L7_AVX512_DQ (UINT32_C(1) << 17) +#define EBX_L7_AVX512_IFMA (UINT32_C(1) << 21) +#define EBX_L7_AVX512_PF (UINT32_C(1) << 26) +#define EBX_L7_AVX512_ER (UINT32_C(1) << 27) +#define EBX_L7_AVX512_CD (UINT32_C(1) << 28) +#define EBX_L7_AVX512_BW (UINT32_C(1) << 30) +#define EBX_L7_AVX512_VL (UINT32_C(1) << 31) + +#define ECX_L7_AVX512_VBMI (UINT32_C(1) << 1) +#define ECX_L7_AVX512_VBMI2 (UINT32_C(1) << 6) +#define ECX_L7_AVX512_VNNI (UINT32_C(1) << 11) +#define ECX_L7_AVX512_BITALG (UINT32_C(1) << 12) +#define ECX_L7_AVX512_VPOPCNTDQ (UINT32_C(1) << 14) + +#define EDX_L7_AVX512_4VNNIW (UINT32_C(1) << 2) +#define EDX_L7_AVX512_4FMAPS (UINT32_C(1) << 3) +#define EDX_L7_AVX512_VP2INTERSECT (UINT32_C(1) << 8) + +/* CPUID (LEAF=7, SUBLEAF=1) */ +#define EAX_L7S1_AVX_VNNI (UINT32_C(1) << 4) +#define EAX_L7S1_AVX_IFMA (UINT32_C(1) << 23) + +#define EDX_L7S1_AVX_VNNI_INT8 (UINT32_C(1) << 4) +#define EDX_L7S1_AVX_NE_CONVERT (UINT32_C(1) << 5) +#define EDX_L7S1_AVX_VNNI_INT16 (UINT32_C(1) << 10) static inline void -get_cpuid_info(int32_t level /* input eax */, - int32_t count /* input ecx */, - int32_t *CPUID_REG(eax), - int32_t *CPUID_REG(ebx), - int32_t *CPUID_REG(ecx), - int32_t *CPUID_REG(edx)) +get_cpuid_info(uint32_t level /* input eax */, + uint32_t count /* input ecx */, + uint32_t *CPUID_REG(eax), + uint32_t *CPUID_REG(ebx), + uint32_t *CPUID_REG(ecx), + uint32_t *CPUID_REG(edx)) { #if defined(__x86_64__) && defined(__GNUC__) __cpuid_count(level, count, *eax, *ebx, *ecx, *edx); @@ -94,80 +197,387 @@ get_cpuid_info(int32_t level /* input eax */, #endif } -/* Processor Info and Feature Bits (EAX=1, ECX=0). */ +/* XSAVE State Components. */ +#define XCR0_SSE (UINT32_C(1) << 1) +#define XCR0_AVX (UINT32_C(1) << 2) +#define XCR0_AVX512_OPMASK (UINT32_C(1) << 5) +#define XCR0_AVX512_ZMM_HI256 (UINT32_C(1) << 6) +#define XCR0_AVX512_HI16_ZMM (UINT32_C(1) << 7) + +static inline uint64_t +get_xgetbv(uint32_t index) +{ +#if defined(__x86_64__) && defined(__GNUC__) + uint32_t eax = 0, edx = 0; + __asm__ __volatile__("xgetbv" : "=a" (eax), "=d" (edx) : "c" (index)); + return ((uint64_t)edx << 32) | eax; +#elif defined (_MSC_VER) + return (uint64_t)_xgetbv(index); +#else + (void) index; + return 0; +#endif +} + +/* Highest Function Parameter and Manufacturer ID (LEAF=0, SUBLEAF=0). */ +static inline uint32_t +detect_cpuid_maxleaf(void) +{ + uint32_t maxlevel = 0, ebx = 0, ecx = 0, edx = 0; + get_cpuid_info(0, 0, &maxlevel, &ebx, &ecx, &edx); + return maxlevel; +} + +/* Processor Info and Feature Bits (LEAF=1, SUBLEAF=0). */ static inline void -detect_cpu_simd_features(py_cpu_simd_flags *flags) +detect_simd_features(py_simd_features *flags, + uint32_t eax, uint32_t ebx, + uint32_t ecx, uint32_t edx) { - int32_t eax = 0, ebx = 0, ecx = 0, edx = 0; - get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx); + // Keep the ordering and newlines as they are declared in the structure. +#ifdef SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD #ifdef CAN_COMPILE_SIMD_SSE_INSTRUCTIONS - flags->sse = CPUID_CHECK_REG(edx, EDX1_SSE); + flags->sse = CPUID_CHECK_REG(edx, EDX_L1_SSE); #endif #ifdef CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS - flags->sse2 = CPUID_CHECK_REG(edx, EDX1_SSE2); + flags->sse2 = CPUID_CHECK_REG(edx, EDX_L1_SSE2); #endif #ifdef CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS - flags->sse3 = CPUID_CHECK_REG(ecx, ECX1_SSE3); + flags->sse3 = CPUID_CHECK_REG(ecx, ECX_L1_SSE3); +#endif +#ifdef CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS + flags->ssse3 = CPUID_CHECK_REG(ecx, ECX_L1_SSSE3); #endif #ifdef CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS - flags->sse41 = CPUID_CHECK_REG(ecx, ECX1_SSE4_1); + flags->sse41 = CPUID_CHECK_REG(ecx, ECX_L1_SSE4_1); #endif #ifdef CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS - flags->sse42 = CPUID_CHECK_REG(ecx, ECX1_SSE4_2); + flags->sse42 = CPUID_CHECK_REG(ecx, ECX_L1_SSE4_2); #endif +#endif + +#ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD #ifdef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS - flags->avx = CPUID_CHECK_REG(ecx, ECX1_AVX); + flags->avx = CPUID_CHECK_REG(ecx, ECX_L1_AVX); + flags->os_xsave = CPUID_CHECK_REG(ecx, ECX_L1_OSXSAVE); +#endif #endif } -/* Extended Feature Bits (EAX=7, ECX=0). */ +/* Extended Feature Bits (LEAF=7, SUBLEAF=0). */ static inline void -detect_cpu_simd_extended_features(py_cpu_simd_flags *flags) +detect_simd_extended_features_ecx_0(py_simd_features *flags, + uint8_t eax, uint8_t ebx, + uint8_t ecx, uint8_t edx) { - int32_t eax = 0, ebx = 0, ecx = 0, edx = 0; - get_cpuid_info(7, 0, &eax, &ebx, &ecx, &edx); + // Keep the ordering and newlines as they are declared in the structure. +#ifdef SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD #ifdef CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS - flags->avx2 = CPUID_CHECK_REG(ebx, EBX7_AVX2); + flags->avx2 = CPUID_CHECK_REG(ebx, EBX_L7_AVX2); +#endif +#endif + +#ifdef SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD +#ifdef CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS + flags->avx512_f = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_F); +#endif +#ifdef CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS + flags->avx512_cd = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_CD); +#endif + +#ifdef CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS + flags->avx512_er = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_ER); +#endif +#ifdef CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS + flags->avx512_pf = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_PF); #endif + +#ifdef CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS + flags->avx512_4fmaps = CPUID_CHECK_REG(edx, EDX_L7_AVX512_4FMAPS); +#endif +#ifdef CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS + flags->avx512_4vnniw = CPUID_CHECK_REG(edx, EDX_L7_AVX512_4VNNIW); +#endif + +#ifdef CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS + flags->avx512_vpopcntdq = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VPOPCNTDQ); +#endif + +#ifdef CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS + flags->avx512_vl = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_VL); +#endif +#ifdef CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS + flags->avx512_dq = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_DQ); +#endif +#ifdef CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS + flags->avx512_bw = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_BW); +#endif + +#ifdef CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS + flags->avx512_ifma = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_IFMA); +#endif + #ifdef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS - flags->avx512vbmi = CPUID_CHECK_REG(ecx, ECX7_AVX512_VBMI); + flags->avx512_vbmi = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VBMI); +#endif + +#ifdef CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS + flags->avx512_vnni = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VNNI); +#endif + +#ifdef CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS + flags->avx512_vbmi2 = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VBMI2); +#endif +#ifdef CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS + flags->avx512_bitalg = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_BITALG); +#endif + +#ifdef CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS + flags->avx512_vp2intersect = CPUID_CHECK_REG(edx, EDX_L7_AVX512_VP2INTERSECT); +#endif #endif } -void -_Py_detect_cpu_simd_features(py_cpu_simd_flags *flags) +/* Extended Feature Bits (LEAF=7, SUBLEAF=1). */ +static inline void +detect_simd_extended_features_ecx_1(py_simd_features *flags, + uint8_t eax, uint8_t ebx, + uint8_t ecx, uint8_t edx) { - if (flags->done) { - return; - } -#ifdef MAY_DETECT_CPUID_SIMD_FEATURES - detect_cpu_simd_features(flags); -#else - flags->sse = flags->sse2 = flags->sse3 = flags->sse41 = flags->sse42 = 0; - flags->avx = 0; + // Keep the ordering and newlines as they are declared in the structure. +#ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD +#ifdef CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS + flags->avx_ne_convert = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_NE_CONVERT); #endif -#ifdef MAY_DETECT_CPUID_SIMD_EXTENDED_FEATURES - detect_cpu_simd_extended_features(flags); -#else - flags->avx2 = flags->avx512vbmi = 0; + +#ifdef CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS + flags->avx_ifma = CPUID_CHECK_REG(eax, EAX_L7S1_AVX_IFMA); +#endif + +#ifdef CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS + flags->avx_vnni = CPUID_CHECK_REG(eax, EAX_L7S1_AVX_VNNI); +#endif +#ifdef CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS + flags->avx_vnni_int8 = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_VNNI_INT8); #endif +#ifdef CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS + flags->avx_vnni_int16 = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_VNNI_INT16); +#endif +#endif +} + +static inline void +detect_simd_xsave_state(py_simd_features *flags) +{ + uint64_t xcr0 = flags->os_xsave ? get_xgetbv(0) : 0; + flags->xcr0_sse = CPUID_CHECK_REG(xcr0, XCR0_SSE); + + flags->xcr0_avx = CPUID_CHECK_REG(xcr0, XCR0_AVX); + + flags->xcr0_avx512_opmask = CPUID_CHECK_REG(xcr0, XCR0_AVX512_OPMASK); + flags->xcr0_avx512_zmm_hi256 = CPUID_CHECK_REG(xcr0, XCR0_AVX512_ZMM_HI256); + flags->xcr0_avx512_hi16_zmm = CPUID_CHECK_REG(xcr0, XCR0_AVX512_HI16_ZMM); +} + +static inline void +finalize_simd_features(py_simd_features *flags) +{ + assert(flags->done == 0); + // Here, any flag that may depend on others should be correctly set + // at runtime to avoid illegal instruction errors. flags->done = 1; } +/* + * Return 0 if flags are compatible and correctly set and -1 otherwise. + * + * If this function returns -1, 'flags' should disable all SIMD features + * to avoid encountering a possible illegal instruction error at runtime. + */ +static inline int +validate_simd_features(const py_simd_features *flags) +{ + if (flags->done != 1) { + return -1; + } + + // AVX-512/F is required to support any other AVX-512 instruction set + uint8_t avx512_require_f = ( + flags->avx512_cd || flags->avx512_er || flags->avx512_pf || + flags->avx512_vl || flags->avx512_dq || flags->avx512_bw || + flags->avx512_ifma || + flags->avx512_vbmi || + flags->avx512_4fmaps || flags->avx512_4vnniw || + flags->avx512_vpopcntdq || + flags->avx512_vnni || flags->avx512_vbmi2 || flags->avx512_bitalg || + flags->avx512_vp2intersect + ); + if (!flags->avx512_f && !avx512_require_f) { + return -1; + } + + return 0; +} + +void +_Py_disable_simd_features(py_simd_features *flags) +{ + // Keep the ordering and newlines as they are declared in the structure. +#define ZERO(FLAG) flags->FLAG = 0 + ZERO(sse); + ZERO(sse2); + ZERO(sse3); + ZERO(ssse3); + ZERO(sse41); + ZERO(sse42); + + ZERO(avx); + ZERO(avx_ifma); + ZERO(avx_ne_convert); + + ZERO(avx_vnni); + ZERO(avx_vnni_int8); + ZERO(avx_vnni_int16); + + ZERO(avx2); + + ZERO(avx512_f); + ZERO(avx512_cd); + + ZERO(avx512_er); + ZERO(avx512_pf); + + ZERO(avx512_4fmaps); + ZERO(avx512_4vnniw); + + ZERO(avx512_vpopcntdq); + + ZERO(avx512_vl); + ZERO(avx512_dq); + ZERO(avx512_bw); + + ZERO(avx512_ifma); + + ZERO(avx512_vbmi); + + ZERO(avx512_vnni); + + ZERO(avx512_vbmi2); + ZERO(avx512_bitalg); + + ZERO(avx512_vp2intersect); + + ZERO(os_xsave); + + ZERO(xcr0_sse); + ZERO(xcr0_avx); + ZERO(xcr0_avx512_opmask); + ZERO(xcr0_avx512_zmm_hi256); + ZERO(xcr0_avx512_hi16_zmm); +#undef ZERO +} + void -_Py_extend_cpu_simd_features(py_cpu_simd_flags *out, - const py_cpu_simd_flags *src) +_Py_update_simd_features(py_simd_features *out, + const py_simd_features *src) { + // Keep the ordering and newlines as they are declared in the structure. #define UPDATE(FLAG) out->FLAG |= src->FLAG UPDATE(sse); UPDATE(sse2); UPDATE(sse3); + UPDATE(ssse3); UPDATE(sse41); UPDATE(sse42); UPDATE(avx); + UPDATE(avx_ifma); + UPDATE(avx_ne_convert); + + UPDATE(avx_vnni); + UPDATE(avx_vnni_int8); + UPDATE(avx_vnni_int16); + UPDATE(avx2); - UPDATE(avx512vbmi); + + UPDATE(avx512_f); + UPDATE(avx512_cd); + + UPDATE(avx512_er); + UPDATE(avx512_pf); + + UPDATE(avx512_4fmaps); + UPDATE(avx512_4vnniw); + + UPDATE(avx512_vpopcntdq); + + UPDATE(avx512_vl); + UPDATE(avx512_dq); + UPDATE(avx512_bw); + + UPDATE(avx512_ifma); + + UPDATE(avx512_vbmi); + + UPDATE(avx512_vnni); + + UPDATE(avx512_vbmi2); + UPDATE(avx512_bitalg); + + UPDATE(avx512_vp2intersect); + + UPDATE(os_xsave); + + UPDATE(xcr0_sse); + UPDATE(xcr0_avx); + UPDATE(xcr0_avx512_opmask); + UPDATE(xcr0_avx512_zmm_hi256); + UPDATE(xcr0_avx512_hi16_zmm); #undef UPDATE out->done = 1; } + +void +_Py_detect_simd_features(py_simd_features *flags) +{ + if (flags->done) { + return; + } + _Py_disable_simd_features(flags); + uint32_t maxleaf = detect_cpuid_maxleaf(); + uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; +#ifdef SHOULD_DETECT_SIMD_FEATURES_L1 + if (maxleaf >= 1) { + eax = 0, ebx = 0, ecx = 0, edx = 0; + get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx); + detect_simd_features(flags, eax, ebx, ecx, edx); + if (flags->os_xsave) { + detect_simd_xsave_state(flags); + } + } +#else + (void) maxleaf; + (void) eax; (void) ebx; (void) ecx; (void) edx; +#endif +#ifdef SHOULD_DETECT_SIMD_FEATURES_L7 + if (maxleaf >= 7) { +#ifdef SHOULD_DETECT_SIMD_FEATURES_L7S0 + eax = 0, ebx = 0, ecx = 0, edx = 0; + get_cpuid_info(7, 0, &eax, &ebx, &ecx, &edx); + detect_simd_extended_features_ecx_0(flags, eax, ebx, ecx, edx); +#endif +#ifdef SHOULD_DETECT_SIMD_FEATURES_L7S1 + eax = 0, ebx = 0, ecx = 0, edx = 0; + get_cpuid_info(7, 1, &eax, &ebx, &ecx, &edx); + detect_simd_extended_features_ecx_1(flags, eax, ebx, ecx, edx); +#endif + } +#else + (void) maxleaf; + (void) eax; (void) ebx; (void) ecx; (void) edx; +#endif + finalize_simd_features(flags); + if (validate_simd_features(flags) < 0) { + _Py_disable_simd_features(flags); + } +} diff --git a/configure b/configure index e749010ccfe815..2bcec7f82ce042 100755 --- a/configure +++ b/configure @@ -30623,9 +30623,11 @@ printf "%s\n" "$py_cv_module__blake2" >&6; } # we do not necessarily know which instruction sets will be used, # we disable SIMD support on some older Android platforms. # -# Detection for more instruction sets can be added. By default, we detect -# SSE-based instruction sets, AVX/AVX2 and AVX512 VBMI. +# See py_simd_features in pycore_cpuinfo.h for how to order fields +# and where to put blank lines to separate processor generations +# for AVX-512 instructions. if test "$ac_sys_system" != "Linux-android" || test "$ANDROID_API_LEVEL" -ge 28; then + # SSE @@ -30773,6 +30775,54 @@ fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mssse3" >&5 +printf %s "checking whether C compiler accepts -mssse3... " >&6; } +if test ${ax_cv_check_cflags___mssse3+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mssse3" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mssse3=yes +else $as_nop + ax_cv_check_cflags___mssse3=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mssse3" >&5 +printf "%s\n" "$ax_cv_check_cflags___mssse3" >&6; } +if test "x$ax_cv_check_cflags___mssse3" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 printf %s "checking checking SIMD instruction set... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse4.1" >&5 @@ -30866,6 +30916,7 @@ fi + # AVX @@ -30917,6 +30968,248 @@ fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxifma" >&5 +printf %s "checking whether C compiler accepts -mavxifma... " >&6; } +if test ${ax_cv_check_cflags___mavxifma+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavxifma" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavxifma=yes +else $as_nop + ax_cv_check_cflags___mavxifma=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxifma" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavxifma" >&6; } +if test "x$ax_cv_check_cflags___mavxifma" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxneconvert" >&5 +printf %s "checking whether C compiler accepts -mavxneconvert... " >&6; } +if test ${ax_cv_check_cflags___mavxneconvert+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavxneconvert" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavxneconvert=yes +else $as_nop + ax_cv_check_cflags___mavxneconvert=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxneconvert" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavxneconvert" >&6; } +if test "x$ax_cv_check_cflags___mavxneconvert" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnni" >&5 +printf %s "checking whether C compiler accepts -mavxvnni... " >&6; } +if test ${ax_cv_check_cflags___mavxvnni+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavxvnni" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavxvnni=yes +else $as_nop + ax_cv_check_cflags___mavxvnni=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxvnni" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavxvnni" >&6; } +if test "x$ax_cv_check_cflags___mavxvnni" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnniint8" >&5 +printf %s "checking whether C compiler accepts -mavxvnniint8... " >&6; } +if test ${ax_cv_check_cflags___mavxvnniint8+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavxvnniint8" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavxvnniint8=yes +else $as_nop + ax_cv_check_cflags___mavxvnniint8=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxvnniint8" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavxvnniint8" >&6; } +if test "x$ax_cv_check_cflags___mavxvnniint8" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnniint16" >&5 +printf %s "checking whether C compiler accepts -mavxvnniint16... " >&6; } +if test ${ax_cv_check_cflags___mavxvnniint16+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavxvnniint16" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavxvnniint16=yes +else $as_nop + ax_cv_check_cflags___mavxvnniint16=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxvnniint16" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavxvnniint16" >&6; } +if test "x$ax_cv_check_cflags___mavxvnniint16" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # AVX 2 + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 printf %s "checking checking SIMD instruction set... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx2" >&5 @@ -30962,20 +31255,21 @@ fi + # { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vbmi" >&5 -printf %s "checking whether C compiler accepts -mavx512vbmi... " >&6; } -if test ${ax_cv_check_cflags___mavx512vbmi+y} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512f" >&5 +printf %s "checking whether C compiler accepts -mavx512f... " >&6; } +if test ${ax_cv_check_cflags___mavx512f+y} then : printf %s "(cached) " >&6 else $as_nop ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512vbmi" + CFLAGS="$CFLAGS -mavx512f" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -30989,19 +31283,747 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx512vbmi=yes + ax_cv_check_cflags___mavx512f=yes else $as_nop - ax_cv_check_cflags___mavx512vbmi=no + ax_cv_check_cflags___mavx512f=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vbmi" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx512vbmi" >&6; } -if test "x$ax_cv_check_cflags___mavx512vbmi" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512f" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512f" >&6; } +if test "x$ax_cv_check_cflags___mavx512f" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512cd" >&5 +printf %s "checking whether C compiler accepts -mavx512cd... " >&6; } +if test ${ax_cv_check_cflags___mavx512cd+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512cd" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512cd=yes +else $as_nop + ax_cv_check_cflags___mavx512cd=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512cd" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512cd" >&6; } +if test "x$ax_cv_check_cflags___mavx512cd" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512er" >&5 +printf %s "checking whether C compiler accepts -mavx512er... " >&6; } +if test ${ax_cv_check_cflags___mavx512er+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512er" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512er=yes +else $as_nop + ax_cv_check_cflags___mavx512er=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512er" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512er" >&6; } +if test "x$ax_cv_check_cflags___mavx512er" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512pf" >&5 +printf %s "checking whether C compiler accepts -mavx512pf... " >&6; } +if test ${ax_cv_check_cflags___mavx512pf+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512pf" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512pf=yes +else $as_nop + ax_cv_check_cflags___mavx512pf=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512pf" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512pf" >&6; } +if test "x$ax_cv_check_cflags___mavx512pf" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx5124fmaps" >&5 +printf %s "checking whether C compiler accepts -mavx5124fmaps... " >&6; } +if test ${ax_cv_check_cflags___mavx5124fmaps+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx5124fmaps" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx5124fmaps=yes +else $as_nop + ax_cv_check_cflags___mavx5124fmaps=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx5124fmaps" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx5124fmaps" >&6; } +if test "x$ax_cv_check_cflags___mavx5124fmaps" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx5124vnniw" >&5 +printf %s "checking whether C compiler accepts -mavx5124vnniw... " >&6; } +if test ${ax_cv_check_cflags___mavx5124vnniw+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx5124vnniw" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx5124vnniw=yes +else $as_nop + ax_cv_check_cflags___mavx5124vnniw=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx5124vnniw" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx5124vnniw" >&6; } +if test "x$ax_cv_check_cflags___mavx5124vnniw" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vpopcntdq" >&5 +printf %s "checking whether C compiler accepts -mavx512vpopcntdq... " >&6; } +if test ${ax_cv_check_cflags___mavx512vpopcntdq+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512vpopcntdq" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512vpopcntdq=yes +else $as_nop + ax_cv_check_cflags___mavx512vpopcntdq=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vpopcntdq" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512vpopcntdq" >&6; } +if test "x$ax_cv_check_cflags___mavx512vpopcntdq" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vl" >&5 +printf %s "checking whether C compiler accepts -mavx512vl... " >&6; } +if test ${ax_cv_check_cflags___mavx512vl+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512vl" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512vl=yes +else $as_nop + ax_cv_check_cflags___mavx512vl=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vl" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512vl" >&6; } +if test "x$ax_cv_check_cflags___mavx512vl" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512dq" >&5 +printf %s "checking whether C compiler accepts -mavx512dq... " >&6; } +if test ${ax_cv_check_cflags___mavx512dq+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512dq" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512dq=yes +else $as_nop + ax_cv_check_cflags___mavx512dq=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512dq" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512dq" >&6; } +if test "x$ax_cv_check_cflags___mavx512dq" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512bw" >&5 +printf %s "checking whether C compiler accepts -mavx512bw... " >&6; } +if test ${ax_cv_check_cflags___mavx512bw+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512bw" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512bw=yes +else $as_nop + ax_cv_check_cflags___mavx512bw=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512bw" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512bw" >&6; } +if test "x$ax_cv_check_cflags___mavx512bw" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512ifma" >&5 +printf %s "checking whether C compiler accepts -mavx512ifma... " >&6; } +if test ${ax_cv_check_cflags___mavx512ifma+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512ifma" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512ifma=yes +else $as_nop + ax_cv_check_cflags___mavx512ifma=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512ifma" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512ifma" >&6; } +if test "x$ax_cv_check_cflags___mavx512ifma" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vbmi" >&5 +printf %s "checking whether C compiler accepts -mavx512vbmi... " >&6; } +if test ${ax_cv_check_cflags___mavx512vbmi+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512vbmi" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512vbmi=yes +else $as_nop + ax_cv_check_cflags___mavx512vbmi=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vbmi" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512vbmi" >&6; } +if test "x$ax_cv_check_cflags___mavx512vbmi" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vnni" >&5 +printf %s "checking whether C compiler accepts -mavx512vnni... " >&6; } +if test ${ax_cv_check_cflags___mavx512vnni+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512vnni" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512vnni=yes +else $as_nop + ax_cv_check_cflags___mavx512vnni=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vnni" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512vnni" >&6; } +if test "x$ax_cv_check_cflags___mavx512vnni" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vbmi2" >&5 +printf %s "checking whether C compiler accepts -mavx512vbmi2... " >&6; } +if test ${ax_cv_check_cflags___mavx512vbmi2+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512vbmi2" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512vbmi2=yes +else $as_nop + ax_cv_check_cflags___mavx512vbmi2=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vbmi2" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512vbmi2" >&6; } +if test "x$ax_cv_check_cflags___mavx512vbmi2" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512bitalg" >&5 +printf %s "checking whether C compiler accepts -mavx512bitalg... " >&6; } +if test ${ax_cv_check_cflags___mavx512bitalg+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512bitalg" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512bitalg=yes +else $as_nop + ax_cv_check_cflags___mavx512bitalg=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512bitalg" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512bitalg" >&6; } +if test "x$ax_cv_check_cflags___mavx512bitalg" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vp2intersect" >&5 +printf %s "checking whether C compiler accepts -mavx512vp2intersect... " >&6; } +if test ${ax_cv_check_cflags___mavx512vp2intersect+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512vp2intersect" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512vp2intersect=yes +else $as_nop + ax_cv_check_cflags___mavx512vp2intersect=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vp2intersect" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512vp2intersect" >&6; } +if test "x$ax_cv_check_cflags___mavx512vp2intersect" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS 1" >>confdefs.h else $as_nop : diff --git a/configure.ac b/configure.ac index 707cda71c9c903..74a8e785c229bf 100644 --- a/configure.ac +++ b/configure.ac @@ -7809,17 +7809,52 @@ AC_DEFUN([PY_SIMD_DETECT], [ # we do not necessarily know which instruction sets will be used, # we disable SIMD support on some older Android platforms. # -# Detection for more instruction sets can be added. By default, we detect -# SSE-based instruction sets, AVX/AVX2 and AVX512 VBMI. +# See py_simd_features in pycore_cpuinfo.h for how to order fields +# and where to put blank lines to separate processor generations +# for AVX-512 instructions. if test "$ac_sys_system" != "Linux-android" || test "$ANDROID_API_LEVEL" -ge 28; then + # SSE PY_SIMD_DETECT([SSE], [-msse]) PY_SIMD_DETECT([SSE2], [-msse2]) PY_SIMD_DETECT([SSE3], [-msse3]) + PY_SIMD_DETECT([SSSE3], [-mssse3]) PY_SIMD_DETECT([SSE4.1], [-msse4.1], [SSE4_1]) PY_SIMD_DETECT([SSE4.2], [-msse4.2], [SSE4_2]) + # AVX PY_SIMD_DETECT([AVX], [-mavx]) + PY_SIMD_DETECT([AVX_IFMA], [-mavxifma]) + PY_SIMD_DETECT([AVX_NE_CONVERT], [-mavxneconvert]) + # + PY_SIMD_DETECT([AVX_VNNI], [-mavxvnni]) + PY_SIMD_DETECT([AVX_VNNI_INT8], [-mavxvnniint8]) + PY_SIMD_DETECT([AVX_VNNI_INT16], [-mavxvnniint16]) + # AVX 2 PY_SIMD_DETECT([AVX2], [-mavx2]) + # + PY_SIMD_DETECT([AVX512_F], [-mavx512f]) + PY_SIMD_DETECT([AVX512_CD], [-mavx512cd]) + PY_SIMD_DETECT([AVX512_ER], [-mavx512er]) + PY_SIMD_DETECT([AVX512_PF], [-mavx512pf]) + # + PY_SIMD_DETECT([AVX512_4FMAPS], [-mavx5124fmaps]) + PY_SIMD_DETECT([AVX512_4VNNIW], [-mavx5124vnniw]) + # + PY_SIMD_DETECT([AVX512_VPOPCNTDQ], [-mavx512vpopcntdq]) + # + PY_SIMD_DETECT([AVX512_VL], [-mavx512vl]) + PY_SIMD_DETECT([AVX512_DQ], [-mavx512dq]) + PY_SIMD_DETECT([AVX512_BW], [-mavx512bw]) + # + PY_SIMD_DETECT([AVX512_IFMA], [-mavx512ifma]) + # PY_SIMD_DETECT([AVX512_VBMI], [-mavx512vbmi]) + # + PY_SIMD_DETECT([AVX512_VNNI], [-mavx512vnni]) + # + PY_SIMD_DETECT([AVX512_VBMI2], [-mavx512vbmi2]) + PY_SIMD_DETECT([AVX512_BITALG], [-mavx512bitalg]) + # + PY_SIMD_DETECT([AVX512_VP2INTERSECT], [-mavx512vp2intersect]) fi LIBHACL_CFLAGS='-I$(srcdir)/Modules/_hacl -I$(srcdir)/Modules/_hacl/include -D_BSD_SOURCE -D_DEFAULT_SOURCE $(PY_STDMODULE_CFLAGS) $(CCSHARED)' diff --git a/pyconfig.h.in b/pyconfig.h.in index b5ad1b310f3e5d..625c9798d6272b 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -35,12 +35,72 @@ /* Define if '-mavx2' is a valid compiler flag. */ #undef CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS +/* Define if '-mavx5124fmaps' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS + +/* Define if '-mavx5124vnniw' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS + +/* Define if '-mavx512bitalg' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS + +/* Define if '-mavx512bw' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS + +/* Define if '-mavx512cd' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS + +/* Define if '-mavx512dq' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS + +/* Define if '-mavx512er' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS + +/* Define if '-mavx512f' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS + +/* Define if '-mavx512ifma' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS + +/* Define if '-mavx512pf' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS + +/* Define if '-mavx512vbmi2' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS + /* Define if '-mavx512vbmi' is a valid compiler flag. */ #undef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS +/* Define if '-mavx512vl' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS + +/* Define if '-mavx512vnni' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS + +/* Define if '-mavx512vp2intersect' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS + +/* Define if '-mavx512vpopcntdq' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS + +/* Define if '-mavxifma' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS + /* Define if '-mavx' is a valid compiler flag. */ #undef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS +/* Define if '-mavxneconvert' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS + +/* Define if '-mavxvnni' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS + +/* Define if '-mavxvnniint16' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS + +/* Define if '-mavxvnniint8' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS + /* Define if '-msse2' is a valid compiler flag. */ #undef CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS @@ -56,6 +116,9 @@ /* Define if '-msse' is a valid compiler flag. */ #undef CAN_COMPILE_SIMD_SSE_INSTRUCTIONS +/* Define if '-mssse3' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS + /* Define if C doubles are 64-bit IEEE 754 binary format, stored in ARM mixed-endian order (byte order 45670123) */ #undef DOUBLE_IS_ARM_MIXED_ENDIAN_IEEE754 From 16b2aed47d05e5de32e90e9ad42cc789b54a2ecf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 7 Oct 2024 15:54:23 +0200 Subject: [PATCH 11/78] do not guard the parsing of `os_xsave` --- Python/cpuinfo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 5ab068fa4af0e9..8d97edc71b45ca 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -259,9 +259,10 @@ detect_simd_features(py_simd_features *flags, #ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD #ifdef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS flags->avx = CPUID_CHECK_REG(ecx, ECX_L1_AVX); - flags->os_xsave = CPUID_CHECK_REG(ecx, ECX_L1_OSXSAVE); #endif #endif + + flags->os_xsave = CPUID_CHECK_REG(ecx, ECX_L1_OSXSAVE); } /* Extended Feature Bits (LEAF=7, SUBLEAF=0). */ From 5018fa930f576f8ec8cb7ec10a8fc65b4e1712cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 7 Oct 2024 16:14:59 +0200 Subject: [PATCH 12/78] Remove old comment. --- Python/cpuinfo.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 8d97edc71b45ca..92a0c0e3c64b02 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -4,13 +4,6 @@ * See https://en.wikipedia.org/wiki/CPUID for details. */ -/* - * In order to properly maintain this file, the following rules should - * be observed and enforced if possible: - * - * - Defining the SIMD_*_INSTRUCTIONS_DETECTION_GUARD macros should - */ - #include "Python.h" #include "pycore_cpuinfo.h" From e75806594785f673e216fa23863c9e9fac243457 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 7 Oct 2024 19:27:28 +0200 Subject: [PATCH 13/78] Update cpuinfo.c comments --- Python/cpuinfo.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 92a0c0e3c64b02..853404d00e56e3 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -10,14 +10,12 @@ #include // UINT32_C() /* Macro to mark a CPUID register function parameter as being used. */ -#define CPUID_REG(PARAM) PARAM +#define CPUID_REG(PARAM) PARAM /* Macro to check one or more CPUID register bits. */ #define CPUID_CHECK_REG(REG, MASK) ((((REG) & (MASK)) == (MASK)) ? 0 : 1) -/* - * For simplicity, we only enable SIMD instructions for Intel CPUs, - * even though we could support ARM NEON and POWER. - */ +// For simplicity, we only enable SIMD instructions for Intel CPUs, +// even though we could support ARM NEON and POWER. #if defined(__x86_64__) && defined(__GNUC__) # include // __cpuid_count() #elif defined(_M_X64) @@ -33,7 +31,7 @@ || defined(CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS) \ || defined(CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS) \ || defined(CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS) \ - // macros above should be sorted in an alphabetical order + // macros above should be sorted in alphabetical order /* Used to guard any SSE instructions detection code. */ # define SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD #endif @@ -44,13 +42,13 @@ || defined(CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS) \ || defined(CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS) \ || defined(CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS) \ - // macros above should be sorted in an alphabetical order + // macros above should be sorted in alphabetical order /* Used to guard any AVX instructions detection code. */ # define SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD #endif #if defined(CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS) \ - // macros above should be sorted in an alphabetical order + // macros above should be sorted in alphabetical order /* Used to guard any AVX-2 instructions detection code. */ # define SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD #endif @@ -71,7 +69,7 @@ || defined(CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS) \ || defined(CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS) \ || defined(CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS) \ - // macros above should be sorted in an alphabetical order + // macros above should be sorted in alphabetical order /* Used to guard any AVX-512 instructions detection code. */ # define SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD #endif @@ -84,7 +82,7 @@ // // Additionally, AVX2 cannot be compiled on macOS ARM64 (yet it can be // compiled on x86_64). However, since autoconf incorrectly assumes so -// when compiling a universal2 binary, we disable AVX for such builds. +// when compiling a universal2 binary, we disable AVX on such builds. #if defined(__APPLE__) # undef SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD # if defined(__arm64__) @@ -181,7 +179,7 @@ get_cpuid_info(uint32_t level /* input eax */, #if defined(__x86_64__) && defined(__GNUC__) __cpuid_count(level, count, *eax, *ebx, *ecx, *edx); #elif defined(_M_X64) - int32_t info[4] = {0}; + uint32_t info[4] = {0}; __cpuidex(info, level, count); *eax = info[0]; *ebx = info[1]; @@ -247,13 +245,13 @@ detect_simd_features(py_simd_features *flags, #ifdef CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS flags->sse42 = CPUID_CHECK_REG(ecx, ECX_L1_SSE4_2); #endif -#endif +#endif // !SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD #ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD #ifdef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS flags->avx = CPUID_CHECK_REG(ecx, ECX_L1_AVX); #endif -#endif +#endif // !SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD flags->os_xsave = CPUID_CHECK_REG(ecx, ECX_L1_OSXSAVE); } @@ -329,7 +327,7 @@ detect_simd_extended_features_ecx_0(py_simd_features *flags, #ifdef CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS flags->avx512_vp2intersect = CPUID_CHECK_REG(edx, EDX_L7_AVX512_VP2INTERSECT); #endif -#endif +#endif // !SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD } /* Extended Feature Bits (LEAF=7, SUBLEAF=1). */ @@ -357,7 +355,7 @@ detect_simd_extended_features_ecx_1(py_simd_features *flags, #ifdef CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS flags->avx_vnni_int16 = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_VNNI_INT16); #endif -#endif +#endif // !SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD } static inline void @@ -552,7 +550,7 @@ _Py_detect_simd_features(py_simd_features *flags) #else (void) maxleaf; (void) eax; (void) ebx; (void) ecx; (void) edx; -#endif +#endif // !SHOULD_DETECT_SIMD_FEATURES_L1 #ifdef SHOULD_DETECT_SIMD_FEATURES_L7 if (maxleaf >= 7) { #ifdef SHOULD_DETECT_SIMD_FEATURES_L7S0 @@ -569,7 +567,7 @@ _Py_detect_simd_features(py_simd_features *flags) #else (void) maxleaf; (void) eax; (void) ebx; (void) ecx; (void) edx; -#endif +#endif // !SHOULD_DETECT_SIMD_FEATURES_L7 finalize_simd_features(flags); if (validate_simd_features(flags) < 0) { _Py_disable_simd_features(flags); From 731be816b460b017b38b90937066c6d19e9e5422 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 7 Oct 2024 19:33:32 +0200 Subject: [PATCH 14/78] Update pycore_cpuinfo.h comments --- Include/internal/pycore_cpuinfo.h | 36 +++++++++++++++---------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index 145da8c9d2d2ae..ad4966e8f8637a 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -76,25 +76,20 @@ typedef struct py_simd_features { /* XCR0 register bits */ _Py_SIMD_XCR0_BIT xcr0_sse: 1; - /* - * On some Intel CPUs, it is possible for the CPU to support AVX2 - * instructions even though the underlying OS does not know about - * AVX. In particular, only (SSE) XMM registers will be saved and - * restored on context-switch, but not (AVX) YMM registers. - */ + // On some Intel CPUs, it is possible for the CPU to support AVX2 + // instructions even though the underlying OS does not know about + // AVX. In particular, only (SSE) XMM registers will be saved and + // restored on context-switch, but not (AVX) YMM registers. _Py_SIMD_XCR0_BIT xcr0_avx: 1; _Py_SIMD_XCR0_BIT xcr0_avx512_opmask: 1; _Py_SIMD_XCR0_BIT xcr0_avx512_zmm_hi256: 1; _Py_SIMD_XCR0_BIT xcr0_avx512_hi16_zmm: 1; - /* - * We want to align the bit-fields correctly so the bitsize of - * 'done' must be chosen so that the sum of all bit fields is - * a multiple of 8. - * - * Whenever a field is added or removed above, update the - * following number (35) and adjust the bitsize of 'done'. - */ + // We want the structure to be aligned correctly, namely + // its size in bits must be a multiple of 8. + // + // Whenever a field is added or removed above, update the + // number of fields (35) and adjust the bitsize of 'done'. uint8_t done: 5; // set if the structure was filled } py_simd_features; @@ -107,10 +102,15 @@ extern void _Py_disable_simd_features(py_simd_features *flags); /* -* Apply a bitwise-OR on all flags in 'out' using those in 'src', -* unconditionally updating 'out' (i.e. out->done is ignored) and -* setting 'out->done' to 1. -*/ + * Apply a bitwise-OR on all flags in 'out' using those in 'src', + * unconditionally updating 'out' (i.e. 'out->done' is ignored). + * + * This also sets 'out->done' to 1 at the end. + * + * Note that the caller is responsible to ensure that the flags set to 1 + * must not lead to illegal instruction errors if the corresponding SIMD + * instruction(s) are used. + */ extern void _Py_update_simd_features(py_simd_features *out, const py_simd_features *src); From 7947715b3d38a7d4e46065de3087323e1f6917db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 7 Oct 2024 19:38:29 +0200 Subject: [PATCH 15/78] fix lint --- Include/internal/pycore_cpuinfo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index ad4966e8f8637a..fbe37fb6a3b936 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -105,7 +105,7 @@ _Py_disable_simd_features(py_simd_features *flags); * Apply a bitwise-OR on all flags in 'out' using those in 'src', * unconditionally updating 'out' (i.e. 'out->done' is ignored). * - * This also sets 'out->done' to 1 at the end. + * This also sets 'out->done' to 1 at the end. * * Note that the caller is responsible to ensure that the flags set to 1 * must not lead to illegal instruction errors if the corresponding SIMD From 7a17cbbe2f03e931115e3f1904b3b497a0b48e20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 7 Oct 2024 19:43:17 +0200 Subject: [PATCH 16/78] I really shouldn't use a Web UI --- Include/internal/pycore_cpuinfo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index fbe37fb6a3b936..92cdff2c3f55f1 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -87,7 +87,7 @@ typedef struct py_simd_features { // We want the structure to be aligned correctly, namely // its size in bits must be a multiple of 8. - // + // // Whenever a field is added or removed above, update the // number of fields (35) and adjust the bitsize of 'done'. uint8_t done: 5; // set if the structure was filled From 76f67b1c527b2b188af24b7ccac4fd3f2f63adca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 7 Oct 2024 19:58:55 +0200 Subject: [PATCH 17/78] Fix _xgetbv() on Windows builds. --- Python/cpuinfo.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 853404d00e56e3..72101a15272bcc 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -17,9 +17,10 @@ // For simplicity, we only enable SIMD instructions for Intel CPUs, // even though we could support ARM NEON and POWER. #if defined(__x86_64__) && defined(__GNUC__) -# include // __cpuid_count() +# include // __cpuid_count() #elif defined(_M_X64) -# include // __cpuidex() +# include // _xgetbv() +# include // __cpuidex() #else # undef CPUID_REG # define CPUID_REG(PARAM) Py_UNUSED(PARAM) @@ -202,7 +203,7 @@ get_xgetbv(uint32_t index) uint32_t eax = 0, edx = 0; __asm__ __volatile__("xgetbv" : "=a" (eax), "=d" (edx) : "c" (index)); return ((uint64_t)edx << 32) | eax; -#elif defined (_MSC_VER) +#elif defined(_M_X64) return (uint64_t)_xgetbv(index); #else (void) index; From 0b49a505205a57f966fc6d40112d89dc8f5f963c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 8 Oct 2024 11:40:28 +0200 Subject: [PATCH 18/78] fix comment --- Python/cpuinfo.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 72101a15272bcc..2c309149fc8102 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -246,13 +246,13 @@ detect_simd_features(py_simd_features *flags, #ifdef CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS flags->sse42 = CPUID_CHECK_REG(ecx, ECX_L1_SSE4_2); #endif -#endif // !SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD +#endif // SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD #ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD #ifdef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS flags->avx = CPUID_CHECK_REG(ecx, ECX_L1_AVX); #endif -#endif // !SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD +#endif // SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD flags->os_xsave = CPUID_CHECK_REG(ecx, ECX_L1_OSXSAVE); } @@ -328,7 +328,7 @@ detect_simd_extended_features_ecx_0(py_simd_features *flags, #ifdef CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS flags->avx512_vp2intersect = CPUID_CHECK_REG(edx, EDX_L7_AVX512_VP2INTERSECT); #endif -#endif // !SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD +#endif // SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD } /* Extended Feature Bits (LEAF=7, SUBLEAF=1). */ @@ -356,7 +356,7 @@ detect_simd_extended_features_ecx_1(py_simd_features *flags, #ifdef CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS flags->avx_vnni_int16 = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_VNNI_INT16); #endif -#endif // !SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD +#endif // SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD } static inline void @@ -551,7 +551,7 @@ _Py_detect_simd_features(py_simd_features *flags) #else (void) maxleaf; (void) eax; (void) ebx; (void) ecx; (void) edx; -#endif // !SHOULD_DETECT_SIMD_FEATURES_L1 +#endif // SHOULD_DETECT_SIMD_FEATURES_L1 #ifdef SHOULD_DETECT_SIMD_FEATURES_L7 if (maxleaf >= 7) { #ifdef SHOULD_DETECT_SIMD_FEATURES_L7S0 @@ -568,7 +568,7 @@ _Py_detect_simd_features(py_simd_features *flags) #else (void) maxleaf; (void) eax; (void) ebx; (void) ecx; (void) edx; -#endif // !SHOULD_DETECT_SIMD_FEATURES_L7 +#endif // SHOULD_DETECT_SIMD_FEATURES_L7 finalize_simd_features(flags); if (validate_simd_features(flags) < 0) { _Py_disable_simd_features(flags); From 9fd6152c0cf1b54ad737d2ea1460413e96278da3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 8 Oct 2024 19:53:01 +0200 Subject: [PATCH 19/78] harden detection of CPU features --- Include/internal/pycore_cpuinfo.h | 162 ++++++---- Python/cpuinfo.c | 505 +++++++++++++++--------------- configure.ac | 1 - 3 files changed, 354 insertions(+), 314 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index 92cdff2c3f55f1..fe934fa13a70b1 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -11,112 +11,138 @@ extern "C" { #include // uint8_t -/* Macro indicating that the member is a CPUID bit. */ -#define _Py_SIMD_FEAT uint8_t -/* Macro indicating that the member is a XCR0 bit. */ -#define _Py_SIMD_XCR0_BIT uint8_t - -typedef struct py_simd_features { - /* Streaming SIMD Extensions */ - _Py_SIMD_FEAT sse: 1; - _Py_SIMD_FEAT sse2: 1; - _Py_SIMD_FEAT sse3: 1; - _Py_SIMD_FEAT ssse3: 1; // Supplemental SSE3 instructions - _Py_SIMD_FEAT sse41: 1; // SSE4.1 - _Py_SIMD_FEAT sse42: 1; // SSE4.2 - - /* Advanced Vector Extensions */ - _Py_SIMD_FEAT avx: 1; - _Py_SIMD_FEAT avx_ifma: 1; - _Py_SIMD_FEAT avx_ne_convert: 1; - - _Py_SIMD_FEAT avx_vnni: 1; - _Py_SIMD_FEAT avx_vnni_int8: 1; - _Py_SIMD_FEAT avx_vnni_int16: 1; - - /* Advanced Vector Extensions 2. */ - _Py_SIMD_FEAT avx2: 1; - +/* Declare a member of 'py_cpuid_features' storing a CPUID bit. */ +#define _Py_CPUID_DECL_FEAT(X) uint8_t X:1 +/* Declare a member of 'py_cpuid_features' storing a XCR0 bit. */ +#define _Py_CPUID_DECL_XCR0(X) uint8_t X:1 + +typedef struct py_cpuid_features { + // --- Streaming SIMD Extensions ------------------------------------------ + _Py_CPUID_DECL_FEAT(sse); + _Py_CPUID_DECL_FEAT(sse2); + _Py_CPUID_DECL_FEAT(sse3); + _Py_CPUID_DECL_FEAT(ssse3); // Supplemental SSE3 instructions + _Py_CPUID_DECL_FEAT(sse41); // SSE4.1 + _Py_CPUID_DECL_FEAT(sse42); // SSE4.2 + + // --- Advanced Vector Extensions ----------------------------------------- + _Py_CPUID_DECL_FEAT(avx); + _Py_CPUID_DECL_FEAT(avx_ifma); + _Py_CPUID_DECL_FEAT(avx_ne_convert); + + _Py_CPUID_DECL_FEAT(avx_vnni); + _Py_CPUID_DECL_FEAT(avx_vnni_int8); + _Py_CPUID_DECL_FEAT(avx_vnni_int16); + + // --- Advanced Vector Extensions 2 --------------------------------------- + _Py_CPUID_DECL_FEAT(avx2); + + // --- Advanced Vector Extensions (512-bit) ------------------------------- /* + * * AVX-512 instruction set are grouped by the processor generation * that implements them (see https://en.wikipedia.org/wiki/AVX-512). * * We do not include GFNI, VPCLMULQDQ and VAES instructions since * they are not exactly AVX-512 per se, nor do we include BF16 or * FP16 since they operate on bfloat16 and binary16 (half-float). + * + * See https://en.wikipedia.org/wiki/AVX-512#Instruction_set for + * the meaning of each suffix (e.g., 'f' stands for 'Foundation'). */ - _Py_SIMD_FEAT avx512_f: 1; - _Py_SIMD_FEAT avx512_cd: 1; - - _Py_SIMD_FEAT avx512_er: 1; - _Py_SIMD_FEAT avx512_pf: 1; + _Py_CPUID_DECL_FEAT(avx512_f); + _Py_CPUID_DECL_FEAT(avx512_cd); - _Py_SIMD_FEAT avx512_4fmaps: 1; - _Py_SIMD_FEAT avx512_4vnniw: 1; + _Py_CPUID_DECL_FEAT(avx512_er); + _Py_CPUID_DECL_FEAT(avx512_pf); - _Py_SIMD_FEAT avx512_vpopcntdq: 1; + _Py_CPUID_DECL_FEAT(avx512_4fmaps); + _Py_CPUID_DECL_FEAT(avx512_4vnniw); - _Py_SIMD_FEAT avx512_vl: 1; - _Py_SIMD_FEAT avx512_dq: 1; - _Py_SIMD_FEAT avx512_bw: 1; + _Py_CPUID_DECL_FEAT(avx512_vpopcntdq); - _Py_SIMD_FEAT avx512_ifma: 1; + _Py_CPUID_DECL_FEAT(avx512_vl); + _Py_CPUID_DECL_FEAT(avx512_dq); + _Py_CPUID_DECL_FEAT(avx512_bw); - _Py_SIMD_FEAT avx512_vbmi: 1; + _Py_CPUID_DECL_FEAT(avx512_ifma); + _Py_CPUID_DECL_FEAT(avx512_vbmi); - _Py_SIMD_FEAT avx512_vnni: 1; + _Py_CPUID_DECL_FEAT(avx512_vnni); - _Py_SIMD_FEAT avx512_vbmi2: 1; - _Py_SIMD_FEAT avx512_bitalg: 1; + _Py_CPUID_DECL_FEAT(avx512_vbmi2); + _Py_CPUID_DECL_FEAT(avx512_bitalg); - _Py_SIMD_FEAT avx512_vp2intersect: 1; + _Py_CPUID_DECL_FEAT(avx512_vp2intersect); - _Py_SIMD_FEAT os_xsave: 1; // XSAVE is supported + // --- Instructions ------------------------------------------------------- + _Py_CPUID_DECL_FEAT(cmov); + _Py_CPUID_DECL_FEAT(fma); + _Py_CPUID_DECL_FEAT(popcnt); + _Py_CPUID_DECL_FEAT(pclmulqdq); - /* XCR0 register bits */ - _Py_SIMD_XCR0_BIT xcr0_sse: 1; + _Py_CPUID_DECL_FEAT(xsave); // XSAVE/XRSTOR/XSETBV/XGETBV + _Py_CPUID_DECL_FEAT(os_xsave); // XSAVE is enabled by the OS + // --- XCR0 register bits ------------------------------------------------- + _Py_CPUID_DECL_XCR0(xcr0_sse); // On some Intel CPUs, it is possible for the CPU to support AVX2 // instructions even though the underlying OS does not know about // AVX. In particular, only (SSE) XMM registers will be saved and // restored on context-switch, but not (AVX) YMM registers. - _Py_SIMD_XCR0_BIT xcr0_avx: 1; - _Py_SIMD_XCR0_BIT xcr0_avx512_opmask: 1; - _Py_SIMD_XCR0_BIT xcr0_avx512_zmm_hi256: 1; - _Py_SIMD_XCR0_BIT xcr0_avx512_hi16_zmm: 1; - - // We want the structure to be aligned correctly, namely - // its size in bits must be a multiple of 8. - // + _Py_CPUID_DECL_XCR0(xcr0_avx); + _Py_CPUID_DECL_XCR0(xcr0_avx512_opmask); + _Py_CPUID_DECL_XCR0(xcr0_avx512_zmm_hi256); + _Py_CPUID_DECL_XCR0(xcr0_avx512_hi16_zmm); + // Whenever a field is added or removed above, update the - // number of fields (35) and adjust the bitsize of 'done'. - uint8_t done: 5; // set if the structure was filled -} py_simd_features; + // number of fields (40) and adjust the bitsize of 'ready' + // so that the size of this structure is a multiple of 8. + uint8_t ready; // set if the structure is ready for usage +} py_cpuid_features; /* * Explicitly initialize all members to zero to guarantee that * we never have an un-initialized attribute at runtime which * could lead to an illegal instruction error. + * + * This does not mark 'flags' as being ready yet. */ extern void -_Py_disable_simd_features(py_simd_features *flags); +_Py_cpuid_disable_features(py_cpuid_features *flags); /* - * Apply a bitwise-OR on all flags in 'out' using those in 'src', - * unconditionally updating 'out' (i.e. 'out->done' is ignored). + * Check whether the structure is ready and flags are inter-compatible, + * returning 1 on success and 0 otherwise. * - * This also sets 'out->done' to 1 at the end. + * The caller should disable all CPUID detected features if the check + * fails to avoid encountering runtime illegal instruction errors. + */ +extern int +_Py_cpuid_check_features(const py_cpuid_features *flags); + +/* + * Return 1 if all expected flags are set in 'actual', 0 otherwise. * - * Note that the caller is responsible to ensure that the flags set to 1 - * must not lead to illegal instruction errors if the corresponding SIMD - * instruction(s) are used. + * If 'actual' or 'expect' are not ready yet, this also returns 0. */ -extern void -_Py_update_simd_features(py_simd_features *out, const py_simd_features *src); +extern int +_Py_cpuid_has_features(const py_cpuid_features *actual, + const py_cpuid_features *expect); + + +/* + * Return 1 if 'actual' and 'expect' are identical, 0 otherwise. + * + * If 'actual' or 'expect' are not ready yet, this also returns 0. + */ +extern int +_Py_cpuid_match_features(const py_cpuid_features *actual, + const py_cpuid_features *expect); -/* Detect the available SIMD features on this machine. */ +/* Detect the available features on this machine. */ extern void -_Py_detect_simd_features(py_simd_features *flags); +_Py_cpuid_detect_features(py_cpuid_features *flags); #ifdef __cplusplus } diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 2c309149fc8102..dddacc3d0286ef 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -9,23 +9,33 @@ #include // UINT32_C() -/* Macro to mark a CPUID register function parameter as being used. */ -#define CPUID_REG(PARAM) PARAM -/* Macro to check one or more CPUID register bits. */ +/* CPUID input and output registers are 32-bit unsigned integers */ +#define CPUID_REG uint32_t +/* Check one or more CPUID register bits. */ #define CPUID_CHECK_REG(REG, MASK) ((((REG) & (MASK)) == (MASK)) ? 0 : 1) -// For simplicity, we only enable SIMD instructions for Intel CPUs, -// even though we could support ARM NEON and POWER. +// For now, we only try to enable SIMD instructions for x86-64 Intel CPUs. +// In the future, we should carefully enable support for ARM NEON and POWER +// as well as AMD. #if defined(__x86_64__) && defined(__GNUC__) -# include // __cpuid_count() +# include // __cpuid_count() +# define HAS_CPUID_SUPPORT +# define HAS_XGETBV_SUPPORT #elif defined(_M_X64) -# include // _xgetbv() -# include // __cpuidex() +# include // _xgetbv() +# define HAS_XGETBV_SUPPORT +# include // __cpuidex() +# define HAS_CPUID_SUPPORT #else -# undef CPUID_REG -# define CPUID_REG(PARAM) Py_UNUSED(PARAM) +# undef HAS_CPUID_SUPPORT +# undef HAS_XGETBV_SUPPORT #endif +// Below, we declare macros for guarding the detection of SSE, AVX/AVX2 +// and AVX-512 instructions. If the compiler does not even recognize the +// corresponding flags or if we are not on an 64-bit platform we do not +// even try to inspect the output of CPUID for those specific features. +#ifdef HAS_CPUID_SUPPORT #if defined(CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \ || defined(CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \ || defined(CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \ @@ -33,7 +43,6 @@ || defined(CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS) \ || defined(CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS) \ // macros above should be sorted in alphabetical order -/* Used to guard any SSE instructions detection code. */ # define SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD #endif @@ -44,13 +53,11 @@ || defined(CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS) \ || defined(CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS) \ // macros above should be sorted in alphabetical order -/* Used to guard any AVX instructions detection code. */ # define SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD #endif #if defined(CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS) \ // macros above should be sorted in alphabetical order -/* Used to guard any AVX-2 instructions detection code. */ # define SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD #endif @@ -71,44 +78,46 @@ || defined(CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS) \ || defined(CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS) \ // macros above should be sorted in alphabetical order -/* Used to guard any AVX-512 instructions detection code. */ # define SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD #endif +#endif // HAS_CPUID_SUPPORT // On macOS, checking the XCR0 register is NOT a guaranteed way // to ensure the usability of AVX-512. As such, we disable the // entire set of AVX-512 instructions. // // See https://stackoverflow.com/a/72523150/9579194. -// -// Additionally, AVX2 cannot be compiled on macOS ARM64 (yet it can be -// compiled on x86_64). However, since autoconf incorrectly assumes so -// when compiling a universal2 binary, we disable AVX on such builds. #if defined(__APPLE__) # undef SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD -# if defined(__arm64__) + // Additionally, AVX2 cannot be compiled on macOS ARM64 (yet it can be + // compiled on x86_64). However, since autoconf incorrectly assumes so + // when compiling a universal2 binary, we disable SIMD on such builds. +# if defined(__aarch64__) || defined(__arm64__) # undef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD # undef SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD # endif #endif +// Below, we declare macros indicating how CPUID can be called at runtime, +// so that we only call CPUID with specific inputs when needed. + #if defined(SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD) \ || defined(SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD) /* Indicate that cpuid should be called once with EAX=1 and ECX=0. */ -# define SHOULD_DETECT_SIMD_FEATURES_L1 +# define SHOULD_PARSE_CPUID_L1 #endif #if defined(SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD) \ || defined(SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD) /* Indicate that cpuid should be called once with EAX=7 and ECX=0. */ -# define SHOULD_DETECT_SIMD_FEATURES_L7 -# define SHOULD_DETECT_SIMD_FEATURES_L7S0 +# define SHOULD_PARSE_CPUID_L7 +# define SHOULD_PARSE_CPUID_L7S0 #endif #if defined(SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD) /* Indicate that cpuid should be called once with EAX=7 and ECX=1. */ -# define SHOULD_DETECT_SIMD_FEATURES_L7 -# define SHOULD_DETECT_SIMD_FEATURES_L7S1 +# define SHOULD_PARSE_CPUID_L7 +# define SHOULD_PARSE_CPUID_L7S1 #endif /* @@ -129,84 +138,89 @@ * Note 2: The SUBLEAF is also referred to as the 'count'. */ -/* CPUID (LEAF=1, SUBLEAF=0) */ -#define ECX_L1_SSE3 (UINT32_C(1) << 0) -#define ECX_L1_SSSE3 (UINT32_C(1) << 9) -#define ECX_L1_SSE4_1 (UINT32_C(1) << 19) -#define ECX_L1_SSE4_2 (UINT32_C(1) << 20) -#define ECX_L1_OSXSAVE (UINT32_C(1) << 27) -#define ECX_L1_AVX (UINT32_C(1) << 28) - -#define EDX_L1_SSE (UINT32_C(1) << 25) -#define EDX_L1_SSE2 (UINT32_C(1) << 26) - -/* CPUID (LEAF=7, SUBLEAF=0) */ -#define EBX_L7_AVX2 (UINT32_C(1) << 5) -#define EBX_L7_AVX512_F (UINT32_C(1) << 16) -#define EBX_L7_AVX512_DQ (UINT32_C(1) << 17) -#define EBX_L7_AVX512_IFMA (UINT32_C(1) << 21) -#define EBX_L7_AVX512_PF (UINT32_C(1) << 26) -#define EBX_L7_AVX512_ER (UINT32_C(1) << 27) -#define EBX_L7_AVX512_CD (UINT32_C(1) << 28) -#define EBX_L7_AVX512_BW (UINT32_C(1) << 30) -#define EBX_L7_AVX512_VL (UINT32_C(1) << 31) - -#define ECX_L7_AVX512_VBMI (UINT32_C(1) << 1) -#define ECX_L7_AVX512_VBMI2 (UINT32_C(1) << 6) -#define ECX_L7_AVX512_VNNI (UINT32_C(1) << 11) -#define ECX_L7_AVX512_BITALG (UINT32_C(1) << 12) -#define ECX_L7_AVX512_VPOPCNTDQ (UINT32_C(1) << 14) - -#define EDX_L7_AVX512_4VNNIW (UINT32_C(1) << 2) -#define EDX_L7_AVX512_4FMAPS (UINT32_C(1) << 3) -#define EDX_L7_AVX512_VP2INTERSECT (UINT32_C(1) << 8) - -/* CPUID (LEAF=7, SUBLEAF=1) */ -#define EAX_L7S1_AVX_VNNI (UINT32_C(1) << 4) -#define EAX_L7S1_AVX_IFMA (UINT32_C(1) << 23) - -#define EDX_L7S1_AVX_VNNI_INT8 (UINT32_C(1) << 4) -#define EDX_L7S1_AVX_NE_CONVERT (UINT32_C(1) << 5) -#define EDX_L7S1_AVX_VNNI_INT16 (UINT32_C(1) << 10) +/* CPUID (LEAF=1, SUBLEAF=0) [ECX] */ +#define ECX_L1_SSE3 (UINT32_C(1) << 0) // 0x00000001 +#define ECX_L1_PCLMULQDQ (UINT32_C(1) << 1) // 0x00000002 +#define ECX_L1_SSSE3 (UINT32_C(1) << 9) // 0x00000200 +#define ECX_L1_FMA (UINT32_C(1) << 12) // 0x00001000 +#define ECX_L1_SSE4_1 (UINT32_C(1) << 19) // 0x00080000 +#define ECX_L1_SSE4_2 (UINT32_C(1) << 20) // 0x00100000 +#define ECX_L1_POPCNT (UINT32_C(1) << 23) // 0x00800000 +#define ECX_L1_XSAVE (UINT32_C(1) << 26) // 0x04000000 +#define ECX_L1_OSXSAVE (UINT32_C(1) << 27) // 0x08000000 +#define ECX_L1_AVX (UINT32_C(1) << 28) // 0x10000000 +/* CPUID (LEAF=1, SUBLEAF=0) [EDX] */ +#define EDX_L1_CMOV (UINT32_C(1) << 15) // 0x00008000 +#define EDX_L1_SSE (UINT32_C(1) << 25) // 0x02000000 +#define EDX_L1_SSE2 (UINT32_C(1) << 26) // 0x04000000 +/* CPUID (LEAF=7, SUBLEAF=0) [EBX] */ +#define EBX_L7_AVX2 (UINT32_C(1) << 5) // 0x00000020 +#define EBX_L7_AVX512_F (UINT32_C(1) << 16) // 0x00010000 +#define EBX_L7_AVX512_DQ (UINT32_C(1) << 17) // 0x00020000 +#define EBX_L7_AVX512_IFMA (UINT32_C(1) << 21) // 0x00200000 +#define EBX_L7_AVX512_PF (UINT32_C(1) << 26) // 0x04000000 +#define EBX_L7_AVX512_ER (UINT32_C(1) << 27) // 0x08000000 +#define EBX_L7_AVX512_CD (UINT32_C(1) << 28) // 0x10000000 +#define EBX_L7_AVX512_BW (UINT32_C(1) << 30) // 0x40000000 +#define EBX_L7_AVX512_VL (UINT32_C(1) << 31) // 0x80000000 +/* CPUID (LEAF=7, SUBLEAF=0) [ECX] */ +#define ECX_L7_AVX512_VBMI (UINT32_C(1) << 1) // 0x00000002 +#define ECX_L7_AVX512_VBMI2 (UINT32_C(1) << 6) // 0x00000040 +#define ECX_L7_AVX512_VNNI (UINT32_C(1) << 11) // 0x00000800 +#define ECX_L7_AVX512_BITALG (UINT32_C(1) << 12) // 0x00001000 +#define ECX_L7_AVX512_VPOPCNTDQ (UINT32_C(1) << 14) // 0x00004000 +/* CPUID (LEAF=7, SUBLEAF=0) [EDX] */ +#define EDX_L7_AVX512_4VNNIW (UINT32_C(1) << 2) // 0x00000004 +#define EDX_L7_AVX512_4FMAPS (UINT32_C(1) << 3) // 0x00000008 +#define EDX_L7_AVX512_VP2INTERSECT (UINT32_C(1) << 8) // 0x00000100 +/* CPUID (LEAF=7, SUBLEAF=1) [EAX] */ +#define EAX_L7S1_AVX_VNNI (UINT32_C(1) << 4) // 0x00000010 +#define EAX_L7S1_AVX_IFMA (UINT32_C(1) << 23) // 0x00800000 +/* CPUID (LEAF=7, SUBLEAF=1) [EDX] */ +#define EDX_L7S1_AVX_VNNI_INT8 (UINT32_C(1) << 4) // 0x00000010 +#define EDX_L7S1_AVX_NE_CONVERT (UINT32_C(1) << 5) // 0x00000020 +#define EDX_L7S1_AVX_VNNI_INT16 (UINT32_C(1) << 10) // 0x00000400 +/* + * Call __cpuid_count() or equivalent and get + * its EAX, EBX, ECX and EDX output registers. + * + * If CPUID is not supported, registers are set to 0. + */ static inline void get_cpuid_info(uint32_t level /* input eax */, uint32_t count /* input ecx */, - uint32_t *CPUID_REG(eax), - uint32_t *CPUID_REG(ebx), - uint32_t *CPUID_REG(ecx), - uint32_t *CPUID_REG(edx)) + CPUID_REG *eax, CPUID_REG *ebx, CPUID_REG *ecx, CPUID_REG *edx) { -#if defined(__x86_64__) && defined(__GNUC__) + *eax = *ebx = *ecx = *edx = 0; // ensure the output to be initialized +#if defined(HAS_CPUID_SUPPORT) && defined(__x86_64__) && defined(__GNUC__) __cpuid_count(level, count, *eax, *ebx, *ecx, *edx); -#elif defined(_M_X64) +#elif defined(HAS_CPUID_SUPPORT) && defined(_M_X64) uint32_t info[4] = {0}; __cpuidex(info, level, count); - *eax = info[0]; - *ebx = info[1]; - *ecx = info[2]; - *edx = info[3]; + *eax = info[0], *ebx = info[1], *ecx = info[2], *edx = info[3]; #endif } -/* XSAVE State Components. */ -#define XCR0_SSE (UINT32_C(1) << 1) -#define XCR0_AVX (UINT32_C(1) << 2) -#define XCR0_AVX512_OPMASK (UINT32_C(1) << 5) -#define XCR0_AVX512_ZMM_HI256 (UINT32_C(1) << 6) -#define XCR0_AVX512_HI16_ZMM (UINT32_C(1) << 7) +/* XSAVE state components (XCR0 control register) */ +#define XCR0_SSE (UINT32_C(1) << 1) // 0x00000002 +#define XCR0_AVX (UINT32_C(1) << 2) // 0x00000004 +#define XCR0_AVX512_OPMASK (UINT32_C(1) << 5) // 0x00000020 +#define XCR0_AVX512_ZMM_HI256 (UINT32_C(1) << 6) // 0x00000040 +#define XCR0_AVX512_HI16_ZMM (UINT32_C(1) << 7) // 0x00000080 static inline uint64_t get_xgetbv(uint32_t index) { -#if defined(__x86_64__) && defined(__GNUC__) + assert(index == 0); // only XCR0 is supported for now +#if defined(HAS_CPUID_SUPPORT) && defined(__x86_64__) && defined(__GNUC__) uint32_t eax = 0, edx = 0; __asm__ __volatile__("xgetbv" : "=a" (eax), "=d" (edx) : "c" (index)); return ((uint64_t)edx << 32) | eax; -#elif defined(_M_X64) +#elif defined(HAS_CPUID_SUPPORT) && defined(_M_X64) return (uint64_t)_xgetbv(index); #else - (void) index; + (void)index; return 0; #endif } @@ -215,16 +229,14 @@ get_xgetbv(uint32_t index) static inline uint32_t detect_cpuid_maxleaf(void) { - uint32_t maxlevel = 0, ebx = 0, ecx = 0, edx = 0; + CPUID_REG maxlevel = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(0, 0, &maxlevel, &ebx, &ecx, &edx); return maxlevel; } /* Processor Info and Feature Bits (LEAF=1, SUBLEAF=0). */ static inline void -detect_simd_features(py_simd_features *flags, - uint32_t eax, uint32_t ebx, - uint32_t ecx, uint32_t edx) +detect_cpuid_features(py_cpuid_features *flags, CPUID_REG ecx, CPUID_REG edx) { // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD @@ -254,21 +266,29 @@ detect_simd_features(py_simd_features *flags, #endif #endif // SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD +#ifdef HAS_CPUID_SUPPORT + flags->cmov = CPUID_CHECK_REG(edx, EDX_L1_CMOV); + flags->fma = CPUID_CHECK_REG(ecx, ECX_L1_FMA); + flags->popcnt = CPUID_CHECK_REG(ecx, ECX_L1_POPCNT); + flags->pclmulqdq = CPUID_CHECK_REG(ecx, ECX_L1_PCLMULQDQ); + + flags->xsave = CPUID_CHECK_REG(ecx, ECX_L1_XSAVE); flags->os_xsave = CPUID_CHECK_REG(ecx, ECX_L1_OSXSAVE); +#endif } /* Extended Feature Bits (LEAF=7, SUBLEAF=0). */ static inline void -detect_simd_extended_features_ecx_0(py_simd_features *flags, - uint8_t eax, uint8_t ebx, - uint8_t ecx, uint8_t edx) +detect_cpuid_extended_features_L7S0(py_cpuid_features *flags, + CPUID_REG ebx, CPUID_REG ecx, CPUID_REG edx) { + (void)ebx, (void)ecx, (void)edx; // to suppress unused warnings // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD #ifdef CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS flags->avx2 = CPUID_CHECK_REG(ebx, EBX_L7_AVX2); #endif -#endif +#endif // SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD #ifdef SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD #ifdef CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS @@ -309,7 +329,6 @@ detect_simd_extended_features_ecx_0(py_simd_features *flags, #ifdef CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS flags->avx512_ifma = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_IFMA); #endif - #ifdef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS flags->avx512_vbmi = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VBMI); #endif @@ -333,10 +352,13 @@ detect_simd_extended_features_ecx_0(py_simd_features *flags, /* Extended Feature Bits (LEAF=7, SUBLEAF=1). */ static inline void -detect_simd_extended_features_ecx_1(py_simd_features *flags, - uint8_t eax, uint8_t ebx, - uint8_t ecx, uint8_t edx) +detect_cpuid_extended_features_L7S1(py_cpuid_features *flags, + CPUID_REG eax, + CPUID_REG ebx, + CPUID_REG ecx, + CPUID_REG edx) { + (void)eax, (void)ebx, (void)ecx, (void)edx; // to suppress unused warnings // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD #ifdef CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS @@ -360,51 +382,51 @@ detect_simd_extended_features_ecx_1(py_simd_features *flags, } static inline void -detect_simd_xsave_state(py_simd_features *flags) +detect_cpuid_xsave_state(py_cpuid_features *flags) { + // Keep the ordering and newlines as they are declared in the structure. +#ifdef HAS_XGETBV_SUPPORT uint64_t xcr0 = flags->os_xsave ? get_xgetbv(0) : 0; flags->xcr0_sse = CPUID_CHECK_REG(xcr0, XCR0_SSE); - flags->xcr0_avx = CPUID_CHECK_REG(xcr0, XCR0_AVX); - flags->xcr0_avx512_opmask = CPUID_CHECK_REG(xcr0, XCR0_AVX512_OPMASK); flags->xcr0_avx512_zmm_hi256 = CPUID_CHECK_REG(xcr0, XCR0_AVX512_ZMM_HI256); flags->xcr0_avx512_hi16_zmm = CPUID_CHECK_REG(xcr0, XCR0_AVX512_HI16_ZMM); +#endif } static inline void -finalize_simd_features(py_simd_features *flags) +cpuid_features_finalize(py_cpuid_features *flags) { - assert(flags->done == 0); + assert(flags->ready == 0); + // Here, any flag that may depend on others should be correctly set // at runtime to avoid illegal instruction errors. - flags->done = 1; + + flags->ready = 1; } -/* - * Return 0 if flags are compatible and correctly set and -1 otherwise. - * - * If this function returns -1, 'flags' should disable all SIMD features - * to avoid encountering a possible illegal instruction error at runtime. - */ static inline int -validate_simd_features(const py_simd_features *flags) +cpuid_features_validate(const py_cpuid_features *flags) { - if (flags->done != 1) { + if (flags->ready != 1) { return -1; } // AVX-512/F is required to support any other AVX-512 instruction set uint8_t avx512_require_f = ( - flags->avx512_cd || flags->avx512_er || flags->avx512_pf || - flags->avx512_vl || flags->avx512_dq || flags->avx512_bw || - flags->avx512_ifma || - flags->avx512_vbmi || + // newlines are placed according to processor generations + flags->avx512_cd || + flags->avx512_er || flags->avx512_pf || flags->avx512_4fmaps || flags->avx512_4vnniw || flags->avx512_vpopcntdq || - flags->avx512_vnni || flags->avx512_vbmi2 || flags->avx512_bitalg || + flags->avx512_vl || flags->avx512_dq || flags->avx512_bw || + flags->avx512_ifma || flags->avx512_vbmi || + flags->avx512_vnni || + flags->avx512_vbmi2 || flags->avx512_bitalg || flags->avx512_vp2intersect ); + if (!flags->avx512_f && !avx512_require_f) { return -1; } @@ -412,165 +434,158 @@ validate_simd_features(const py_simd_features *flags) return 0; } -void -_Py_disable_simd_features(py_simd_features *flags) +int +_Py_cpuid_check_features(const py_cpuid_features *flags) { - // Keep the ordering and newlines as they are declared in the structure. -#define ZERO(FLAG) flags->FLAG = 0 - ZERO(sse); - ZERO(sse2); - ZERO(sse3); - ZERO(ssse3); - ZERO(sse41); - ZERO(sse42); - - ZERO(avx); - ZERO(avx_ifma); - ZERO(avx_ne_convert); - - ZERO(avx_vnni); - ZERO(avx_vnni_int8); - ZERO(avx_vnni_int16); - - ZERO(avx2); - - ZERO(avx512_f); - ZERO(avx512_cd); - - ZERO(avx512_er); - ZERO(avx512_pf); - - ZERO(avx512_4fmaps); - ZERO(avx512_4vnniw); - - ZERO(avx512_vpopcntdq); - - ZERO(avx512_vl); - ZERO(avx512_dq); - ZERO(avx512_bw); - - ZERO(avx512_ifma); - - ZERO(avx512_vbmi); - - ZERO(avx512_vnni); - - ZERO(avx512_vbmi2); - ZERO(avx512_bitalg); - - ZERO(avx512_vp2intersect); - - ZERO(os_xsave); - - ZERO(xcr0_sse); - ZERO(xcr0_avx); - ZERO(xcr0_avx512_opmask); - ZERO(xcr0_avx512_zmm_hi256); - ZERO(xcr0_avx512_hi16_zmm); -#undef ZERO + return cpuid_features_validate(flags) < 0 ? 0 : 1; } +/* + * Apply a 1-parameter macro MACRO(FLAG) on all members + * of a 'py_cpuid_features' object ('ready' is omitted). + */ +#define CPUID_APPLY_MACRO(MACRO) \ + do { \ + MACRO(sse); \ + MACRO(sse2); \ + MACRO(sse3); \ + MACRO(ssse3); \ + MACRO(sse41); \ + MACRO(sse42); \ + \ + MACRO(avx); \ + MACRO(avx_ifma); \ + MACRO(avx_ne_convert); \ + \ + MACRO(avx_vnni); \ + MACRO(avx_vnni_int8); \ + MACRO(avx_vnni_int16); \ + \ + MACRO(avx2); \ + \ + MACRO(avx512_f); \ + MACRO(avx512_cd); \ + \ + MACRO(avx512_er); \ + MACRO(avx512_pf); \ + \ + MACRO(avx512_4fmaps); \ + MACRO(avx512_4vnniw); \ + \ + MACRO(avx512_vpopcntdq); \ + \ + MACRO(avx512_vl); \ + MACRO(avx512_dq); \ + MACRO(avx512_bw); \ + \ + MACRO(avx512_ifma); \ + MACRO(avx512_vbmi); \ + \ + MACRO(avx512_vnni); \ + \ + MACRO(avx512_vbmi2); \ + MACRO(avx512_bitalg); \ + \ + MACRO(avx512_vp2intersect); \ + \ + MACRO(cmov); \ + MACRO(fma); \ + MACRO(popcnt); \ + MACRO(pclmulqdq); \ + \ + MACRO(xsave); \ + MACRO(os_xsave); \ + \ + MACRO(xcr0_sse); \ + MACRO(xcr0_avx); \ + MACRO(xcr0_avx512_opmask); \ + MACRO(xcr0_avx512_zmm_hi256); \ + MACRO(xcr0_avx512_hi16_zmm); \ + } while (0) + void -_Py_update_simd_features(py_simd_features *out, - const py_simd_features *src) +_Py_cpuid_disable_features(py_cpuid_features *flags) { - // Keep the ordering and newlines as they are declared in the structure. -#define UPDATE(FLAG) out->FLAG |= src->FLAG - UPDATE(sse); - UPDATE(sse2); - UPDATE(sse3); - UPDATE(ssse3); - UPDATE(sse41); - UPDATE(sse42); - - UPDATE(avx); - UPDATE(avx_ifma); - UPDATE(avx_ne_convert); - - UPDATE(avx_vnni); - UPDATE(avx_vnni_int8); - UPDATE(avx_vnni_int16); - - UPDATE(avx2); - - UPDATE(avx512_f); - UPDATE(avx512_cd); - - UPDATE(avx512_er); - UPDATE(avx512_pf); - - UPDATE(avx512_4fmaps); - UPDATE(avx512_4vnniw); - - UPDATE(avx512_vpopcntdq); - - UPDATE(avx512_vl); - UPDATE(avx512_dq); - UPDATE(avx512_bw); - - UPDATE(avx512_ifma); - - UPDATE(avx512_vbmi); - - UPDATE(avx512_vnni); - - UPDATE(avx512_vbmi2); - UPDATE(avx512_bitalg); - - UPDATE(avx512_vp2intersect); +#define CPUID_DISABLE(FLAG) flags->FLAG = 0 + CPUID_APPLY_MACRO(CPUID_DISABLE); +#undef CPUID_DISABLE +} - UPDATE(os_xsave); +int +_Py_cpuid_has_features(const py_cpuid_features *actual, + const py_cpuid_features *expect) +{ +#define CPUID_CHECK_FEATURE(FLAG) \ + do { \ + if (expect->FLAG && !actual->FLAG) { \ + return 0; \ + } \ + } while (0) + CPUID_APPLY_MACRO(CPUID_CHECK_FEATURE); +#undef CPUID_CHECK_FEATURE + return 1; +} - UPDATE(xcr0_sse); - UPDATE(xcr0_avx); - UPDATE(xcr0_avx512_opmask); - UPDATE(xcr0_avx512_zmm_hi256); - UPDATE(xcr0_avx512_hi16_zmm); -#undef UPDATE - out->done = 1; +int +_Py_cpuid_match_features(const py_cpuid_features *actual, + const py_cpuid_features *expect) +{ +#define CPUID_MATCH_FEATURE(FLAG) \ + do { \ + if (expect->FLAG != actual->FLAG) { \ + return 0; \ + } \ + } while (0) + CPUID_APPLY_MACRO(CPUID_MATCH_FEATURE); +#undef CPUID_MATCH_FEATURE + return 1; } +#undef CPUID_APPLY_MACRO + void -_Py_detect_simd_features(py_simd_features *flags) +_Py_cpuid_detect_features(py_cpuid_features *flags) { - if (flags->done) { + if (flags->ready) { return; } - _Py_disable_simd_features(flags); + _Py_cpuid_disable_features(flags); +#ifdef HAS_CPUID_SUPPORT uint32_t maxleaf = detect_cpuid_maxleaf(); - uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; -#ifdef SHOULD_DETECT_SIMD_FEATURES_L1 + (void)maxleaf; // to suppress unused warnings + CPUID_REG eax = 0, ebx = 0, ecx = 0, edx = 0; + (void)eax, (void)ebx, (void)ecx, (void)edx; // to suppress unused warnings + +#ifdef SHOULD_PARSE_CPUID_L1 if (maxleaf >= 1) { eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx); - detect_simd_features(flags, eax, ebx, ecx, edx); + detect_cpuid_features(flags, ecx, edx); if (flags->os_xsave) { - detect_simd_xsave_state(flags); + detect_cpuid_xsave_state(flags); } } -#else - (void) maxleaf; - (void) eax; (void) ebx; (void) ecx; (void) edx; -#endif // SHOULD_DETECT_SIMD_FEATURES_L1 -#ifdef SHOULD_DETECT_SIMD_FEATURES_L7 +#endif // SHOULD_PARSE_CPUID_L1 + +#ifdef SHOULD_PARSE_CPUID_L7 if (maxleaf >= 7) { -#ifdef SHOULD_DETECT_SIMD_FEATURES_L7S0 +#ifdef SHOULD_PARSE_CPUID_L7S0 eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(7, 0, &eax, &ebx, &ecx, &edx); - detect_simd_extended_features_ecx_0(flags, eax, ebx, ecx, edx); + detect_cpuid_extended_features_L7S0(flags, ebx, ecx, edx); #endif -#ifdef SHOULD_DETECT_SIMD_FEATURES_L7S1 +#ifdef SHOULD_PARSE_CPUID_L7S1 eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(7, 1, &eax, &ebx, &ecx, &edx); - detect_simd_extended_features_ecx_1(flags, eax, ebx, ecx, edx); + detect_cpuid_extended_features_L7S1(flags, eax, ebx, ecx, edx); #endif } -#else - (void) maxleaf; - (void) eax; (void) ebx; (void) ecx; (void) edx; -#endif // SHOULD_DETECT_SIMD_FEATURES_L7 - finalize_simd_features(flags); - if (validate_simd_features(flags) < 0) { - _Py_disable_simd_features(flags); +#endif // SHOULD_PARSE_CPUID_L7 + cpuid_features_finalize(flags); + if (cpuid_features_validate(flags) < 0) { + _Py_cpuid_disable_features(flags); } +#else + flags->ready = 1; +#endif // HAS_CPUID_SUPPORT } diff --git a/configure.ac b/configure.ac index 74a8e785c229bf..84a39e0d402804 100644 --- a/configure.ac +++ b/configure.ac @@ -7846,7 +7846,6 @@ if test "$ac_sys_system" != "Linux-android" || test "$ANDROID_API_LEVEL" -ge 28; PY_SIMD_DETECT([AVX512_BW], [-mavx512bw]) # PY_SIMD_DETECT([AVX512_IFMA], [-mavx512ifma]) - # PY_SIMD_DETECT([AVX512_VBMI], [-mavx512vbmi]) # PY_SIMD_DETECT([AVX512_VNNI], [-mavx512vnni]) From 97a0fc542cbedd813f84c6f65bcddad14be0a8b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 11 Oct 2024 12:10:25 +0200 Subject: [PATCH 20/78] update configure --- configure | 1 - 1 file changed, 1 deletion(-) diff --git a/configure b/configure index 2bcec7f82ce042..08940431c680f1 100755 --- a/configure +++ b/configure @@ -31788,7 +31788,6 @@ fi - # From 5f2884d38e43e04a7c044df6bd185c73d6d5af98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 13 Oct 2024 10:29:57 +0200 Subject: [PATCH 21/78] update comments --- Include/internal/pycore_cpuinfo.h | 90 +++++++++++++++---------------- Python/cpuinfo.c | 6 +++ 2 files changed, 49 insertions(+), 47 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index fe934fa13a70b1..779601f947111f 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -11,35 +11,31 @@ extern "C" { #include // uint8_t -/* Declare a member of 'py_cpuid_features' storing a CPUID bit. */ -#define _Py_CPUID_DECL_FEAT(X) uint8_t X:1 -/* Declare a member of 'py_cpuid_features' storing a XCR0 bit. */ -#define _Py_CPUID_DECL_XCR0(X) uint8_t X:1 - typedef struct py_cpuid_features { + /* Macro to declare a member flag of 'py_cpuid_features' as a uint8_t. */ +#define _Py_CPUID_DECL_FLAG(MEMBER_NAME) uint8_t MEMBER_NAME:1 // --- Streaming SIMD Extensions ------------------------------------------ - _Py_CPUID_DECL_FEAT(sse); - _Py_CPUID_DECL_FEAT(sse2); - _Py_CPUID_DECL_FEAT(sse3); - _Py_CPUID_DECL_FEAT(ssse3); // Supplemental SSE3 instructions - _Py_CPUID_DECL_FEAT(sse41); // SSE4.1 - _Py_CPUID_DECL_FEAT(sse42); // SSE4.2 + _Py_CPUID_DECL_FLAG(sse); + _Py_CPUID_DECL_FLAG(sse2); + _Py_CPUID_DECL_FLAG(sse3); + _Py_CPUID_DECL_FLAG(ssse3); // Supplemental SSE3 instructions + _Py_CPUID_DECL_FLAG(sse41); // SSE4.1 + _Py_CPUID_DECL_FLAG(sse42); // SSE4.2 // --- Advanced Vector Extensions ----------------------------------------- - _Py_CPUID_DECL_FEAT(avx); - _Py_CPUID_DECL_FEAT(avx_ifma); - _Py_CPUID_DECL_FEAT(avx_ne_convert); + _Py_CPUID_DECL_FLAG(avx); + _Py_CPUID_DECL_FLAG(avx_ifma); + _Py_CPUID_DECL_FLAG(avx_ne_convert); - _Py_CPUID_DECL_FEAT(avx_vnni); - _Py_CPUID_DECL_FEAT(avx_vnni_int8); - _Py_CPUID_DECL_FEAT(avx_vnni_int16); + _Py_CPUID_DECL_FLAG(avx_vnni); + _Py_CPUID_DECL_FLAG(avx_vnni_int8); + _Py_CPUID_DECL_FLAG(avx_vnni_int16); // --- Advanced Vector Extensions 2 --------------------------------------- - _Py_CPUID_DECL_FEAT(avx2); + _Py_CPUID_DECL_FLAG(avx2); // --- Advanced Vector Extensions (512-bit) ------------------------------- /* - * * AVX-512 instruction set are grouped by the processor generation * that implements them (see https://en.wikipedia.org/wiki/AVX-512). * @@ -50,51 +46,51 @@ typedef struct py_cpuid_features { * See https://en.wikipedia.org/wiki/AVX-512#Instruction_set for * the meaning of each suffix (e.g., 'f' stands for 'Foundation'). */ - _Py_CPUID_DECL_FEAT(avx512_f); - _Py_CPUID_DECL_FEAT(avx512_cd); + _Py_CPUID_DECL_FLAG(avx512_f); + _Py_CPUID_DECL_FLAG(avx512_cd); - _Py_CPUID_DECL_FEAT(avx512_er); - _Py_CPUID_DECL_FEAT(avx512_pf); + _Py_CPUID_DECL_FLAG(avx512_er); + _Py_CPUID_DECL_FLAG(avx512_pf); - _Py_CPUID_DECL_FEAT(avx512_4fmaps); - _Py_CPUID_DECL_FEAT(avx512_4vnniw); + _Py_CPUID_DECL_FLAG(avx512_4fmaps); + _Py_CPUID_DECL_FLAG(avx512_4vnniw); - _Py_CPUID_DECL_FEAT(avx512_vpopcntdq); + _Py_CPUID_DECL_FLAG(avx512_vpopcntdq); - _Py_CPUID_DECL_FEAT(avx512_vl); - _Py_CPUID_DECL_FEAT(avx512_dq); - _Py_CPUID_DECL_FEAT(avx512_bw); + _Py_CPUID_DECL_FLAG(avx512_vl); + _Py_CPUID_DECL_FLAG(avx512_dq); + _Py_CPUID_DECL_FLAG(avx512_bw); - _Py_CPUID_DECL_FEAT(avx512_ifma); - _Py_CPUID_DECL_FEAT(avx512_vbmi); + _Py_CPUID_DECL_FLAG(avx512_ifma); + _Py_CPUID_DECL_FLAG(avx512_vbmi); - _Py_CPUID_DECL_FEAT(avx512_vnni); + _Py_CPUID_DECL_FLAG(avx512_vnni); - _Py_CPUID_DECL_FEAT(avx512_vbmi2); - _Py_CPUID_DECL_FEAT(avx512_bitalg); + _Py_CPUID_DECL_FLAG(avx512_vbmi2); + _Py_CPUID_DECL_FLAG(avx512_bitalg); - _Py_CPUID_DECL_FEAT(avx512_vp2intersect); + _Py_CPUID_DECL_FLAG(avx512_vp2intersect); // --- Instructions ------------------------------------------------------- - _Py_CPUID_DECL_FEAT(cmov); - _Py_CPUID_DECL_FEAT(fma); - _Py_CPUID_DECL_FEAT(popcnt); - _Py_CPUID_DECL_FEAT(pclmulqdq); + _Py_CPUID_DECL_FLAG(cmov); + _Py_CPUID_DECL_FLAG(fma); + _Py_CPUID_DECL_FLAG(popcnt); + _Py_CPUID_DECL_FLAG(pclmulqdq); - _Py_CPUID_DECL_FEAT(xsave); // XSAVE/XRSTOR/XSETBV/XGETBV - _Py_CPUID_DECL_FEAT(os_xsave); // XSAVE is enabled by the OS + _Py_CPUID_DECL_FLAG(xsave); // XSAVE/XRSTOR/XSETBV/XGETBV + _Py_CPUID_DECL_FLAG(os_xsave); // XSAVE is enabled by the OS // --- XCR0 register bits ------------------------------------------------- - _Py_CPUID_DECL_XCR0(xcr0_sse); + _Py_CPUID_DECL_FLAG(xcr0_sse); // On some Intel CPUs, it is possible for the CPU to support AVX2 // instructions even though the underlying OS does not know about // AVX. In particular, only (SSE) XMM registers will be saved and // restored on context-switch, but not (AVX) YMM registers. - _Py_CPUID_DECL_XCR0(xcr0_avx); - _Py_CPUID_DECL_XCR0(xcr0_avx512_opmask); - _Py_CPUID_DECL_XCR0(xcr0_avx512_zmm_hi256); - _Py_CPUID_DECL_XCR0(xcr0_avx512_hi16_zmm); - + _Py_CPUID_DECL_FLAG(xcr0_avx); + _Py_CPUID_DECL_FLAG(xcr0_avx512_opmask); + _Py_CPUID_DECL_FLAG(xcr0_avx512_zmm_hi256); + _Py_CPUID_DECL_FLAG(xcr0_avx512_hi16_zmm); +#undef _Py_CPUID_DECL_FLAG // Whenever a field is added or removed above, update the // number of fields (40) and adjust the bitsize of 'ready' // so that the size of this structure is a multiple of 8. diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index dddacc3d0286ef..edfc4e8b5be7ae 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -515,6 +515,9 @@ int _Py_cpuid_has_features(const py_cpuid_features *actual, const py_cpuid_features *expect) { + if (!actual->ready || !expect->ready) { + return 0; + } #define CPUID_CHECK_FEATURE(FLAG) \ do { \ if (expect->FLAG && !actual->FLAG) { \ @@ -530,6 +533,9 @@ int _Py_cpuid_match_features(const py_cpuid_features *actual, const py_cpuid_features *expect) { + if (!actual->ready || !expect->ready) { + return 0; + } #define CPUID_MATCH_FEATURE(FLAG) \ do { \ if (expect->FLAG != actual->FLAG) { \ From 7c3b74ede4f8ebf018d1b0e466ad9dda56d2ca4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 13 Oct 2024 12:46:21 +0200 Subject: [PATCH 22/78] update Makefile --- Makefile.pre.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.pre.in b/Makefile.pre.in index f3640921a501b6..019389c4ba9d07 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -432,9 +432,9 @@ PYTHON_OBJS= \ Python/codegen.o \ Python/compile.o \ Python/context.o \ + Python/cpuinfo.o \ Python/critical_section.o \ Python/crossinterp.o \ - Python/cpuinfo.o \ Python/dynamic_annotations.o \ Python/errors.o \ Python/flowgraph.o \ From 130d0991558bd802de5af7b1b56ad2871ce9ce9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Oct 2024 10:49:48 +0200 Subject: [PATCH 23/78] address Erlend's review --- Python/cpuinfo.c | 118 ++++++++++++++++---------------- configure | 58 ++++++++-------- configure.ac | 4 +- pyconfig.h.in | 174 +++++++++++++++++++++++------------------------ 4 files changed, 177 insertions(+), 177 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index edfc4e8b5be7ae..7139c0e632bdee 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -36,47 +36,47 @@ // corresponding flags or if we are not on an 64-bit platform we do not // even try to inspect the output of CPUID for those specific features. #ifdef HAS_CPUID_SUPPORT -#if defined(CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS) \ +#if defined(Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS) \ // macros above should be sorted in alphabetical order # define SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD #endif -#if defined(CAN_COMPILE_SIMD_AVX_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS) \ +#if defined(Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS) \ // macros above should be sorted in alphabetical order # define SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD #endif -#if defined(CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS) \ +#if defined(Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS) \ // macros above should be sorted in alphabetical order # define SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD #endif -#if defined(CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS) \ +#if defined(Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS) \ // macros above should be sorted in alphabetical order # define SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD #endif @@ -240,28 +240,28 @@ detect_cpuid_features(py_cpuid_features *flags, CPUID_REG ecx, CPUID_REG edx) { // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD -#ifdef CAN_COMPILE_SIMD_SSE_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS flags->sse = CPUID_CHECK_REG(edx, EDX_L1_SSE); #endif -#ifdef CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS flags->sse2 = CPUID_CHECK_REG(edx, EDX_L1_SSE2); #endif -#ifdef CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS flags->sse3 = CPUID_CHECK_REG(ecx, ECX_L1_SSE3); #endif -#ifdef CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS flags->ssse3 = CPUID_CHECK_REG(ecx, ECX_L1_SSSE3); #endif -#ifdef CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS flags->sse41 = CPUID_CHECK_REG(ecx, ECX_L1_SSE4_1); #endif -#ifdef CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS flags->sse42 = CPUID_CHECK_REG(ecx, ECX_L1_SSE4_2); #endif #endif // SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD #ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD -#ifdef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS flags->avx = CPUID_CHECK_REG(ecx, ECX_L1_AVX); #endif #endif // SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD @@ -285,66 +285,66 @@ detect_cpuid_extended_features_L7S0(py_cpuid_features *flags, (void)ebx, (void)ecx, (void)edx; // to suppress unused warnings // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD -#ifdef CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS flags->avx2 = CPUID_CHECK_REG(ebx, EBX_L7_AVX2); #endif #endif // SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD #ifdef SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD -#ifdef CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS flags->avx512_f = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_F); #endif -#ifdef CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS flags->avx512_cd = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_CD); #endif -#ifdef CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS flags->avx512_er = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_ER); #endif -#ifdef CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS flags->avx512_pf = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_PF); #endif -#ifdef CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS flags->avx512_4fmaps = CPUID_CHECK_REG(edx, EDX_L7_AVX512_4FMAPS); #endif -#ifdef CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS flags->avx512_4vnniw = CPUID_CHECK_REG(edx, EDX_L7_AVX512_4VNNIW); #endif -#ifdef CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS flags->avx512_vpopcntdq = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VPOPCNTDQ); #endif -#ifdef CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS flags->avx512_vl = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_VL); #endif -#ifdef CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS flags->avx512_dq = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_DQ); #endif -#ifdef CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS flags->avx512_bw = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_BW); #endif -#ifdef CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS flags->avx512_ifma = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_IFMA); #endif -#ifdef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS flags->avx512_vbmi = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VBMI); #endif -#ifdef CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS flags->avx512_vnni = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VNNI); #endif -#ifdef CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS flags->avx512_vbmi2 = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VBMI2); #endif -#ifdef CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS flags->avx512_bitalg = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_BITALG); #endif -#ifdef CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS flags->avx512_vp2intersect = CPUID_CHECK_REG(edx, EDX_L7_AVX512_VP2INTERSECT); #endif #endif // SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD @@ -361,21 +361,21 @@ detect_cpuid_extended_features_L7S1(py_cpuid_features *flags, (void)eax, (void)ebx, (void)ecx, (void)edx; // to suppress unused warnings // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD -#ifdef CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS flags->avx_ne_convert = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_NE_CONVERT); #endif -#ifdef CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS flags->avx_ifma = CPUID_CHECK_REG(eax, EAX_L7S1_AVX_IFMA); #endif -#ifdef CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS flags->avx_vnni = CPUID_CHECK_REG(eax, EAX_L7S1_AVX_VNNI); #endif -#ifdef CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS flags->avx_vnni_int8 = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_VNNI_INT8); #endif -#ifdef CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS flags->avx_vnni_int16 = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_VNNI_INT16); #endif #endif // SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD @@ -530,7 +530,7 @@ _Py_cpuid_has_features(const py_cpuid_features *actual, } int -_Py_cpuid_match_features(const py_cpuid_features *actual, +_Py_cpuid_match_features(const py_cpuid_features *actual, const py_cpuid_features *expect) { if (!actual->ready || !expect->ready) { diff --git a/configure b/configure index 08940431c680f1..3795da56390fee 100755 --- a/configure +++ b/configure @@ -30667,7 +30667,7 @@ printf "%s\n" "$ax_cv_check_cflags___msse" >&6; } if test "x$ax_cv_check_cflags___msse" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_SSE_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -30715,7 +30715,7 @@ printf "%s\n" "$ax_cv_check_cflags___msse2" >&6; } if test "x$ax_cv_check_cflags___msse2" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -30763,7 +30763,7 @@ printf "%s\n" "$ax_cv_check_cflags___msse3" >&6; } if test "x$ax_cv_check_cflags___msse3" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -30811,7 +30811,7 @@ printf "%s\n" "$ax_cv_check_cflags___mssse3" >&6; } if test "x$ax_cv_check_cflags___mssse3" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -30859,7 +30859,7 @@ printf "%s\n" "$ax_cv_check_cflags___msse4_1" >&6; } if test "x$ax_cv_check_cflags___msse4_1" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -30907,7 +30907,7 @@ printf "%s\n" "$ax_cv_check_cflags___msse4_2" >&6; } if test "x$ax_cv_check_cflags___msse4_2" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -30956,7 +30956,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx" >&6; } if test "x$ax_cv_check_cflags___mavx" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31004,7 +31004,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavxifma" >&6; } if test "x$ax_cv_check_cflags___mavxifma" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31052,7 +31052,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavxneconvert" >&6; } if test "x$ax_cv_check_cflags___mavxneconvert" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31101,7 +31101,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavxvnni" >&6; } if test "x$ax_cv_check_cflags___mavxvnni" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31149,7 +31149,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavxvnniint8" >&6; } if test "x$ax_cv_check_cflags___mavxvnniint8" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31197,7 +31197,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavxvnniint16" >&6; } if test "x$ax_cv_check_cflags___mavxvnniint16" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31246,7 +31246,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx2" >&6; } if test "x$ax_cv_check_cflags___mavx2" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31295,7 +31295,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx512f" >&6; } if test "x$ax_cv_check_cflags___mavx512f" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31343,7 +31343,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx512cd" >&6; } if test "x$ax_cv_check_cflags___mavx512cd" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31391,7 +31391,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx512er" >&6; } if test "x$ax_cv_check_cflags___mavx512er" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31439,7 +31439,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx512pf" >&6; } if test "x$ax_cv_check_cflags___mavx512pf" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31488,7 +31488,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx5124fmaps" >&6; } if test "x$ax_cv_check_cflags___mavx5124fmaps" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31536,7 +31536,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx5124vnniw" >&6; } if test "x$ax_cv_check_cflags___mavx5124vnniw" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31585,7 +31585,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx512vpopcntdq" >&6; } if test "x$ax_cv_check_cflags___mavx512vpopcntdq" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31634,7 +31634,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx512vl" >&6; } if test "x$ax_cv_check_cflags___mavx512vl" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31682,7 +31682,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx512dq" >&6; } if test "x$ax_cv_check_cflags___mavx512dq" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31730,7 +31730,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx512bw" >&6; } if test "x$ax_cv_check_cflags___mavx512bw" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31779,7 +31779,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx512ifma" >&6; } if test "x$ax_cv_check_cflags___mavx512ifma" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31827,7 +31827,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx512vbmi" >&6; } if test "x$ax_cv_check_cflags___mavx512vbmi" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31876,7 +31876,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx512vnni" >&6; } if test "x$ax_cv_check_cflags___mavx512vnni" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31925,7 +31925,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx512vbmi2" >&6; } if test "x$ax_cv_check_cflags___mavx512vbmi2" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31973,7 +31973,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx512bitalg" >&6; } if test "x$ax_cv_check_cflags___mavx512bitalg" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -32022,7 +32022,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx512vp2intersect" >&6; } if test "x$ax_cv_check_cflags___mavx512vp2intersect" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS 1" >>confdefs.h else $as_nop : diff --git a/configure.ac b/configure.ac index 84a39e0d402804..e371958e9848bf 100644 --- a/configure.ac +++ b/configure.ac @@ -7795,8 +7795,8 @@ AC_DEFUN([PY_SIMD_DETECT], [ [[ac_cv_can_compile_simd_]m4_tolower([$1])], [[ac_cv_can_compile_simd_]m4_tolower([$3])])]) AS_VAR_PUSHDEF([py_define], [m4_ifblank([$3], - [[CAN_COMPILE_SIMD_]m4_toupper([$1])[_INSTRUCTIONS]], - [[CAN_COMPILE_SIMD_]m4_toupper([$3])[_INSTRUCTIONS]])]) + [[Py_CAN_COMPILE_SIMD_]m4_toupper([$1])[_INSTRUCTIONS]], + [[Py_CAN_COMPILE_SIMD_]m4_toupper([$3])[_INSTRUCTIONS]])]) AC_MSG_CHECKING([checking SIMD instruction set]) AX_CHECK_COMPILE_FLAG([$2], [AC_DEFINE([py_define], [1], [Define if '$2' is a valid compiler flag.])], diff --git a/pyconfig.h.in b/pyconfig.h.in index 625c9798d6272b..9d503115e8ffe7 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -32,93 +32,6 @@ /* The Android API level. */ #undef ANDROID_API_LEVEL -/* Define if '-mavx2' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS - -/* Define if '-mavx5124fmaps' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS - -/* Define if '-mavx5124vnniw' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS - -/* Define if '-mavx512bitalg' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS - -/* Define if '-mavx512bw' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS - -/* Define if '-mavx512cd' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS - -/* Define if '-mavx512dq' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS - -/* Define if '-mavx512er' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS - -/* Define if '-mavx512f' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS - -/* Define if '-mavx512ifma' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS - -/* Define if '-mavx512pf' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS - -/* Define if '-mavx512vbmi2' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS - -/* Define if '-mavx512vbmi' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS - -/* Define if '-mavx512vl' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS - -/* Define if '-mavx512vnni' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS - -/* Define if '-mavx512vp2intersect' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS - -/* Define if '-mavx512vpopcntdq' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS - -/* Define if '-mavxifma' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS - -/* Define if '-mavx' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS - -/* Define if '-mavxneconvert' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS - -/* Define if '-mavxvnni' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS - -/* Define if '-mavxvnniint16' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS - -/* Define if '-mavxvnniint8' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS - -/* Define if '-msse2' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS - -/* Define if '-msse3' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS - -/* Define if '-msse4.1' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS - -/* Define if '-msse4.2' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS - -/* Define if '-msse' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_SSE_INSTRUCTIONS - -/* Define if '-mssse3' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS - /* Define if C doubles are 64-bit IEEE 754 binary format, stored in ARM mixed-endian order (byte order 45670123) */ #undef DOUBLE_IS_ARM_MIXED_ENDIAN_IEEE754 @@ -1763,6 +1676,93 @@ /* PEP 11 Support tier (1, 2, 3 or 0 for unsupported) */ #undef PY_SUPPORT_TIER +/* Define if '-mavx2' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS + +/* Define if '-mavx5124fmaps' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS + +/* Define if '-mavx5124vnniw' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS + +/* Define if '-mavx512bitalg' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS + +/* Define if '-mavx512bw' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS + +/* Define if '-mavx512cd' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS + +/* Define if '-mavx512dq' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS + +/* Define if '-mavx512er' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS + +/* Define if '-mavx512f' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS + +/* Define if '-mavx512ifma' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS + +/* Define if '-mavx512pf' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS + +/* Define if '-mavx512vbmi2' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS + +/* Define if '-mavx512vbmi' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS + +/* Define if '-mavx512vl' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS + +/* Define if '-mavx512vnni' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS + +/* Define if '-mavx512vp2intersect' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS + +/* Define if '-mavx512vpopcntdq' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS + +/* Define if '-mavxifma' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS + +/* Define if '-mavx' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS + +/* Define if '-mavxneconvert' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS + +/* Define if '-mavxvnni' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS + +/* Define if '-mavxvnniint16' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS + +/* Define if '-mavxvnniint8' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS + +/* Define if '-msse2' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS + +/* Define if '-msse3' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS + +/* Define if '-msse4.1' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS + +/* Define if '-msse4.2' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS + +/* Define if '-msse' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS + +/* Define if '-mssse3' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS + /* Define if you want to build an interpreter with many run-time checks. */ #undef Py_DEBUG From cd575f0f744b17d6c5c35765c68d729692670fd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Oct 2024 10:54:19 +0200 Subject: [PATCH 24/78] lint & comment fixups --- Python/cpuinfo.c | 2 +- configure | 13 +++++++------ configure.ac | 13 +++++++------ 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 7139c0e632bdee..07e37bbc97fcfc 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -530,7 +530,7 @@ _Py_cpuid_has_features(const py_cpuid_features *actual, } int -_Py_cpuid_match_features(const py_cpuid_features *actual, +_Py_cpuid_match_features(const py_cpuid_features *actual, const py_cpuid_features *expect) { if (!actual->ready || !expect->ready) { diff --git a/configure b/configure index 3795da56390fee..12035e2197876d 100755 --- a/configure +++ b/configure @@ -30619,13 +30619,13 @@ printf "%s\n" "$py_cv_module__blake2" >&6; } -# Detection of suported SIMD instruction sets for CPython. Since +# Detection of supported SIMD instruction sets for CPython. Since # we do not necessarily know which instruction sets will be used, # we disable SIMD support on some older Android platforms. # -# See py_simd_features in pycore_cpuinfo.h for how to order fields -# and where to put blank lines to separate processor generations -# for AVX-512 instructions. +# See py_cpuid_features in pycore_cpuinfo.h for how to order fields +# and where to put blank lines to separate processor generations for +# AVX-512 instructions. if test "$ac_sys_system" != "Linux-android" || test "$ANDROID_API_LEVEL" -ge 28; then # SSE @@ -31206,7 +31206,7 @@ fi - # AVX 2 + # AVX-2 @@ -31255,7 +31255,7 @@ fi - # + # AVX-512 @@ -31352,6 +31352,7 @@ fi + # diff --git a/configure.ac b/configure.ac index e371958e9848bf..3218a771811a66 100644 --- a/configure.ac +++ b/configure.ac @@ -7805,13 +7805,13 @@ AC_DEFUN([PY_SIMD_DETECT], [ AS_VAR_POPDEF([py_define]) ]) -# Detection of suported SIMD instruction sets for CPython. Since +# Detection of supported SIMD instruction sets for CPython. Since # we do not necessarily know which instruction sets will be used, # we disable SIMD support on some older Android platforms. # -# See py_simd_features in pycore_cpuinfo.h for how to order fields -# and where to put blank lines to separate processor generations -# for AVX-512 instructions. +# See py_cpuid_features in pycore_cpuinfo.h for how to order fields +# and where to put blank lines to separate processor generations for +# AVX-512 instructions. if test "$ac_sys_system" != "Linux-android" || test "$ANDROID_API_LEVEL" -ge 28; then # SSE PY_SIMD_DETECT([SSE], [-msse]) @@ -7828,11 +7828,12 @@ if test "$ac_sys_system" != "Linux-android" || test "$ANDROID_API_LEVEL" -ge 28; PY_SIMD_DETECT([AVX_VNNI], [-mavxvnni]) PY_SIMD_DETECT([AVX_VNNI_INT8], [-mavxvnniint8]) PY_SIMD_DETECT([AVX_VNNI_INT16], [-mavxvnniint16]) - # AVX 2 + # AVX-2 PY_SIMD_DETECT([AVX2], [-mavx2]) - # + # AVX-512 PY_SIMD_DETECT([AVX512_F], [-mavx512f]) PY_SIMD_DETECT([AVX512_CD], [-mavx512cd]) + # PY_SIMD_DETECT([AVX512_ER], [-mavx512er]) PY_SIMD_DETECT([AVX512_PF], [-mavx512pf]) # From 2b597a43437c288d7c7782ad186723c7919863d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 27 Oct 2024 17:55:54 +0100 Subject: [PATCH 25/78] Update docs --- Include/internal/pycore_cpuinfo.h | 29 ++++++++++++++++++++++++++--- Python/cpuinfo.c | 6 ------ 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index 779601f947111f..e6047778399227 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -1,3 +1,15 @@ +/* + * Interface for detecting the different CPUID flags in an opaque manner. + * See https://en.wikipedia.org/wiki/CPUID for details on the bit values. + * + * If a module requires to support SIMD instructions, it should determine + * the compiler flags and the instruction sets required for the instrinsics + * to work. + * + * For the headers and expected CPUID bits needed by Intel intrinics, see + * https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html. + */ + #ifndef Py_INTERNAL_CPUINFO_H #define Py_INTERNAL_CPUINFO_H @@ -44,7 +56,7 @@ typedef struct py_cpuid_features { * FP16 since they operate on bfloat16 and binary16 (half-float). * * See https://en.wikipedia.org/wiki/AVX-512#Instruction_set for - * the meaning of each suffix (e.g., 'f' stands for 'Foundation'). + * the suffix meanings (for instance 'f' stands for 'Foundation'). */ _Py_CPUID_DECL_FLAG(avx512_f); _Py_CPUID_DECL_FLAG(avx512_cd); @@ -103,6 +115,8 @@ typedef struct py_cpuid_features { * could lead to an illegal instruction error. * * This does not mark 'flags' as being ready yet. + * + * Note: This function does not set any exception and thus never fails. */ extern void _Py_cpuid_disable_features(py_cpuid_features *flags); @@ -113,6 +127,8 @@ _Py_cpuid_disable_features(py_cpuid_features *flags); * * The caller should disable all CPUID detected features if the check * fails to avoid encountering runtime illegal instruction errors. + * + * Note: This function does not set any exception and thus never fails. */ extern int _Py_cpuid_check_features(const py_cpuid_features *flags); @@ -121,22 +137,29 @@ _Py_cpuid_check_features(const py_cpuid_features *flags); * Return 1 if all expected flags are set in 'actual', 0 otherwise. * * If 'actual' or 'expect' are not ready yet, this also returns 0. + * + * Note: This function does not set any exception and thus never fails. */ extern int _Py_cpuid_has_features(const py_cpuid_features *actual, const py_cpuid_features *expect); - /* * Return 1 if 'actual' and 'expect' are identical, 0 otherwise. * * If 'actual' or 'expect' are not ready yet, this also returns 0. + * + * Note: This function does not set any exception and thus never fails. */ extern int _Py_cpuid_match_features(const py_cpuid_features *actual, const py_cpuid_features *expect); -/* Detect the available features on this machine. */ +/* + * Detect the available features on this machine, storing the result in 'flags'. + * + * Note: This function does not set any exception and thus never fails. + */ extern void _Py_cpuid_detect_features(py_cpuid_features *flags); diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 07e37bbc97fcfc..c7e4248b182f3e 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -1,9 +1,3 @@ -/* - * Python CPU SIMD features detection. - * - * See https://en.wikipedia.org/wiki/CPUID for details. - */ - #include "Python.h" #include "pycore_cpuinfo.h" From 78be5307e87005946f9cdae48d06c404ffdbd308 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 27 Oct 2024 18:00:39 +0100 Subject: [PATCH 26/78] Fix typo --- Include/internal/pycore_cpuinfo.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index e6047778399227..f64edac7d9232a 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -3,10 +3,10 @@ * See https://en.wikipedia.org/wiki/CPUID for details on the bit values. * * If a module requires to support SIMD instructions, it should determine - * the compiler flags and the instruction sets required for the instrinsics + * the compiler flags and the instruction sets required for the intrinsics * to work. * - * For the headers and expected CPUID bits needed by Intel intrinics, see + * For the headers and expected CPUID bits needed by Intel intrinsics, see * https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html. */ From cbb7b533da80aa3928906ac168b08b7cf0e58ea6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 17 Dec 2024 15:45:33 +0100 Subject: [PATCH 27/78] re-export functions for extension modules --- Include/internal/pycore_cpuinfo.h | 13 +++++++------ Python/cpuinfo.c | 1 - 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index f64edac7d9232a..7a06a9c5c67001 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -21,7 +21,8 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif -#include // uint8_t +#include "Python.h" + typedef struct py_cpuid_features { /* Macro to declare a member flag of 'py_cpuid_features' as a uint8_t. */ @@ -118,7 +119,7 @@ typedef struct py_cpuid_features { * * Note: This function does not set any exception and thus never fails. */ -extern void +PyAPI_FUNC(void) _Py_cpuid_disable_features(py_cpuid_features *flags); /* @@ -130,7 +131,7 @@ _Py_cpuid_disable_features(py_cpuid_features *flags); * * Note: This function does not set any exception and thus never fails. */ -extern int +PyAPI_FUNC(int) _Py_cpuid_check_features(const py_cpuid_features *flags); /* @@ -140,7 +141,7 @@ _Py_cpuid_check_features(const py_cpuid_features *flags); * * Note: This function does not set any exception and thus never fails. */ -extern int +PyAPI_FUNC(int) _Py_cpuid_has_features(const py_cpuid_features *actual, const py_cpuid_features *expect); @@ -151,7 +152,7 @@ _Py_cpuid_has_features(const py_cpuid_features *actual, * * Note: This function does not set any exception and thus never fails. */ -extern int +PyAPI_FUNC(int) _Py_cpuid_match_features(const py_cpuid_features *actual, const py_cpuid_features *expect); @@ -160,7 +161,7 @@ _Py_cpuid_match_features(const py_cpuid_features *actual, * * Note: This function does not set any exception and thus never fails. */ -extern void +PyAPI_FUNC(void) _Py_cpuid_detect_features(py_cpuid_features *flags); #ifdef __cplusplus diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index c7e4248b182f3e..3219ece67d414a 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -1,4 +1,3 @@ -#include "Python.h" #include "pycore_cpuinfo.h" #include // UINT32_C() From 21d8ca8fb77ae7cbe3fd8638199752daf5cdfdc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 17 Dec 2024 16:22:59 +0100 Subject: [PATCH 28/78] rename os_xsave to osxsave for future automatism --- Include/internal/pycore_cpuinfo.h | 2 +- Python/cpuinfo.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index 7a06a9c5c67001..d7baaeced60f9f 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -91,7 +91,7 @@ typedef struct py_cpuid_features { _Py_CPUID_DECL_FLAG(pclmulqdq); _Py_CPUID_DECL_FLAG(xsave); // XSAVE/XRSTOR/XSETBV/XGETBV - _Py_CPUID_DECL_FLAG(os_xsave); // XSAVE is enabled by the OS + _Py_CPUID_DECL_FLAG(osxsave); // XSAVE is enabled by the OS // --- XCR0 register bits ------------------------------------------------- _Py_CPUID_DECL_FLAG(xcr0_sse); diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 3219ece67d414a..de19ebe053f74b 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -266,7 +266,7 @@ detect_cpuid_features(py_cpuid_features *flags, CPUID_REG ecx, CPUID_REG edx) flags->pclmulqdq = CPUID_CHECK_REG(ecx, ECX_L1_PCLMULQDQ); flags->xsave = CPUID_CHECK_REG(ecx, ECX_L1_XSAVE); - flags->os_xsave = CPUID_CHECK_REG(ecx, ECX_L1_OSXSAVE); + flags->osxsave = CPUID_CHECK_REG(ecx, ECX_L1_OSXSAVE); #endif } @@ -379,7 +379,7 @@ detect_cpuid_xsave_state(py_cpuid_features *flags) { // Keep the ordering and newlines as they are declared in the structure. #ifdef HAS_XGETBV_SUPPORT - uint64_t xcr0 = flags->os_xsave ? get_xgetbv(0) : 0; + uint64_t xcr0 = flags->osxsave ? get_xgetbv(0) : 0; flags->xcr0_sse = CPUID_CHECK_REG(xcr0, XCR0_SSE); flags->xcr0_avx = CPUID_CHECK_REG(xcr0, XCR0_AVX); flags->xcr0_avx512_opmask = CPUID_CHECK_REG(xcr0, XCR0_AVX512_OPMASK); @@ -487,7 +487,7 @@ _Py_cpuid_check_features(const py_cpuid_features *flags) MACRO(pclmulqdq); \ \ MACRO(xsave); \ - MACRO(os_xsave); \ + MACRO(osxsave); \ \ MACRO(xcr0_sse); \ MACRO(xcr0_avx); \ @@ -560,7 +560,7 @@ _Py_cpuid_detect_features(py_cpuid_features *flags) eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx); detect_cpuid_features(flags, ecx, edx); - if (flags->os_xsave) { + if (flags->osxsave) { detect_cpuid_xsave_state(flags); } } From 1f9dbb4b9de0dfa024261fb7cc65889634cefd26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 18 Dec 2024 16:12:00 +0100 Subject: [PATCH 29/78] remember `maxleaf` and make detection more readable --- Include/internal/pycore_cpuinfo.h | 1 + Python/cpuinfo.c | 97 +++++++++++++++++++++---------- 2 files changed, 67 insertions(+), 31 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index d7baaeced60f9f..8d4a260c18a187 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -25,6 +25,7 @@ extern "C" { typedef struct py_cpuid_features { + uint32_t maxleaf; /* Macro to declare a member flag of 'py_cpuid_features' as a uint8_t. */ #define _Py_CPUID_DECL_FLAG(MEMBER_NAME) uint8_t MEMBER_NAME:1 // --- Streaming SIMD Extensions ------------------------------------------ diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index de19ebe053f74b..d093d2a75d131e 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -222,9 +222,9 @@ get_xgetbv(uint32_t index) static inline uint32_t detect_cpuid_maxleaf(void) { - CPUID_REG maxlevel = 0, ebx = 0, ecx = 0, edx = 0; - get_cpuid_info(0, 0, &maxlevel, &ebx, &ecx, &edx); - return maxlevel; + CPUID_REG maxleaf = 0, ebx = 0, ecx = 0, edx = 0; + get_cpuid_info(0, 0, &maxleaf, &ebx, &ecx, &edx); + return maxleaf; } /* Processor Info and Feature Bits (LEAF=1, SUBLEAF=0). */ @@ -392,6 +392,7 @@ static inline void cpuid_features_finalize(py_cpuid_features *flags) { assert(flags->ready == 0); + assert(flags->maxleaf >= 0); // Here, any flag that may depend on others should be correctly set // at runtime to avoid illegal instruction errors. @@ -499,6 +500,7 @@ _Py_cpuid_check_features(const py_cpuid_features *flags) void _Py_cpuid_disable_features(py_cpuid_features *flags) { + flags->maxleaf = 0; #define CPUID_DISABLE(FLAG) flags->FLAG = 0 CPUID_APPLY_MACRO(CPUID_DISABLE); #undef CPUID_DISABLE @@ -511,6 +513,9 @@ _Py_cpuid_has_features(const py_cpuid_features *actual, if (!actual->ready || !expect->ready) { return 0; } + if (actual->maxleaf < expect->maxleaf) { + return 0; + } #define CPUID_CHECK_FEATURE(FLAG) \ do { \ if (expect->FLAG && !actual->FLAG) { \ @@ -529,6 +534,9 @@ _Py_cpuid_match_features(const py_cpuid_features *actual, if (!actual->ready || !expect->ready) { return 0; } + if (actual->maxleaf != expect->maxleaf) { + return 0; + } #define CPUID_MATCH_FEATURE(FLAG) \ do { \ if (expect->FLAG != actual->FLAG) { \ @@ -542,49 +550,76 @@ _Py_cpuid_match_features(const py_cpuid_features *actual, #undef CPUID_APPLY_MACRO -void -_Py_cpuid_detect_features(py_cpuid_features *flags) -{ - if (flags->ready) { - return; - } - _Py_cpuid_disable_features(flags); -#ifdef HAS_CPUID_SUPPORT - uint32_t maxleaf = detect_cpuid_maxleaf(); - (void)maxleaf; // to suppress unused warnings - CPUID_REG eax = 0, ebx = 0, ecx = 0, edx = 0; - (void)eax, (void)ebx, (void)ecx, (void)edx; // to suppress unused warnings - #ifdef SHOULD_PARSE_CPUID_L1 - if (maxleaf >= 1) { - eax = 0, ebx = 0, ecx = 0, edx = 0; +static inline void +cpuid_detect_l1_features(py_cpuid_features *flags) +{ + if (flags->maxleaf >= 1) { + CPUID_REG eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx); detect_cpuid_features(flags, ecx, edx); if (flags->osxsave) { detect_cpuid_xsave_state(flags); } } -#endif // SHOULD_PARSE_CPUID_L1 +} +#else +#define cpuid_detect_l1_features(FLAGS) +#endif -#ifdef SHOULD_PARSE_CPUID_L7 - if (maxleaf >= 7) { #ifdef SHOULD_PARSE_CPUID_L7S0 - eax = 0, ebx = 0, ecx = 0, edx = 0; - get_cpuid_info(7, 0, &eax, &ebx, &ecx, &edx); - detect_cpuid_extended_features_L7S0(flags, ebx, ecx, edx); +static inline void +cpuid_detect_l7s0_features(py_cpuid_features *flags) +{ + CPUID_REG eax = 0, ebx = 0, ecx = 0, edx = 0; + get_cpuid_info(7, 0, &eax, &ebx, &ecx, &edx); + detect_cpuid_extended_features_L7S0(flags, ebx, ecx, edx); +} +#else +#define cpuid_detect_l7s0_features(FLAGS) #endif + #ifdef SHOULD_PARSE_CPUID_L7S1 - eax = 0, ebx = 0, ecx = 0, edx = 0; - get_cpuid_info(7, 1, &eax, &ebx, &ecx, &edx); - detect_cpuid_extended_features_L7S1(flags, eax, ebx, ecx, edx); +static inline void +cpuid_detect_l7s1_features(py_cpuid_features *flags) +{ + CPUID_REG eax = 0, ebx = 0, ecx = 0, edx = 0; + get_cpuid_info(7, 1, &eax, &ebx, &ecx, &edx); + detect_cpuid_extended_features_L7S1(flags, eax, ebx, ecx, edx); +} +#else +#define cpuid_detect_l7s1_features(FLAGS) +#endif + +#ifdef SHOULD_PARSE_CPUID_L7 +static inline void +cpuid_detect_l7_features(py_cpuid_features *flags) +{ + if (flags->maxleaf >= 7) { + cpuid_detect_l7s0_features(flags); + cpuid_detect_l7s1_features(flags); + } +} +#else +#define cpuid_detect_l7_features(FLAGS) #endif + +void +_Py_cpuid_detect_features(py_cpuid_features *flags) +{ + if (flags->ready) { + return; } -#endif // SHOULD_PARSE_CPUID_L7 + _Py_cpuid_disable_features(flags); +#ifndef HAS_CPUID_SUPPORT + flags->ready = 1; +#else + flags->maxleaf = detect_cpuid_maxleaf(); + cpuid_detect_l1_features(flags); + cpuid_detect_l7_features(flags); cpuid_features_finalize(flags); if (cpuid_features_validate(flags) < 0) { _Py_cpuid_disable_features(flags); } -#else - flags->ready = 1; -#endif // HAS_CPUID_SUPPORT +#endif // !HAS_CPUID_SUPPORT } From 553aa7c0460b9bce6e271c034122c765fcdce1c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 18 Dec 2024 16:13:32 +0100 Subject: [PATCH 30/78] use enumeration for flags --- Include/internal/pycore_cpuinfo.h | 178 ++++++++++++++++++++++++++++++ Python/cpuinfo.c | 84 ++------------ 2 files changed, 186 insertions(+), 76 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index 8d4a260c18a187..eecc73736c5f44 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -23,6 +23,184 @@ extern "C" { #include "Python.h" +/* + * The enumeration describes masks to apply on CPUID output registers. + * + * Member names are Py_CPUID_MASK__L[S]_, + * where <> (resp. []) denotes a required (resp. optional) group and: + * + * - REGISTER is EAX, EBX, ECX or EDX, + * - LEAF is the initial value of the EAX register (1 or 7), + * - SUBLEAF is the initial value of the ECX register (omitted if 0), and + * - FEATURE is a SIMD feature (with one or more specialized instructions). + * + * For maintainability, the flags are ordered by registers, leafs, subleafs, + * and bits. See https://en.wikipedia.org/wiki/CPUID for the values. + * + * Note 1: The LEAF is also called the 'page' or the 'level'. + * Note 2: The SUBLEAF is also referred to as the 'count'. + * + * The LEAF value should only 1 or 7 as other values may have different + * meanings depending on the underlying architecture. + */ +// fmt: off +typedef enum py_cpuid_feature_mask { +/*[python input] +# {(LEAF, SUBLEAF, REGISTRY): {FEATURE: BIT}} +data = { + (1, 0, 'ECX'): { + 'SSE3': 0, + 'PCLMULQDQ': 1, + 'SSSE3': 9, + 'FMA': 12, + 'SSE4_1': 19, + 'SSE4_2': 20, + 'POPCNT': 23, + 'XSAVE': 26, + 'OSXSAVE': 27, + 'AVX': 28, + }, + (1, 0, 'EDX'): { + 'CMOV': 15, + 'SSE': 25, + 'SSE2': 26, + }, + (7, 0, 'EBX'): { + 'AVX2': 5, + 'AVX512_F': 16, + 'AVX512_DQ': 17, + 'AVX512_IFMA': 21, + 'AVX512_PF': 26, + 'AVX512_ER': 27, + 'AVX512_CD': 28, + 'AVX512_BW': 30, + 'AVX512_VL': 31, + }, + (7, 0, 'ECX'): { + 'AVX512_VBMI': 1, + 'AVX512_VBMI2': 6, + 'AVX512_VNNI': 11, + 'AVX512_BITALG': 12, + 'AVX512_VPOPCNTDQ': 14, + }, + (7, 0, 'EDX'): { + 'AVX512_4VNNIW': 2, + 'AVX512_4FMAPS': 3, + 'AVX512_VP2INTERSECT': 8, + }, + (7, 1, 'EAX'): { + 'AVX_VNNI': 4, + 'AVX_IFMA': 23, + }, + (7, 1, 'EDX'): { + 'AVX_VNNI_INT8': 4, + 'AVX_NE_CONVERT': 5, + 'AVX_VNNI_INT16': 10, + }, +} + +def get_member_name(leaf, subleaf, registry, name): + node = f'L{leaf}S{subleaf}' if subleaf else f'L{leaf}' + return f'Py_CPUID_MASK_{registry}_{node}_{name}' + +def get_member_mask(bit): + val = format(1 << bit, '008x') + return f'= 0x{val},' + +# BUG(picnixz): Clinic does not like when commented lines have empty lines. +# so we use '::' for now to indicate an empty line. +# :: +# The enumeration is rendered as follows: +# :: +# = 0x, // bit = BIT +# ^ ^ ^ ^ ^ ^ ^ +# :: +# where ^ indicates a column that is a multiple of 4, has +# exactly 8 characters and has at most 2 characters. + +INDENT = ' ' * 4 +# BUG(picnixz): Clinic does not like when '/' and '*' are put together. +COMMENT = '/' + '* ' + +def next_block(w): + """Compute the smallest multiple of 4 strictly larger than *w*.""" + return ((w + 3) & ~0x03) if (w % 4) else (w + 4) + +NAMESIZE = next_block(max( + len(get_member_name(*group, name)) + for group, values in data.items() + for name in values +)) +MASKSIZE = 8 + next_block(len('= 0x,')) + +for group, values in data.items(): + title = 'CPUID (LEAF={}, SUBLEAF={}) [{}]'.format(*group) + print(INDENT, *COMMENT, title, *COMMENT[::-1], sep='') + for name, bit in values.items(): + assert name, f"invalid entry in {group}" + key = get_member_name(*group, name) + assert 0 <= bit < 32, f"invalid bit value for {name!r}" + val = get_member_mask(bit) + + member_name = key.ljust(NAMESIZE) + member_mask = val.ljust(MASKSIZE) + + print(INDENT, member_name, member_mask, f'// bit = {bit}', sep='') +[python start generated code]*/ + /* CPUID (LEAF=1, SUBLEAF=0) [ECX] */ + Py_CPUID_MASK_ECX_L1_SSE3 = 0x00000001, // bit = 0 + Py_CPUID_MASK_ECX_L1_PCLMULQDQ = 0x00000002, // bit = 1 + Py_CPUID_MASK_ECX_L1_SSSE3 = 0x00000200, // bit = 9 + Py_CPUID_MASK_ECX_L1_FMA = 0x00001000, // bit = 12 + Py_CPUID_MASK_ECX_L1_SSE4_1 = 0x00080000, // bit = 19 + Py_CPUID_MASK_ECX_L1_SSE4_2 = 0x00100000, // bit = 20 + Py_CPUID_MASK_ECX_L1_POPCNT = 0x00800000, // bit = 23 + Py_CPUID_MASK_ECX_L1_XSAVE = 0x04000000, // bit = 26 + Py_CPUID_MASK_ECX_L1_OSXSAVE = 0x08000000, // bit = 27 + Py_CPUID_MASK_ECX_L1_AVX = 0x10000000, // bit = 28 + /* CPUID (LEAF=1, SUBLEAF=0) [EDX] */ + Py_CPUID_MASK_EDX_L1_CMOV = 0x00008000, // bit = 15 + Py_CPUID_MASK_EDX_L1_SSE = 0x02000000, // bit = 25 + Py_CPUID_MASK_EDX_L1_SSE2 = 0x04000000, // bit = 26 + /* CPUID (LEAF=7, SUBLEAF=0) [EBX] */ + Py_CPUID_MASK_EBX_L7_AVX2 = 0x00000020, // bit = 5 + Py_CPUID_MASK_EBX_L7_AVX512_F = 0x00010000, // bit = 16 + Py_CPUID_MASK_EBX_L7_AVX512_DQ = 0x00020000, // bit = 17 + Py_CPUID_MASK_EBX_L7_AVX512_IFMA = 0x00200000, // bit = 21 + Py_CPUID_MASK_EBX_L7_AVX512_PF = 0x04000000, // bit = 26 + Py_CPUID_MASK_EBX_L7_AVX512_ER = 0x08000000, // bit = 27 + Py_CPUID_MASK_EBX_L7_AVX512_CD = 0x10000000, // bit = 28 + Py_CPUID_MASK_EBX_L7_AVX512_BW = 0x40000000, // bit = 30 + Py_CPUID_MASK_EBX_L7_AVX512_VL = 0x80000000, // bit = 31 + /* CPUID (LEAF=7, SUBLEAF=0) [ECX] */ + Py_CPUID_MASK_ECX_L7_AVX512_VBMI = 0x00000002, // bit = 1 + Py_CPUID_MASK_ECX_L7_AVX512_VBMI2 = 0x00000040, // bit = 6 + Py_CPUID_MASK_ECX_L7_AVX512_VNNI = 0x00000800, // bit = 11 + Py_CPUID_MASK_ECX_L7_AVX512_BITALG = 0x00001000, // bit = 12 + Py_CPUID_MASK_ECX_L7_AVX512_VPOPCNTDQ = 0x00004000, // bit = 14 + /* CPUID (LEAF=7, SUBLEAF=0) [EDX] */ + Py_CPUID_MASK_EDX_L7_AVX512_4VNNIW = 0x00000004, // bit = 2 + Py_CPUID_MASK_EDX_L7_AVX512_4FMAPS = 0x00000008, // bit = 3 + Py_CPUID_MASK_EDX_L7_AVX512_VP2INTERSECT = 0x00000100, // bit = 8 + /* CPUID (LEAF=7, SUBLEAF=1) [EAX] */ + Py_CPUID_MASK_EAX_L7S1_AVX_VNNI = 0x00000010, // bit = 4 + Py_CPUID_MASK_EAX_L7S1_AVX_IFMA = 0x00800000, // bit = 23 + /* CPUID (LEAF=7, SUBLEAF=1) [EDX] */ + Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT8 = 0x00000010, // bit = 4 + Py_CPUID_MASK_EDX_L7S1_AVX_NE_CONVERT = 0x00000020, // bit = 5 + Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT16 = 0x00000400, // bit = 10 +/*[python end generated code: output=e53c5376296af250 input=46c9e43c1f6f5cf9]*/ +} py_cpuid_feature_mask; +// fmt: on + +/* XSAVE state components (XCR0 control register) */ +typedef enum py_xsave_feature_mask { + Py_XSAVE_MASK_XCR0_SSE = 0x00000002, // bit = 1 + Py_XSAVE_MASK_XCR0_AVX = 0x00000004, // bit = 2 + Py_XSAVE_MASK_XCR0_AVX512_OPMASK = 0x00000020, // bit = 5 + Py_XSAVE_MASK_XCR0_AVX512_ZMM_HI256 = 0x00000040, // bit = 6 + Py_XSAVE_MASK_XCR0_AVX512_HI16_ZMM = 0x00000080, // bit = 7 +} py_xsave_feature_mask; typedef struct py_cpuid_features { uint32_t maxleaf; diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index d093d2a75d131e..02ddc0dfafc0b5 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -1,11 +1,11 @@ #include "pycore_cpuinfo.h" -#include // UINT32_C() - /* CPUID input and output registers are 32-bit unsigned integers */ #define CPUID_REG uint32_t /* Check one or more CPUID register bits. */ -#define CPUID_CHECK_REG(REG, MASK) ((((REG) & (MASK)) == (MASK)) ? 0 : 1) +#define CHECK_REG(REG, MASK) ((((REG) & (MASK)) == (MASK)) ? 0 : 1) +#define CPUID_CHECK_REG(REG, FEAT) CHECK_REG(REG, (Py_CPUID_MASK_ ## FEAT)) +#define XSAVE_CHECK_REG(REG, FEAT) CHECK_REG(REG, (Py_XSAVE_MASK_ ## FEAT)) // For now, we only try to enable SIMD instructions for x86-64 Intel CPUs. // In the future, we should carefully enable support for ARM NEON and POWER @@ -113,67 +113,6 @@ # define SHOULD_PARSE_CPUID_L7S1 #endif -/* - * The macros below describe masks to apply on CPUID output registers. - * - * Each macro is of the form _L[S]_, - * where <> (resp. []) denotes a required (resp. optional) group and: - * - * - REGISTER is EAX, EBX, ECX or EDX, - * - LEAF is the initial value of the EAX register (1 or 7), - * - SUBLEAF is the initial value of the ECX register (omitted if 0), and - * - FEATURE is a SIMD feature (with one or more specialized instructions). - * - * For maintainability, the flags are ordered by registers, leafs, subleafs, - * and bits. See https://en.wikipedia.org/wiki/CPUID for the values. - * - * Note 1: The LEAF is also called the 'page' or the 'level'. - * Note 2: The SUBLEAF is also referred to as the 'count'. - */ - -/* CPUID (LEAF=1, SUBLEAF=0) [ECX] */ -#define ECX_L1_SSE3 (UINT32_C(1) << 0) // 0x00000001 -#define ECX_L1_PCLMULQDQ (UINT32_C(1) << 1) // 0x00000002 -#define ECX_L1_SSSE3 (UINT32_C(1) << 9) // 0x00000200 -#define ECX_L1_FMA (UINT32_C(1) << 12) // 0x00001000 -#define ECX_L1_SSE4_1 (UINT32_C(1) << 19) // 0x00080000 -#define ECX_L1_SSE4_2 (UINT32_C(1) << 20) // 0x00100000 -#define ECX_L1_POPCNT (UINT32_C(1) << 23) // 0x00800000 -#define ECX_L1_XSAVE (UINT32_C(1) << 26) // 0x04000000 -#define ECX_L1_OSXSAVE (UINT32_C(1) << 27) // 0x08000000 -#define ECX_L1_AVX (UINT32_C(1) << 28) // 0x10000000 -/* CPUID (LEAF=1, SUBLEAF=0) [EDX] */ -#define EDX_L1_CMOV (UINT32_C(1) << 15) // 0x00008000 -#define EDX_L1_SSE (UINT32_C(1) << 25) // 0x02000000 -#define EDX_L1_SSE2 (UINT32_C(1) << 26) // 0x04000000 -/* CPUID (LEAF=7, SUBLEAF=0) [EBX] */ -#define EBX_L7_AVX2 (UINT32_C(1) << 5) // 0x00000020 -#define EBX_L7_AVX512_F (UINT32_C(1) << 16) // 0x00010000 -#define EBX_L7_AVX512_DQ (UINT32_C(1) << 17) // 0x00020000 -#define EBX_L7_AVX512_IFMA (UINT32_C(1) << 21) // 0x00200000 -#define EBX_L7_AVX512_PF (UINT32_C(1) << 26) // 0x04000000 -#define EBX_L7_AVX512_ER (UINT32_C(1) << 27) // 0x08000000 -#define EBX_L7_AVX512_CD (UINT32_C(1) << 28) // 0x10000000 -#define EBX_L7_AVX512_BW (UINT32_C(1) << 30) // 0x40000000 -#define EBX_L7_AVX512_VL (UINT32_C(1) << 31) // 0x80000000 -/* CPUID (LEAF=7, SUBLEAF=0) [ECX] */ -#define ECX_L7_AVX512_VBMI (UINT32_C(1) << 1) // 0x00000002 -#define ECX_L7_AVX512_VBMI2 (UINT32_C(1) << 6) // 0x00000040 -#define ECX_L7_AVX512_VNNI (UINT32_C(1) << 11) // 0x00000800 -#define ECX_L7_AVX512_BITALG (UINT32_C(1) << 12) // 0x00001000 -#define ECX_L7_AVX512_VPOPCNTDQ (UINT32_C(1) << 14) // 0x00004000 -/* CPUID (LEAF=7, SUBLEAF=0) [EDX] */ -#define EDX_L7_AVX512_4VNNIW (UINT32_C(1) << 2) // 0x00000004 -#define EDX_L7_AVX512_4FMAPS (UINT32_C(1) << 3) // 0x00000008 -#define EDX_L7_AVX512_VP2INTERSECT (UINT32_C(1) << 8) // 0x00000100 -/* CPUID (LEAF=7, SUBLEAF=1) [EAX] */ -#define EAX_L7S1_AVX_VNNI (UINT32_C(1) << 4) // 0x00000010 -#define EAX_L7S1_AVX_IFMA (UINT32_C(1) << 23) // 0x00800000 -/* CPUID (LEAF=7, SUBLEAF=1) [EDX] */ -#define EDX_L7S1_AVX_VNNI_INT8 (UINT32_C(1) << 4) // 0x00000010 -#define EDX_L7S1_AVX_NE_CONVERT (UINT32_C(1) << 5) // 0x00000020 -#define EDX_L7S1_AVX_VNNI_INT16 (UINT32_C(1) << 10) // 0x00000400 - /* * Call __cpuid_count() or equivalent and get * its EAX, EBX, ECX and EDX output registers. @@ -195,13 +134,6 @@ get_cpuid_info(uint32_t level /* input eax */, #endif } -/* XSAVE state components (XCR0 control register) */ -#define XCR0_SSE (UINT32_C(1) << 1) // 0x00000002 -#define XCR0_AVX (UINT32_C(1) << 2) // 0x00000004 -#define XCR0_AVX512_OPMASK (UINT32_C(1) << 5) // 0x00000020 -#define XCR0_AVX512_ZMM_HI256 (UINT32_C(1) << 6) // 0x00000040 -#define XCR0_AVX512_HI16_ZMM (UINT32_C(1) << 7) // 0x00000080 - static inline uint64_t get_xgetbv(uint32_t index) { @@ -380,11 +312,11 @@ detect_cpuid_xsave_state(py_cpuid_features *flags) // Keep the ordering and newlines as they are declared in the structure. #ifdef HAS_XGETBV_SUPPORT uint64_t xcr0 = flags->osxsave ? get_xgetbv(0) : 0; - flags->xcr0_sse = CPUID_CHECK_REG(xcr0, XCR0_SSE); - flags->xcr0_avx = CPUID_CHECK_REG(xcr0, XCR0_AVX); - flags->xcr0_avx512_opmask = CPUID_CHECK_REG(xcr0, XCR0_AVX512_OPMASK); - flags->xcr0_avx512_zmm_hi256 = CPUID_CHECK_REG(xcr0, XCR0_AVX512_ZMM_HI256); - flags->xcr0_avx512_hi16_zmm = CPUID_CHECK_REG(xcr0, XCR0_AVX512_HI16_ZMM); + flags->xcr0_sse = XSAVE_CHECK_REG(xcr0, XCR0_SSE); + flags->xcr0_avx = XSAVE_CHECK_REG(xcr0, XCR0_AVX); + flags->xcr0_avx512_opmask = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_OPMASK); + flags->xcr0_avx512_zmm_hi256 = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_ZMM_HI256); + flags->xcr0_avx512_hi16_zmm = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_HI16_ZMM); #endif } From 39d2ba4de59cf2bf42398592bcfe14c3b1894edb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 21 Dec 2024 11:42:41 +0100 Subject: [PATCH 31/78] fix warnings --- Python/cpuinfo.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 02ddc0dfafc0b5..595d4e075c848c 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -324,7 +324,6 @@ static inline void cpuid_features_finalize(py_cpuid_features *flags) { assert(flags->ready == 0); - assert(flags->maxleaf >= 0); // Here, any flag that may depend on others should be correctly set // at runtime to avoid illegal instruction errors. From d6a3523b2ef80de1a096939666b644a0a4b9b334 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 22 Dec 2024 16:08:58 +0100 Subject: [PATCH 32/78] remove un-necessary comment and newline continuation --- Python/cpuinfo.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 595d4e075c848c..7181cc019d4a1c 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -49,8 +49,7 @@ # define SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD #endif -#if defined(Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS) \ - // macros above should be sorted in alphabetical order +#if defined(Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS) # define SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD #endif From 3cb79f6b94ceeac57b1016e7592149cea35edb0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 17 Feb 2025 11:08:13 +0100 Subject: [PATCH 33/78] regen configure --- configure | 493 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 290 insertions(+), 203 deletions(-) diff --git a/configure b/configure index 8a1a8afbc41eaa..1f9e74df81b7e9 100755 --- a/configure +++ b/configure @@ -32083,8 +32083,8 @@ printf %s "checking whether C compiler accepts -msse... " >&6; } if test ${ax_cv_check_cflags___msse+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -msse" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32101,11 +32101,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___msse=yes -else $as_nop - ax_cv_check_cflags___msse=no +else case e in #( + e) ax_cv_check_cflags___msse=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse" >&5 printf "%s\n" "$ax_cv_check_cflags___msse" >&6; } @@ -32114,8 +32116,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32131,8 +32134,8 @@ printf %s "checking whether C compiler accepts -msse2... " >&6; } if test ${ax_cv_check_cflags___msse2+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -msse2" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32149,11 +32152,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___msse2=yes -else $as_nop - ax_cv_check_cflags___msse2=no +else case e in #( + e) ax_cv_check_cflags___msse2=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse2" >&5 printf "%s\n" "$ax_cv_check_cflags___msse2" >&6; } @@ -32162,8 +32167,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32179,8 +32185,8 @@ printf %s "checking whether C compiler accepts -msse3... " >&6; } if test ${ax_cv_check_cflags___msse3+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -msse3" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32197,11 +32203,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___msse3=yes -else $as_nop - ax_cv_check_cflags___msse3=no +else case e in #( + e) ax_cv_check_cflags___msse3=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse3" >&5 printf "%s\n" "$ax_cv_check_cflags___msse3" >&6; } @@ -32210,8 +32218,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32227,8 +32236,8 @@ printf %s "checking whether C compiler accepts -mssse3... " >&6; } if test ${ax_cv_check_cflags___mssse3+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mssse3" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32245,11 +32254,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mssse3=yes -else $as_nop - ax_cv_check_cflags___mssse3=no +else case e in #( + e) ax_cv_check_cflags___mssse3=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mssse3" >&5 printf "%s\n" "$ax_cv_check_cflags___mssse3" >&6; } @@ -32258,8 +32269,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32275,8 +32287,8 @@ printf %s "checking whether C compiler accepts -msse4.1... " >&6; } if test ${ax_cv_check_cflags___msse4_1+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -msse4.1" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32293,11 +32305,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___msse4_1=yes -else $as_nop - ax_cv_check_cflags___msse4_1=no +else case e in #( + e) ax_cv_check_cflags___msse4_1=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse4_1" >&5 printf "%s\n" "$ax_cv_check_cflags___msse4_1" >&6; } @@ -32306,8 +32320,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32323,8 +32338,8 @@ printf %s "checking whether C compiler accepts -msse4.2... " >&6; } if test ${ax_cv_check_cflags___msse4_2+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -msse4.2" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32341,11 +32356,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___msse4_2=yes -else $as_nop - ax_cv_check_cflags___msse4_2=no +else case e in #( + e) ax_cv_check_cflags___msse4_2=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse4_2" >&5 printf "%s\n" "$ax_cv_check_cflags___msse4_2" >&6; } @@ -32354,8 +32371,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32372,8 +32390,8 @@ printf %s "checking whether C compiler accepts -mavx... " >&6; } if test ${ax_cv_check_cflags___mavx+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32390,11 +32408,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx=yes -else $as_nop - ax_cv_check_cflags___mavx=no +else case e in #( + e) ax_cv_check_cflags___mavx=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx" >&6; } @@ -32403,8 +32423,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32420,8 +32441,8 @@ printf %s "checking whether C compiler accepts -mavxifma... " >&6; } if test ${ax_cv_check_cflags___mavxifma+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavxifma" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32438,11 +32459,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavxifma=yes -else $as_nop - ax_cv_check_cflags___mavxifma=no +else case e in #( + e) ax_cv_check_cflags___mavxifma=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxifma" >&5 printf "%s\n" "$ax_cv_check_cflags___mavxifma" >&6; } @@ -32451,8 +32474,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32468,8 +32492,8 @@ printf %s "checking whether C compiler accepts -mavxneconvert... " >&6; } if test ${ax_cv_check_cflags___mavxneconvert+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavxneconvert" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32486,11 +32510,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavxneconvert=yes -else $as_nop - ax_cv_check_cflags___mavxneconvert=no +else case e in #( + e) ax_cv_check_cflags___mavxneconvert=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxneconvert" >&5 printf "%s\n" "$ax_cv_check_cflags___mavxneconvert" >&6; } @@ -32499,8 +32525,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32517,8 +32544,8 @@ printf %s "checking whether C compiler accepts -mavxvnni... " >&6; } if test ${ax_cv_check_cflags___mavxvnni+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavxvnni" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32535,11 +32562,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavxvnni=yes -else $as_nop - ax_cv_check_cflags___mavxvnni=no +else case e in #( + e) ax_cv_check_cflags___mavxvnni=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxvnni" >&5 printf "%s\n" "$ax_cv_check_cflags___mavxvnni" >&6; } @@ -32548,8 +32577,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32565,8 +32595,8 @@ printf %s "checking whether C compiler accepts -mavxvnniint8... " >&6; } if test ${ax_cv_check_cflags___mavxvnniint8+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavxvnniint8" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32583,11 +32613,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavxvnniint8=yes -else $as_nop - ax_cv_check_cflags___mavxvnniint8=no +else case e in #( + e) ax_cv_check_cflags___mavxvnniint8=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxvnniint8" >&5 printf "%s\n" "$ax_cv_check_cflags___mavxvnniint8" >&6; } @@ -32596,8 +32628,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32613,8 +32646,8 @@ printf %s "checking whether C compiler accepts -mavxvnniint16... " >&6; } if test ${ax_cv_check_cflags___mavxvnniint16+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavxvnniint16" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32631,11 +32664,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavxvnniint16=yes -else $as_nop - ax_cv_check_cflags___mavxvnniint16=no +else case e in #( + e) ax_cv_check_cflags___mavxvnniint16=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxvnniint16" >&5 printf "%s\n" "$ax_cv_check_cflags___mavxvnniint16" >&6; } @@ -32644,8 +32679,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32662,8 +32698,8 @@ printf %s "checking whether C compiler accepts -mavx2... " >&6; } if test ${ax_cv_check_cflags___mavx2+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx2" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32680,11 +32716,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx2=yes -else $as_nop - ax_cv_check_cflags___mavx2=no +else case e in #( + e) ax_cv_check_cflags___mavx2=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx2" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx2" >&6; } @@ -32693,8 +32731,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32711,8 +32750,8 @@ printf %s "checking whether C compiler accepts -mavx512f... " >&6; } if test ${ax_cv_check_cflags___mavx512f+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx512f" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32729,11 +32768,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx512f=yes -else $as_nop - ax_cv_check_cflags___mavx512f=no +else case e in #( + e) ax_cv_check_cflags___mavx512f=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512f" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx512f" >&6; } @@ -32742,8 +32783,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32759,8 +32801,8 @@ printf %s "checking whether C compiler accepts -mavx512cd... " >&6; } if test ${ax_cv_check_cflags___mavx512cd+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx512cd" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32777,11 +32819,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx512cd=yes -else $as_nop - ax_cv_check_cflags___mavx512cd=no +else case e in #( + e) ax_cv_check_cflags___mavx512cd=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512cd" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx512cd" >&6; } @@ -32790,8 +32834,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32808,8 +32853,8 @@ printf %s "checking whether C compiler accepts -mavx512er... " >&6; } if test ${ax_cv_check_cflags___mavx512er+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx512er" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32826,11 +32871,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx512er=yes -else $as_nop - ax_cv_check_cflags___mavx512er=no +else case e in #( + e) ax_cv_check_cflags___mavx512er=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512er" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx512er" >&6; } @@ -32839,8 +32886,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32856,8 +32904,8 @@ printf %s "checking whether C compiler accepts -mavx512pf... " >&6; } if test ${ax_cv_check_cflags___mavx512pf+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx512pf" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32874,11 +32922,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx512pf=yes -else $as_nop - ax_cv_check_cflags___mavx512pf=no +else case e in #( + e) ax_cv_check_cflags___mavx512pf=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512pf" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx512pf" >&6; } @@ -32887,8 +32937,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32905,8 +32956,8 @@ printf %s "checking whether C compiler accepts -mavx5124fmaps... " >&6; } if test ${ax_cv_check_cflags___mavx5124fmaps+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx5124fmaps" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32923,11 +32974,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx5124fmaps=yes -else $as_nop - ax_cv_check_cflags___mavx5124fmaps=no +else case e in #( + e) ax_cv_check_cflags___mavx5124fmaps=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx5124fmaps" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx5124fmaps" >&6; } @@ -32936,8 +32989,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32953,8 +33007,8 @@ printf %s "checking whether C compiler accepts -mavx5124vnniw... " >&6; } if test ${ax_cv_check_cflags___mavx5124vnniw+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx5124vnniw" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32971,11 +33025,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx5124vnniw=yes -else $as_nop - ax_cv_check_cflags___mavx5124vnniw=no +else case e in #( + e) ax_cv_check_cflags___mavx5124vnniw=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx5124vnniw" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx5124vnniw" >&6; } @@ -32984,8 +33040,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -33002,8 +33059,8 @@ printf %s "checking whether C compiler accepts -mavx512vpopcntdq... " >&6; } if test ${ax_cv_check_cflags___mavx512vpopcntdq+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx512vpopcntdq" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -33020,11 +33077,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx512vpopcntdq=yes -else $as_nop - ax_cv_check_cflags___mavx512vpopcntdq=no +else case e in #( + e) ax_cv_check_cflags___mavx512vpopcntdq=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vpopcntdq" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx512vpopcntdq" >&6; } @@ -33033,8 +33092,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -33051,8 +33111,8 @@ printf %s "checking whether C compiler accepts -mavx512vl... " >&6; } if test ${ax_cv_check_cflags___mavx512vl+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx512vl" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -33069,11 +33129,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx512vl=yes -else $as_nop - ax_cv_check_cflags___mavx512vl=no +else case e in #( + e) ax_cv_check_cflags___mavx512vl=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vl" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx512vl" >&6; } @@ -33082,8 +33144,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -33099,8 +33162,8 @@ printf %s "checking whether C compiler accepts -mavx512dq... " >&6; } if test ${ax_cv_check_cflags___mavx512dq+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx512dq" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -33117,11 +33180,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx512dq=yes -else $as_nop - ax_cv_check_cflags___mavx512dq=no +else case e in #( + e) ax_cv_check_cflags___mavx512dq=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512dq" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx512dq" >&6; } @@ -33130,8 +33195,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -33147,8 +33213,8 @@ printf %s "checking whether C compiler accepts -mavx512bw... " >&6; } if test ${ax_cv_check_cflags___mavx512bw+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx512bw" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -33165,11 +33231,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx512bw=yes -else $as_nop - ax_cv_check_cflags___mavx512bw=no +else case e in #( + e) ax_cv_check_cflags___mavx512bw=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512bw" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx512bw" >&6; } @@ -33178,8 +33246,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -33196,8 +33265,8 @@ printf %s "checking whether C compiler accepts -mavx512ifma... " >&6; } if test ${ax_cv_check_cflags___mavx512ifma+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx512ifma" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -33214,11 +33283,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx512ifma=yes -else $as_nop - ax_cv_check_cflags___mavx512ifma=no +else case e in #( + e) ax_cv_check_cflags___mavx512ifma=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512ifma" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx512ifma" >&6; } @@ -33227,8 +33298,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -33244,8 +33316,8 @@ printf %s "checking whether C compiler accepts -mavx512vbmi... " >&6; } if test ${ax_cv_check_cflags___mavx512vbmi+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx512vbmi" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -33262,11 +33334,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx512vbmi=yes -else $as_nop - ax_cv_check_cflags___mavx512vbmi=no +else case e in #( + e) ax_cv_check_cflags___mavx512vbmi=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vbmi" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx512vbmi" >&6; } @@ -33275,8 +33349,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -33293,8 +33368,8 @@ printf %s "checking whether C compiler accepts -mavx512vnni... " >&6; } if test ${ax_cv_check_cflags___mavx512vnni+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx512vnni" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -33311,11 +33386,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx512vnni=yes -else $as_nop - ax_cv_check_cflags___mavx512vnni=no +else case e in #( + e) ax_cv_check_cflags___mavx512vnni=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vnni" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx512vnni" >&6; } @@ -33324,8 +33401,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -33342,8 +33420,8 @@ printf %s "checking whether C compiler accepts -mavx512vbmi2... " >&6; } if test ${ax_cv_check_cflags___mavx512vbmi2+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx512vbmi2" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -33360,11 +33438,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx512vbmi2=yes -else $as_nop - ax_cv_check_cflags___mavx512vbmi2=no +else case e in #( + e) ax_cv_check_cflags___mavx512vbmi2=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vbmi2" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx512vbmi2" >&6; } @@ -33373,8 +33453,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -33390,8 +33471,8 @@ printf %s "checking whether C compiler accepts -mavx512bitalg... " >&6; } if test ${ax_cv_check_cflags___mavx512bitalg+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx512bitalg" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -33408,11 +33489,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx512bitalg=yes -else $as_nop - ax_cv_check_cflags___mavx512bitalg=no +else case e in #( + e) ax_cv_check_cflags___mavx512bitalg=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512bitalg" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx512bitalg" >&6; } @@ -33421,8 +33504,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -33439,8 +33523,8 @@ printf %s "checking whether C compiler accepts -mavx512vp2intersect... " >&6; } if test ${ax_cv_check_cflags___mavx512vp2intersect+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx512vp2intersect" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -33457,11 +33541,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx512vp2intersect=yes -else $as_nop - ax_cv_check_cflags___mavx512vp2intersect=no +else case e in #( + e) ax_cv_check_cflags___mavx512vp2intersect=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vp2intersect" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx512vp2intersect" >&6; } @@ -33470,8 +33556,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi From e0a578caf066b74716af8f3b5b9aa47b03cf3e5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 17 Feb 2025 11:18:31 +0100 Subject: [PATCH 34/78] clinic now supports empty comment lines in Python blocks --- Include/internal/pycore_cpuinfo.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index eecc73736c5f44..3b504da9831cdd 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -107,14 +107,11 @@ def get_member_mask(bit): val = format(1 << bit, '008x') return f'= 0x{val},' -# BUG(picnixz): Clinic does not like when commented lines have empty lines. -# so we use '::' for now to indicate an empty line. -# :: # The enumeration is rendered as follows: -# :: +# # = 0x, // bit = BIT # ^ ^ ^ ^ ^ ^ ^ -# :: +# # where ^ indicates a column that is a multiple of 4, has # exactly 8 characters and has at most 2 characters. @@ -189,7 +186,7 @@ for group, values in data.items(): Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT8 = 0x00000010, // bit = 4 Py_CPUID_MASK_EDX_L7S1_AVX_NE_CONVERT = 0x00000020, // bit = 5 Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT16 = 0x00000400, // bit = 10 -/*[python end generated code: output=e53c5376296af250 input=46c9e43c1f6f5cf9]*/ +/*[python end generated code: output=e53c5376296af250 input=4102387db46d5787]*/ } py_cpuid_feature_mask; // fmt: on From c12f9c74fc6b9e728fee3c34fa308444808c308e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 29 Mar 2025 13:20:30 +0100 Subject: [PATCH 35/78] move cpuinfo enumerations to real invokable Python scripts --- Include/internal/pycore_cpuinfo.h | 180 +----------------- .../internal/pycore_cpuinfo_cpuid_features.h | 102 ++++++++++ .../internal/pycore_cpuinfo_xsave_features.h | 47 +++++ Makefile.pre.in | 2 + PCbuild/pythoncore.vcxproj | 2 + PCbuild/pythoncore.vcxproj.filters | 6 + Tools/cpuinfo/__init__.py | 0 Tools/cpuinfo/_util.py | 18 ++ Tools/cpuinfo/cpuid_features_gen.py | 138 ++++++++++++++ Tools/cpuinfo/xsave_features_gen.py | 59 ++++++ 10 files changed, 378 insertions(+), 176 deletions(-) create mode 100644 Include/internal/pycore_cpuinfo_cpuid_features.h create mode 100644 Include/internal/pycore_cpuinfo_xsave_features.h create mode 100644 Tools/cpuinfo/__init__.py create mode 100644 Tools/cpuinfo/_util.py create mode 100644 Tools/cpuinfo/cpuid_features_gen.py create mode 100644 Tools/cpuinfo/xsave_features_gen.py diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index 3b504da9831cdd..c427d8c1fd3585 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -1,4 +1,6 @@ /* + * @author Bénédikt Tran + * * Interface for detecting the different CPUID flags in an opaque manner. * See https://en.wikipedia.org/wiki/CPUID for details on the bit values. * @@ -22,182 +24,8 @@ extern "C" { #endif #include "Python.h" - -/* - * The enumeration describes masks to apply on CPUID output registers. - * - * Member names are Py_CPUID_MASK__L[S]_, - * where <> (resp. []) denotes a required (resp. optional) group and: - * - * - REGISTER is EAX, EBX, ECX or EDX, - * - LEAF is the initial value of the EAX register (1 or 7), - * - SUBLEAF is the initial value of the ECX register (omitted if 0), and - * - FEATURE is a SIMD feature (with one or more specialized instructions). - * - * For maintainability, the flags are ordered by registers, leafs, subleafs, - * and bits. See https://en.wikipedia.org/wiki/CPUID for the values. - * - * Note 1: The LEAF is also called the 'page' or the 'level'. - * Note 2: The SUBLEAF is also referred to as the 'count'. - * - * The LEAF value should only 1 or 7 as other values may have different - * meanings depending on the underlying architecture. - */ -// fmt: off -typedef enum py_cpuid_feature_mask { -/*[python input] -# {(LEAF, SUBLEAF, REGISTRY): {FEATURE: BIT}} -data = { - (1, 0, 'ECX'): { - 'SSE3': 0, - 'PCLMULQDQ': 1, - 'SSSE3': 9, - 'FMA': 12, - 'SSE4_1': 19, - 'SSE4_2': 20, - 'POPCNT': 23, - 'XSAVE': 26, - 'OSXSAVE': 27, - 'AVX': 28, - }, - (1, 0, 'EDX'): { - 'CMOV': 15, - 'SSE': 25, - 'SSE2': 26, - }, - (7, 0, 'EBX'): { - 'AVX2': 5, - 'AVX512_F': 16, - 'AVX512_DQ': 17, - 'AVX512_IFMA': 21, - 'AVX512_PF': 26, - 'AVX512_ER': 27, - 'AVX512_CD': 28, - 'AVX512_BW': 30, - 'AVX512_VL': 31, - }, - (7, 0, 'ECX'): { - 'AVX512_VBMI': 1, - 'AVX512_VBMI2': 6, - 'AVX512_VNNI': 11, - 'AVX512_BITALG': 12, - 'AVX512_VPOPCNTDQ': 14, - }, - (7, 0, 'EDX'): { - 'AVX512_4VNNIW': 2, - 'AVX512_4FMAPS': 3, - 'AVX512_VP2INTERSECT': 8, - }, - (7, 1, 'EAX'): { - 'AVX_VNNI': 4, - 'AVX_IFMA': 23, - }, - (7, 1, 'EDX'): { - 'AVX_VNNI_INT8': 4, - 'AVX_NE_CONVERT': 5, - 'AVX_VNNI_INT16': 10, - }, -} - -def get_member_name(leaf, subleaf, registry, name): - node = f'L{leaf}S{subleaf}' if subleaf else f'L{leaf}' - return f'Py_CPUID_MASK_{registry}_{node}_{name}' - -def get_member_mask(bit): - val = format(1 << bit, '008x') - return f'= 0x{val},' - -# The enumeration is rendered as follows: -# -# = 0x, // bit = BIT -# ^ ^ ^ ^ ^ ^ ^ -# -# where ^ indicates a column that is a multiple of 4, has -# exactly 8 characters and has at most 2 characters. - -INDENT = ' ' * 4 -# BUG(picnixz): Clinic does not like when '/' and '*' are put together. -COMMENT = '/' + '* ' - -def next_block(w): - """Compute the smallest multiple of 4 strictly larger than *w*.""" - return ((w + 3) & ~0x03) if (w % 4) else (w + 4) - -NAMESIZE = next_block(max( - len(get_member_name(*group, name)) - for group, values in data.items() - for name in values -)) -MASKSIZE = 8 + next_block(len('= 0x,')) - -for group, values in data.items(): - title = 'CPUID (LEAF={}, SUBLEAF={}) [{}]'.format(*group) - print(INDENT, *COMMENT, title, *COMMENT[::-1], sep='') - for name, bit in values.items(): - assert name, f"invalid entry in {group}" - key = get_member_name(*group, name) - assert 0 <= bit < 32, f"invalid bit value for {name!r}" - val = get_member_mask(bit) - - member_name = key.ljust(NAMESIZE) - member_mask = val.ljust(MASKSIZE) - - print(INDENT, member_name, member_mask, f'// bit = {bit}', sep='') -[python start generated code]*/ - /* CPUID (LEAF=1, SUBLEAF=0) [ECX] */ - Py_CPUID_MASK_ECX_L1_SSE3 = 0x00000001, // bit = 0 - Py_CPUID_MASK_ECX_L1_PCLMULQDQ = 0x00000002, // bit = 1 - Py_CPUID_MASK_ECX_L1_SSSE3 = 0x00000200, // bit = 9 - Py_CPUID_MASK_ECX_L1_FMA = 0x00001000, // bit = 12 - Py_CPUID_MASK_ECX_L1_SSE4_1 = 0x00080000, // bit = 19 - Py_CPUID_MASK_ECX_L1_SSE4_2 = 0x00100000, // bit = 20 - Py_CPUID_MASK_ECX_L1_POPCNT = 0x00800000, // bit = 23 - Py_CPUID_MASK_ECX_L1_XSAVE = 0x04000000, // bit = 26 - Py_CPUID_MASK_ECX_L1_OSXSAVE = 0x08000000, // bit = 27 - Py_CPUID_MASK_ECX_L1_AVX = 0x10000000, // bit = 28 - /* CPUID (LEAF=1, SUBLEAF=0) [EDX] */ - Py_CPUID_MASK_EDX_L1_CMOV = 0x00008000, // bit = 15 - Py_CPUID_MASK_EDX_L1_SSE = 0x02000000, // bit = 25 - Py_CPUID_MASK_EDX_L1_SSE2 = 0x04000000, // bit = 26 - /* CPUID (LEAF=7, SUBLEAF=0) [EBX] */ - Py_CPUID_MASK_EBX_L7_AVX2 = 0x00000020, // bit = 5 - Py_CPUID_MASK_EBX_L7_AVX512_F = 0x00010000, // bit = 16 - Py_CPUID_MASK_EBX_L7_AVX512_DQ = 0x00020000, // bit = 17 - Py_CPUID_MASK_EBX_L7_AVX512_IFMA = 0x00200000, // bit = 21 - Py_CPUID_MASK_EBX_L7_AVX512_PF = 0x04000000, // bit = 26 - Py_CPUID_MASK_EBX_L7_AVX512_ER = 0x08000000, // bit = 27 - Py_CPUID_MASK_EBX_L7_AVX512_CD = 0x10000000, // bit = 28 - Py_CPUID_MASK_EBX_L7_AVX512_BW = 0x40000000, // bit = 30 - Py_CPUID_MASK_EBX_L7_AVX512_VL = 0x80000000, // bit = 31 - /* CPUID (LEAF=7, SUBLEAF=0) [ECX] */ - Py_CPUID_MASK_ECX_L7_AVX512_VBMI = 0x00000002, // bit = 1 - Py_CPUID_MASK_ECX_L7_AVX512_VBMI2 = 0x00000040, // bit = 6 - Py_CPUID_MASK_ECX_L7_AVX512_VNNI = 0x00000800, // bit = 11 - Py_CPUID_MASK_ECX_L7_AVX512_BITALG = 0x00001000, // bit = 12 - Py_CPUID_MASK_ECX_L7_AVX512_VPOPCNTDQ = 0x00004000, // bit = 14 - /* CPUID (LEAF=7, SUBLEAF=0) [EDX] */ - Py_CPUID_MASK_EDX_L7_AVX512_4VNNIW = 0x00000004, // bit = 2 - Py_CPUID_MASK_EDX_L7_AVX512_4FMAPS = 0x00000008, // bit = 3 - Py_CPUID_MASK_EDX_L7_AVX512_VP2INTERSECT = 0x00000100, // bit = 8 - /* CPUID (LEAF=7, SUBLEAF=1) [EAX] */ - Py_CPUID_MASK_EAX_L7S1_AVX_VNNI = 0x00000010, // bit = 4 - Py_CPUID_MASK_EAX_L7S1_AVX_IFMA = 0x00800000, // bit = 23 - /* CPUID (LEAF=7, SUBLEAF=1) [EDX] */ - Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT8 = 0x00000010, // bit = 4 - Py_CPUID_MASK_EDX_L7S1_AVX_NE_CONVERT = 0x00000020, // bit = 5 - Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT16 = 0x00000400, // bit = 10 -/*[python end generated code: output=e53c5376296af250 input=4102387db46d5787]*/ -} py_cpuid_feature_mask; -// fmt: on - -/* XSAVE state components (XCR0 control register) */ -typedef enum py_xsave_feature_mask { - Py_XSAVE_MASK_XCR0_SSE = 0x00000002, // bit = 1 - Py_XSAVE_MASK_XCR0_AVX = 0x00000004, // bit = 2 - Py_XSAVE_MASK_XCR0_AVX512_OPMASK = 0x00000020, // bit = 5 - Py_XSAVE_MASK_XCR0_AVX512_ZMM_HI256 = 0x00000040, // bit = 6 - Py_XSAVE_MASK_XCR0_AVX512_HI16_ZMM = 0x00000080, // bit = 7 -} py_xsave_feature_mask; +#include "pycore_cpuinfo_cpuid_features.h" +#include "pycore_cpuinfo_xsave_features.h" typedef struct py_cpuid_features { uint32_t maxleaf; diff --git a/Include/internal/pycore_cpuinfo_cpuid_features.h b/Include/internal/pycore_cpuinfo_cpuid_features.h new file mode 100644 index 00000000000000..a67a1472bfb85f --- /dev/null +++ b/Include/internal/pycore_cpuinfo_cpuid_features.h @@ -0,0 +1,102 @@ +/* + * @author Bénédikt Tran + * @seealso Tools/cpuinfo/cpuid_features_gen.py + * + * The enumeration describes masks to apply on CPUID output registers. + * + * Member names are Py_CPUID_MASK__L[S]_, + * where <> (resp. []) denotes a required (resp. optional) group and: + * + * - REGISTER is EAX, EBX, ECX or EDX, + * - LEAF is the initial value of the EAX register (1 or 7), + * - SUBLEAF is the initial value of the ECX register (omitted if 0), and + * - FEATURE is a SIMD feature (with one or more specialized instructions). + * + * For maintainability, the flags are ordered by registers, leafs, subleafs, + * and bits. See https://en.wikipedia.org/wiki/CPUID for the values. + * + * Note 1: The LEAF is also called the 'page' or the 'level'. + * Note 2: The SUBLEAF is also referred to as the 'count'. + * + * The LEAF value should only 1 or 7 as other values may have different + * meanings depending on the underlying architecture. + */ + +#ifndef Py_INTERNAL_CPUINFO_CPUID_FEATURES_H +#define Py_INTERNAL_CPUINFO_CPUID_FEATURES_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "Python.h" + +// fmt: off +/*[python input] +import importlib +import os +import sys + +ROOT = os.getcwd() +TOOL = os.path.join(ROOT, 'Tools/cpuinfo/cpuid_features_gen.py') +TOOL = os.path.realpath(TOOL) + +if not os.path.exists(TOOL): + raise FileNotFoundError(TOOL) + +sys.path.insert(0, os.path.dirname(os.path.dirname(TOOL))) +module = importlib.import_module('cpuinfo.cpuid_features_gen') +print(module.generate_cpuid_features_enum("py_cpuid_feature_mask")) +[python start generated code]*/ +typedef enum py_cpuid_feature_mask { + /* CPUID (LEAF=1, SUBLEAF=0) [ECX] */ + Py_CPUID_MASK_ECX_L1_SSE3 = 0x00000001, // bit = 0 + Py_CPUID_MASK_ECX_L1_PCLMULQDQ = 0x00000002, // bit = 1 + Py_CPUID_MASK_ECX_L1_SSSE3 = 0x00000200, // bit = 9 + Py_CPUID_MASK_ECX_L1_FMA = 0x00001000, // bit = 12 + Py_CPUID_MASK_ECX_L1_SSE4_1 = 0x00080000, // bit = 19 + Py_CPUID_MASK_ECX_L1_SSE4_2 = 0x00100000, // bit = 20 + Py_CPUID_MASK_ECX_L1_POPCNT = 0x00800000, // bit = 23 + Py_CPUID_MASK_ECX_L1_XSAVE = 0x04000000, // bit = 26 + Py_CPUID_MASK_ECX_L1_OSXSAVE = 0x08000000, // bit = 27 + Py_CPUID_MASK_ECX_L1_AVX = 0x10000000, // bit = 28 + /* CPUID (LEAF=1, SUBLEAF=0) [EDX] */ + Py_CPUID_MASK_EDX_L1_CMOV = 0x00008000, // bit = 15 + Py_CPUID_MASK_EDX_L1_SSE = 0x02000000, // bit = 25 + Py_CPUID_MASK_EDX_L1_SSE2 = 0x04000000, // bit = 26 + /* CPUID (LEAF=7, SUBLEAF=0) [EBX] */ + Py_CPUID_MASK_EBX_L7_AVX2 = 0x00000020, // bit = 5 + Py_CPUID_MASK_EBX_L7_AVX512_F = 0x00010000, // bit = 16 + Py_CPUID_MASK_EBX_L7_AVX512_DQ = 0x00020000, // bit = 17 + Py_CPUID_MASK_EBX_L7_AVX512_IFMA = 0x00200000, // bit = 21 + Py_CPUID_MASK_EBX_L7_AVX512_PF = 0x04000000, // bit = 26 + Py_CPUID_MASK_EBX_L7_AVX512_ER = 0x08000000, // bit = 27 + Py_CPUID_MASK_EBX_L7_AVX512_CD = 0x10000000, // bit = 28 + Py_CPUID_MASK_EBX_L7_AVX512_BW = 0x40000000, // bit = 30 + Py_CPUID_MASK_EBX_L7_AVX512_VL = 0x80000000, // bit = 31 + /* CPUID (LEAF=7, SUBLEAF=0) [ECX] */ + Py_CPUID_MASK_ECX_L7_AVX512_VBMI = 0x00000002, // bit = 1 + Py_CPUID_MASK_ECX_L7_AVX512_VBMI2 = 0x00000040, // bit = 6 + Py_CPUID_MASK_ECX_L7_AVX512_VNNI = 0x00000800, // bit = 11 + Py_CPUID_MASK_ECX_L7_AVX512_BITALG = 0x00001000, // bit = 12 + Py_CPUID_MASK_ECX_L7_AVX512_VPOPCNTDQ = 0x00004000, // bit = 14 + /* CPUID (LEAF=7, SUBLEAF=0) [EDX] */ + Py_CPUID_MASK_EDX_L7_AVX512_4VNNIW = 0x00000004, // bit = 2 + Py_CPUID_MASK_EDX_L7_AVX512_4FMAPS = 0x00000008, // bit = 3 + Py_CPUID_MASK_EDX_L7_AVX512_VP2INTERSECT = 0x00000100, // bit = 8 + /* CPUID (LEAF=7, SUBLEAF=1) [EAX] */ + Py_CPUID_MASK_EAX_L7S1_AVX_VNNI = 0x00000010, // bit = 4 + Py_CPUID_MASK_EAX_L7S1_AVX_IFMA = 0x00800000, // bit = 23 + /* CPUID (LEAF=7, SUBLEAF=1) [EDX] */ + Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT8 = 0x00000010, // bit = 4 + Py_CPUID_MASK_EDX_L7S1_AVX_NE_CONVERT = 0x00000020, // bit = 5 + Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT16 = 0x00000400, // bit = 10 +} py_cpuid_feature_mask; +/*[python end generated code: output=c4460242e465fa91 input=a07f431329efd11e]*/ +// fmt: on + +#endif // !Py_INTERNAL_CPUINFO_CPUID_FEATURES_H diff --git a/Include/internal/pycore_cpuinfo_xsave_features.h b/Include/internal/pycore_cpuinfo_xsave_features.h new file mode 100644 index 00000000000000..42097d43529deb --- /dev/null +++ b/Include/internal/pycore_cpuinfo_xsave_features.h @@ -0,0 +1,47 @@ +/* + * @author Bénédikt Tran + * @seealso Tools/cpuinfo/xsave_features_gen.py + * + * XSAVE state components (XCR0 control register) + */ +#ifndef Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H +#define Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "Python.h" + +// fmt: off +/*[python input] +import importlib +import os +import sys + +ROOT = os.getcwd() +TOOL = os.path.join(ROOT, 'Tools/cpuinfo/xsave_features_gen.py') +TOOL = os.path.realpath(TOOL) + +if not os.path.exists(TOOL): + raise FileNotFoundError(TOOL) + +sys.path.insert(0, os.path.dirname(os.path.dirname(TOOL))) +module = importlib.import_module('cpuinfo.xsave_features_gen') +print(module.generate_xsave_features_enum("py_xsave_feature_mask")) +[python start generated code]*/ +typedef enum py_xsave_feature_mask { + Py_XSAVE_MASK_XCR0_SSE = 0x00000002, // bit = 1 + Py_XSAVE_MASK_XCR0_AVX = 0x00000004, // bit = 2 + Py_XSAVE_MASK_XCR0_AVX512_OPMASK = 0x00000020, // bit = 5 + Py_XSAVE_MASK_XCR0_AVX512_ZMM_HI256 = 0x00000040, // bit = 6 + Py_XSAVE_MASK_XCR0_AVX512_HI16_ZMM = 0x00000080, // bit = 7 +} py_xsave_feature_mask; +/*[python end generated code: output=9a476ed0abbc617b input=78e3d4ff6b796edb]*/ +// fmt: on + +#endif // !Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H diff --git a/Makefile.pre.in b/Makefile.pre.in index 0211ae1804afcf..f23f34c7774018 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1223,6 +1223,8 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_condvar.h \ $(srcdir)/Include/internal/pycore_context.h \ $(srcdir)/Include/internal/pycore_cpuinfo.h \ + $(srcdir)/Include/internal/pycore_cpuinfo_cpuid_features.h \ + $(srcdir)/Include/internal/pycore_cpuinfo_xsave_features.h \ $(srcdir)/Include/internal/pycore_critical_section.h \ $(srcdir)/Include/internal/pycore_crossinterp.h \ $(srcdir)/Include/internal/pycore_crossinterp_data_registry.h \ diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 24c8996e9ebc72..7a0ff28ad0fd59 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -228,6 +228,8 @@ + + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 88845d289c2235..3a8b043f8b9f50 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -603,6 +603,12 @@ Include\cpython + + Include\cpython + + + Include\cpython + Include\internal diff --git a/Tools/cpuinfo/__init__.py b/Tools/cpuinfo/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/Tools/cpuinfo/_util.py b/Tools/cpuinfo/_util.py new file mode 100644 index 00000000000000..9aef599bd8f0e5 --- /dev/null +++ b/Tools/cpuinfo/_util.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +__all__ = ["next_block", "make_enum_member"] + + +def next_block(w: int) -> int: + """Compute the smallest multiple of 4 strictly larger than *w*.""" + return ((w + 3) & ~0x03) if (w % 4) else (w + 4) + + +_MASKSIZE: Final[int] = next_block(len("= 0x00000000,")) + + +def make_enum_member(key: str, bit: int, name_maxsize: int) -> str: + member_name = key.ljust(name_maxsize) + member_mask = format(1 << bit, "008x") + member_mask = f"= 0x{member_mask},".ljust(_MASKSIZE) + return f"{member_name}{member_mask} // bit = {bit}" diff --git a/Tools/cpuinfo/cpuid_features_gen.py b/Tools/cpuinfo/cpuid_features_gen.py new file mode 100644 index 00000000000000..da5dc005bd2bf7 --- /dev/null +++ b/Tools/cpuinfo/cpuid_features_gen.py @@ -0,0 +1,138 @@ +""" +Generate an enumeration describing masks to apply on CPUID output registers. + +Member names are Py_CPUID_MASK__L[S]_, +where <> (resp. []) denotes a required (resp. optional) group and: + +- REGISTER is EAX, EBX, ECX or EDX, +- LEAF is the initial value of the EAX register (1 or 7), +- SUBLEAF is the initial value of the ECX register (omitted if 0), and +- FEATURE is a SIMD feature (with one or more specialized instructions). + +For maintainability, the flags are ordered by registers, leafs, subleafs, +and bits. See https://en.wikipedia.org/wiki/CPUID for the values. + +Note 1: The LEAF is also called the 'page' or the 'level'. +Note 2: The SUBLEAF is also referred to as the 'count'. + +The LEAF value should only 1 or 7 as other values may have different +meanings depending on the underlying architecture. +""" + +from __future__ import annotations + +__all__ = ["generate_cpuid_features_enum"] + +from functools import partial +from io import StringIO +from typing import TYPE_CHECKING +from . import _util as util + +if TYPE_CHECKING: + from typing import Final, IO + + type Leaf = int + type SubLeaf = int + type Registry = str + type FeatureFamily = tuple[Leaf, SubLeaf, Registry] + + type Feature = str + type Bit = int + +CPUID_FEATURES: Final[dict[CPUIDFeatureFamily, dict[Feature, Bit]]] = { + (1, 0, "ECX"): { + "SSE3": 0, + "PCLMULQDQ": 1, + "SSSE3": 9, + "FMA": 12, + "SSE4_1": 19, + "SSE4_2": 20, + "POPCNT": 23, + "XSAVE": 26, + "OSXSAVE": 27, + "AVX": 28, + }, + (1, 0, "EDX"): { + "CMOV": 15, + "SSE": 25, + "SSE2": 26, + }, + (7, 0, "EBX"): { + "AVX2": 5, + "AVX512_F": 16, + "AVX512_DQ": 17, + "AVX512_IFMA": 21, + "AVX512_PF": 26, + "AVX512_ER": 27, + "AVX512_CD": 28, + "AVX512_BW": 30, + "AVX512_VL": 31, + }, + (7, 0, "ECX"): { + "AVX512_VBMI": 1, + "AVX512_VBMI2": 6, + "AVX512_VNNI": 11, + "AVX512_BITALG": 12, + "AVX512_VPOPCNTDQ": 14, + }, + (7, 0, "EDX"): { + "AVX512_4VNNIW": 2, + "AVX512_4FMAPS": 3, + "AVX512_VP2INTERSECT": 8, + }, + (7, 1, "EAX"): { + "AVX_VNNI": 4, + "AVX_IFMA": 23, + }, + (7, 1, "EDX"): { + "AVX_VNNI_INT8": 4, + "AVX_NE_CONVERT": 5, + "AVX_VNNI_INT16": 10, + }, +} + + +def get_member_name( + leaf: Leaf, subleaf: SubLeaf, registry: Registry, name: Feature +) -> str: + node = f"L{leaf}S{subleaf}" if subleaf else f"L{leaf}" + return f"Py_CPUID_MASK_{registry}_{node}_{name}" + + +NAMESIZE: Final[int] = util.next_block( + max( + len(get_member_name(*family, name)) + for family, values in CPUID_FEATURES.items() + for name in values + ) +) + + +def generate_cpuid_features_enum(enum_name: str) -> str: + # The enumeration is rendered as follows: + # + # = 0x, // bit = BIT + # ^ ^ ^ ^ ^ ^ ^ + # + # where ^ indicates a column that is a multiple of 4, has + # exactly 8 characters and has at most 2 characters. + + output = StringIO() + write = partial(print, file=output) + indent = " " * 4 + + write(f"typedef enum {enum_name} {{") + for family, values in CPUID_FEATURES.items(): + leaf, subleaf, registry = family + title = f"CPUID (LEAF={leaf}, SUBLEAF={subleaf}) [{registry}]" + write(indent, "/* ", title, " */", sep="") + for feature_name, bit in values.items(): + if not feature_name: + raise ValueError(f"invalid entry for {family}") + if not 0 <= bit < 32: + raise ValueError(f"invalid bit value for {feature_name!r}") + key = get_member_name(leaf, subleaf, registry, feature_name) + member_def = util.make_enum_member(key, bit, NAMESIZE) + write(indent, member_def, sep="") + write(f"}} {enum_name};") + return output.getvalue().rstrip("\n") diff --git a/Tools/cpuinfo/xsave_features_gen.py b/Tools/cpuinfo/xsave_features_gen.py new file mode 100644 index 00000000000000..fdcbcd2b51af27 --- /dev/null +++ b/Tools/cpuinfo/xsave_features_gen.py @@ -0,0 +1,59 @@ +""" +Generate enumeration for XSAVE state components (XCR0 control register). +""" + +from __future__ import annotations + +__all__ = ["generate_xsave_features_enum"] + +from functools import partial +from io import StringIO +from typing import TYPE_CHECKING +from . import _util as util + +if TYPE_CHECKING: + from typing import Final + + type Feature = str + type Bit = int + +XSAVE_FEATURES: Final[dict[Feature, Bit]] = { + "SSE": 1, + "AVX": 2, + "AVX512_OPMASK": 5, + "AVX512_ZMM_HI256": 6, + "AVX512_HI16_ZMM": 7, +} + + +def get_member_name(feature: Feature) -> str: + return f"Py_XSAVE_MASK_XCR0_{feature}" + + +NAMESIZE: Final[int] = util.next_block( + max(map(len, map(get_member_name, XSAVE_FEATURES))) +) + + +def generate_xsave_features_enum(enum_name: str) -> str: + # The enumeration is rendered as follows: + # + # = 0x, // bit = BIT + # ^ ^ ^ ^ ^ ^ ^ + # + # where ^ indicates a column that is a multiple of 4, has + # exactly 8 characters and has at most 2 characters. + + output = StringIO() + write = partial(print, file=output) + indent = " " * 4 + + write(f"typedef enum {enum_name} {{") + for feature_name, bit in XSAVE_FEATURES.items(): + if not 0 <= bit < 32: + raise ValueError(f"invalid bit value for {feature_name!r}") + key = get_member_name(feature_name) + member_def = util.make_enum_member(key, bit, NAMESIZE) + write(indent, member_def, sep="") + write(f"}} {enum_name};") + return output.getvalue().rstrip("\n") From bd3589feb1adb6f0e8bd0387c784e8e3bcc99cf7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 5 Apr 2025 13:07:28 +0200 Subject: [PATCH 36/78] add comments --- Include/internal/pycore_cpuinfo_cpuid_features.h | 6 +++--- Include/internal/pycore_cpuinfo_xsave_features.h | 10 ++++++---- Tools/cpuinfo/cpuid_features_gen.py | 12 +++++++++++- Tools/cpuinfo/xsave_features_gen.py | 9 +++++++++ 4 files changed, 29 insertions(+), 8 deletions(-) diff --git a/Include/internal/pycore_cpuinfo_cpuid_features.h b/Include/internal/pycore_cpuinfo_cpuid_features.h index a67a1472bfb85f..b8c3eb38f0d0e4 100644 --- a/Include/internal/pycore_cpuinfo_cpuid_features.h +++ b/Include/internal/pycore_cpuinfo_cpuid_features.h @@ -42,14 +42,14 @@ import os import sys ROOT = os.getcwd() -TOOL = os.path.join(ROOT, 'Tools/cpuinfo/cpuid_features_gen.py') +TOOL = os.path.join(ROOT, "Tools/cpuinfo/cpuid_features_gen.py") TOOL = os.path.realpath(TOOL) if not os.path.exists(TOOL): raise FileNotFoundError(TOOL) sys.path.insert(0, os.path.dirname(os.path.dirname(TOOL))) -module = importlib.import_module('cpuinfo.cpuid_features_gen') +module = importlib.import_module("cpuinfo.cpuid_features_gen") print(module.generate_cpuid_features_enum("py_cpuid_feature_mask")) [python start generated code]*/ typedef enum py_cpuid_feature_mask { @@ -96,7 +96,7 @@ typedef enum py_cpuid_feature_mask { Py_CPUID_MASK_EDX_L7S1_AVX_NE_CONVERT = 0x00000020, // bit = 5 Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT16 = 0x00000400, // bit = 10 } py_cpuid_feature_mask; -/*[python end generated code: output=c4460242e465fa91 input=a07f431329efd11e]*/ +/*[python end generated code: output=c4460242e465fa91 input=61d2b5f1bc368b94]*/ // fmt: on #endif // !Py_INTERNAL_CPUINFO_CPUID_FEATURES_H diff --git a/Include/internal/pycore_cpuinfo_xsave_features.h b/Include/internal/pycore_cpuinfo_xsave_features.h index 42097d43529deb..e81e1ab76557df 100644 --- a/Include/internal/pycore_cpuinfo_xsave_features.h +++ b/Include/internal/pycore_cpuinfo_xsave_features.h @@ -2,7 +2,9 @@ * @author Bénédikt Tran * @seealso Tools/cpuinfo/xsave_features_gen.py * - * XSAVE state components (XCR0 control register) + * XSAVE state components (XCR0 control register). + * + * See https://en.wikipedia.org/wiki/Control_register#XCR0_and_XSS. */ #ifndef Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H #define Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H @@ -24,14 +26,14 @@ import os import sys ROOT = os.getcwd() -TOOL = os.path.join(ROOT, 'Tools/cpuinfo/xsave_features_gen.py') +TOOL = os.path.join(ROOT, "Tools/cpuinfo/xsave_features_gen.py") TOOL = os.path.realpath(TOOL) if not os.path.exists(TOOL): raise FileNotFoundError(TOOL) sys.path.insert(0, os.path.dirname(os.path.dirname(TOOL))) -module = importlib.import_module('cpuinfo.xsave_features_gen') +module = importlib.import_module("cpuinfo.xsave_features_gen") print(module.generate_xsave_features_enum("py_xsave_feature_mask")) [python start generated code]*/ typedef enum py_xsave_feature_mask { @@ -41,7 +43,7 @@ typedef enum py_xsave_feature_mask { Py_XSAVE_MASK_XCR0_AVX512_ZMM_HI256 = 0x00000040, // bit = 6 Py_XSAVE_MASK_XCR0_AVX512_HI16_ZMM = 0x00000080, // bit = 7 } py_xsave_feature_mask; -/*[python end generated code: output=9a476ed0abbc617b input=78e3d4ff6b796edb]*/ +/*[python end generated code: output=9a476ed0abbc617b input=41f35058299c0118]*/ // fmt: on #endif // !Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H diff --git a/Tools/cpuinfo/cpuid_features_gen.py b/Tools/cpuinfo/cpuid_features_gen.py index da5dc005bd2bf7..f23a68c141b696 100644 --- a/Tools/cpuinfo/cpuid_features_gen.py +++ b/Tools/cpuinfo/cpuid_features_gen.py @@ -17,6 +17,8 @@ The LEAF value should only 1 or 7 as other values may have different meanings depending on the underlying architecture. + +.. seealso:: Include/internal/pycore_cpuinfo_cpuid_features.h """ from __future__ import annotations @@ -39,7 +41,8 @@ type Feature = str type Bit = int -CPUID_FEATURES: Final[dict[CPUIDFeatureFamily, dict[Feature, Bit]]] = { +CPUID_FEATURES: Final[dict[FeatureFamily, dict[Feature, Bit]]] = { + # See https://en.wikipedia.org/wiki/CPUID#EAX=1:_Processor_Info_and_Feature_Bits. (1, 0, "ECX"): { "SSE3": 0, "PCLMULQDQ": 1, @@ -57,6 +60,7 @@ "SSE": 25, "SSE2": 26, }, + # See https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features. (7, 0, "EBX"): { "AVX2": 5, "AVX512_F": 16, @@ -80,6 +84,7 @@ "AVX512_4FMAPS": 3, "AVX512_VP2INTERSECT": 8, }, + # See https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=1:_Extended_Features. (7, 1, "EAX"): { "AVX_VNNI": 4, "AVX_IFMA": 23, @@ -109,6 +114,11 @@ def get_member_name( def generate_cpuid_features_enum(enum_name: str) -> str: + """Used by Include/internal/pycore_cpuinfo_cpuid_features.h. + + The C enumeration is generated by this function and Argument Clinic. + """ + # The enumeration is rendered as follows: # # = 0x, // bit = BIT diff --git a/Tools/cpuinfo/xsave_features_gen.py b/Tools/cpuinfo/xsave_features_gen.py index fdcbcd2b51af27..bacb4e8b4344a8 100644 --- a/Tools/cpuinfo/xsave_features_gen.py +++ b/Tools/cpuinfo/xsave_features_gen.py @@ -1,5 +1,9 @@ """ Generate enumeration for XSAVE state components (XCR0 control register). + +See https://en.wikipedia.org/wiki/Control_register#XCR0_and_XSS. + +.. seealso:: Include/internal/pycore_cpuinfo_xsave_features.h """ from __future__ import annotations @@ -36,6 +40,11 @@ def get_member_name(feature: Feature) -> str: def generate_xsave_features_enum(enum_name: str) -> str: + """Used by Include/internal/pycore_cpuinfo_xsave_features.h. + + The C enumeration is generated by this function and Argument Clinic. + """ + # The enumeration is rendered as follows: # # = 0x, // bit = BIT From d213b67c423743b084901339bcd22d970599da50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 8 Apr 2025 12:29:28 +0200 Subject: [PATCH 37/78] update C comments --- Include/internal/pycore_cpuinfo.h | 10 +++++++--- Python/cpuinfo.c | 6 ++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index c427d8c1fd3585..57ad48efb038c0 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -29,7 +29,11 @@ extern "C" { typedef struct py_cpuid_features { uint32_t maxleaf; - /* Macro to declare a member flag of 'py_cpuid_features' as a uint8_t. */ + /* + * Macro to declare a member flag of 'py_cpuid_features' as a uint8_t. + * Whenever this macro is used, do not forget to update the number of + * fields and the bitsize of the 'ready' member (see structure end). + */ #define _Py_CPUID_DECL_FLAG(MEMBER_NAME) uint8_t MEMBER_NAME:1 // --- Streaming SIMD Extensions ------------------------------------------ _Py_CPUID_DECL_FLAG(sse); @@ -94,8 +98,8 @@ typedef struct py_cpuid_features { _Py_CPUID_DECL_FLAG(popcnt); _Py_CPUID_DECL_FLAG(pclmulqdq); - _Py_CPUID_DECL_FLAG(xsave); // XSAVE/XRSTOR/XSETBV/XGETBV - _Py_CPUID_DECL_FLAG(osxsave); // XSAVE is enabled by the OS + _Py_CPUID_DECL_FLAG(xsave); // XSAVE/XRSTOR/XSETBV/XGETBV + _Py_CPUID_DECL_FLAG(osxsave); // XSAVE is enabled by the OS // --- XCR0 register bits ------------------------------------------------- _Py_CPUID_DECL_FLAG(xcr0_sse); diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 7181cc019d4a1c..0f934d04d76446 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -162,6 +162,7 @@ detect_cpuid_maxleaf(void) static inline void detect_cpuid_features(py_cpuid_features *flags, CPUID_REG ecx, CPUID_REG edx) { + assert(flags->maxleaf >= 1); // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD #ifdef Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS @@ -206,6 +207,7 @@ static inline void detect_cpuid_extended_features_L7S0(py_cpuid_features *flags, CPUID_REG ebx, CPUID_REG ecx, CPUID_REG edx) { + assert(flags->maxleaf >= 7); (void)ebx, (void)ecx, (void)edx; // to suppress unused warnings // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD @@ -282,6 +284,7 @@ detect_cpuid_extended_features_L7S1(py_cpuid_features *flags, CPUID_REG ecx, CPUID_REG edx) { + assert(flags->maxleaf >= 7); (void)eax, (void)ebx, (void)ecx, (void)edx; // to suppress unused warnings // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD @@ -309,6 +312,7 @@ static inline void detect_cpuid_xsave_state(py_cpuid_features *flags) { // Keep the ordering and newlines as they are declared in the structure. + assert(flags->maxleaf >= 1); #ifdef HAS_XGETBV_SUPPORT uint64_t xcr0 = flags->osxsave ? get_xgetbv(0) : 0; flags->xcr0_sse = XSAVE_CHECK_REG(xcr0, XCR0_SSE); @@ -501,6 +505,7 @@ cpuid_detect_l1_features(py_cpuid_features *flags) static inline void cpuid_detect_l7s0_features(py_cpuid_features *flags) { + assert(flags->maxleaf >= 7); CPUID_REG eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(7, 0, &eax, &ebx, &ecx, &edx); detect_cpuid_extended_features_L7S0(flags, ebx, ecx, edx); @@ -513,6 +518,7 @@ cpuid_detect_l7s0_features(py_cpuid_features *flags) static inline void cpuid_detect_l7s1_features(py_cpuid_features *flags) { + assert(flags->maxleaf >= 7); CPUID_REG eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(7, 1, &eax, &ebx, &ecx, &edx); detect_cpuid_extended_features_L7S1(flags, eax, ebx, ecx, edx); From 19b7d86e374fa94529b110cb57089511c37c1971 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 8 Apr 2025 12:43:13 +0200 Subject: [PATCH 38/78] TMP: usage proof-of-concept --- Modules/blake2module.c | 132 +++++++++-------------------------------- Modules/hmacmodule.c | 67 +++------------------ 2 files changed, 35 insertions(+), 164 deletions(-) diff --git a/Modules/blake2module.c b/Modules/blake2module.c index 0b0642c1e04e5a..4a9f16c3007b23 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -16,29 +16,11 @@ #include "pyconfig.h" #include "Python.h" #include "hashlib.h" -#include "pycore_strhex.h" // _Py_strhex() +#include "pycore_cpuinfo.h" // py_cpuid_features +#include "pycore_strhex.h" // _Py_strhex() #include "pycore_typeobject.h" #include "pycore_moduleobject.h" -// QUICK CPU AUTODETECTION -// -// See https://github.com/python/cpython/pull/119316 -- we only enable -// vectorized versions for Intel CPUs, even though HACL*'s "vec128" modules also -// run on ARM NEON. (We could enable them on POWER -- but I don't have access to -// a test machine to see if that speeds anything up.) -// -// Note that configure.ac and the rest of the build are written in such a way -// that if the configure script finds suitable flags to compile HACL's SIMD128 -// (resp. SIMD256) files, then Hacl_Hash_Blake2b_Simd128.c (resp. ...) will be -// pulled into the build automatically, and then only the CPU autodetection will -// need to be updated here. - -#if defined(__x86_64__) && defined(__GNUC__) -#include -#elif defined(_M_X64) -#include -#endif - #include // SIMD256 can't be compiled on macOS ARM64, and performance of SIMD128 isn't @@ -51,83 +33,6 @@ # undef HACL_CAN_COMPILE_SIMD256 #endif -// ECX -#define ECX_SSE3 (1 << 0) -#define ECX_SSSE3 (1 << 9) -#define ECX_SSE4_1 (1 << 19) -#define ECX_SSE4_2 (1 << 20) -#define ECX_AVX (1 << 28) - -// EBX -#define EBX_AVX2 (1 << 5) - -// EDX -#define EDX_SSE (1 << 25) -#define EDX_SSE2 (1 << 26) -#define EDX_CMOV (1 << 15) - -// zero-initialized by default -typedef struct { - bool sse, sse2, sse3, sse41, sse42, cmov, avx, avx2; - bool done; -} cpu_flags; - -void detect_cpu_features(cpu_flags *flags) { - if (!flags->done) { - int eax1 = 0, ebx1 = 0, ecx1 = 0, edx1 = 0; - int eax7 = 0, ebx7 = 0, ecx7 = 0, edx7 = 0; -#if defined(__x86_64__) && defined(__GNUC__) - __cpuid_count(1, 0, eax1, ebx1, ecx1, edx1); - __cpuid_count(7, 0, eax7, ebx7, ecx7, edx7); -#elif defined(_M_X64) - int info1[4] = { 0 }; - int info7[4] = { 0 }; - __cpuidex(info1, 1, 0); - __cpuidex(info7, 7, 0); - eax1 = info1[0]; - ebx1 = info1[1]; - ecx1 = info1[2]; - edx1 = info1[3]; - eax7 = info7[0]; - ebx7 = info7[1]; - ecx7 = info7[2]; - edx7 = info7[3]; -#endif - (void) eax1; (void) ebx1; (void) ecx1; (void) edx1; - (void) eax7; (void) ebx7; (void) ecx7; (void) edx7; - - - flags->avx = (ecx1 & ECX_AVX) != 0; - - flags->avx2 = (ebx7 & EBX_AVX2) != 0; - - flags->sse = (edx1 & EDX_SSE) != 0; - flags->sse2 = (edx1 & EDX_SSE2) != 0; - flags->cmov = (edx1 & EDX_CMOV) != 0; - - flags->sse3 = (ecx1 & ECX_SSE3) != 0; - /* ssse3 = (ecx1 & ECX_SSSE3) != 0; */ - flags->sse41 = (ecx1 & ECX_SSE4_1) != 0; - flags->sse42 = (ecx1 & ECX_SSE4_2) != 0; - - flags->done = true; - } -} - -#ifdef HACL_CAN_COMPILE_SIMD128 -static inline bool has_simd128(cpu_flags *flags) { - // For now this is Intel-only, could conceivably be #ifdef'd to something - // else. - return flags->sse && flags->sse2 && flags->sse3 && flags->sse41 && flags->sse42 && flags->cmov; -} -#endif - -#ifdef HACL_CAN_COMPILE_SIMD256 -static inline bool has_simd256(cpu_flags *flags) { - return flags->avx && flags->avx2; -} -#endif - // Small mismatch between the variable names Python defines as part of configure // at the ones HACL* expects to be set in order to enable those headers. #define HACL_CAN_COMPILE_VEC128 HACL_CAN_COMPILE_SIMD128 @@ -154,9 +59,31 @@ PyDoc_STRVAR(blake2mod__doc__, typedef struct { PyTypeObject* blake2b_type; PyTypeObject* blake2s_type; - cpu_flags flags; + + bool can_run_simd128; + bool can_run_simd256; } Blake2State; +static void +blake2_init_cpu_features(Blake2State *state) +{ + py_cpuid_features flags; + _Py_cpuid_detect_features(&flags); +#if HACL_CAN_COMPILE_SIMD128 + state->can_run_simd128 = flags.sse && flags.sse2 && flags.sse3 + && flags.sse41 && flags.sse42 + && flags.cmov; +#else + state->can_run_simd128 = false; +#endif + +#if HACL_CAN_COMPILE_SIMD256 + state->can_run_simd256 = flags.avx && flags.avx2; +#else + state->can_run_simd256 = false; +#endif +} + static inline Blake2State* blake2_get_state(PyObject *module) { @@ -224,10 +151,7 @@ static int blake2_exec(PyObject *m) { Blake2State* st = blake2_get_state(m); - - // This is called at module initialization-time, and so appears to be as - // good a place as any to probe the CPU flags. - detect_cpu_features(&st->flags); + blake2_init_cpu_features(st); st->blake2b_type = (PyTypeObject *)PyType_FromModuleAndSpec( m, &blake2b_type_spec, NULL); @@ -332,14 +256,14 @@ static inline blake2_impl type_to_impl(PyTypeObject *type) { #endif if (!strcmp(type->tp_name, blake2b_type_spec.name)) { #ifdef HACL_CAN_COMPILE_SIMD256 - if (has_simd256(&st->flags)) + if (st->can_run_simd256) return Blake2b_256; else #endif return Blake2b; } else if (!strcmp(type->tp_name, blake2s_type_spec.name)) { #ifdef HACL_CAN_COMPILE_SIMD128 - if (has_simd128(&st->flags)) + if (st->can_run_simd128) return Blake2s_128; else #endif diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index f75854c6ef5c91..4940f58a7c24f6 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -17,6 +17,7 @@ #endif #include "Python.h" +#include "pycore_cpuinfo.h" // py_cpuid_features #include "pycore_hashtable.h" #include "pycore_strhex.h" // _Py_strhex() @@ -1682,73 +1683,19 @@ hmacmodule_init_strings(hmacmodule_state *state) static void hmacmodule_init_cpu_features(hmacmodule_state *state) { - int eax1 = 0, ebx1 = 0, ecx1 = 0, edx1 = 0; - int eax7 = 0, ebx7 = 0, ecx7 = 0, edx7 = 0; -#if defined(__x86_64__) && defined(__GNUC__) - __cpuid_count(1, 0, eax1, ebx1, ecx1, edx1); - __cpuid_count(7, 0, eax7, ebx7, ecx7, edx7); -#elif defined(_M_X64) - int info1[4] = { 0 }; - __cpuidex(info1, 1, 0); - eax1 = info1[0], ebx1 = info1[1], ecx1 = info1[2], edx1 = info1[3]; - - int info7[4] = { 0 }; - __cpuidex(info7, 7, 0); - eax7 = info7[0], ebx7 = info7[1], ecx7 = info7[2], edx7 = info7[3]; -#endif - // fmt: off - (void)eax1; (void)ebx1; (void)ecx1; (void)edx1; - (void)eax7; (void)ebx7; (void)ecx7; (void)edx7; - // fmt: on - -#define EBX_AVX2 (1 << 5) -#define ECX_SSE3 (1 << 0) -#define ECX_SSSE3 (1 << 9) -#define ECX_SSE4_1 (1 << 19) -#define ECX_SSE4_2 (1 << 20) -#define ECX_AVX (1 << 28) -#define EDX_SSE (1 << 25) -#define EDX_SSE2 (1 << 26) -#define EDX_CMOV (1 << 15) - - bool avx = (ecx1 & ECX_AVX) != 0; - bool avx2 = (ebx7 & EBX_AVX2) != 0; - - bool sse = (edx1 & EDX_SSE) != 0; - bool sse2 = (edx1 & EDX_SSE2) != 0; - bool cmov = (edx1 & EDX_CMOV) != 0; - - bool sse3 = (ecx1 & ECX_SSE3) != 0; - bool sse41 = (ecx1 & ECX_SSE4_1) != 0; - bool sse42 = (ecx1 & ECX_SSE4_2) != 0; - -#undef EDX_CMOV -#undef EDX_SSE2 -#undef EDX_SSE -#undef ECX_AVX -#undef ECX_SSE4_2 -#undef ECX_SSE4_1 -#undef ECX_SSSE3 -#undef ECX_SSE3 -#undef EBX_AVX2 - + py_cpuid_features flags; + _Py_cpuid_detect_features(&flags); #if HACL_CAN_COMPILE_SIMD128 - // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection - state->can_run_simd128 = sse && sse2 && sse3 && sse41 && sse42 && cmov; + state->can_run_simd128 = flags.sse && flags.sse2 && flags.sse3 + && flags.sse41 && flags.sse42 + && flags.cmov; #else - // fmt: off - (void)sse; (void)sse2; (void)sse3; (void)sse41; (void)sse42; (void)cmov; - // fmt: on state->can_run_simd128 = false; #endif #if HACL_CAN_COMPILE_SIMD256 - // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection - state->can_run_simd256 = state->can_run_simd128 && avx && avx2; + state->can_run_simd256 = flags.avx && flags.avx2; #else - // fmt: off - (void)avx; (void)avx2; - // fmt: on state->can_run_simd256 = false; #endif } From d59d06d985387e2de7ce1f2af4a94c116c1108ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 22 Apr 2025 15:36:07 +0200 Subject: [PATCH 39/78] improve configure.ac --- configure | 552 +++++++++++++++++++++++++++++++-------------------- configure.ac | 131 ++++++------ 2 files changed, 410 insertions(+), 273 deletions(-) diff --git a/configure b/configure index 97e68a7bed7dba..9aa2528b0910ef 100755 --- a/configure +++ b/configure @@ -32122,14 +32122,14 @@ printf "%s\n" "$py_cv_module__blake2" >&6; } # See py_cpuid_features in pycore_cpuinfo.h for how to order fields # and where to put blank lines to separate processor generations for # AVX-512 instructions. -if test "$ac_sys_system" != "Linux-android" || test "$ANDROID_API_LEVEL" -ge 28; then - # SSE +if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || \ + { test -n "$ANDROID_API_LEVEL" && test "$ANDROID_API_LEVEL" -ge 28; } +then + # SSE - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse" >&5 printf %s "checking whether C compiler accepts -msse... " >&6; } if test ${ax_cv_check_cflags___msse+y} then : @@ -32164,23 +32164,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___msse" >&6; } if test "x$ax_cv_check_cflags___msse" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_sse=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_sse=no ;; esac fi + if test "x$ac_cv_can_compile_simd_sse" = xyes +then : +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS 1" >>confdefs.h + + +fi + - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse2" >&5 + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse2" >&5 printf %s "checking whether C compiler accepts -msse2... " >&6; } if test ${ax_cv_check_cflags___msse2+y} then : @@ -32215,23 +32219,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___msse2" >&6; } if test "x$ax_cv_check_cflags___msse2" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_sse2=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_sse2=no ;; esac fi + if test "x$ac_cv_can_compile_simd_sse2" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS 1" >>confdefs.h + + +fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse3" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse3" >&5 printf %s "checking whether C compiler accepts -msse3... " >&6; } if test ${ax_cv_check_cflags___msse3+y} then : @@ -32266,23 +32274,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___msse3" >&6; } if test "x$ax_cv_check_cflags___msse3" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_sse3=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_sse3=no ;; esac fi + if test "x$ac_cv_can_compile_simd_sse3" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS 1" >>confdefs.h +fi + - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mssse3" >&5 + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mssse3" >&5 printf %s "checking whether C compiler accepts -mssse3... " >&6; } if test ${ax_cv_check_cflags___mssse3+y} then : @@ -32317,23 +32329,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___mssse3" >&6; } if test "x$ax_cv_check_cflags___mssse3" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_ssse3=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_ssse3=no ;; esac fi + if test "x$ac_cv_can_compile_simd_ssse3" = xyes +then : +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS 1" >>confdefs.h +fi + - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse4.1" >&5 + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse4.1" >&5 printf %s "checking whether C compiler accepts -msse4.1... " >&6; } if test ${ax_cv_check_cflags___msse4_1+y} then : @@ -32368,23 +32384,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___msse4_1" >&6; } if test "x$ax_cv_check_cflags___msse4_1" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_sse4_1=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_sse4_1=no ;; esac fi + if test "x$ac_cv_can_compile_simd_sse4_1" = xyes +then : +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS 1" >>confdefs.h +fi + - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse4.2" >&5 + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse4.2" >&5 printf %s "checking whether C compiler accepts -msse4.2... " >&6; } if test ${ax_cv_check_cflags___msse4_2+y} then : @@ -32419,24 +32439,28 @@ fi printf "%s\n" "$ax_cv_check_cflags___msse4_2" >&6; } if test "x$ax_cv_check_cflags___msse4_2" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_sse4_2=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_sse4_2=no ;; esac fi + if test "x$ac_cv_can_compile_simd_sse4_2" = xyes +then : +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS 1" >>confdefs.h - # AVX +fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx" >&5 + + # AVX + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx" >&5 printf %s "checking whether C compiler accepts -mavx... " >&6; } if test ${ax_cv_check_cflags___mavx+y} then : @@ -32471,23 +32495,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx" >&6; } if test "x$ax_cv_check_cflags___mavx" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS 1" >>confdefs.h + + +fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxifma" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxifma" >&5 printf %s "checking whether C compiler accepts -mavxifma... " >&6; } if test ${ax_cv_check_cflags___mavxifma+y} then : @@ -32522,23 +32550,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavxifma" >&6; } if test "x$ax_cv_check_cflags___mavxifma" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx_ifma=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx_ifma=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx_ifma" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS 1" >>confdefs.h + + +fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxneconvert" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxneconvert" >&5 printf %s "checking whether C compiler accepts -mavxneconvert... " >&6; } if test ${ax_cv_check_cflags___mavxneconvert+y} then : @@ -32573,24 +32605,28 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavxneconvert" >&6; } if test "x$ax_cv_check_cflags___mavxneconvert" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx_ne_convert=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx_ne_convert=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx_ne_convert" = xyes +then : +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS 1" >>confdefs.h + + +fi + - # + + # - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnni" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnni" >&5 printf %s "checking whether C compiler accepts -mavxvnni... " >&6; } if test ${ax_cv_check_cflags___mavxvnni+y} then : @@ -32625,23 +32661,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavxvnni" >&6; } if test "x$ax_cv_check_cflags___mavxvnni" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx_vnni=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx_vnni=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx_vnni" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS 1" >>confdefs.h + + +fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnniint8" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnniint8" >&5 printf %s "checking whether C compiler accepts -mavxvnniint8... " >&6; } if test ${ax_cv_check_cflags___mavxvnniint8+y} then : @@ -32676,23 +32716,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavxvnniint8" >&6; } if test "x$ax_cv_check_cflags___mavxvnniint8" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx_vnni_int8=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx_vnni_int8=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx_vnni_int8" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS 1" >>confdefs.h + +fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnniint16" >&5 + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnniint16" >&5 printf %s "checking whether C compiler accepts -mavxvnniint16... " >&6; } if test ${ax_cv_check_cflags___mavxvnniint16+y} then : @@ -32727,24 +32771,28 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavxvnniint16" >&6; } if test "x$ax_cv_check_cflags___mavxvnniint16" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx_vnni_int16=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx_vnni_int16=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx_vnni_int16" = xyes +then : +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS 1" >>confdefs.h - # AVX-2 +fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx2" >&5 + + # AVX-2 + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx2" >&5 printf %s "checking whether C compiler accepts -mavx2... " >&6; } if test ${ax_cv_check_cflags___mavx2+y} then : @@ -32779,24 +32827,28 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx2" >&6; } if test "x$ax_cv_check_cflags___mavx2" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx2=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx2=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx2" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS 1" >>confdefs.h +fi + - # AVX-512 + # AVX-512 - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512f" >&5 + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512f" >&5 printf %s "checking whether C compiler accepts -mavx512f... " >&6; } if test ${ax_cv_check_cflags___mavx512f+y} then : @@ -32831,23 +32883,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx512f" >&6; } if test "x$ax_cv_check_cflags___mavx512f" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_f=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_f=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_f" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS 1" >>confdefs.h + + +fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512cd" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512cd" >&5 printf %s "checking whether C compiler accepts -mavx512cd... " >&6; } if test ${ax_cv_check_cflags___mavx512cd+y} then : @@ -32882,24 +32938,28 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx512cd" >&6; } if test "x$ax_cv_check_cflags___mavx512cd" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_cd=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_cd=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_cd" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS 1" >>confdefs.h + + +fi - # + # - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512er" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512er" >&5 printf %s "checking whether C compiler accepts -mavx512er... " >&6; } if test ${ax_cv_check_cflags___mavx512er+y} then : @@ -32934,23 +32994,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx512er" >&6; } if test "x$ax_cv_check_cflags___mavx512er" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_er=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_er=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_er" = xyes +then : +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS 1" >>confdefs.h +fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512pf" >&5 + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512pf" >&5 printf %s "checking whether C compiler accepts -mavx512pf... " >&6; } if test ${ax_cv_check_cflags___mavx512pf+y} then : @@ -32985,24 +33049,28 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx512pf" >&6; } if test "x$ax_cv_check_cflags___mavx512pf" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_pf=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_pf=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_pf" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS 1" >>confdefs.h + + +fi - # + # - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx5124fmaps" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx5124fmaps" >&5 printf %s "checking whether C compiler accepts -mavx5124fmaps... " >&6; } if test ${ax_cv_check_cflags___mavx5124fmaps+y} then : @@ -33037,23 +33105,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx5124fmaps" >&6; } if test "x$ax_cv_check_cflags___mavx5124fmaps" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_4fmaps=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_4fmaps=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_4fmaps" = xyes +then : +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS 1" >>confdefs.h + + +fi + - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx5124vnniw" >&5 + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx5124vnniw" >&5 printf %s "checking whether C compiler accepts -mavx5124vnniw... " >&6; } if test ${ax_cv_check_cflags___mavx5124vnniw+y} then : @@ -33088,24 +33160,28 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx5124vnniw" >&6; } if test "x$ax_cv_check_cflags___mavx5124vnniw" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_4vnniw=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_4vnniw=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_4vnniw" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS 1" >>confdefs.h +fi - # + # - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vpopcntdq" >&5 + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vpopcntdq" >&5 printf %s "checking whether C compiler accepts -mavx512vpopcntdq... " >&6; } if test ${ax_cv_check_cflags___mavx512vpopcntdq+y} then : @@ -33140,24 +33216,28 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx512vpopcntdq" >&6; } if test "x$ax_cv_check_cflags___mavx512vpopcntdq" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_vpopcntdq=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_vpopcntdq=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_vpopcntdq" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS 1" >>confdefs.h + +fi - # + # - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vl" >&5 + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vl" >&5 printf %s "checking whether C compiler accepts -mavx512vl... " >&6; } if test ${ax_cv_check_cflags___mavx512vl+y} then : @@ -33192,23 +33272,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx512vl" >&6; } if test "x$ax_cv_check_cflags___mavx512vl" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_vl=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_vl=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_vl" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS 1" >>confdefs.h + + +fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512dq" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512dq" >&5 printf %s "checking whether C compiler accepts -mavx512dq... " >&6; } if test ${ax_cv_check_cflags___mavx512dq+y} then : @@ -33243,23 +33327,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx512dq" >&6; } if test "x$ax_cv_check_cflags___mavx512dq" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_dq=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_dq=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_dq" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS 1" >>confdefs.h +fi + - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512bw" >&5 + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512bw" >&5 printf %s "checking whether C compiler accepts -mavx512bw... " >&6; } if test ${ax_cv_check_cflags___mavx512bw+y} then : @@ -33294,24 +33382,28 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx512bw" >&6; } if test "x$ax_cv_check_cflags___mavx512bw" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_bw=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_bw=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_bw" = xyes +then : +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS 1" >>confdefs.h + - # +fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512ifma" >&5 + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512ifma" >&5 printf %s "checking whether C compiler accepts -mavx512ifma... " >&6; } if test ${ax_cv_check_cflags___mavx512ifma+y} then : @@ -33346,23 +33438,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx512ifma" >&6; } if test "x$ax_cv_check_cflags___mavx512ifma" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_ifma=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_ifma=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_ifma" = xyes +then : +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS 1" >>confdefs.h +fi + - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vbmi" >&5 + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vbmi" >&5 printf %s "checking whether C compiler accepts -mavx512vbmi... " >&6; } if test ${ax_cv_check_cflags___mavx512vbmi+y} then : @@ -33397,24 +33493,28 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx512vbmi" >&6; } if test "x$ax_cv_check_cflags___mavx512vbmi" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_vbmi=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_vbmi=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_vbmi" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS 1" >>confdefs.h + +fi - # + + # - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vnni" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vnni" >&5 printf %s "checking whether C compiler accepts -mavx512vnni... " >&6; } if test ${ax_cv_check_cflags___mavx512vnni+y} then : @@ -33449,24 +33549,28 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx512vnni" >&6; } if test "x$ax_cv_check_cflags___mavx512vnni" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_vnni=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_vnni=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_vnni" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS 1" >>confdefs.h + +fi - # + # - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vbmi2" >&5 + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vbmi2" >&5 printf %s "checking whether C compiler accepts -mavx512vbmi2... " >&6; } if test ${ax_cv_check_cflags___mavx512vbmi2+y} then : @@ -33501,23 +33605,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx512vbmi2" >&6; } if test "x$ax_cv_check_cflags___mavx512vbmi2" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_vbmi2=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_vbmi2=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_vbmi2" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS 1" >>confdefs.h + + +fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512bitalg" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512bitalg" >&5 printf %s "checking whether C compiler accepts -mavx512bitalg... " >&6; } if test ${ax_cv_check_cflags___mavx512bitalg+y} then : @@ -33552,24 +33660,28 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx512bitalg" >&6; } if test "x$ax_cv_check_cflags___mavx512bitalg" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_bitalg=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_bitalg=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_bitalg" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS 1" >>confdefs.h + +fi - # + # - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vp2intersect" >&5 + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vp2intersect" >&5 printf %s "checking whether C compiler accepts -mavx512vp2intersect... " >&6; } if test ${ax_cv_check_cflags___mavx512vp2intersect+y} then : @@ -33604,14 +33716,20 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx512vp2intersect" >&6; } if test "x$ax_cv_check_cflags___mavx512vp2intersect" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_vp2intersect=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_vp2intersect=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_vp2intersect" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS 1" >>confdefs.h + + +fi diff --git a/configure.ac b/configure.ac index f64e81ccdc5fab..ce3e48ed876bef 100644 --- a/configure.ac +++ b/configure.ac @@ -7854,20 +7854,37 @@ PY_STDLIB_MOD([_sha2], [test "$with_builtin_sha2" = yes]) PY_STDLIB_MOD([_sha3], [test "$with_builtin_sha3" = yes]) PY_STDLIB_MOD([_blake2], [test "$with_builtin_blake2" = yes]) -dnl PY_SIMD_DETECT(INSTRUCTION_SET_NAME, COMPILER_FLAG, NORMALIZED_NAME) +dnl PY_SIMD_DETECT(INSTRUCTION-SET-NAME, COMPILER-FLAG, [NORMALIZED-NAME]) +dnl ---------------------------------------------------------------------- +dnl +dnl Check if the compiler supports a given COMPILER-FLAG and define: +dnl +dnl ac_cv_can_compile_simd_ = yes +dnl #define Py_CAN_COMPILE_SIMD__INSTRUCTIONS 1 +dnl +dnl or +dnl +dnl ac_cv_can_compile_simd_ = no +dnl #undef Py_CAN_COMPILE_SIMD__INSTRUCTIONS +dnl +dnl where and are the lowercased and uppercased versions +dnl of NORMALIZED-NAME; by default, the latter is INSTRUCTION-SET-NAME. +dnl AC_DEFUN([PY_SIMD_DETECT], [ - AS_VAR_PUSHDEF([py_var], [m4_ifblank([$3], - [[ac_cv_can_compile_simd_]m4_tolower([$1])], - [[ac_cv_can_compile_simd_]m4_tolower([$3])])]) - AS_VAR_PUSHDEF([py_define], [m4_ifblank([$3], - [[Py_CAN_COMPILE_SIMD_]m4_toupper([$1])[_INSTRUCTIONS]], - [[Py_CAN_COMPILE_SIMD_]m4_toupper([$3])[_INSTRUCTIONS]])]) - AC_MSG_CHECKING([checking SIMD instruction set]) - AX_CHECK_COMPILE_FLAG([$2], - [AC_DEFINE([py_define], [1], [Define if '$2' is a valid compiler flag.])], - [], []) - AS_VAR_POPDEF([py_var]) - AS_VAR_POPDEF([py_define]) + AS_VAR_PUSHDEF([py_var], [m4_ifblank([$3], + [[ac_cv_can_compile_simd_]m4_tolower([$1])], + [[ac_cv_can_compile_simd_]m4_tolower([$3])])]) + AS_VAR_PUSHDEF([py_define], [m4_ifblank([$3], + [[Py_CAN_COMPILE_SIMD_]m4_toupper([$1])[_INSTRUCTIONS]], + [[Py_CAN_COMPILE_SIMD_]m4_toupper([$3])[_INSTRUCTIONS]])]) + AX_CHECK_COMPILE_FLAG([$2], + [AS_VAR_SET([py_var], [yes])], + [AS_VAR_SET([py_var], [no])]) + AS_VAR_IF([py_var], [yes], [ + AC_DEFINE([py_define], [1], [Define if '$2' is a valid compiler flag.]) + ]) + AS_VAR_POPDEF([py_var]) + AS_VAR_POPDEF([py_define]) ]) # Detection of supported SIMD instruction sets for CPython. Since @@ -7877,49 +7894,51 @@ AC_DEFUN([PY_SIMD_DETECT], [ # See py_cpuid_features in pycore_cpuinfo.h for how to order fields # and where to put blank lines to separate processor generations for # AVX-512 instructions. -if test "$ac_sys_system" != "Linux-android" || test "$ANDROID_API_LEVEL" -ge 28; then - # SSE - PY_SIMD_DETECT([SSE], [-msse]) - PY_SIMD_DETECT([SSE2], [-msse2]) - PY_SIMD_DETECT([SSE3], [-msse3]) - PY_SIMD_DETECT([SSSE3], [-mssse3]) - PY_SIMD_DETECT([SSE4.1], [-msse4.1], [SSE4_1]) - PY_SIMD_DETECT([SSE4.2], [-msse4.2], [SSE4_2]) - # AVX - PY_SIMD_DETECT([AVX], [-mavx]) - PY_SIMD_DETECT([AVX_IFMA], [-mavxifma]) - PY_SIMD_DETECT([AVX_NE_CONVERT], [-mavxneconvert]) - # - PY_SIMD_DETECT([AVX_VNNI], [-mavxvnni]) - PY_SIMD_DETECT([AVX_VNNI_INT8], [-mavxvnniint8]) - PY_SIMD_DETECT([AVX_VNNI_INT16], [-mavxvnniint16]) - # AVX-2 - PY_SIMD_DETECT([AVX2], [-mavx2]) - # AVX-512 - PY_SIMD_DETECT([AVX512_F], [-mavx512f]) - PY_SIMD_DETECT([AVX512_CD], [-mavx512cd]) - # - PY_SIMD_DETECT([AVX512_ER], [-mavx512er]) - PY_SIMD_DETECT([AVX512_PF], [-mavx512pf]) - # - PY_SIMD_DETECT([AVX512_4FMAPS], [-mavx5124fmaps]) - PY_SIMD_DETECT([AVX512_4VNNIW], [-mavx5124vnniw]) - # - PY_SIMD_DETECT([AVX512_VPOPCNTDQ], [-mavx512vpopcntdq]) - # - PY_SIMD_DETECT([AVX512_VL], [-mavx512vl]) - PY_SIMD_DETECT([AVX512_DQ], [-mavx512dq]) - PY_SIMD_DETECT([AVX512_BW], [-mavx512bw]) - # - PY_SIMD_DETECT([AVX512_IFMA], [-mavx512ifma]) - PY_SIMD_DETECT([AVX512_VBMI], [-mavx512vbmi]) - # - PY_SIMD_DETECT([AVX512_VNNI], [-mavx512vnni]) - # - PY_SIMD_DETECT([AVX512_VBMI2], [-mavx512vbmi2]) - PY_SIMD_DETECT([AVX512_BITALG], [-mavx512bitalg]) - # - PY_SIMD_DETECT([AVX512_VP2INTERSECT], [-mavx512vp2intersect]) +if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || \ + { test -n "$ANDROID_API_LEVEL" && test "$ANDROID_API_LEVEL" -ge 28; } +then + # SSE + PY_SIMD_DETECT([SSE], [-msse]) + PY_SIMD_DETECT([SSE2], [-msse2]) + PY_SIMD_DETECT([SSE3], [-msse3]) + PY_SIMD_DETECT([SSSE3], [-mssse3]) + PY_SIMD_DETECT([SSE4.1], [-msse4.1], [SSE4_1]) + PY_SIMD_DETECT([SSE4.2], [-msse4.2], [SSE4_2]) + # AVX + PY_SIMD_DETECT([AVX], [-mavx]) + PY_SIMD_DETECT([AVX_IFMA], [-mavxifma]) + PY_SIMD_DETECT([AVX_NE_CONVERT], [-mavxneconvert]) + # + PY_SIMD_DETECT([AVX_VNNI], [-mavxvnni]) + PY_SIMD_DETECT([AVX_VNNI_INT8], [-mavxvnniint8]) + PY_SIMD_DETECT([AVX_VNNI_INT16], [-mavxvnniint16]) + # AVX-2 + PY_SIMD_DETECT([AVX2], [-mavx2]) + # AVX-512 + PY_SIMD_DETECT([AVX512_F], [-mavx512f]) + PY_SIMD_DETECT([AVX512_CD], [-mavx512cd]) + # + PY_SIMD_DETECT([AVX512_ER], [-mavx512er]) + PY_SIMD_DETECT([AVX512_PF], [-mavx512pf]) + # + PY_SIMD_DETECT([AVX512_4FMAPS], [-mavx5124fmaps]) + PY_SIMD_DETECT([AVX512_4VNNIW], [-mavx5124vnniw]) + # + PY_SIMD_DETECT([AVX512_VPOPCNTDQ], [-mavx512vpopcntdq]) + # + PY_SIMD_DETECT([AVX512_VL], [-mavx512vl]) + PY_SIMD_DETECT([AVX512_DQ], [-mavx512dq]) + PY_SIMD_DETECT([AVX512_BW], [-mavx512bw]) + # + PY_SIMD_DETECT([AVX512_IFMA], [-mavx512ifma]) + PY_SIMD_DETECT([AVX512_VBMI], [-mavx512vbmi]) + # + PY_SIMD_DETECT([AVX512_VNNI], [-mavx512vnni]) + # + PY_SIMD_DETECT([AVX512_VBMI2], [-mavx512vbmi2]) + PY_SIMD_DETECT([AVX512_BITALG], [-mavx512bitalg]) + # + PY_SIMD_DETECT([AVX512_VP2INTERSECT], [-mavx512vp2intersect]) fi LIBHACL_CFLAGS='-I$(srcdir)/Modules/_hacl -I$(srcdir)/Modules/_hacl/include -D_BSD_SOURCE -D_DEFAULT_SOURCE $(PY_STDMODULE_CFLAGS) $(CCSHARED)' From d00da3e8bae6d8b3b273f64081bdba95f25b61d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 30 Jun 2025 15:04:43 +0200 Subject: [PATCH 40/78] post-merge --- Modules/blake2module.c | 1 - configure | 582 ++++++++++++++++++----------------------- configure.ac | 4 +- pyconfig.h.in | 174 ++++++------ 4 files changed, 344 insertions(+), 417 deletions(-) diff --git a/Modules/blake2module.c b/Modules/blake2module.c index d8ae379f1f6529..e222d6d2e5c298 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -58,7 +58,6 @@ PyDoc_STRVAR(blake2mod__doc__, typedef struct { PyTypeObject *blake2b_type; PyTypeObject *blake2s_type; - bool can_run_simd128; bool can_run_simd256; } Blake2State; diff --git a/configure b/configure index 11a23302bf0f94..b0a7ed029fb1b0 100755 --- a/configure +++ b/configure @@ -32547,6 +32547,11 @@ fi # See py_cpuid_features in pycore_cpuinfo.h for how to order fields # and where to put blank lines to separate processor generations for # AVX-512 instructions. +# +# Although AVX support is not guaranteed on Android [1], this is safe +# because we do a runtime CPUID check. +# +# [1]: https://developer.android.com/ndk/guides/abis#86-64 if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || \ { test -n "$ANDROID_API_LEVEL" && test "$ANDROID_API_LEVEL" -ge 28; } then @@ -32556,13 +32561,13 @@ then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse" >&5 printf %s "checking whether C compiler accepts -msse... " >&6; } -if test ${ax_cv_check_cflags___msse+y} +if test ${ax_cv_check_cflags__Werror__msse+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -msse" + CFLAGS="$CFLAGS -Werror -msse" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -32576,18 +32581,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___msse=yes + ax_cv_check_cflags__Werror__msse=yes else case e in #( - e) ax_cv_check_cflags___msse=no ;; + e) ax_cv_check_cflags__Werror__msse=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse" >&5 -printf "%s\n" "$ax_cv_check_cflags___msse" >&6; } -if test "x$ax_cv_check_cflags___msse" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__msse" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__msse" >&6; } +if test "x$ax_cv_check_cflags__Werror__msse" = xyes then : ac_cv_can_compile_simd_sse=yes else case e in #( @@ -32599,7 +32604,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS 1" >>confdefs.h fi @@ -32611,13 +32616,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse2" >&5 printf %s "checking whether C compiler accepts -msse2... " >&6; } -if test ${ax_cv_check_cflags___msse2+y} +if test ${ax_cv_check_cflags__Werror__msse2+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -msse2" + CFLAGS="$CFLAGS -Werror -msse2" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -32631,18 +32636,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___msse2=yes + ax_cv_check_cflags__Werror__msse2=yes else case e in #( - e) ax_cv_check_cflags___msse2=no ;; + e) ax_cv_check_cflags__Werror__msse2=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse2" >&5 -printf "%s\n" "$ax_cv_check_cflags___msse2" >&6; } -if test "x$ax_cv_check_cflags___msse2" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__msse2" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__msse2" >&6; } +if test "x$ax_cv_check_cflags__Werror__msse2" = xyes then : ac_cv_can_compile_simd_sse2=yes else case e in #( @@ -32654,7 +32659,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS 1" >>confdefs.h fi @@ -32666,13 +32671,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse3" >&5 printf %s "checking whether C compiler accepts -msse3... " >&6; } -if test ${ax_cv_check_cflags___msse3+y} +if test ${ax_cv_check_cflags__Werror__msse3+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -msse3" + CFLAGS="$CFLAGS -Werror -msse3" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -32686,18 +32691,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___msse3=yes + ax_cv_check_cflags__Werror__msse3=yes else case e in #( - e) ax_cv_check_cflags___msse3=no ;; + e) ax_cv_check_cflags__Werror__msse3=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse3" >&5 -printf "%s\n" "$ax_cv_check_cflags___msse3" >&6; } -if test "x$ax_cv_check_cflags___msse3" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__msse3" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__msse3" >&6; } +if test "x$ax_cv_check_cflags__Werror__msse3" = xyes then : ac_cv_can_compile_simd_sse3=yes else case e in #( @@ -32709,7 +32714,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS 1" >>confdefs.h fi @@ -32721,13 +32726,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mssse3" >&5 printf %s "checking whether C compiler accepts -mssse3... " >&6; } -if test ${ax_cv_check_cflags___mssse3+y} +if test ${ax_cv_check_cflags__Werror__mssse3+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mssse3" + CFLAGS="$CFLAGS -Werror -mssse3" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -32741,18 +32746,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mssse3=yes + ax_cv_check_cflags__Werror__mssse3=yes else case e in #( - e) ax_cv_check_cflags___mssse3=no ;; + e) ax_cv_check_cflags__Werror__mssse3=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mssse3" >&5 -printf "%s\n" "$ax_cv_check_cflags___mssse3" >&6; } -if test "x$ax_cv_check_cflags___mssse3" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mssse3" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mssse3" >&6; } +if test "x$ax_cv_check_cflags__Werror__mssse3" = xyes then : ac_cv_can_compile_simd_ssse3=yes else case e in #( @@ -32764,7 +32769,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS 1" >>confdefs.h fi @@ -32776,13 +32781,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse4.1" >&5 printf %s "checking whether C compiler accepts -msse4.1... " >&6; } -if test ${ax_cv_check_cflags___msse4_1+y} +if test ${ax_cv_check_cflags__Werror__msse4_1+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -msse4.1" + CFLAGS="$CFLAGS -Werror -msse4.1" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -32796,18 +32801,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___msse4_1=yes + ax_cv_check_cflags__Werror__msse4_1=yes else case e in #( - e) ax_cv_check_cflags___msse4_1=no ;; + e) ax_cv_check_cflags__Werror__msse4_1=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse4_1" >&5 -printf "%s\n" "$ax_cv_check_cflags___msse4_1" >&6; } -if test "x$ax_cv_check_cflags___msse4_1" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__msse4_1" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__msse4_1" >&6; } +if test "x$ax_cv_check_cflags__Werror__msse4_1" = xyes then : ac_cv_can_compile_simd_sse4_1=yes else case e in #( @@ -32819,7 +32824,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS 1" >>confdefs.h fi @@ -32831,13 +32836,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse4.2" >&5 printf %s "checking whether C compiler accepts -msse4.2... " >&6; } -if test ${ax_cv_check_cflags___msse4_2+y} +if test ${ax_cv_check_cflags__Werror__msse4_2+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -msse4.2" + CFLAGS="$CFLAGS -Werror -msse4.2" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -32851,18 +32856,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___msse4_2=yes + ax_cv_check_cflags__Werror__msse4_2=yes else case e in #( - e) ax_cv_check_cflags___msse4_2=no ;; + e) ax_cv_check_cflags__Werror__msse4_2=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse4_2" >&5 -printf "%s\n" "$ax_cv_check_cflags___msse4_2" >&6; } -if test "x$ax_cv_check_cflags___msse4_2" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__msse4_2" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__msse4_2" >&6; } +if test "x$ax_cv_check_cflags__Werror__msse4_2" = xyes then : ac_cv_can_compile_simd_sse4_2=yes else case e in #( @@ -32874,7 +32879,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS 1" >>confdefs.h fi @@ -32887,13 +32892,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx" >&5 printf %s "checking whether C compiler accepts -mavx... " >&6; } -if test ${ax_cv_check_cflags___mavx+y} +if test ${ax_cv_check_cflags__Werror__mavx+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx" + CFLAGS="$CFLAGS -Werror -mavx" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -32907,18 +32912,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx=yes + ax_cv_check_cflags__Werror__mavx=yes else case e in #( - e) ax_cv_check_cflags___mavx=no ;; + e) ax_cv_check_cflags__Werror__mavx=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx" >&6; } -if test "x$ax_cv_check_cflags___mavx" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx" = xyes then : ac_cv_can_compile_simd_avx=yes else case e in #( @@ -32930,7 +32935,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS 1" >>confdefs.h fi @@ -32942,13 +32947,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxifma" >&5 printf %s "checking whether C compiler accepts -mavxifma... " >&6; } -if test ${ax_cv_check_cflags___mavxifma+y} +if test ${ax_cv_check_cflags__Werror__mavxifma+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavxifma" + CFLAGS="$CFLAGS -Werror -mavxifma" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -32962,18 +32967,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavxifma=yes + ax_cv_check_cflags__Werror__mavxifma=yes else case e in #( - e) ax_cv_check_cflags___mavxifma=no ;; + e) ax_cv_check_cflags__Werror__mavxifma=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxifma" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavxifma" >&6; } -if test "x$ax_cv_check_cflags___mavxifma" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavxifma" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavxifma" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavxifma" = xyes then : ac_cv_can_compile_simd_avx_ifma=yes else case e in #( @@ -32985,7 +32990,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS 1" >>confdefs.h fi @@ -32997,13 +33002,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxneconvert" >&5 printf %s "checking whether C compiler accepts -mavxneconvert... " >&6; } -if test ${ax_cv_check_cflags___mavxneconvert+y} +if test ${ax_cv_check_cflags__Werror__mavxneconvert+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavxneconvert" + CFLAGS="$CFLAGS -Werror -mavxneconvert" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33017,18 +33022,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavxneconvert=yes + ax_cv_check_cflags__Werror__mavxneconvert=yes else case e in #( - e) ax_cv_check_cflags___mavxneconvert=no ;; + e) ax_cv_check_cflags__Werror__mavxneconvert=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxneconvert" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavxneconvert" >&6; } -if test "x$ax_cv_check_cflags___mavxneconvert" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavxneconvert" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavxneconvert" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavxneconvert" = xyes then : ac_cv_can_compile_simd_avx_ne_convert=yes else case e in #( @@ -33040,7 +33045,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS 1" >>confdefs.h fi @@ -33053,13 +33058,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnni" >&5 printf %s "checking whether C compiler accepts -mavxvnni... " >&6; } -if test ${ax_cv_check_cflags___mavxvnni+y} +if test ${ax_cv_check_cflags__Werror__mavxvnni+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavxvnni" + CFLAGS="$CFLAGS -Werror -mavxvnni" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33073,18 +33078,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavxvnni=yes + ax_cv_check_cflags__Werror__mavxvnni=yes else case e in #( - e) ax_cv_check_cflags___mavxvnni=no ;; + e) ax_cv_check_cflags__Werror__mavxvnni=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxvnni" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavxvnni" >&6; } -if test "x$ax_cv_check_cflags___mavxvnni" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavxvnni" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavxvnni" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavxvnni" = xyes then : ac_cv_can_compile_simd_avx_vnni=yes else case e in #( @@ -33096,7 +33101,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS 1" >>confdefs.h fi @@ -33108,13 +33113,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnniint8" >&5 printf %s "checking whether C compiler accepts -mavxvnniint8... " >&6; } -if test ${ax_cv_check_cflags___mavxvnniint8+y} +if test ${ax_cv_check_cflags__Werror__mavxvnniint8+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavxvnniint8" + CFLAGS="$CFLAGS -Werror -mavxvnniint8" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33128,18 +33133,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavxvnniint8=yes + ax_cv_check_cflags__Werror__mavxvnniint8=yes else case e in #( - e) ax_cv_check_cflags___mavxvnniint8=no ;; + e) ax_cv_check_cflags__Werror__mavxvnniint8=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxvnniint8" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavxvnniint8" >&6; } -if test "x$ax_cv_check_cflags___mavxvnniint8" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavxvnniint8" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavxvnniint8" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavxvnniint8" = xyes then : ac_cv_can_compile_simd_avx_vnni_int8=yes else case e in #( @@ -33151,7 +33156,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS 1" >>confdefs.h fi @@ -33163,13 +33168,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnniint16" >&5 printf %s "checking whether C compiler accepts -mavxvnniint16... " >&6; } -if test ${ax_cv_check_cflags___mavxvnniint16+y} +if test ${ax_cv_check_cflags__Werror__mavxvnniint16+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavxvnniint16" + CFLAGS="$CFLAGS -Werror -mavxvnniint16" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33183,18 +33188,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavxvnniint16=yes + ax_cv_check_cflags__Werror__mavxvnniint16=yes else case e in #( - e) ax_cv_check_cflags___mavxvnniint16=no ;; + e) ax_cv_check_cflags__Werror__mavxvnniint16=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxvnniint16" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavxvnniint16" >&6; } -if test "x$ax_cv_check_cflags___mavxvnniint16" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavxvnniint16" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavxvnniint16" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavxvnniint16" = xyes then : ac_cv_can_compile_simd_avx_vnni_int16=yes else case e in #( @@ -33206,7 +33211,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS 1" >>confdefs.h fi @@ -33219,13 +33224,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx2" >&5 printf %s "checking whether C compiler accepts -mavx2... " >&6; } -if test ${ax_cv_check_cflags___mavx2+y} +if test ${ax_cv_check_cflags__Werror__mavx2+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx2" + CFLAGS="$CFLAGS -Werror -mavx2" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33239,18 +33244,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx2=yes + ax_cv_check_cflags__Werror__mavx2=yes else case e in #( - e) ax_cv_check_cflags___mavx2=no ;; + e) ax_cv_check_cflags__Werror__mavx2=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx2" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx2" >&6; } -if test "x$ax_cv_check_cflags___mavx2" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx2" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx2" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx2" = xyes then : ac_cv_can_compile_simd_avx2=yes else case e in #( @@ -33262,7 +33267,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS 1" >>confdefs.h fi @@ -33275,13 +33280,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512f" >&5 printf %s "checking whether C compiler accepts -mavx512f... " >&6; } -if test ${ax_cv_check_cflags___mavx512f+y} +if test ${ax_cv_check_cflags__Werror__mavx512f+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512f" + CFLAGS="$CFLAGS -Werror -mavx512f" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33295,18 +33300,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx512f=yes + ax_cv_check_cflags__Werror__mavx512f=yes else case e in #( - e) ax_cv_check_cflags___mavx512f=no ;; + e) ax_cv_check_cflags__Werror__mavx512f=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512f" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx512f" >&6; } -if test "x$ax_cv_check_cflags___mavx512f" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512f" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512f" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512f" = xyes then : ac_cv_can_compile_simd_avx512_f=yes else case e in #( @@ -33318,7 +33323,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS 1" >>confdefs.h fi @@ -33330,13 +33335,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512cd" >&5 printf %s "checking whether C compiler accepts -mavx512cd... " >&6; } -if test ${ax_cv_check_cflags___mavx512cd+y} +if test ${ax_cv_check_cflags__Werror__mavx512cd+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512cd" + CFLAGS="$CFLAGS -Werror -mavx512cd" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33350,18 +33355,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx512cd=yes + ax_cv_check_cflags__Werror__mavx512cd=yes else case e in #( - e) ax_cv_check_cflags___mavx512cd=no ;; + e) ax_cv_check_cflags__Werror__mavx512cd=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512cd" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx512cd" >&6; } -if test "x$ax_cv_check_cflags___mavx512cd" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512cd" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512cd" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512cd" = xyes then : ac_cv_can_compile_simd_avx512_cd=yes else case e in #( @@ -33373,7 +33378,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS 1" >>confdefs.h fi @@ -33386,13 +33391,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512er" >&5 printf %s "checking whether C compiler accepts -mavx512er... " >&6; } -if test ${ax_cv_check_cflags___mavx512er+y} +if test ${ax_cv_check_cflags__Werror__mavx512er+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512er" + CFLAGS="$CFLAGS -Werror -mavx512er" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33406,18 +33411,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx512er=yes + ax_cv_check_cflags__Werror__mavx512er=yes else case e in #( - e) ax_cv_check_cflags___mavx512er=no ;; + e) ax_cv_check_cflags__Werror__mavx512er=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512er" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx512er" >&6; } -if test "x$ax_cv_check_cflags___mavx512er" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512er" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512er" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512er" = xyes then : ac_cv_can_compile_simd_avx512_er=yes else case e in #( @@ -33429,7 +33434,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS 1" >>confdefs.h fi @@ -33441,13 +33446,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512pf" >&5 printf %s "checking whether C compiler accepts -mavx512pf... " >&6; } -if test ${ax_cv_check_cflags___mavx512pf+y} +if test ${ax_cv_check_cflags__Werror__mavx512pf+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512pf" + CFLAGS="$CFLAGS -Werror -mavx512pf" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33461,18 +33466,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx512pf=yes + ax_cv_check_cflags__Werror__mavx512pf=yes else case e in #( - e) ax_cv_check_cflags___mavx512pf=no ;; + e) ax_cv_check_cflags__Werror__mavx512pf=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512pf" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx512pf" >&6; } -if test "x$ax_cv_check_cflags___mavx512pf" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512pf" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512pf" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512pf" = xyes then : ac_cv_can_compile_simd_avx512_pf=yes else case e in #( @@ -33484,7 +33489,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS 1" >>confdefs.h fi @@ -33497,13 +33502,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx5124fmaps" >&5 printf %s "checking whether C compiler accepts -mavx5124fmaps... " >&6; } -if test ${ax_cv_check_cflags___mavx5124fmaps+y} +if test ${ax_cv_check_cflags__Werror__mavx5124fmaps+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx5124fmaps" + CFLAGS="$CFLAGS -Werror -mavx5124fmaps" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33517,18 +33522,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx5124fmaps=yes + ax_cv_check_cflags__Werror__mavx5124fmaps=yes else case e in #( - e) ax_cv_check_cflags___mavx5124fmaps=no ;; + e) ax_cv_check_cflags__Werror__mavx5124fmaps=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx5124fmaps" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx5124fmaps" >&6; } -if test "x$ax_cv_check_cflags___mavx5124fmaps" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx5124fmaps" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx5124fmaps" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx5124fmaps" = xyes then : ac_cv_can_compile_simd_avx512_4fmaps=yes else case e in #( @@ -33540,7 +33545,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS 1" >>confdefs.h fi @@ -33552,13 +33557,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx5124vnniw" >&5 printf %s "checking whether C compiler accepts -mavx5124vnniw... " >&6; } -if test ${ax_cv_check_cflags___mavx5124vnniw+y} +if test ${ax_cv_check_cflags__Werror__mavx5124vnniw+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx5124vnniw" + CFLAGS="$CFLAGS -Werror -mavx5124vnniw" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33572,18 +33577,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx5124vnniw=yes + ax_cv_check_cflags__Werror__mavx5124vnniw=yes else case e in #( - e) ax_cv_check_cflags___mavx5124vnniw=no ;; + e) ax_cv_check_cflags__Werror__mavx5124vnniw=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx5124vnniw" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx5124vnniw" >&6; } -if test "x$ax_cv_check_cflags___mavx5124vnniw" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx5124vnniw" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx5124vnniw" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx5124vnniw" = xyes then : ac_cv_can_compile_simd_avx512_4vnniw=yes else case e in #( @@ -33595,7 +33600,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS 1" >>confdefs.h fi @@ -33608,13 +33613,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vpopcntdq" >&5 printf %s "checking whether C compiler accepts -mavx512vpopcntdq... " >&6; } -if test ${ax_cv_check_cflags___mavx512vpopcntdq+y} +if test ${ax_cv_check_cflags__Werror__mavx512vpopcntdq+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512vpopcntdq" + CFLAGS="$CFLAGS -Werror -mavx512vpopcntdq" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33628,18 +33633,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx512vpopcntdq=yes + ax_cv_check_cflags__Werror__mavx512vpopcntdq=yes else case e in #( - e) ax_cv_check_cflags___mavx512vpopcntdq=no ;; + e) ax_cv_check_cflags__Werror__mavx512vpopcntdq=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vpopcntdq" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx512vpopcntdq" >&6; } -if test "x$ax_cv_check_cflags___mavx512vpopcntdq" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512vpopcntdq" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512vpopcntdq" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512vpopcntdq" = xyes then : ac_cv_can_compile_simd_avx512_vpopcntdq=yes else case e in #( @@ -33651,7 +33656,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS 1" >>confdefs.h fi @@ -33664,13 +33669,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vl" >&5 printf %s "checking whether C compiler accepts -mavx512vl... " >&6; } -if test ${ax_cv_check_cflags___mavx512vl+y} +if test ${ax_cv_check_cflags__Werror__mavx512vl+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512vl" + CFLAGS="$CFLAGS -Werror -mavx512vl" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33684,18 +33689,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx512vl=yes + ax_cv_check_cflags__Werror__mavx512vl=yes else case e in #( - e) ax_cv_check_cflags___mavx512vl=no ;; + e) ax_cv_check_cflags__Werror__mavx512vl=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vl" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx512vl" >&6; } -if test "x$ax_cv_check_cflags___mavx512vl" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512vl" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512vl" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512vl" = xyes then : ac_cv_can_compile_simd_avx512_vl=yes else case e in #( @@ -33707,7 +33712,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS 1" >>confdefs.h fi @@ -33719,13 +33724,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512dq" >&5 printf %s "checking whether C compiler accepts -mavx512dq... " >&6; } -if test ${ax_cv_check_cflags___mavx512dq+y} +if test ${ax_cv_check_cflags__Werror__mavx512dq+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512dq" + CFLAGS="$CFLAGS -Werror -mavx512dq" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33739,18 +33744,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx512dq=yes + ax_cv_check_cflags__Werror__mavx512dq=yes else case e in #( - e) ax_cv_check_cflags___mavx512dq=no ;; + e) ax_cv_check_cflags__Werror__mavx512dq=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512dq" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx512dq" >&6; } -if test "x$ax_cv_check_cflags___mavx512dq" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512dq" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512dq" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512dq" = xyes then : ac_cv_can_compile_simd_avx512_dq=yes else case e in #( @@ -33762,7 +33767,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS 1" >>confdefs.h fi @@ -33774,13 +33779,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512bw" >&5 printf %s "checking whether C compiler accepts -mavx512bw... " >&6; } -if test ${ax_cv_check_cflags___mavx512bw+y} +if test ${ax_cv_check_cflags__Werror__mavx512bw+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512bw" + CFLAGS="$CFLAGS -Werror -mavx512bw" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33794,18 +33799,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx512bw=yes + ax_cv_check_cflags__Werror__mavx512bw=yes else case e in #( - e) ax_cv_check_cflags___mavx512bw=no ;; + e) ax_cv_check_cflags__Werror__mavx512bw=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512bw" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx512bw" >&6; } -if test "x$ax_cv_check_cflags___mavx512bw" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512bw" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512bw" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512bw" = xyes then : ac_cv_can_compile_simd_avx512_bw=yes else case e in #( @@ -33817,7 +33822,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS 1" >>confdefs.h fi @@ -33830,13 +33835,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512ifma" >&5 printf %s "checking whether C compiler accepts -mavx512ifma... " >&6; } -if test ${ax_cv_check_cflags___mavx512ifma+y} +if test ${ax_cv_check_cflags__Werror__mavx512ifma+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512ifma" + CFLAGS="$CFLAGS -Werror -mavx512ifma" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33850,18 +33855,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx512ifma=yes + ax_cv_check_cflags__Werror__mavx512ifma=yes else case e in #( - e) ax_cv_check_cflags___mavx512ifma=no ;; + e) ax_cv_check_cflags__Werror__mavx512ifma=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512ifma" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx512ifma" >&6; } -if test "x$ax_cv_check_cflags___mavx512ifma" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512ifma" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512ifma" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512ifma" = xyes then : ac_cv_can_compile_simd_avx512_ifma=yes else case e in #( @@ -33873,7 +33878,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS 1" >>confdefs.h fi @@ -33885,13 +33890,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vbmi" >&5 printf %s "checking whether C compiler accepts -mavx512vbmi... " >&6; } -if test ${ax_cv_check_cflags___mavx512vbmi+y} +if test ${ax_cv_check_cflags__Werror__mavx512vbmi+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512vbmi" + CFLAGS="$CFLAGS -Werror -mavx512vbmi" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33905,18 +33910,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx512vbmi=yes + ax_cv_check_cflags__Werror__mavx512vbmi=yes else case e in #( - e) ax_cv_check_cflags___mavx512vbmi=no ;; + e) ax_cv_check_cflags__Werror__mavx512vbmi=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vbmi" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx512vbmi" >&6; } -if test "x$ax_cv_check_cflags___mavx512vbmi" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512vbmi" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512vbmi" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512vbmi" = xyes then : ac_cv_can_compile_simd_avx512_vbmi=yes else case e in #( @@ -33928,7 +33933,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS 1" >>confdefs.h fi @@ -33941,13 +33946,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vnni" >&5 printf %s "checking whether C compiler accepts -mavx512vnni... " >&6; } -if test ${ax_cv_check_cflags___mavx512vnni+y} +if test ${ax_cv_check_cflags__Werror__mavx512vnni+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512vnni" + CFLAGS="$CFLAGS -Werror -mavx512vnni" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33961,18 +33966,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx512vnni=yes + ax_cv_check_cflags__Werror__mavx512vnni=yes else case e in #( - e) ax_cv_check_cflags___mavx512vnni=no ;; + e) ax_cv_check_cflags__Werror__mavx512vnni=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vnni" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx512vnni" >&6; } -if test "x$ax_cv_check_cflags___mavx512vnni" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512vnni" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512vnni" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512vnni" = xyes then : ac_cv_can_compile_simd_avx512_vnni=yes else case e in #( @@ -33984,7 +33989,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS 1" >>confdefs.h fi @@ -33997,13 +34002,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vbmi2" >&5 printf %s "checking whether C compiler accepts -mavx512vbmi2... " >&6; } -if test ${ax_cv_check_cflags___mavx512vbmi2+y} +if test ${ax_cv_check_cflags__Werror__mavx512vbmi2+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512vbmi2" + CFLAGS="$CFLAGS -Werror -mavx512vbmi2" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -34017,18 +34022,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx512vbmi2=yes + ax_cv_check_cflags__Werror__mavx512vbmi2=yes else case e in #( - e) ax_cv_check_cflags___mavx512vbmi2=no ;; + e) ax_cv_check_cflags__Werror__mavx512vbmi2=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vbmi2" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx512vbmi2" >&6; } -if test "x$ax_cv_check_cflags___mavx512vbmi2" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512vbmi2" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512vbmi2" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512vbmi2" = xyes then : ac_cv_can_compile_simd_avx512_vbmi2=yes else case e in #( @@ -34040,7 +34045,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS 1" >>confdefs.h fi @@ -34052,13 +34057,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512bitalg" >&5 printf %s "checking whether C compiler accepts -mavx512bitalg... " >&6; } -if test ${ax_cv_check_cflags___mavx512bitalg+y} +if test ${ax_cv_check_cflags__Werror__mavx512bitalg+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512bitalg" + CFLAGS="$CFLAGS -Werror -mavx512bitalg" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -34072,18 +34077,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx512bitalg=yes + ax_cv_check_cflags__Werror__mavx512bitalg=yes else case e in #( - e) ax_cv_check_cflags___mavx512bitalg=no ;; + e) ax_cv_check_cflags__Werror__mavx512bitalg=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512bitalg" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx512bitalg" >&6; } -if test "x$ax_cv_check_cflags___mavx512bitalg" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512bitalg" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512bitalg" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512bitalg" = xyes then : ac_cv_can_compile_simd_avx512_bitalg=yes else case e in #( @@ -34095,7 +34100,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS 1" >>confdefs.h fi @@ -34108,13 +34113,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vp2intersect" >&5 printf %s "checking whether C compiler accepts -mavx512vp2intersect... " >&6; } -if test ${ax_cv_check_cflags___mavx512vp2intersect+y} +if test ${ax_cv_check_cflags__Werror__mavx512vp2intersect+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512vp2intersect" + CFLAGS="$CFLAGS -Werror -mavx512vp2intersect" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -34128,18 +34133,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx512vp2intersect=yes + ax_cv_check_cflags__Werror__mavx512vp2intersect=yes else case e in #( - e) ax_cv_check_cflags___mavx512vp2intersect=no ;; + e) ax_cv_check_cflags__Werror__mavx512vp2intersect=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vp2intersect" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx512vp2intersect" >&6; } -if test "x$ax_cv_check_cflags___mavx512vp2intersect" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512vp2intersect" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512vp2intersect" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512vp2intersect" = xyes then : ac_cv_can_compile_simd_avx512_vp2intersect=yes else case e in #( @@ -34151,7 +34156,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS 1" >>confdefs.h fi @@ -34210,48 +34215,19 @@ else use_hacl_universal2_impl=no fi -# The SIMD files use aligned_alloc, which is not available on older versions of -# Android. -# The *mmintrin.h headers are x86-family-specific, so can't be used on WASI. +# The HACL* SIMD-128 files use aligned_alloc, which is not available +# on older versions of Android. In addition, since the *mmintrin.h +# headers are x86-family-specific, they cannot be used on WASI. if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || \ { test -n "$ANDROID_API_LEVEL" && test "$ANDROID_API_LEVEL" -ge 28; } then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse -msse2 -msse3 -msse4.1 -msse4.2" >&5 -printf %s "checking whether C compiler accepts -msse -msse2 -msse3 -msse4.1 -msse4.2... " >&6; } -if test ${ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) - ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -Werror -msse -msse2 -msse3 -msse4.1 -msse4.2" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main (void) -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2=yes -else case e in #( - e) ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2=no ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2" >&5 -printf "%s\n" "$ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2" >&6; } -if test "x$ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2" = xyes -then : - + # SIMD-128 + if test "$ac_cv_can_compile_simd_sse" = "yes" \ + -a "$ac_cv_can_compile_simd_sse2" = "yes" \ + -a "$ac_cv_can_compile_simd_sse3" = "yes" \ + -a "$ac_cv_can_compile_simd_sse4_1" = "yes" \ + -a "$ac_cv_can_compile_simd_sse4_2" = "yes" + then LIBHACL_SIMD128_FLAGS="-msse -msse2 -msse3 -msse4.1 -msse4.2" @@ -34272,65 +34248,22 @@ printf "%s\n" "universal2" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: standard" >&5 printf "%s\n" "standard" >&6; } fi - - -else case e in #( - e) : ;; -esac -fi - + fi fi -# The SIMD files use aligned_alloc, which is not available on older versions of -# Android. -# The *mmintrin.h headers are x86-family-specific, so can't be used on WASI. -# -# Although AVX support is not guaranteed on Android -# (https://developer.android.com/ndk/guides/abis#86-64), this is safe because we do a -# runtime CPUID check. +# The HACL* SIMD-256 files use aligned_alloc, which is not available +# on older versions of Android. In addition, since the *mmintrin.h +# headers are x86-family-specific, they cannot be used on WASI. if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || \ { test -n "$ANDROID_API_LEVEL" && test "$ANDROID_API_LEVEL" -ge 28; } then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx2" >&5 -printf %s "checking whether C compiler accepts -mavx2... " >&6; } -if test ${ax_cv_check_cflags__Werror__mavx2+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) - ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -Werror -mavx2" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main (void) -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - ax_cv_check_cflags__Werror__mavx2=yes -else case e in #( - e) ax_cv_check_cflags__Werror__mavx2=no ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx2" >&5 -printf "%s\n" "$ax_cv_check_cflags__Werror__mavx2" >&6; } -if test "x$ax_cv_check_cflags__Werror__mavx2" = xyes -then : - + if test "$ac_cv_can_compile_simd_avx2" = "yes" + then LIBHACL_SIMD256_FLAGS="-mavx2" + printf "%s\n" "#define _Py_HACL_CAN_COMPILE_VEC256 1" >>confdefs.h @@ -34349,12 +34282,7 @@ printf "%s\n" "universal2" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: standard" >&5 printf "%s\n" "standard" >&6; } fi - -else case e in #( - e) : ;; -esac -fi - + fi fi diff --git a/configure.ac b/configure.ac index 0c94495da79e90..75778af3de3170 100644 --- a/configure.ac +++ b/configure.ac @@ -8119,8 +8119,8 @@ then if test "$ac_cv_can_compile_simd_sse" = "yes" \ -a "$ac_cv_can_compile_simd_sse2" = "yes" \ -a "$ac_cv_can_compile_simd_sse3" = "yes" \ - -a "$ac_cv_can_compile_simd_sse41" = "yes" \ - -a "$ac_cv_can_compile_simd_sse42" = "yes" + -a "$ac_cv_can_compile_simd_sse4_1" = "yes" \ + -a "$ac_cv_can_compile_simd_sse4_2" = "yes" then [LIBHACL_SIMD128_FLAGS="-msse -msse2 -msse3 -msse4.1 -msse4.2"] diff --git a/pyconfig.h.in b/pyconfig.h.in index db72b1a6a05fee..478855c7022c3a 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -1724,93 +1724,6 @@ /* PEP 11 Support tier (1, 2, 3 or 0 for unsupported) */ #undef PY_SUPPORT_TIER -/* Define if '-mavx2' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS - -/* Define if '-mavx5124fmaps' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS - -/* Define if '-mavx5124vnniw' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS - -/* Define if '-mavx512bitalg' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS - -/* Define if '-mavx512bw' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS - -/* Define if '-mavx512cd' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS - -/* Define if '-mavx512dq' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS - -/* Define if '-mavx512er' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS - -/* Define if '-mavx512f' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS - -/* Define if '-mavx512ifma' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS - -/* Define if '-mavx512pf' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS - -/* Define if '-mavx512vbmi2' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS - -/* Define if '-mavx512vbmi' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS - -/* Define if '-mavx512vl' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS - -/* Define if '-mavx512vnni' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS - -/* Define if '-mavx512vp2intersect' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS - -/* Define if '-mavx512vpopcntdq' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS - -/* Define if '-mavxifma' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS - -/* Define if '-mavx' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS - -/* Define if '-mavxneconvert' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS - -/* Define if '-mavxvnni' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS - -/* Define if '-mavxvnniint16' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS - -/* Define if '-mavxvnniint8' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS - -/* Define if '-msse2' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS - -/* Define if '-msse3' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS - -/* Define if '-msse4.1' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS - -/* Define if '-msse4.2' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS - -/* Define if '-msse' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS - -/* Define if '-mssse3' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS - /* Define if you want to build an interpreter with many run-time checks. */ #undef Py_DEBUG @@ -2104,6 +2017,93 @@ /* Maximum length in bytes of a thread name */ #undef _PYTHREAD_NAME_MAXLEN +/* Define if '-mavx2' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS + +/* Define if '-mavx5124fmaps' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS + +/* Define if '-mavx5124vnniw' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS + +/* Define if '-mavx512bitalg' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS + +/* Define if '-mavx512bw' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS + +/* Define if '-mavx512cd' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS + +/* Define if '-mavx512dq' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS + +/* Define if '-mavx512er' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS + +/* Define if '-mavx512f' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS + +/* Define if '-mavx512ifma' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS + +/* Define if '-mavx512pf' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS + +/* Define if '-mavx512vbmi2' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS + +/* Define if '-mavx512vbmi' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS + +/* Define if '-mavx512vl' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS + +/* Define if '-mavx512vnni' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS + +/* Define if '-mavx512vp2intersect' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS + +/* Define if '-mavx512vpopcntdq' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS + +/* Define if '-mavxifma' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS + +/* Define if '-mavx' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS + +/* Define if '-mavxneconvert' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS + +/* Define if '-mavxvnni' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS + +/* Define if '-mavxvnniint16' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS + +/* Define if '-mavxvnniint8' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS + +/* Define if '-msse2' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS + +/* Define if '-msse3' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS + +/* Define if '-msse4.1' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS + +/* Define if '-msse4.2' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS + +/* Define if '-msse' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS + +/* Define if '-mssse3' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS + /* Defined if _Complex C type can be used with libffi. */ #undef _Py_FFI_SUPPORT_C_COMPLEX From 8b7ecfb856508896e90fd9142968ac6cc2389dda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 30 Jun 2025 15:13:46 +0200 Subject: [PATCH 41/78] use `_Py` prefix to prevent public namespace pollution --- Include/internal/pycore_cpuinfo.h | 22 +-- .../internal/pycore_cpuinfo_cpuid_features.h | 103 ++++++------ .../internal/pycore_cpuinfo_xsave_features.h | 41 ++--- Modules/blake2module.c | 4 +- Modules/hmacmodule.c | 4 +- Python/cpuinfo.c | 156 +++++++++--------- Tools/cpuinfo/_util.py | 103 +++++++++++- Tools/cpuinfo/cpuid_features_gen.py | 55 +++--- Tools/cpuinfo/xsave_features_gen.py | 49 +++--- 9 files changed, 308 insertions(+), 229 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index 57ad48efb038c0..49fe7652f3e74f 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -27,10 +27,10 @@ extern "C" { #include "pycore_cpuinfo_cpuid_features.h" #include "pycore_cpuinfo_xsave_features.h" -typedef struct py_cpuid_features { +typedef struct _Py_cpuid_features_s { uint32_t maxleaf; /* - * Macro to declare a member flag of 'py_cpuid_features' as a uint8_t. + * Macro to declare a member flag of '_Py_cpuid_features' as a uint8_t. * Whenever this macro is used, do not forget to update the number of * fields and the bitsize of the 'ready' member (see structure end). */ @@ -116,7 +116,7 @@ typedef struct py_cpuid_features { // number of fields (40) and adjust the bitsize of 'ready' // so that the size of this structure is a multiple of 8. uint8_t ready; // set if the structure is ready for usage -} py_cpuid_features; +} _Py_cpuid_features; /* * Explicitly initialize all members to zero to guarantee that @@ -128,7 +128,7 @@ typedef struct py_cpuid_features { * Note: This function does not set any exception and thus never fails. */ PyAPI_FUNC(void) -_Py_cpuid_disable_features(py_cpuid_features *flags); +_Py_cpuid_disable_features(_Py_cpuid_features *flags); /* * Check whether the structure is ready and flags are inter-compatible, @@ -140,7 +140,7 @@ _Py_cpuid_disable_features(py_cpuid_features *flags); * Note: This function does not set any exception and thus never fails. */ PyAPI_FUNC(int) -_Py_cpuid_check_features(const py_cpuid_features *flags); +_Py_cpuid_check_features(const _Py_cpuid_features *flags); /* * Return 1 if all expected flags are set in 'actual', 0 otherwise. @@ -150,8 +150,8 @@ _Py_cpuid_check_features(const py_cpuid_features *flags); * Note: This function does not set any exception and thus never fails. */ PyAPI_FUNC(int) -_Py_cpuid_has_features(const py_cpuid_features *actual, - const py_cpuid_features *expect); +_Py_cpuid_has_features(const _Py_cpuid_features *actual, + const _Py_cpuid_features *expect); /* * Return 1 if 'actual' and 'expect' are identical, 0 otherwise. @@ -161,16 +161,16 @@ _Py_cpuid_has_features(const py_cpuid_features *actual, * Note: This function does not set any exception and thus never fails. */ PyAPI_FUNC(int) -_Py_cpuid_match_features(const py_cpuid_features *actual, - const py_cpuid_features *expect); +_Py_cpuid_match_features(const _Py_cpuid_features *actual, + const _Py_cpuid_features *expect); /* - * Detect the available features on this machine, storing the result in 'flags'. + * Detect the available host features, storing the result in 'flags'. * * Note: This function does not set any exception and thus never fails. */ PyAPI_FUNC(void) -_Py_cpuid_detect_features(py_cpuid_features *flags); +_Py_cpuid_detect_features(_Py_cpuid_features *flags); #ifdef __cplusplus } diff --git a/Include/internal/pycore_cpuinfo_cpuid_features.h b/Include/internal/pycore_cpuinfo_cpuid_features.h index b8c3eb38f0d0e4..83aa6bc34c9aed 100644 --- a/Include/internal/pycore_cpuinfo_cpuid_features.h +++ b/Include/internal/pycore_cpuinfo_cpuid_features.h @@ -1,10 +1,10 @@ -/* +/** * @author Bénédikt Tran - * @seealso Tools/cpuinfo/cpuid_features_gen.py + * @seealso @file Tools/cpuinfo/cpuid_features_gen.py * * The enumeration describes masks to apply on CPUID output registers. * - * Member names are Py_CPUID_MASK__L[S]_, + * Member names are _Py_CPUID_MASK__L[S]_, * where <> (resp. []) denotes a required (resp. optional) group and: * * - REGISTER is EAX, EBX, ECX or EDX, @@ -35,68 +35,59 @@ extern "C" { #include "Python.h" -// fmt: off /*[python input] -import importlib -import os -import sys - -ROOT = os.getcwd() -TOOL = os.path.join(ROOT, "Tools/cpuinfo/cpuid_features_gen.py") -TOOL = os.path.realpath(TOOL) - -if not os.path.exists(TOOL): - raise FileNotFoundError(TOOL) - -sys.path.insert(0, os.path.dirname(os.path.dirname(TOOL))) -module = importlib.import_module("cpuinfo.cpuid_features_gen") -print(module.generate_cpuid_features_enum("py_cpuid_feature_mask")) +import os, sys +sys.path.insert(0, os.path.realpath(os.path.join(os.getcwd(), "Tools"))) +from cpuinfo.cpuid_features_gen import generate_cpuid_features_enum +print(generate_cpuid_features_enum("_Py_cpuid_feature_mask")) [python start generated code]*/ -typedef enum py_cpuid_feature_mask { +// fmt: off +/** Enumeration for CPUID features */ +enum _Py_cpuid_feature_mask_e { /* CPUID (LEAF=1, SUBLEAF=0) [ECX] */ - Py_CPUID_MASK_ECX_L1_SSE3 = 0x00000001, // bit = 0 - Py_CPUID_MASK_ECX_L1_PCLMULQDQ = 0x00000002, // bit = 1 - Py_CPUID_MASK_ECX_L1_SSSE3 = 0x00000200, // bit = 9 - Py_CPUID_MASK_ECX_L1_FMA = 0x00001000, // bit = 12 - Py_CPUID_MASK_ECX_L1_SSE4_1 = 0x00080000, // bit = 19 - Py_CPUID_MASK_ECX_L1_SSE4_2 = 0x00100000, // bit = 20 - Py_CPUID_MASK_ECX_L1_POPCNT = 0x00800000, // bit = 23 - Py_CPUID_MASK_ECX_L1_XSAVE = 0x04000000, // bit = 26 - Py_CPUID_MASK_ECX_L1_OSXSAVE = 0x08000000, // bit = 27 - Py_CPUID_MASK_ECX_L1_AVX = 0x10000000, // bit = 28 + _Py_CPUID_MASK_ECX_L1_SSE3 = 0x00000001, // bit = 0 + _Py_CPUID_MASK_ECX_L1_PCLMULQDQ = 0x00000002, // bit = 1 + _Py_CPUID_MASK_ECX_L1_SSSE3 = 0x00000200, // bit = 9 + _Py_CPUID_MASK_ECX_L1_FMA = 0x00001000, // bit = 12 + _Py_CPUID_MASK_ECX_L1_SSE4_1 = 0x00080000, // bit = 19 + _Py_CPUID_MASK_ECX_L1_SSE4_2 = 0x00100000, // bit = 20 + _Py_CPUID_MASK_ECX_L1_POPCNT = 0x00800000, // bit = 23 + _Py_CPUID_MASK_ECX_L1_XSAVE = 0x04000000, // bit = 26 + _Py_CPUID_MASK_ECX_L1_OSXSAVE = 0x08000000, // bit = 27 + _Py_CPUID_MASK_ECX_L1_AVX = 0x10000000, // bit = 28 /* CPUID (LEAF=1, SUBLEAF=0) [EDX] */ - Py_CPUID_MASK_EDX_L1_CMOV = 0x00008000, // bit = 15 - Py_CPUID_MASK_EDX_L1_SSE = 0x02000000, // bit = 25 - Py_CPUID_MASK_EDX_L1_SSE2 = 0x04000000, // bit = 26 + _Py_CPUID_MASK_EDX_L1_CMOV = 0x00008000, // bit = 15 + _Py_CPUID_MASK_EDX_L1_SSE = 0x02000000, // bit = 25 + _Py_CPUID_MASK_EDX_L1_SSE2 = 0x04000000, // bit = 26 /* CPUID (LEAF=7, SUBLEAF=0) [EBX] */ - Py_CPUID_MASK_EBX_L7_AVX2 = 0x00000020, // bit = 5 - Py_CPUID_MASK_EBX_L7_AVX512_F = 0x00010000, // bit = 16 - Py_CPUID_MASK_EBX_L7_AVX512_DQ = 0x00020000, // bit = 17 - Py_CPUID_MASK_EBX_L7_AVX512_IFMA = 0x00200000, // bit = 21 - Py_CPUID_MASK_EBX_L7_AVX512_PF = 0x04000000, // bit = 26 - Py_CPUID_MASK_EBX_L7_AVX512_ER = 0x08000000, // bit = 27 - Py_CPUID_MASK_EBX_L7_AVX512_CD = 0x10000000, // bit = 28 - Py_CPUID_MASK_EBX_L7_AVX512_BW = 0x40000000, // bit = 30 - Py_CPUID_MASK_EBX_L7_AVX512_VL = 0x80000000, // bit = 31 + _Py_CPUID_MASK_EBX_L7_AVX2 = 0x00000020, // bit = 5 + _Py_CPUID_MASK_EBX_L7_AVX512_F = 0x00010000, // bit = 16 + _Py_CPUID_MASK_EBX_L7_AVX512_DQ = 0x00020000, // bit = 17 + _Py_CPUID_MASK_EBX_L7_AVX512_IFMA = 0x00200000, // bit = 21 + _Py_CPUID_MASK_EBX_L7_AVX512_PF = 0x04000000, // bit = 26 + _Py_CPUID_MASK_EBX_L7_AVX512_ER = 0x08000000, // bit = 27 + _Py_CPUID_MASK_EBX_L7_AVX512_CD = 0x10000000, // bit = 28 + _Py_CPUID_MASK_EBX_L7_AVX512_BW = 0x40000000, // bit = 30 + _Py_CPUID_MASK_EBX_L7_AVX512_VL = 0x80000000, // bit = 31 /* CPUID (LEAF=7, SUBLEAF=0) [ECX] */ - Py_CPUID_MASK_ECX_L7_AVX512_VBMI = 0x00000002, // bit = 1 - Py_CPUID_MASK_ECX_L7_AVX512_VBMI2 = 0x00000040, // bit = 6 - Py_CPUID_MASK_ECX_L7_AVX512_VNNI = 0x00000800, // bit = 11 - Py_CPUID_MASK_ECX_L7_AVX512_BITALG = 0x00001000, // bit = 12 - Py_CPUID_MASK_ECX_L7_AVX512_VPOPCNTDQ = 0x00004000, // bit = 14 + _Py_CPUID_MASK_ECX_L7_AVX512_VBMI = 0x00000002, // bit = 1 + _Py_CPUID_MASK_ECX_L7_AVX512_VBMI2 = 0x00000040, // bit = 6 + _Py_CPUID_MASK_ECX_L7_AVX512_VNNI = 0x00000800, // bit = 11 + _Py_CPUID_MASK_ECX_L7_AVX512_BITALG = 0x00001000, // bit = 12 + _Py_CPUID_MASK_ECX_L7_AVX512_VPOPCNTDQ = 0x00004000, // bit = 14 /* CPUID (LEAF=7, SUBLEAF=0) [EDX] */ - Py_CPUID_MASK_EDX_L7_AVX512_4VNNIW = 0x00000004, // bit = 2 - Py_CPUID_MASK_EDX_L7_AVX512_4FMAPS = 0x00000008, // bit = 3 - Py_CPUID_MASK_EDX_L7_AVX512_VP2INTERSECT = 0x00000100, // bit = 8 + _Py_CPUID_MASK_EDX_L7_AVX512_4VNNIW = 0x00000004, // bit = 2 + _Py_CPUID_MASK_EDX_L7_AVX512_4FMAPS = 0x00000008, // bit = 3 + _Py_CPUID_MASK_EDX_L7_AVX512_VP2INTERSECT = 0x00000100, // bit = 8 /* CPUID (LEAF=7, SUBLEAF=1) [EAX] */ - Py_CPUID_MASK_EAX_L7S1_AVX_VNNI = 0x00000010, // bit = 4 - Py_CPUID_MASK_EAX_L7S1_AVX_IFMA = 0x00800000, // bit = 23 + _Py_CPUID_MASK_EAX_L7S1_AVX_VNNI = 0x00000010, // bit = 4 + _Py_CPUID_MASK_EAX_L7S1_AVX_IFMA = 0x00800000, // bit = 23 /* CPUID (LEAF=7, SUBLEAF=1) [EDX] */ - Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT8 = 0x00000010, // bit = 4 - Py_CPUID_MASK_EDX_L7S1_AVX_NE_CONVERT = 0x00000020, // bit = 5 - Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT16 = 0x00000400, // bit = 10 -} py_cpuid_feature_mask; -/*[python end generated code: output=c4460242e465fa91 input=61d2b5f1bc368b94]*/ + _Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT8 = 0x00000010, // bit = 4 + _Py_CPUID_MASK_EDX_L7S1_AVX_NE_CONVERT = 0x00000020, // bit = 5 + _Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT16 = 0x00000400, // bit = 10 +}; // fmt: on +/*[python end generated code: output=8e58b0997d69bbf8 input=fce00935f64021f9]*/ #endif // !Py_INTERNAL_CPUINFO_CPUID_FEATURES_H diff --git a/Include/internal/pycore_cpuinfo_xsave_features.h b/Include/internal/pycore_cpuinfo_xsave_features.h index e81e1ab76557df..c0e33e820b9ef1 100644 --- a/Include/internal/pycore_cpuinfo_xsave_features.h +++ b/Include/internal/pycore_cpuinfo_xsave_features.h @@ -1,11 +1,12 @@ -/* +/** * @author Bénédikt Tran - * @seealso Tools/cpuinfo/xsave_features_gen.py + * @seealso @file Tools/cpuinfo/xsave_features_gen.py * * XSAVE state components (XCR0 control register). * * See https://en.wikipedia.org/wiki/Control_register#XCR0_and_XSS. */ + #ifndef Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H #define Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H @@ -21,29 +22,21 @@ extern "C" { // fmt: off /*[python input] -import importlib -import os -import sys - -ROOT = os.getcwd() -TOOL = os.path.join(ROOT, "Tools/cpuinfo/xsave_features_gen.py") -TOOL = os.path.realpath(TOOL) - -if not os.path.exists(TOOL): - raise FileNotFoundError(TOOL) - -sys.path.insert(0, os.path.dirname(os.path.dirname(TOOL))) -module = importlib.import_module("cpuinfo.xsave_features_gen") -print(module.generate_xsave_features_enum("py_xsave_feature_mask")) +import os, sys +sys.path.insert(0, os.path.realpath(os.path.join(os.getcwd(), "Tools"))) +from cpuinfo.xsave_features_gen import generate_xsave_features_enum +print(generate_xsave_features_enum("_Py_xsave_feature_mask")) [python start generated code]*/ -typedef enum py_xsave_feature_mask { - Py_XSAVE_MASK_XCR0_SSE = 0x00000002, // bit = 1 - Py_XSAVE_MASK_XCR0_AVX = 0x00000004, // bit = 2 - Py_XSAVE_MASK_XCR0_AVX512_OPMASK = 0x00000020, // bit = 5 - Py_XSAVE_MASK_XCR0_AVX512_ZMM_HI256 = 0x00000040, // bit = 6 - Py_XSAVE_MASK_XCR0_AVX512_HI16_ZMM = 0x00000080, // bit = 7 -} py_xsave_feature_mask; -/*[python end generated code: output=9a476ed0abbc617b input=41f35058299c0118]*/ +// fmt: off +/** Enumeration for XSAVE components */ +enum _Py_xsave_feature_mask_e { + _Py_XSAVE_MASK_XCR0_SSE = 0x00000002, // bit = 1 + _Py_XSAVE_MASK_XCR0_AVX = 0x00000004, // bit = 2 + _Py_XSAVE_MASK_XCR0_AVX512_OPMASK = 0x00000020, // bit = 5 + _Py_XSAVE_MASK_XCR0_AVX512_ZMM_HI256 = 0x00000040, // bit = 6 + _Py_XSAVE_MASK_XCR0_AVX512_HI16_ZMM = 0x00000080, // bit = 7 +}; // fmt: on +/*[python end generated code: output=35ea9a165938f8ef input=336793a305515376]*/ #endif // !Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H diff --git a/Modules/blake2module.c b/Modules/blake2module.c index e222d6d2e5c298..2f8baea62d77fc 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -16,7 +16,7 @@ #include "Python.h" #include "hashlib.h" -#include "pycore_cpuinfo.h" // py_cpuid_features +#include "pycore_cpuinfo.h" // _Py_cpuid_features #include "pycore_strhex.h" // _Py_strhex() #include "pycore_typeobject.h" #include "pycore_moduleobject.h" @@ -111,7 +111,7 @@ _blake2_free(void *module) static void blake2module_init_cpu_features(Blake2State *state) { - py_cpuid_features flags; + _Py_cpuid_features flags; _Py_cpuid_detect_features(&flags); #if _Py_HACL_CAN_COMPILE_VEC128 state->can_run_simd128 = flags.sse && flags.sse2 && flags.sse3 diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index f2e47e0fab15aa..064e31fe830deb 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -17,7 +17,7 @@ #endif #include "Python.h" -#include "pycore_cpuinfo.h" // py_cpuid_features +#include "pycore_cpuinfo.h" // _Py_cpuid_features #include "pycore_hashtable.h" #include "pycore_strhex.h" // _Py_strhex() @@ -1553,7 +1553,7 @@ hmacmodule_init_globals(PyObject *module, hmacmodule_state *state) static void hmacmodule_init_cpu_features(hmacmodule_state *state) { - py_cpuid_features flags; + _Py_cpuid_features flags; _Py_cpuid_detect_features(&flags); #if _Py_HACL_CAN_COMPILE_VEC128 state->can_run_simd128 = flags.sse && flags.sse2 && flags.sse3 diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 0f934d04d76446..6e595f438e1a7e 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -4,8 +4,8 @@ #define CPUID_REG uint32_t /* Check one or more CPUID register bits. */ #define CHECK_REG(REG, MASK) ((((REG) & (MASK)) == (MASK)) ? 0 : 1) -#define CPUID_CHECK_REG(REG, FEAT) CHECK_REG(REG, (Py_CPUID_MASK_ ## FEAT)) -#define XSAVE_CHECK_REG(REG, FEAT) CHECK_REG(REG, (Py_XSAVE_MASK_ ## FEAT)) +#define CPUID_CHECK_REG(REG, FEAT) CHECK_REG(REG, (_Py_CPUID_MASK_ ## FEAT)) +#define XSAVE_CHECK_REG(REG, FEAT) CHECK_REG(REG, (_Py_XSAVE_MASK_ ## FEAT)) // For now, we only try to enable SIMD instructions for x86-64 Intel CPUs. // In the future, we should carefully enable support for ARM NEON and POWER @@ -29,46 +29,46 @@ // corresponding flags or if we are not on an 64-bit platform we do not // even try to inspect the output of CPUID for those specific features. #ifdef HAS_CPUID_SUPPORT -#if defined(Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS) \ +#if defined(_Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS) \ // macros above should be sorted in alphabetical order # define SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD #endif -#if defined(Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS) \ +#if defined(_Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS) \ // macros above should be sorted in alphabetical order # define SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD #endif -#if defined(Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS) +#if defined(_Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS) # define SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD #endif -#if defined(Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS) \ +#if defined(_Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS) \ // macros above should be sorted in alphabetical order # define SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD #endif @@ -160,33 +160,33 @@ detect_cpuid_maxleaf(void) /* Processor Info and Feature Bits (LEAF=1, SUBLEAF=0). */ static inline void -detect_cpuid_features(py_cpuid_features *flags, CPUID_REG ecx, CPUID_REG edx) +detect_cpuid_features(_Py_cpuid_features *flags, CPUID_REG ecx, CPUID_REG edx) { assert(flags->maxleaf >= 1); // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD -#ifdef Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS flags->sse = CPUID_CHECK_REG(edx, EDX_L1_SSE); #endif -#ifdef Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS flags->sse2 = CPUID_CHECK_REG(edx, EDX_L1_SSE2); #endif -#ifdef Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS flags->sse3 = CPUID_CHECK_REG(ecx, ECX_L1_SSE3); #endif -#ifdef Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS flags->ssse3 = CPUID_CHECK_REG(ecx, ECX_L1_SSSE3); #endif -#ifdef Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS flags->sse41 = CPUID_CHECK_REG(ecx, ECX_L1_SSE4_1); #endif -#ifdef Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS flags->sse42 = CPUID_CHECK_REG(ecx, ECX_L1_SSE4_2); #endif #endif // SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD #ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD -#ifdef Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS flags->avx = CPUID_CHECK_REG(ecx, ECX_L1_AVX); #endif #endif // SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD @@ -204,73 +204,73 @@ detect_cpuid_features(py_cpuid_features *flags, CPUID_REG ecx, CPUID_REG edx) /* Extended Feature Bits (LEAF=7, SUBLEAF=0). */ static inline void -detect_cpuid_extended_features_L7S0(py_cpuid_features *flags, +detect_cpuid_extended_features_L7S0(_Py_cpuid_features *flags, CPUID_REG ebx, CPUID_REG ecx, CPUID_REG edx) { assert(flags->maxleaf >= 7); (void)ebx, (void)ecx, (void)edx; // to suppress unused warnings // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD -#ifdef Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS flags->avx2 = CPUID_CHECK_REG(ebx, EBX_L7_AVX2); #endif #endif // SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD #ifdef SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD -#ifdef Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS flags->avx512_f = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_F); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS flags->avx512_cd = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_CD); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS flags->avx512_er = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_ER); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS flags->avx512_pf = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_PF); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS flags->avx512_4fmaps = CPUID_CHECK_REG(edx, EDX_L7_AVX512_4FMAPS); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS flags->avx512_4vnniw = CPUID_CHECK_REG(edx, EDX_L7_AVX512_4VNNIW); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS flags->avx512_vpopcntdq = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VPOPCNTDQ); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS flags->avx512_vl = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_VL); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS flags->avx512_dq = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_DQ); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS flags->avx512_bw = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_BW); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS flags->avx512_ifma = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_IFMA); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS flags->avx512_vbmi = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VBMI); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS flags->avx512_vnni = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VNNI); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS flags->avx512_vbmi2 = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VBMI2); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS flags->avx512_bitalg = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_BITALG); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS flags->avx512_vp2intersect = CPUID_CHECK_REG(edx, EDX_L7_AVX512_VP2INTERSECT); #endif #endif // SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD @@ -278,7 +278,7 @@ detect_cpuid_extended_features_L7S0(py_cpuid_features *flags, /* Extended Feature Bits (LEAF=7, SUBLEAF=1). */ static inline void -detect_cpuid_extended_features_L7S1(py_cpuid_features *flags, +detect_cpuid_extended_features_L7S1(_Py_cpuid_features *flags, CPUID_REG eax, CPUID_REG ebx, CPUID_REG ecx, @@ -288,28 +288,28 @@ detect_cpuid_extended_features_L7S1(py_cpuid_features *flags, (void)eax, (void)ebx, (void)ecx, (void)edx; // to suppress unused warnings // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD -#ifdef Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS flags->avx_ne_convert = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_NE_CONVERT); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS flags->avx_ifma = CPUID_CHECK_REG(eax, EAX_L7S1_AVX_IFMA); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS flags->avx_vnni = CPUID_CHECK_REG(eax, EAX_L7S1_AVX_VNNI); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS flags->avx_vnni_int8 = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_VNNI_INT8); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS flags->avx_vnni_int16 = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_VNNI_INT16); #endif #endif // SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD } static inline void -detect_cpuid_xsave_state(py_cpuid_features *flags) +detect_cpuid_xsave_state(_Py_cpuid_features *flags) { // Keep the ordering and newlines as they are declared in the structure. assert(flags->maxleaf >= 1); @@ -324,7 +324,7 @@ detect_cpuid_xsave_state(py_cpuid_features *flags) } static inline void -cpuid_features_finalize(py_cpuid_features *flags) +cpuid_features_finalize(_Py_cpuid_features *flags) { assert(flags->ready == 0); @@ -335,7 +335,7 @@ cpuid_features_finalize(py_cpuid_features *flags) } static inline int -cpuid_features_validate(const py_cpuid_features *flags) +cpuid_features_validate(const _Py_cpuid_features *flags) { if (flags->ready != 1) { return -1; @@ -363,14 +363,14 @@ cpuid_features_validate(const py_cpuid_features *flags) } int -_Py_cpuid_check_features(const py_cpuid_features *flags) +_Py_cpuid_check_features(const _Py_cpuid_features *flags) { return cpuid_features_validate(flags) < 0 ? 0 : 1; } /* * Apply a 1-parameter macro MACRO(FLAG) on all members - * of a 'py_cpuid_features' object ('ready' is omitted). + * of a '_Py_cpuid_features' object ('ready' is omitted). */ #define CPUID_APPLY_MACRO(MACRO) \ do { \ @@ -432,7 +432,7 @@ _Py_cpuid_check_features(const py_cpuid_features *flags) } while (0) void -_Py_cpuid_disable_features(py_cpuid_features *flags) +_Py_cpuid_disable_features(_Py_cpuid_features *flags) { flags->maxleaf = 0; #define CPUID_DISABLE(FLAG) flags->FLAG = 0 @@ -441,8 +441,8 @@ _Py_cpuid_disable_features(py_cpuid_features *flags) } int -_Py_cpuid_has_features(const py_cpuid_features *actual, - const py_cpuid_features *expect) +_Py_cpuid_has_features(const _Py_cpuid_features *actual, + const _Py_cpuid_features *expect) { if (!actual->ready || !expect->ready) { return 0; @@ -462,8 +462,8 @@ _Py_cpuid_has_features(const py_cpuid_features *actual, } int -_Py_cpuid_match_features(const py_cpuid_features *actual, - const py_cpuid_features *expect) +_Py_cpuid_match_features(const _Py_cpuid_features *actual, + const _Py_cpuid_features *expect) { if (!actual->ready || !expect->ready) { return 0; @@ -486,7 +486,7 @@ _Py_cpuid_match_features(const py_cpuid_features *actual, #ifdef SHOULD_PARSE_CPUID_L1 static inline void -cpuid_detect_l1_features(py_cpuid_features *flags) +cpuid_detect_l1_features(_Py_cpuid_features *flags) { if (flags->maxleaf >= 1) { CPUID_REG eax = 0, ebx = 0, ecx = 0, edx = 0; @@ -503,7 +503,7 @@ cpuid_detect_l1_features(py_cpuid_features *flags) #ifdef SHOULD_PARSE_CPUID_L7S0 static inline void -cpuid_detect_l7s0_features(py_cpuid_features *flags) +cpuid_detect_l7s0_features(_Py_cpuid_features *flags) { assert(flags->maxleaf >= 7); CPUID_REG eax = 0, ebx = 0, ecx = 0, edx = 0; @@ -516,7 +516,7 @@ cpuid_detect_l7s0_features(py_cpuid_features *flags) #ifdef SHOULD_PARSE_CPUID_L7S1 static inline void -cpuid_detect_l7s1_features(py_cpuid_features *flags) +cpuid_detect_l7s1_features(_Py_cpuid_features *flags) { assert(flags->maxleaf >= 7); CPUID_REG eax = 0, ebx = 0, ecx = 0, edx = 0; @@ -529,7 +529,7 @@ cpuid_detect_l7s1_features(py_cpuid_features *flags) #ifdef SHOULD_PARSE_CPUID_L7 static inline void -cpuid_detect_l7_features(py_cpuid_features *flags) +cpuid_detect_l7_features(_Py_cpuid_features *flags) { if (flags->maxleaf >= 7) { cpuid_detect_l7s0_features(flags); @@ -541,7 +541,7 @@ cpuid_detect_l7_features(py_cpuid_features *flags) #endif void -_Py_cpuid_detect_features(py_cpuid_features *flags) +_Py_cpuid_detect_features(_Py_cpuid_features *flags) { if (flags->ready) { return; diff --git a/Tools/cpuinfo/_util.py b/Tools/cpuinfo/_util.py index 9aef599bd8f0e5..e501b2761f9659 100644 --- a/Tools/cpuinfo/_util.py +++ b/Tools/cpuinfo/_util.py @@ -1,6 +1,19 @@ from __future__ import annotations -__all__ = ["next_block", "make_enum_member"] +__all__ = [ + "next_block", "make_enum_name", "make_enum_member", + "Style", "C99_STYLE", "C11_STYLE", "DOXYGEN_STYLE", + "CWriter" +] # fmt: skip + +import contextlib +import enum +from io import StringIO +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Iterator + from typing import Any, Final def next_block(w: int) -> int: @@ -11,8 +24,96 @@ def next_block(w: int) -> int: _MASKSIZE: Final[int] = next_block(len("= 0x00000000,")) +def make_enum_name(name: str) -> tuple[str, str]: + if name.endswith("_e"): + raise ValueError(f"enumeration must not end by '_e': {name!r}") + return f"{name}_e", name # (enum name, typedef name) + + def make_enum_member(key: str, bit: int, name_maxsize: int) -> str: member_name = key.ljust(name_maxsize) member_mask = format(1 << bit, "008x") member_mask = f"= 0x{member_mask},".ljust(_MASKSIZE) return f"{member_name}{member_mask} // bit = {bit}" + + +class Style(enum.IntEnum): + C99 = enum.auto() + C11 = enum.auto() + DOXYGEN = enum.auto() + + +C99_STYLE = Style.C99 +C11_STYLE = Style.C11 +DOXYGEN_STYLE = Style.DOXYGEN + +_COMMENT_INLINE_STYLE: Final[dict[Style, tuple[str, str, str]]] = { + C99_STYLE: ("// ", "", ""), + C11_STYLE: ("/* ", " */", ""), + DOXYGEN_STYLE: ("/** ", " */", ""), +} + +_COMMENT_BLOCK_STYLE: Final[dict[Style, tuple[str, str, str]]] = { + C99_STYLE: ("// ", "", ""), + C11_STYLE: ("/*", " */", " * "), + DOXYGEN_STYLE: ("/**", " */", " * "), +} + + +class CWriter: + def __init__(self, *, indentsize: int = 4) -> None: + self._stream = StringIO() + self._indent = " " * indentsize + self._prefix = "" + self._disable_external_formatter() + + def _disable_external_formatter(self) -> None: + """Add a directive to suppress external formatters to run.""" + with self.prefixed(""): + self.write("// fmt: off") + + def _enable_external_formatter(self) -> None: + """Add a directive to allow external formatters to run.""" + with self.prefixed(""): + self.write("// fmt: on") + + def comment( + self, text: str, *, level: int = 0, style: Style = C11_STYLE + ) -> None: + """Add a C comment, possibly using doxygen style.""" + if len(text) < 72 and "\n" not in text: + prolog, epilog, _ = _COMMENT_INLINE_STYLE[style] + self.write(prolog, text, epilog, sep="", level=level) + else: + prolog, epilog, prefix = _COMMENT_BLOCK_STYLE[style] + self.write(prolog, level=level) + with self.prefixed(prefix): + for line in text.splitlines(): + self.write(line, level=level) + self.write(epilog, level=level) + + @contextlib.contextmanager + def prefixed(self, prefix: str) -> Iterator[None]: + old_prefix = self._prefix + self._prefix = prefix + try: + yield + finally: + self._prefix = old_prefix + + def _prefix_at(self, level: int) -> str: + return "".join((self._indent * level, self._prefix)) + + def write( + self, *args: Any, sep: str = " ", end: str = "\n", level: int = 0 + ) -> None: + if prefix := self._prefix_at(level): + self._write(prefix, sep="", end="") + self._write(*args, sep=sep, end=end) + + def _write(self, *args: Any, sep: str, end: str) -> None: + print(*args, sep=sep, end=end, file=self._stream) + + def build(self) -> str: + self._enable_external_formatter() + return self._stream.getvalue().rstrip("\n") diff --git a/Tools/cpuinfo/cpuid_features_gen.py b/Tools/cpuinfo/cpuid_features_gen.py index f23a68c141b696..ffbf526c01e37f 100644 --- a/Tools/cpuinfo/cpuid_features_gen.py +++ b/Tools/cpuinfo/cpuid_features_gen.py @@ -1,7 +1,7 @@ """ Generate an enumeration describing masks to apply on CPUID output registers. -Member names are Py_CPUID_MASK__L[S]_, +Member names are _Py_CPUID_MASK__L[S]_, where <> (resp. []) denotes a required (resp. optional) group and: - REGISTER is EAX, EBX, ECX or EDX, @@ -18,20 +18,20 @@ The LEAF value should only 1 or 7 as other values may have different meanings depending on the underlying architecture. -.. seealso:: Include/internal/pycore_cpuinfo_cpuid_features.h +.. seealso:: :file:`Include/internal/pycore_cpuinfo_cpuid_features.h` """ from __future__ import annotations __all__ = ["generate_cpuid_features_enum"] -from functools import partial -from io import StringIO from typing import TYPE_CHECKING + from . import _util as util +from ._util import DOXYGEN_STYLE if TYPE_CHECKING: - from typing import Final, IO + from typing import Final type Leaf = int type SubLeaf = int @@ -39,9 +39,9 @@ type FeatureFamily = tuple[Leaf, SubLeaf, Registry] type Feature = str - type Bit = int + type BitIndex = int -CPUID_FEATURES: Final[dict[FeatureFamily, dict[Feature, Bit]]] = { +CPUID_FEATURES: Final[dict[FeatureFamily, dict[Feature, BitIndex]]] = { # See https://en.wikipedia.org/wiki/CPUID#EAX=1:_Processor_Info_and_Feature_Bits. (1, 0, "ECX"): { "SSE3": 0, @@ -101,10 +101,10 @@ def get_member_name( leaf: Leaf, subleaf: SubLeaf, registry: Registry, name: Feature ) -> str: node = f"L{leaf}S{subleaf}" if subleaf else f"L{leaf}" - return f"Py_CPUID_MASK_{registry}_{node}_{name}" + return f"_Py_CPUID_MASK_{registry}_{node}_{name}" -NAMESIZE: Final[int] = util.next_block( +_NAME_MAXSIZE: Final[int] = util.next_block( max( len(get_member_name(*family, name)) for family, values in CPUID_FEATURES.items() @@ -114,35 +114,32 @@ def get_member_name( def generate_cpuid_features_enum(enum_name: str) -> str: - """Used by Include/internal/pycore_cpuinfo_cpuid_features.h. - - The C enumeration is generated by this function and Argument Clinic. - """ + """Used by :file:`Include/internal/pycore_cpuinfo_cpuid_features.h`. - # The enumeration is rendered as follows: - # - # = 0x, // bit = BIT - # ^ ^ ^ ^ ^ ^ ^ - # - # where ^ indicates a column that is a multiple of 4, has - # exactly 8 characters and has at most 2 characters. + The C enumeration is generated by this function and Argument Clinic, + to be eventually rendred as follows: - output = StringIO() - write = partial(print, file=output) - indent = " " * 4 + = 0x, // bit = BIT + ^ ^ ^ ^ ^ ^ ^ - write(f"typedef enum {enum_name} {{") + where ^ indicates a column that is a multiple of 4, has + exactly 8 characters and has at most 2 characters. + """ + enum_name, _typedef_enum_name = util.make_enum_name(enum_name) + writer = util.CWriter() + writer.comment("Enumeration for CPUID features", style=DOXYGEN_STYLE) + writer.write(f"enum {enum_name} {{") for family, values in CPUID_FEATURES.items(): leaf, subleaf, registry = family title = f"CPUID (LEAF={leaf}, SUBLEAF={subleaf}) [{registry}]" - write(indent, "/* ", title, " */", sep="") + writer.comment(title, level=1) for feature_name, bit in values.items(): if not feature_name: raise ValueError(f"invalid entry for {family}") if not 0 <= bit < 32: raise ValueError(f"invalid bit value for {feature_name!r}") key = get_member_name(leaf, subleaf, registry, feature_name) - member_def = util.make_enum_member(key, bit, NAMESIZE) - write(indent, member_def, sep="") - write(f"}} {enum_name};") - return output.getvalue().rstrip("\n") + member_def = util.make_enum_member(key, bit, _NAME_MAXSIZE) + writer.write(member_def, level=1) + writer.write("};") + return writer.build() diff --git a/Tools/cpuinfo/xsave_features_gen.py b/Tools/cpuinfo/xsave_features_gen.py index bacb4e8b4344a8..858151c1b74956 100644 --- a/Tools/cpuinfo/xsave_features_gen.py +++ b/Tools/cpuinfo/xsave_features_gen.py @@ -3,25 +3,25 @@ See https://en.wikipedia.org/wiki/Control_register#XCR0_and_XSS. -.. seealso:: Include/internal/pycore_cpuinfo_xsave_features.h +.. seealso:: :file:`Include/internal/pycore_cpuinfo_xsave_features.h` """ from __future__ import annotations __all__ = ["generate_xsave_features_enum"] -from functools import partial -from io import StringIO from typing import TYPE_CHECKING + from . import _util as util +from ._util import DOXYGEN_STYLE if TYPE_CHECKING: from typing import Final type Feature = str - type Bit = int + type BitIndex = int -XSAVE_FEATURES: Final[dict[Feature, Bit]] = { +XSAVE_FEATURES: Final[dict[Feature, BitIndex]] = { "SSE": 1, "AVX": 2, "AVX512_OPMASK": 5, @@ -31,38 +31,35 @@ def get_member_name(feature: Feature) -> str: - return f"Py_XSAVE_MASK_XCR0_{feature}" + return f"_Py_XSAVE_MASK_XCR0_{feature}" -NAMESIZE: Final[int] = util.next_block( +_NAME_MAXSIZE: Final[int] = util.next_block( max(map(len, map(get_member_name, XSAVE_FEATURES))) ) def generate_xsave_features_enum(enum_name: str) -> str: - """Used by Include/internal/pycore_cpuinfo_xsave_features.h. - - The C enumeration is generated by this function and Argument Clinic. - """ + """Used by :file:`Include/internal/pycore_cpuinfo_xsave_features.h`. - # The enumeration is rendered as follows: - # - # = 0x, // bit = BIT - # ^ ^ ^ ^ ^ ^ ^ - # - # where ^ indicates a column that is a multiple of 4, has - # exactly 8 characters and has at most 2 characters. + The C enumeration is generated by this function and Argument Clinic, + to be eventually rendred as follows: - output = StringIO() - write = partial(print, file=output) - indent = " " * 4 + = 0x, // bit = BIT + ^ ^ ^ ^ ^ ^ ^ - write(f"typedef enum {enum_name} {{") + where ^ indicates a column that is a multiple of 4, has + exactly 8 characters and has at most 2 characters. + """ + enum_name, _typedef_enum_name = util.make_enum_name(enum_name) + writer = util.CWriter() + writer.comment("Enumeration for XSAVE components", style=DOXYGEN_STYLE) + writer.write(f"enum {enum_name} {{") for feature_name, bit in XSAVE_FEATURES.items(): if not 0 <= bit < 32: raise ValueError(f"invalid bit value for {feature_name!r}") key = get_member_name(feature_name) - member_def = util.make_enum_member(key, bit, NAMESIZE) - write(indent, member_def, sep="") - write(f"}} {enum_name};") - return output.getvalue().rstrip("\n") + member_def = util.make_enum_member(key, bit, _NAME_MAXSIZE) + writer.write(member_def, level=1) + writer.write("};") + return writer.build() From 3c31ba34a28b4839ce0f720c075ad1651383dd4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 30 Jun 2025 15:14:20 +0200 Subject: [PATCH 42/78] let the compiler decide on the inlineness --- Python/cpuinfo.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 6e595f438e1a7e..0a8d1cd6f7dfd0 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -118,7 +118,7 @@ * * If CPUID is not supported, registers are set to 0. */ -static inline void +static void get_cpuid_info(uint32_t level /* input eax */, uint32_t count /* input ecx */, CPUID_REG *eax, CPUID_REG *ebx, CPUID_REG *ecx, CPUID_REG *edx) @@ -133,7 +133,7 @@ get_cpuid_info(uint32_t level /* input eax */, #endif } -static inline uint64_t +static uint64_t get_xgetbv(uint32_t index) { assert(index == 0); // only XCR0 is supported for now @@ -150,7 +150,7 @@ get_xgetbv(uint32_t index) } /* Highest Function Parameter and Manufacturer ID (LEAF=0, SUBLEAF=0). */ -static inline uint32_t +static uint32_t detect_cpuid_maxleaf(void) { CPUID_REG maxleaf = 0, ebx = 0, ecx = 0, edx = 0; @@ -159,7 +159,7 @@ detect_cpuid_maxleaf(void) } /* Processor Info and Feature Bits (LEAF=1, SUBLEAF=0). */ -static inline void +static void detect_cpuid_features(_Py_cpuid_features *flags, CPUID_REG ecx, CPUID_REG edx) { assert(flags->maxleaf >= 1); @@ -203,7 +203,7 @@ detect_cpuid_features(_Py_cpuid_features *flags, CPUID_REG ecx, CPUID_REG edx) } /* Extended Feature Bits (LEAF=7, SUBLEAF=0). */ -static inline void +static void detect_cpuid_extended_features_L7S0(_Py_cpuid_features *flags, CPUID_REG ebx, CPUID_REG ecx, CPUID_REG edx) { @@ -277,7 +277,7 @@ detect_cpuid_extended_features_L7S0(_Py_cpuid_features *flags, } /* Extended Feature Bits (LEAF=7, SUBLEAF=1). */ -static inline void +static void detect_cpuid_extended_features_L7S1(_Py_cpuid_features *flags, CPUID_REG eax, CPUID_REG ebx, @@ -308,7 +308,7 @@ detect_cpuid_extended_features_L7S1(_Py_cpuid_features *flags, #endif // SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD } -static inline void +static void detect_cpuid_xsave_state(_Py_cpuid_features *flags) { // Keep the ordering and newlines as they are declared in the structure. @@ -323,7 +323,7 @@ detect_cpuid_xsave_state(_Py_cpuid_features *flags) #endif } -static inline void +static void cpuid_features_finalize(_Py_cpuid_features *flags) { assert(flags->ready == 0); @@ -334,7 +334,7 @@ cpuid_features_finalize(_Py_cpuid_features *flags) flags->ready = 1; } -static inline int +static int cpuid_features_validate(const _Py_cpuid_features *flags) { if (flags->ready != 1) { @@ -485,7 +485,7 @@ _Py_cpuid_match_features(const _Py_cpuid_features *actual, #undef CPUID_APPLY_MACRO #ifdef SHOULD_PARSE_CPUID_L1 -static inline void +static void cpuid_detect_l1_features(_Py_cpuid_features *flags) { if (flags->maxleaf >= 1) { @@ -502,7 +502,7 @@ cpuid_detect_l1_features(_Py_cpuid_features *flags) #endif #ifdef SHOULD_PARSE_CPUID_L7S0 -static inline void +static void cpuid_detect_l7s0_features(_Py_cpuid_features *flags) { assert(flags->maxleaf >= 7); @@ -515,7 +515,7 @@ cpuid_detect_l7s0_features(_Py_cpuid_features *flags) #endif #ifdef SHOULD_PARSE_CPUID_L7S1 -static inline void +static void cpuid_detect_l7s1_features(_Py_cpuid_features *flags) { assert(flags->maxleaf >= 7); @@ -528,7 +528,7 @@ cpuid_detect_l7s1_features(_Py_cpuid_features *flags) #endif #ifdef SHOULD_PARSE_CPUID_L7 -static inline void +static void cpuid_detect_l7_features(_Py_cpuid_features *flags) { if (flags->maxleaf >= 7) { From 143d57e2f3fc78bbd9a1b0b83530dd2bc4e0c34c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 30 Jun 2025 15:14:47 +0200 Subject: [PATCH 43/78] drop CPUID_REG alias --- Python/cpuinfo.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 0a8d1cd6f7dfd0..fd59137aaab56f 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -1,7 +1,5 @@ #include "pycore_cpuinfo.h" -/* CPUID input and output registers are 32-bit unsigned integers */ -#define CPUID_REG uint32_t /* Check one or more CPUID register bits. */ #define CHECK_REG(REG, MASK) ((((REG) & (MASK)) == (MASK)) ? 0 : 1) #define CPUID_CHECK_REG(REG, FEAT) CHECK_REG(REG, (_Py_CPUID_MASK_ ## FEAT)) @@ -121,7 +119,7 @@ static void get_cpuid_info(uint32_t level /* input eax */, uint32_t count /* input ecx */, - CPUID_REG *eax, CPUID_REG *ebx, CPUID_REG *ecx, CPUID_REG *edx) + uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) { *eax = *ebx = *ecx = *edx = 0; // ensure the output to be initialized #if defined(HAS_CPUID_SUPPORT) && defined(__x86_64__) && defined(__GNUC__) @@ -153,14 +151,14 @@ get_xgetbv(uint32_t index) static uint32_t detect_cpuid_maxleaf(void) { - CPUID_REG maxleaf = 0, ebx = 0, ecx = 0, edx = 0; + uint32_t maxleaf = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(0, 0, &maxleaf, &ebx, &ecx, &edx); return maxleaf; } /* Processor Info and Feature Bits (LEAF=1, SUBLEAF=0). */ static void -detect_cpuid_features(_Py_cpuid_features *flags, CPUID_REG ecx, CPUID_REG edx) +detect_cpuid_features(_Py_cpuid_features *flags, uint32_t ecx, uint32_t edx) { assert(flags->maxleaf >= 1); // Keep the ordering and newlines as they are declared in the structure. @@ -205,7 +203,7 @@ detect_cpuid_features(_Py_cpuid_features *flags, CPUID_REG ecx, CPUID_REG edx) /* Extended Feature Bits (LEAF=7, SUBLEAF=0). */ static void detect_cpuid_extended_features_L7S0(_Py_cpuid_features *flags, - CPUID_REG ebx, CPUID_REG ecx, CPUID_REG edx) + uint32_t ebx, uint32_t ecx, uint32_t edx) { assert(flags->maxleaf >= 7); (void)ebx, (void)ecx, (void)edx; // to suppress unused warnings @@ -279,10 +277,10 @@ detect_cpuid_extended_features_L7S0(_Py_cpuid_features *flags, /* Extended Feature Bits (LEAF=7, SUBLEAF=1). */ static void detect_cpuid_extended_features_L7S1(_Py_cpuid_features *flags, - CPUID_REG eax, - CPUID_REG ebx, - CPUID_REG ecx, - CPUID_REG edx) + uint32_t eax, + uint32_t ebx, + uint32_t ecx, + uint32_t edx) { assert(flags->maxleaf >= 7); (void)eax, (void)ebx, (void)ecx, (void)edx; // to suppress unused warnings @@ -489,7 +487,7 @@ static void cpuid_detect_l1_features(_Py_cpuid_features *flags) { if (flags->maxleaf >= 1) { - CPUID_REG eax = 0, ebx = 0, ecx = 0, edx = 0; + uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx); detect_cpuid_features(flags, ecx, edx); if (flags->osxsave) { @@ -506,7 +504,7 @@ static void cpuid_detect_l7s0_features(_Py_cpuid_features *flags) { assert(flags->maxleaf >= 7); - CPUID_REG eax = 0, ebx = 0, ecx = 0, edx = 0; + uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(7, 0, &eax, &ebx, &ecx, &edx); detect_cpuid_extended_features_L7S0(flags, ebx, ecx, edx); } @@ -519,7 +517,7 @@ static void cpuid_detect_l7s1_features(_Py_cpuid_features *flags) { assert(flags->maxleaf >= 7); - CPUID_REG eax = 0, ebx = 0, ecx = 0, edx = 0; + uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(7, 1, &eax, &ebx, &ecx, &edx); detect_cpuid_extended_features_L7S1(flags, eax, ebx, ecx, edx); } From ee2a83cd6559e967d6bd0d27c1a11759c8f02191 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 30 Jun 2025 15:16:31 +0200 Subject: [PATCH 44/78] simplify `_Py_cpuid_check_features` --- Python/cpuinfo.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index fd59137aaab56f..eeacf9dd8df3bf 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -332,11 +332,11 @@ cpuid_features_finalize(_Py_cpuid_features *flags) flags->ready = 1; } -static int -cpuid_features_validate(const _Py_cpuid_features *flags) +int +_Py_cpuid_check_features(const _Py_cpuid_features *flags) { if (flags->ready != 1) { - return -1; + return 0; } // AVX-512/F is required to support any other AVX-512 instruction set @@ -354,16 +354,10 @@ cpuid_features_validate(const _Py_cpuid_features *flags) ); if (!flags->avx512_f && !avx512_require_f) { - return -1; + return 0; } - return 0; -} - -int -_Py_cpuid_check_features(const _Py_cpuid_features *flags) -{ - return cpuid_features_validate(flags) < 0 ? 0 : 1; + return 1; } /* @@ -552,7 +546,7 @@ _Py_cpuid_detect_features(_Py_cpuid_features *flags) cpuid_detect_l1_features(flags); cpuid_detect_l7_features(flags); cpuid_features_finalize(flags); - if (cpuid_features_validate(flags) < 0) { + if (!_Py_cpuid_check_features(flags)) { _Py_cpuid_disable_features(flags); } #endif // !HAS_CPUID_SUPPORT From e6d458354166dd298b75f9fcba858ea9e5cd2d81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 30 Jun 2025 15:26:33 +0200 Subject: [PATCH 45/78] amend docs for `_Py_cpuid_disable_features` --- Include/internal/pycore_cpuinfo.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index 49fe7652f3e74f..059653c844394f 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -119,11 +119,11 @@ typedef struct _Py_cpuid_features_s { } _Py_cpuid_features; /* - * Explicitly initialize all members to zero to guarantee that - * we never have an un-initialized attribute at runtime which - * could lead to an illegal instruction error. + * Explicitly set all members to zero to guarantee that + * we never have a non-initialized attribute at runtime + * which could lead to an illegal instruction error. * - * This does not mark 'flags' as being ready yet. + * This readiness state of 'flags' is ignored and left untouched. * * Note: This function does not set any exception and thus never fails. */ From 838f928beb26472023279e45187660488badb070 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 30 Jun 2025 18:41:10 +0200 Subject: [PATCH 46/78] use macros to support larger flag ranges --- .../internal/pycore_cpuinfo_cpuid_features.h | 102 +++++++++--------- .../internal/pycore_cpuinfo_xsave_features.h | 23 ++-- Tools/cpuinfo/__init__.py | 15 +++ Tools/cpuinfo/_util.py | 53 ++++----- Tools/cpuinfo/cpuid_features_gen.py | 34 ++---- Tools/cpuinfo/xsave_features_gen.py | 30 ++---- 6 files changed, 119 insertions(+), 138 deletions(-) diff --git a/Include/internal/pycore_cpuinfo_cpuid_features.h b/Include/internal/pycore_cpuinfo_cpuid_features.h index 83aa6bc34c9aed..a289766534783f 100644 --- a/Include/internal/pycore_cpuinfo_cpuid_features.h +++ b/Include/internal/pycore_cpuinfo_cpuid_features.h @@ -39,55 +39,59 @@ extern "C" { import os, sys sys.path.insert(0, os.path.realpath(os.path.join(os.getcwd(), "Tools"))) from cpuinfo.cpuid_features_gen import generate_cpuid_features_enum -print(generate_cpuid_features_enum("_Py_cpuid_feature_mask")) +print(generate_cpuid_features_enum()) [python start generated code]*/ -// fmt: off -/** Enumeration for CPUID features */ -enum _Py_cpuid_feature_mask_e { - /* CPUID (LEAF=1, SUBLEAF=0) [ECX] */ - _Py_CPUID_MASK_ECX_L1_SSE3 = 0x00000001, // bit = 0 - _Py_CPUID_MASK_ECX_L1_PCLMULQDQ = 0x00000002, // bit = 1 - _Py_CPUID_MASK_ECX_L1_SSSE3 = 0x00000200, // bit = 9 - _Py_CPUID_MASK_ECX_L1_FMA = 0x00001000, // bit = 12 - _Py_CPUID_MASK_ECX_L1_SSE4_1 = 0x00080000, // bit = 19 - _Py_CPUID_MASK_ECX_L1_SSE4_2 = 0x00100000, // bit = 20 - _Py_CPUID_MASK_ECX_L1_POPCNT = 0x00800000, // bit = 23 - _Py_CPUID_MASK_ECX_L1_XSAVE = 0x04000000, // bit = 26 - _Py_CPUID_MASK_ECX_L1_OSXSAVE = 0x08000000, // bit = 27 - _Py_CPUID_MASK_ECX_L1_AVX = 0x10000000, // bit = 28 - /* CPUID (LEAF=1, SUBLEAF=0) [EDX] */ - _Py_CPUID_MASK_EDX_L1_CMOV = 0x00008000, // bit = 15 - _Py_CPUID_MASK_EDX_L1_SSE = 0x02000000, // bit = 25 - _Py_CPUID_MASK_EDX_L1_SSE2 = 0x04000000, // bit = 26 - /* CPUID (LEAF=7, SUBLEAF=0) [EBX] */ - _Py_CPUID_MASK_EBX_L7_AVX2 = 0x00000020, // bit = 5 - _Py_CPUID_MASK_EBX_L7_AVX512_F = 0x00010000, // bit = 16 - _Py_CPUID_MASK_EBX_L7_AVX512_DQ = 0x00020000, // bit = 17 - _Py_CPUID_MASK_EBX_L7_AVX512_IFMA = 0x00200000, // bit = 21 - _Py_CPUID_MASK_EBX_L7_AVX512_PF = 0x04000000, // bit = 26 - _Py_CPUID_MASK_EBX_L7_AVX512_ER = 0x08000000, // bit = 27 - _Py_CPUID_MASK_EBX_L7_AVX512_CD = 0x10000000, // bit = 28 - _Py_CPUID_MASK_EBX_L7_AVX512_BW = 0x40000000, // bit = 30 - _Py_CPUID_MASK_EBX_L7_AVX512_VL = 0x80000000, // bit = 31 - /* CPUID (LEAF=7, SUBLEAF=0) [ECX] */ - _Py_CPUID_MASK_ECX_L7_AVX512_VBMI = 0x00000002, // bit = 1 - _Py_CPUID_MASK_ECX_L7_AVX512_VBMI2 = 0x00000040, // bit = 6 - _Py_CPUID_MASK_ECX_L7_AVX512_VNNI = 0x00000800, // bit = 11 - _Py_CPUID_MASK_ECX_L7_AVX512_BITALG = 0x00001000, // bit = 12 - _Py_CPUID_MASK_ECX_L7_AVX512_VPOPCNTDQ = 0x00004000, // bit = 14 - /* CPUID (LEAF=7, SUBLEAF=0) [EDX] */ - _Py_CPUID_MASK_EDX_L7_AVX512_4VNNIW = 0x00000004, // bit = 2 - _Py_CPUID_MASK_EDX_L7_AVX512_4FMAPS = 0x00000008, // bit = 3 - _Py_CPUID_MASK_EDX_L7_AVX512_VP2INTERSECT = 0x00000100, // bit = 8 - /* CPUID (LEAF=7, SUBLEAF=1) [EAX] */ - _Py_CPUID_MASK_EAX_L7S1_AVX_VNNI = 0x00000010, // bit = 4 - _Py_CPUID_MASK_EAX_L7S1_AVX_IFMA = 0x00800000, // bit = 23 - /* CPUID (LEAF=7, SUBLEAF=1) [EDX] */ - _Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT8 = 0x00000010, // bit = 4 - _Py_CPUID_MASK_EDX_L7S1_AVX_NE_CONVERT = 0x00000020, // bit = 5 - _Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT16 = 0x00000400, // bit = 10 -}; -// fmt: on -/*[python end generated code: output=8e58b0997d69bbf8 input=fce00935f64021f9]*/ +// clang-format off +/** Constants for CPUID features */ +/* CPUID (LEAF=1, SUBLEAF=0) [ECX] */ +#define _Py_CPUID_MASK_ECX_L1_SSE3 0x00000001 // bit = 0 +#define _Py_CPUID_MASK_ECX_L1_PCLMULQDQ 0x00000002 // bit = 1 +#define _Py_CPUID_MASK_ECX_L1_SSSE3 0x00000200 // bit = 9 +#define _Py_CPUID_MASK_ECX_L1_FMA 0x00001000 // bit = 12 +#define _Py_CPUID_MASK_ECX_L1_SSE4_1 0x00080000 // bit = 19 +#define _Py_CPUID_MASK_ECX_L1_SSE4_2 0x00100000 // bit = 20 +#define _Py_CPUID_MASK_ECX_L1_POPCNT 0x00800000 // bit = 23 +#define _Py_CPUID_MASK_ECX_L1_XSAVE 0x04000000 // bit = 26 +#define _Py_CPUID_MASK_ECX_L1_OSXSAVE 0x08000000 // bit = 27 +#define _Py_CPUID_MASK_ECX_L1_AVX 0x10000000 // bit = 28 + +/* CPUID (LEAF=1, SUBLEAF=0) [EDX] */ +#define _Py_CPUID_MASK_EDX_L1_CMOV 0x00008000 // bit = 15 +#define _Py_CPUID_MASK_EDX_L1_SSE 0x02000000 // bit = 25 +#define _Py_CPUID_MASK_EDX_L1_SSE2 0x04000000 // bit = 26 + +/* CPUID (LEAF=7, SUBLEAF=0) [EBX] */ +#define _Py_CPUID_MASK_EBX_L7_AVX2 0x00000020 // bit = 5 +#define _Py_CPUID_MASK_EBX_L7_AVX512_F 0x00010000 // bit = 16 +#define _Py_CPUID_MASK_EBX_L7_AVX512_DQ 0x00020000 // bit = 17 +#define _Py_CPUID_MASK_EBX_L7_AVX512_IFMA 0x00200000 // bit = 21 +#define _Py_CPUID_MASK_EBX_L7_AVX512_PF 0x04000000 // bit = 26 +#define _Py_CPUID_MASK_EBX_L7_AVX512_ER 0x08000000 // bit = 27 +#define _Py_CPUID_MASK_EBX_L7_AVX512_CD 0x10000000 // bit = 28 +#define _Py_CPUID_MASK_EBX_L7_AVX512_BW 0x40000000 // bit = 30 +#define _Py_CPUID_MASK_EBX_L7_AVX512_VL 0x80000000 // bit = 31 + +/* CPUID (LEAF=7, SUBLEAF=0) [ECX] */ +#define _Py_CPUID_MASK_ECX_L7_AVX512_VBMI 0x00000002 // bit = 1 +#define _Py_CPUID_MASK_ECX_L7_AVX512_VBMI2 0x00000040 // bit = 6 +#define _Py_CPUID_MASK_ECX_L7_AVX512_VNNI 0x00000800 // bit = 11 +#define _Py_CPUID_MASK_ECX_L7_AVX512_BITALG 0x00001000 // bit = 12 +#define _Py_CPUID_MASK_ECX_L7_AVX512_VPOPCNTDQ 0x00004000 // bit = 14 + +/* CPUID (LEAF=7, SUBLEAF=0) [EDX] */ +#define _Py_CPUID_MASK_EDX_L7_AVX512_4VNNIW 0x00000004 // bit = 2 +#define _Py_CPUID_MASK_EDX_L7_AVX512_4FMAPS 0x00000008 // bit = 3 +#define _Py_CPUID_MASK_EDX_L7_AVX512_VP2INTERSECT 0x00000100 // bit = 8 + +/* CPUID (LEAF=7, SUBLEAF=1) [EAX] */ +#define _Py_CPUID_MASK_EAX_L7S1_AVX_VNNI 0x00000010 // bit = 4 +#define _Py_CPUID_MASK_EAX_L7S1_AVX_IFMA 0x00800000 // bit = 23 + +/* CPUID (LEAF=7, SUBLEAF=1) [EDX] */ +#define _Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT8 0x00000010 // bit = 4 +#define _Py_CPUID_MASK_EDX_L7S1_AVX_NE_CONVERT 0x00000020 // bit = 5 +#define _Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT16 0x00000400 // bit = 10 +// clang-format on +/*[python end generated code: output=e9112f064e2effec input=d7df15fec9f3daa2]*/ #endif // !Py_INTERNAL_CPUINFO_CPUID_FEATURES_H diff --git a/Include/internal/pycore_cpuinfo_xsave_features.h b/Include/internal/pycore_cpuinfo_xsave_features.h index c0e33e820b9ef1..f9ce25e8a71003 100644 --- a/Include/internal/pycore_cpuinfo_xsave_features.h +++ b/Include/internal/pycore_cpuinfo_xsave_features.h @@ -20,23 +20,20 @@ extern "C" { #include "Python.h" -// fmt: off /*[python input] import os, sys sys.path.insert(0, os.path.realpath(os.path.join(os.getcwd(), "Tools"))) from cpuinfo.xsave_features_gen import generate_xsave_features_enum -print(generate_xsave_features_enum("_Py_xsave_feature_mask")) +print(generate_xsave_features_enum()) [python start generated code]*/ -// fmt: off -/** Enumeration for XSAVE components */ -enum _Py_xsave_feature_mask_e { - _Py_XSAVE_MASK_XCR0_SSE = 0x00000002, // bit = 1 - _Py_XSAVE_MASK_XCR0_AVX = 0x00000004, // bit = 2 - _Py_XSAVE_MASK_XCR0_AVX512_OPMASK = 0x00000020, // bit = 5 - _Py_XSAVE_MASK_XCR0_AVX512_ZMM_HI256 = 0x00000040, // bit = 6 - _Py_XSAVE_MASK_XCR0_AVX512_HI16_ZMM = 0x00000080, // bit = 7 -}; -// fmt: on -/*[python end generated code: output=35ea9a165938f8ef input=336793a305515376]*/ +// clang-format off +/** Constants for XSAVE components */ +#define _Py_XSAVE_MASK_XCR0_SSE 0x00000002 // bit = 1 +#define _Py_XSAVE_MASK_XCR0_AVX 0x00000004 // bit = 2 +#define _Py_XSAVE_MASK_XCR0_AVX512_OPMASK 0x00000020 // bit = 5 +#define _Py_XSAVE_MASK_XCR0_AVX512_ZMM_HI256 0x00000040 // bit = 6 +#define _Py_XSAVE_MASK_XCR0_AVX512_HI16_ZMM 0x00000080 // bit = 7 +// clang-format on +/*[python end generated code: output=ac059b802b4317cb input=6323151855b3c9f0]*/ #endif // !Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H diff --git a/Tools/cpuinfo/__init__.py b/Tools/cpuinfo/__init__.py index e69de29bb2d1d6..434ed9173f403a 100644 --- a/Tools/cpuinfo/__init__.py +++ b/Tools/cpuinfo/__init__.py @@ -0,0 +1,15 @@ +""" +This package provides functions to generate flags for CPUID and XSAVE. + +The constants are macros generated by Argument Clinic as follows: + + #define 0x // bit = BIT + ^ ^ + +where ^ indicates a column that is a multiple of 4, has +exactly 8 characters and has at most 2 characters. + +A C enumeration is NOT generated as the largest member may not fit +on an 'int', which is forbidden as ISO C restricts enumerator values +to that range. +""" diff --git a/Tools/cpuinfo/_util.py b/Tools/cpuinfo/_util.py index e501b2761f9659..9d478ca686f65b 100644 --- a/Tools/cpuinfo/_util.py +++ b/Tools/cpuinfo/_util.py @@ -1,9 +1,9 @@ from __future__ import annotations __all__ = [ - "next_block", "make_enum_name", "make_enum_member", + "next_block", "make_constant", "Style", "C99_STYLE", "C11_STYLE", "DOXYGEN_STYLE", - "CWriter" + "CWriter", ] # fmt: skip import contextlib @@ -13,7 +13,7 @@ if TYPE_CHECKING: from collections.abc import Iterator - from typing import Any, Final + from typing import Any, Final, Literal def next_block(w: int) -> int: @@ -21,20 +21,15 @@ def next_block(w: int) -> int: return ((w + 3) & ~0x03) if (w % 4) else (w + 4) -_MASKSIZE: Final[int] = next_block(len("= 0x00000000,")) +_MASKSIZE: Final[int] = next_block(len("0x00000000")) -def make_enum_name(name: str) -> tuple[str, str]: - if name.endswith("_e"): - raise ValueError(f"enumeration must not end by '_e': {name!r}") - return f"{name}_e", name # (enum name, typedef name) - - -def make_enum_member(key: str, bit: int, name_maxsize: int) -> str: +def make_constant(key: str, bit: int, name_maxsize: int) -> str: + assert bit <= 32, f"{key}: mask does not on an uint32_t" member_name = key.ljust(name_maxsize) member_mask = format(1 << bit, "008x") - member_mask = f"= 0x{member_mask},".ljust(_MASKSIZE) - return f"{member_name}{member_mask} // bit = {bit}" + member_mask = f"0x{member_mask}".ljust(_MASKSIZE) + return f"#define {member_name}{member_mask}// bit = {bit}" class Style(enum.IntEnum): @@ -43,9 +38,9 @@ class Style(enum.IntEnum): DOXYGEN = enum.auto() -C99_STYLE = Style.C99 -C11_STYLE = Style.C11 -DOXYGEN_STYLE = Style.DOXYGEN +C99_STYLE: Final[Literal[Style.C99]] = Style.C99 +C11_STYLE: Final[Literal[Style.C11]] = Style.C11 +DOXYGEN_STYLE: Final[Literal[Style.DOXYGEN]] = Style.DOXYGEN _COMMENT_INLINE_STYLE: Final[dict[Style, tuple[str, str, str]]] = { C99_STYLE: ("// ", "", ""), @@ -65,17 +60,6 @@ def __init__(self, *, indentsize: int = 4) -> None: self._stream = StringIO() self._indent = " " * indentsize self._prefix = "" - self._disable_external_formatter() - - def _disable_external_formatter(self) -> None: - """Add a directive to suppress external formatters to run.""" - with self.prefixed(""): - self.write("// fmt: off") - - def _enable_external_formatter(self) -> None: - """Add a directive to allow external formatters to run.""" - with self.prefixed(""): - self.write("// fmt: on") def comment( self, text: str, *, level: int = 0, style: Style = C11_STYLE @@ -111,9 +95,18 @@ def write( self._write(prefix, sep="", end="") self._write(*args, sep=sep, end=end) - def _write(self, *args: Any, sep: str, end: str) -> None: + def write_blankline(self) -> None: + self._write() + + def _write(self, *args: Any, sep: str = " ", end: str = "\n") -> None: print(*args, sep=sep, end=end, file=self._stream) def build(self) -> str: - self._enable_external_formatter() - return self._stream.getvalue().rstrip("\n") + # inject directives to temporarily disable external C formatters + return "\n".join( + ( + "// clang-format off", + self._stream.getvalue().rstrip(), + "// clang-format on", + ) + ) diff --git a/Tools/cpuinfo/cpuid_features_gen.py b/Tools/cpuinfo/cpuid_features_gen.py index ffbf526c01e37f..84f60dd625797b 100644 --- a/Tools/cpuinfo/cpuid_features_gen.py +++ b/Tools/cpuinfo/cpuid_features_gen.py @@ -1,7 +1,7 @@ """ Generate an enumeration describing masks to apply on CPUID output registers. -Member names are _Py_CPUID_MASK__L[S]_, +Constants are _Py_CPUID_MASK__L[S]_, where <> (resp. []) denotes a required (resp. optional) group and: - REGISTER is EAX, EBX, ECX or EDX, @@ -97,7 +97,7 @@ } -def get_member_name( +def get_constant_name( leaf: Leaf, subleaf: SubLeaf, registry: Registry, name: Feature ) -> str: node = f"L{leaf}S{subleaf}" if subleaf else f"L{leaf}" @@ -106,40 +106,26 @@ def get_member_name( _NAME_MAXSIZE: Final[int] = util.next_block( max( - len(get_member_name(*family, name)) + len(get_constant_name(*family, name)) for family, values in CPUID_FEATURES.items() for name in values ) ) -def generate_cpuid_features_enum(enum_name: str) -> str: - """Used by :file:`Include/internal/pycore_cpuinfo_cpuid_features.h`. - - The C enumeration is generated by this function and Argument Clinic, - to be eventually rendred as follows: - - = 0x, // bit = BIT - ^ ^ ^ ^ ^ ^ ^ - - where ^ indicates a column that is a multiple of 4, has - exactly 8 characters and has at most 2 characters. - """ - enum_name, _typedef_enum_name = util.make_enum_name(enum_name) +def generate_cpuid_features_enum() -> str: + """Used by :file:`Include/internal/pycore_cpuinfo_cpuid_features.h`.""" writer = util.CWriter() - writer.comment("Enumeration for CPUID features", style=DOXYGEN_STYLE) - writer.write(f"enum {enum_name} {{") + writer.comment("Constants for CPUID features", style=DOXYGEN_STYLE) for family, values in CPUID_FEATURES.items(): leaf, subleaf, registry = family - title = f"CPUID (LEAF={leaf}, SUBLEAF={subleaf}) [{registry}]" - writer.comment(title, level=1) + writer.comment(f"CPUID (LEAF={leaf}, SUBLEAF={subleaf}) [{registry}]") for feature_name, bit in values.items(): if not feature_name: raise ValueError(f"invalid entry for {family}") if not 0 <= bit < 32: raise ValueError(f"invalid bit value for {feature_name!r}") - key = get_member_name(leaf, subleaf, registry, feature_name) - member_def = util.make_enum_member(key, bit, _NAME_MAXSIZE) - writer.write(member_def, level=1) - writer.write("};") + key = get_constant_name(leaf, subleaf, registry, feature_name) + writer.write(util.make_constant(key, bit, _NAME_MAXSIZE)) + writer.write_blankline() return writer.build() diff --git a/Tools/cpuinfo/xsave_features_gen.py b/Tools/cpuinfo/xsave_features_gen.py index 858151c1b74956..3d820759ce9f03 100644 --- a/Tools/cpuinfo/xsave_features_gen.py +++ b/Tools/cpuinfo/xsave_features_gen.py @@ -1,5 +1,5 @@ """ -Generate enumeration for XSAVE state components (XCR0 control register). +Generate constants for XSAVE state components (XCR0 control register). See https://en.wikipedia.org/wiki/Control_register#XCR0_and_XSS. @@ -30,36 +30,22 @@ } -def get_member_name(feature: Feature) -> str: +def get_constant_name(feature: Feature) -> str: return f"_Py_XSAVE_MASK_XCR0_{feature}" _NAME_MAXSIZE: Final[int] = util.next_block( - max(map(len, map(get_member_name, XSAVE_FEATURES))) + max(map(len, map(get_constant_name, XSAVE_FEATURES))) ) -def generate_xsave_features_enum(enum_name: str) -> str: - """Used by :file:`Include/internal/pycore_cpuinfo_xsave_features.h`. - - The C enumeration is generated by this function and Argument Clinic, - to be eventually rendred as follows: - - = 0x, // bit = BIT - ^ ^ ^ ^ ^ ^ ^ - - where ^ indicates a column that is a multiple of 4, has - exactly 8 characters and has at most 2 characters. - """ - enum_name, _typedef_enum_name = util.make_enum_name(enum_name) +def generate_xsave_features_enum() -> str: + """Used by :file:`Include/internal/pycore_cpuinfo_xsave_features.h`.""" writer = util.CWriter() - writer.comment("Enumeration for XSAVE components", style=DOXYGEN_STYLE) - writer.write(f"enum {enum_name} {{") + writer.comment("Constants for XSAVE components", style=DOXYGEN_STYLE) for feature_name, bit in XSAVE_FEATURES.items(): if not 0 <= bit < 32: raise ValueError(f"invalid bit value for {feature_name!r}") - key = get_member_name(feature_name) - member_def = util.make_enum_member(key, bit, _NAME_MAXSIZE) - writer.write(member_def, level=1) - writer.write("};") + key = get_constant_name(feature_name) + writer.write(util.make_constant(key, bit, _NAME_MAXSIZE)) return writer.build() From 62c9a405f670668df877844b8dd8235b3d4d0027 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 30 Jun 2025 19:06:59 +0200 Subject: [PATCH 47/78] handle -Wpedantic --- Python/cpuinfo.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index eeacf9dd8df3bf..0a7634ee33198d 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -128,6 +128,8 @@ get_cpuid_info(uint32_t level /* input eax */, uint32_t info[4] = {0}; __cpuidex(info, level, count); *eax = info[0], *ebx = info[1], *ecx = info[2], *edx = info[3]; +#else + (void)level, (void)count; #endif } @@ -151,8 +153,8 @@ get_xgetbv(uint32_t index) static uint32_t detect_cpuid_maxleaf(void) { - uint32_t maxleaf = 0, ebx = 0, ecx = 0, edx = 0; - get_cpuid_info(0, 0, &maxleaf, &ebx, &ecx, &edx); + uint32_t maxleaf = 0, _ebx = 0, _ecx = 0, _edx = 0; + get_cpuid_info(0, 0, &maxleaf, &_ebx, &_ecx, &_edx); return maxleaf; } @@ -160,7 +162,9 @@ detect_cpuid_maxleaf(void) static void detect_cpuid_features(_Py_cpuid_features *flags, uint32_t ecx, uint32_t edx) { + assert(flags->ready == 0); assert(flags->maxleaf >= 1); + (void)flags, (void)ecx, (void)edx; // silence -Wunused-parameter // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD #ifdef _Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS @@ -205,8 +209,9 @@ static void detect_cpuid_extended_features_L7S0(_Py_cpuid_features *flags, uint32_t ebx, uint32_t ecx, uint32_t edx) { + assert(flags->ready == 0); assert(flags->maxleaf >= 7); - (void)ebx, (void)ecx, (void)edx; // to suppress unused warnings + (void)flags, (void)ebx, (void)ecx, (void)edx; // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD #ifdef _Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS @@ -282,8 +287,9 @@ detect_cpuid_extended_features_L7S1(_Py_cpuid_features *flags, uint32_t ecx, uint32_t edx) { + assert(flags->ready == 0); assert(flags->maxleaf >= 7); - (void)eax, (void)ebx, (void)ecx, (void)edx; // to suppress unused warnings + (void)flags, (void)eax, (void)ebx, (void)ecx, (void)edx; // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD #ifdef _Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS @@ -309,8 +315,10 @@ detect_cpuid_extended_features_L7S1(_Py_cpuid_features *flags, static void detect_cpuid_xsave_state(_Py_cpuid_features *flags) { - // Keep the ordering and newlines as they are declared in the structure. + assert(flags->ready == 0); assert(flags->maxleaf >= 1); + (void)flags; + // Keep the ordering and newlines as they are declared in the structure. #ifdef HAS_XGETBV_SUPPORT uint64_t xcr0 = flags->osxsave ? get_xgetbv(0) : 0; flags->xcr0_sse = XSAVE_CHECK_REG(xcr0, XCR0_SSE); @@ -480,6 +488,7 @@ _Py_cpuid_match_features(const _Py_cpuid_features *actual, static void cpuid_detect_l1_features(_Py_cpuid_features *flags) { + assert(flags->ready == 0); if (flags->maxleaf >= 1) { uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx); @@ -497,9 +506,10 @@ cpuid_detect_l1_features(_Py_cpuid_features *flags) static void cpuid_detect_l7s0_features(_Py_cpuid_features *flags) { + assert(flags->ready == 0); assert(flags->maxleaf >= 7); - uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; - get_cpuid_info(7, 0, &eax, &ebx, &ecx, &edx); + uint32_t _eax = 0, ebx = 0, ecx = 0, edx = 0; + get_cpuid_info(7, 0, &_eax, &ebx, &ecx, &edx); detect_cpuid_extended_features_L7S0(flags, ebx, ecx, edx); } #else @@ -510,6 +520,7 @@ cpuid_detect_l7s0_features(_Py_cpuid_features *flags) static void cpuid_detect_l7s1_features(_Py_cpuid_features *flags) { + assert(flags->ready == 0); assert(flags->maxleaf >= 7); uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(7, 1, &eax, &ebx, &ecx, &edx); @@ -523,6 +534,7 @@ cpuid_detect_l7s1_features(_Py_cpuid_features *flags) static void cpuid_detect_l7_features(_Py_cpuid_features *flags) { + assert(flags->ready == 0); if (flags->maxleaf >= 7) { cpuid_detect_l7s0_features(flags); cpuid_detect_l7s1_features(flags); From a22aa95c44f2cc558be5404f045195342e6e2737 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 8 Jul 2025 20:58:36 +0200 Subject: [PATCH 48/78] reorganize files --- Include/internal/pycore_cpuinfo_cpuid_features.h | 14 +++++++++----- Include/internal/pycore_cpuinfo_xsave_features.h | 14 +++++++++----- PCbuild/pythoncore.vcxproj.filters | 6 +++--- Tools/cpuinfo/{ => libcpuinfo}/__init__.py | 5 +++++ Tools/cpuinfo/libcpuinfo/features/__init__.py | 0 .../features/cpuid.py} | 8 ++++---- .../features/xsave.py} | 8 ++++---- Tools/cpuinfo/{_util.py => libcpuinfo/util.py} | 0 8 files changed, 34 insertions(+), 21 deletions(-) rename Tools/cpuinfo/{ => libcpuinfo}/__init__.py (79%) create mode 100644 Tools/cpuinfo/libcpuinfo/features/__init__.py rename Tools/cpuinfo/{cpuid_features_gen.py => libcpuinfo/features/cpuid.py} (95%) rename Tools/cpuinfo/{xsave_features_gen.py => libcpuinfo/features/xsave.py} (88%) rename Tools/cpuinfo/{_util.py => libcpuinfo/util.py} (100%) diff --git a/Include/internal/pycore_cpuinfo_cpuid_features.h b/Include/internal/pycore_cpuinfo_cpuid_features.h index a289766534783f..8db54e7af37fb1 100644 --- a/Include/internal/pycore_cpuinfo_cpuid_features.h +++ b/Include/internal/pycore_cpuinfo_cpuid_features.h @@ -1,6 +1,6 @@ /** * @author Bénédikt Tran - * @seealso @file Tools/cpuinfo/cpuid_features_gen.py + * @seealso @file Tools/cpuinfo/libcpuinfo/features/cpuid.py * * The enumeration describes masks to apply on CPUID output registers. * @@ -37,9 +37,9 @@ extern "C" { /*[python input] import os, sys -sys.path.insert(0, os.path.realpath(os.path.join(os.getcwd(), "Tools"))) -from cpuinfo.cpuid_features_gen import generate_cpuid_features_enum -print(generate_cpuid_features_enum()) +sys.path.insert(0, os.path.realpath(os.path.join(os.getcwd(), "Tools/cpuinfo"))) +from libcpuinfo.features.cpuid import make_cpuid_features_constants +print(make_cpuid_features_constants()) [python start generated code]*/ // clang-format off /** Constants for CPUID features */ @@ -92,6 +92,10 @@ print(generate_cpuid_features_enum()) #define _Py_CPUID_MASK_EDX_L7S1_AVX_NE_CONVERT 0x00000020 // bit = 5 #define _Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT16 0x00000400 // bit = 10 // clang-format on -/*[python end generated code: output=e9112f064e2effec input=d7df15fec9f3daa2]*/ +/*[python end generated code: output=e9112f064e2effec input=71ec6b4356052ec3]*/ + +#ifdef __cplusplus +} +#endif #endif // !Py_INTERNAL_CPUINFO_CPUID_FEATURES_H diff --git a/Include/internal/pycore_cpuinfo_xsave_features.h b/Include/internal/pycore_cpuinfo_xsave_features.h index f9ce25e8a71003..e8719261b07604 100644 --- a/Include/internal/pycore_cpuinfo_xsave_features.h +++ b/Include/internal/pycore_cpuinfo_xsave_features.h @@ -1,6 +1,6 @@ /** * @author Bénédikt Tran - * @seealso @file Tools/cpuinfo/xsave_features_gen.py + * @seealso @file Tools/cpuinfo/libcpuinfo/features/xsave.py * * XSAVE state components (XCR0 control register). * @@ -22,9 +22,9 @@ extern "C" { /*[python input] import os, sys -sys.path.insert(0, os.path.realpath(os.path.join(os.getcwd(), "Tools"))) -from cpuinfo.xsave_features_gen import generate_xsave_features_enum -print(generate_xsave_features_enum()) +sys.path.insert(0, os.path.realpath(os.path.join(os.getcwd(), "Tools/cpuinfo"))) +from libcpuinfo.features.xsave import make_xsave_features_constants +print(make_xsave_features_constants()) [python start generated code]*/ // clang-format off /** Constants for XSAVE components */ @@ -34,6 +34,10 @@ print(generate_xsave_features_enum()) #define _Py_XSAVE_MASK_XCR0_AVX512_ZMM_HI256 0x00000040 // bit = 6 #define _Py_XSAVE_MASK_XCR0_AVX512_HI16_ZMM 0x00000080 // bit = 7 // clang-format on -/*[python end generated code: output=ac059b802b4317cb input=6323151855b3c9f0]*/ +/*[python end generated code: output=ac059b802b4317cb input=0a1b0774d3271477]*/ + +#ifdef __cplusplus +} +#endif #endif // !Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index b4037a566b3ec5..8afc2010ef93ca 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -601,13 +601,13 @@ Include\internal - Include\cpython + Include\internal - Include\cpython + Include\internal - Include\cpython + Include\internal Include\internal diff --git a/Tools/cpuinfo/__init__.py b/Tools/cpuinfo/libcpuinfo/__init__.py similarity index 79% rename from Tools/cpuinfo/__init__.py rename to Tools/cpuinfo/libcpuinfo/__init__.py index 434ed9173f403a..a935debd4f4bbc 100644 --- a/Tools/cpuinfo/__init__.py +++ b/Tools/cpuinfo/libcpuinfo/__init__.py @@ -12,4 +12,9 @@ A C enumeration is NOT generated as the largest member may not fit on an 'int', which is forbidden as ISO C restricts enumerator values to that range. + +.. note:: + + This package must not be used directly and should only be + invoked from an Argument Clinic "[python input]" directive. """ diff --git a/Tools/cpuinfo/libcpuinfo/features/__init__.py b/Tools/cpuinfo/libcpuinfo/features/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/Tools/cpuinfo/cpuid_features_gen.py b/Tools/cpuinfo/libcpuinfo/features/cpuid.py similarity index 95% rename from Tools/cpuinfo/cpuid_features_gen.py rename to Tools/cpuinfo/libcpuinfo/features/cpuid.py index 84f60dd625797b..65a37860a2778d 100644 --- a/Tools/cpuinfo/cpuid_features_gen.py +++ b/Tools/cpuinfo/libcpuinfo/features/cpuid.py @@ -23,12 +23,12 @@ from __future__ import annotations -__all__ = ["generate_cpuid_features_enum"] +__all__ = ["make_cpuid_features_constants"] from typing import TYPE_CHECKING -from . import _util as util -from ._util import DOXYGEN_STYLE +import libcpuinfo.util as util +from libcpuinfo.util import DOXYGEN_STYLE if TYPE_CHECKING: from typing import Final @@ -113,7 +113,7 @@ def get_constant_name( ) -def generate_cpuid_features_enum() -> str: +def make_cpuid_features_constants() -> str: """Used by :file:`Include/internal/pycore_cpuinfo_cpuid_features.h`.""" writer = util.CWriter() writer.comment("Constants for CPUID features", style=DOXYGEN_STYLE) diff --git a/Tools/cpuinfo/xsave_features_gen.py b/Tools/cpuinfo/libcpuinfo/features/xsave.py similarity index 88% rename from Tools/cpuinfo/xsave_features_gen.py rename to Tools/cpuinfo/libcpuinfo/features/xsave.py index 3d820759ce9f03..474162dfc4463b 100644 --- a/Tools/cpuinfo/xsave_features_gen.py +++ b/Tools/cpuinfo/libcpuinfo/features/xsave.py @@ -8,12 +8,12 @@ from __future__ import annotations -__all__ = ["generate_xsave_features_enum"] +__all__ = ["make_xsave_features_constants"] from typing import TYPE_CHECKING -from . import _util as util -from ._util import DOXYGEN_STYLE +import libcpuinfo.util as util +from libcpuinfo.util import DOXYGEN_STYLE if TYPE_CHECKING: from typing import Final @@ -39,7 +39,7 @@ def get_constant_name(feature: Feature) -> str: ) -def generate_xsave_features_enum() -> str: +def make_xsave_features_constants() -> str: """Used by :file:`Include/internal/pycore_cpuinfo_xsave_features.h`.""" writer = util.CWriter() writer.comment("Constants for XSAVE components", style=DOXYGEN_STYLE) diff --git a/Tools/cpuinfo/_util.py b/Tools/cpuinfo/libcpuinfo/util.py similarity index 100% rename from Tools/cpuinfo/_util.py rename to Tools/cpuinfo/libcpuinfo/util.py From 87039dc140e8158d3a7024eaff82bcd05db7e407 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 8 Jul 2025 21:26:22 +0200 Subject: [PATCH 49/78] suppress compilation warnings --- Python/cpuinfo.c | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 0a7634ee33198d..8f2ad5add1e753 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -110,6 +110,15 @@ # define SHOULD_PARSE_CPUID_L7S1 #endif +#if defined(SHOULD_PARSE_CPUID_L7S1) && !defined(SHOULD_PARSE_CPUID_L7) +#error "SHOULD_PARSE_CPUID_L7S1 requires SHOULD_PARSE_CPUID_L7" +#endif +#if defined(SHOULD_PARSE_CPUID_L7S0) && !defined(SHOULD_PARSE_CPUID_L7) +#error "SHOULD_PARSE_CPUID_L7S0 requires SHOULD_PARSE_CPUID_L7" +#endif + +#undef SHOULD_PARSE_CPUID_L1 + /* * Call __cpuid_count() or equivalent and get * its EAX, EBX, ECX and EDX output registers. @@ -133,7 +142,8 @@ get_cpuid_info(uint32_t level /* input eax */, #endif } -static uint64_t +#if defined(HAS_XGETBV_SUPPORT) && defined(SHOULD_PARSE_CPUID_L1) +static uint64_t /* should only be used after calling cpuid(1, 0, ...) */ get_xgetbv(uint32_t index) { assert(index == 0); // only XCR0 is supported for now @@ -148,6 +158,7 @@ get_xgetbv(uint32_t index) return 0; #endif } +#endif /* Highest Function Parameter and Manufacturer ID (LEAF=0, SUBLEAF=0). */ static uint32_t @@ -159,7 +170,8 @@ detect_cpuid_maxleaf(void) } /* Processor Info and Feature Bits (LEAF=1, SUBLEAF=0). */ -static void +#ifdef SHOULD_PARSE_CPUID_L1 +static void /* should only be used after calling cpuid(1, 0, ...) */ detect_cpuid_features(_Py_cpuid_features *flags, uint32_t ecx, uint32_t edx) { assert(flags->ready == 0); @@ -203,9 +215,11 @@ detect_cpuid_features(_Py_cpuid_features *flags, uint32_t ecx, uint32_t edx) flags->osxsave = CPUID_CHECK_REG(ecx, ECX_L1_OSXSAVE); #endif } +#endif /* Extended Feature Bits (LEAF=7, SUBLEAF=0). */ -static void +#ifdef SHOULD_PARSE_CPUID_L7S0 +static void /* should only be used after calling cpuid(7, 0, ...) */ detect_cpuid_extended_features_L7S0(_Py_cpuid_features *flags, uint32_t ebx, uint32_t ecx, uint32_t edx) { @@ -278,9 +292,11 @@ detect_cpuid_extended_features_L7S0(_Py_cpuid_features *flags, #endif #endif // SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD } +#endif /* Extended Feature Bits (LEAF=7, SUBLEAF=1). */ -static void +#ifdef SHOULD_PARSE_CPUID_L7S1 +static void /* should only be used after calling cpuid(7, 1, ...) */ detect_cpuid_extended_features_L7S1(_Py_cpuid_features *flags, uint32_t eax, uint32_t ebx, @@ -311,23 +327,24 @@ detect_cpuid_extended_features_L7S1(_Py_cpuid_features *flags, #endif #endif // SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD } +#endif -static void +#ifdef SHOULD_PARSE_CPUID_L1 +static void /* should only be used after calling cpuid(1, 0, ...) */ detect_cpuid_xsave_state(_Py_cpuid_features *flags) { assert(flags->ready == 0); assert(flags->maxleaf >= 1); (void)flags; // Keep the ordering and newlines as they are declared in the structure. -#ifdef HAS_XGETBV_SUPPORT uint64_t xcr0 = flags->osxsave ? get_xgetbv(0) : 0; flags->xcr0_sse = XSAVE_CHECK_REG(xcr0, XCR0_SSE); flags->xcr0_avx = XSAVE_CHECK_REG(xcr0, XCR0_AVX); flags->xcr0_avx512_opmask = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_OPMASK); flags->xcr0_avx512_zmm_hi256 = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_ZMM_HI256); flags->xcr0_avx512_hi16_zmm = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_HI16_ZMM); -#endif } +#endif static void cpuid_features_finalize(_Py_cpuid_features *flags) @@ -493,9 +510,7 @@ cpuid_detect_l1_features(_Py_cpuid_features *flags) uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx); detect_cpuid_features(flags, ecx, edx); - if (flags->osxsave) { - detect_cpuid_xsave_state(flags); - } + detect_cpuid_xsave_state(flags); } } #else @@ -551,9 +566,6 @@ _Py_cpuid_detect_features(_Py_cpuid_features *flags) return; } _Py_cpuid_disable_features(flags); -#ifndef HAS_CPUID_SUPPORT - flags->ready = 1; -#else flags->maxleaf = detect_cpuid_maxleaf(); cpuid_detect_l1_features(flags); cpuid_detect_l7_features(flags); @@ -561,5 +573,4 @@ _Py_cpuid_detect_features(_Py_cpuid_features *flags) if (!_Py_cpuid_check_features(flags)) { _Py_cpuid_disable_features(flags); } -#endif // !HAS_CPUID_SUPPORT } From 8a4b1205b5160ecb80b5becdcf5b22cc96fa1617 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 8 Jul 2025 21:26:27 +0200 Subject: [PATCH 50/78] add linting --- Tools/cpuinfo/.ruff.toml | 16 ++++++++++++++++ Tools/cpuinfo/mypy.ini | 9 +++++++++ 2 files changed, 25 insertions(+) create mode 100644 Tools/cpuinfo/.ruff.toml create mode 100644 Tools/cpuinfo/mypy.ini diff --git a/Tools/cpuinfo/.ruff.toml b/Tools/cpuinfo/.ruff.toml new file mode 100644 index 00000000000000..e49d04c2d4e863 --- /dev/null +++ b/Tools/cpuinfo/.ruff.toml @@ -0,0 +1,16 @@ +# Python 3.12 is required for 'type' statements +target-version = "py312" +line-length = 79 + +[format] +skip-magic-trailing-comma = false + +[lint] +select = [ + "I", # isort + "F841", # unused variable + "RUF100", # Ban unused `# noqa` comments + "PGH004", # Ban blanket `# noqa` comments (only ignore specific error codes) +] + + diff --git a/Tools/cpuinfo/mypy.ini b/Tools/cpuinfo/mypy.ini new file mode 100644 index 00000000000000..914ca082b72189 --- /dev/null +++ b/Tools/cpuinfo/mypy.ini @@ -0,0 +1,9 @@ +[mypy] +files = Tools/cpuinfo/ +pretty = True + +python_version = 3.12 +strict = True +extra_checks = True +enable_error_code = ignore-without-code,redundant-expr,truthy-bool +warn_unreachable = True From 8603e1485b177fa241680e22564651dad8b3a4fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 8 Jul 2025 21:28:40 +0200 Subject: [PATCH 51/78] typo --- Python/cpuinfo.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 8f2ad5add1e753..44190dc9f30352 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -117,8 +117,6 @@ #error "SHOULD_PARSE_CPUID_L7S0 requires SHOULD_PARSE_CPUID_L7" #endif -#undef SHOULD_PARSE_CPUID_L1 - /* * Call __cpuid_count() or equivalent and get * its EAX, EBX, ECX and EDX output registers. From d6213a5f2d115ebc89e64651822fe02b76bfd902 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 8 Jul 2025 21:28:48 +0200 Subject: [PATCH 52/78] typo --- Python/cpuinfo.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 44190dc9f30352..05f116cab4a97f 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -110,12 +110,12 @@ # define SHOULD_PARSE_CPUID_L7S1 #endif -#if defined(SHOULD_PARSE_CPUID_L7S1) && !defined(SHOULD_PARSE_CPUID_L7) -#error "SHOULD_PARSE_CPUID_L7S1 requires SHOULD_PARSE_CPUID_L7" -#endif #if defined(SHOULD_PARSE_CPUID_L7S0) && !defined(SHOULD_PARSE_CPUID_L7) #error "SHOULD_PARSE_CPUID_L7S0 requires SHOULD_PARSE_CPUID_L7" #endif +#if defined(SHOULD_PARSE_CPUID_L7S1) && !defined(SHOULD_PARSE_CPUID_L7) +#error "SHOULD_PARSE_CPUID_L7S1 requires SHOULD_PARSE_CPUID_L7" +#endif /* * Call __cpuid_count() or equivalent and get From 79d5b3453f39ced2fdacf83fee9366fbfacb5bb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 13 Jul 2025 14:10:12 +0200 Subject: [PATCH 53/78] log more! --- Tools/build/check_extension_modules.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/build/check_extension_modules.py b/Tools/build/check_extension_modules.py index 9815bcfe27d995..cff29b4bfaab21 100644 --- a/Tools/build/check_extension_modules.py +++ b/Tools/build/check_extension_modules.py @@ -471,7 +471,7 @@ def main(): if args.debug: args.verbose = True logging.basicConfig( - level=logging.DEBUG if args.debug else logging.INFO, + level=logging.DEBUG, format="[%(levelname)s] %(message)s", ) From f69d74a0037ea71a3a30931081bf4b1615c82bb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 13 Jul 2025 14:12:11 +0200 Subject: [PATCH 54/78] skip CI --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c6171571857af6..15dc80b60cbbc2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -635,7 +635,7 @@ jobs: - build-tsan - cross-build-linux - cifuzz - if: always() + if: false steps: - name: Check whether the needed jobs succeeded or failed From cb9065da58bed6b7ca95a39c48f6146ded510174 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 13 Jul 2025 14:16:32 +0200 Subject: [PATCH 55/78] more printf --- Python/cpuinfo.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 05f116cab4a97f..0db0eb52efe072 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -506,8 +506,12 @@ cpuid_detect_l1_features(_Py_cpuid_features *flags) assert(flags->ready == 0); if (flags->maxleaf >= 1) { uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; + printf("[L1::get_cpuid_info(1, 0)]\n"); get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx); + printf("RES: eax=%u, ebx=%u, ecx=%u, edx=%u\n", eax, ebx, ecx, edx); + printf("[L1::detect_cpuid_features]\n"); detect_cpuid_features(flags, ecx, edx); + printf("[L1::detect_cpuid_xsave_state]\n"); detect_cpuid_xsave_state(flags); } } @@ -522,7 +526,9 @@ cpuid_detect_l7s0_features(_Py_cpuid_features *flags) assert(flags->ready == 0); assert(flags->maxleaf >= 7); uint32_t _eax = 0, ebx = 0, ecx = 0, edx = 0; + printf("[L1::get_cpuid_info(7, 0)]\n"); get_cpuid_info(7, 0, &_eax, &ebx, &ecx, &edx); + printf("RES: eax=%u, ebx=%u, ecx=%u, edx=%u\n", _eax, ebx, ecx, edx); detect_cpuid_extended_features_L7S0(flags, ebx, ecx, edx); } #else @@ -536,7 +542,9 @@ cpuid_detect_l7s1_features(_Py_cpuid_features *flags) assert(flags->ready == 0); assert(flags->maxleaf >= 7); uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; + printf("[L1::get_cpuid_info(7, 1)]\n"); get_cpuid_info(7, 1, &eax, &ebx, &ecx, &edx); + printf("RES: eax=%u, ebx=%u, ecx=%u, edx=%u\n", eax, ebx, ecx, edx); detect_cpuid_extended_features_L7S1(flags, eax, ebx, ecx, edx); } #else @@ -563,12 +571,19 @@ _Py_cpuid_detect_features(_Py_cpuid_features *flags) if (flags->ready) { return; } + printf("[disable features]\n"); _Py_cpuid_disable_features(flags); + printf("[detect MAXLEAF]\n"); flags->maxleaf = detect_cpuid_maxleaf(); + printf("[L1, maxleaf=%d]\n", flags->maxleaf); cpuid_detect_l1_features(flags); + printf("[L7, maxleaf=%d]\n", flags->maxleaf); cpuid_detect_l7_features(flags); + printf("finalize\n"); cpuid_features_finalize(flags); if (!_Py_cpuid_check_features(flags)) { + printf("invalid check\n"); _Py_cpuid_disable_features(flags); } + printf("done\n"); } From 88df3b72790ae7f2e4a65a3c9ca7815de2402b29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 13 Jul 2025 14:18:04 +0200 Subject: [PATCH 56/78] faster ci --- .github/workflows/build.yml | 533 ------------------------------------ 1 file changed, 533 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 15dc80b60cbbc2..b5a8afed244a0f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -152,536 +152,3 @@ jobs: - name: Check for unsupported C global variables if: github.event_name == 'pull_request' # $GITHUB_EVENT_NAME run: make check-c-globals - - build-windows: - name: >- - Windows - ${{ fromJSON(matrix.free-threading) && '(free-threading)' || '' }} - needs: build-context - if: fromJSON(needs.build-context.outputs.run-windows-tests) - strategy: - fail-fast: false - matrix: - arch: - - x64 - - Win32 - - arm64 - free-threading: - - false - - true - exclude: - # Skip Win32 on free-threaded builds - - { arch: Win32, free-threading: true } - uses: ./.github/workflows/reusable-windows.yml - with: - arch: ${{ matrix.arch }} - free-threading: ${{ matrix.free-threading }} - - build-windows-msi: - name: >- # ${{ '' } is a hack to nest jobs under the same sidebar category - Windows MSI${{ '' }} - needs: build-context - if: fromJSON(needs.build-context.outputs.run-windows-msi) - strategy: - fail-fast: false - matrix: - arch: - - x86 - - x64 - - arm64 - uses: ./.github/workflows/reusable-windows-msi.yml - with: - arch: ${{ matrix.arch }} - - build-macos: - name: >- - macOS - ${{ fromJSON(matrix.free-threading) && '(free-threading)' || '' }} - needs: build-context - if: needs.build-context.outputs.run-tests == 'true' - strategy: - fail-fast: false - matrix: - # Cirrus and macos-14 are M1, macos-13 is default GHA Intel. - # macOS 13 only runs tests against the GIL-enabled CPython. - # Cirrus used for upstream, macos-14 for forks. - os: - - ghcr.io/cirruslabs/macos-runner:sonoma - - macos-14 - - macos-13 - is-fork: # only used for the exclusion trick - - ${{ github.repository_owner != 'python' }} - free-threading: - - false - - true - exclude: - - os: ghcr.io/cirruslabs/macos-runner:sonoma - is-fork: true - - os: macos-14 - is-fork: false - - os: macos-13 - free-threading: true - uses: ./.github/workflows/reusable-macos.yml - with: - config_hash: ${{ needs.build-context.outputs.config-hash }} - free-threading: ${{ matrix.free-threading }} - os: ${{ matrix.os }} - - build-ubuntu: - name: >- - Ubuntu - ${{ fromJSON(matrix.free-threading) && '(free-threading)' || '' }} - ${{ fromJSON(matrix.bolt) && '(bolt)' || '' }} - needs: build-context - if: needs.build-context.outputs.run-tests == 'true' - strategy: - fail-fast: false - matrix: - bolt: - - false - - true - free-threading: - - false - - true - os: - - ubuntu-24.04 - - ubuntu-24.04-arm - exclude: - # Do not test BOLT with free-threading, to conserve resources - - bolt: true - free-threading: true - # BOLT currently crashes during instrumentation on aarch64 - - os: ubuntu-24.04-arm - bolt: true - uses: ./.github/workflows/reusable-ubuntu.yml - with: - config_hash: ${{ needs.build-context.outputs.config-hash }} - bolt-optimizations: ${{ matrix.bolt }} - free-threading: ${{ matrix.free-threading }} - os: ${{ matrix.os }} - - build-ubuntu-ssltests: - name: 'Ubuntu SSL tests with OpenSSL' - runs-on: ${{ matrix.os }} - timeout-minutes: 60 - needs: build-context - if: needs.build-context.outputs.run-tests == 'true' - strategy: - fail-fast: false - matrix: - os: [ubuntu-24.04] - openssl_ver: [3.0.16, 3.1.8, 3.2.4, 3.3.3, 3.4.1] - # See Tools/ssl/make_ssl_data.py for notes on adding a new version - env: - OPENSSL_VER: ${{ matrix.openssl_ver }} - MULTISSL_DIR: ${{ github.workspace }}/multissl - OPENSSL_DIR: ${{ github.workspace }}/multissl/openssl/${{ matrix.openssl_ver }} - LD_LIBRARY_PATH: ${{ github.workspace }}/multissl/openssl/${{ matrix.openssl_ver }}/lib - steps: - - uses: actions/checkout@v4 - with: - persist-credentials: false - - name: Runner image version - run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" - - name: Restore config.cache - uses: actions/cache@v4 - with: - path: config.cache - key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} - - name: Register gcc problem matcher - run: echo "::add-matcher::.github/problem-matchers/gcc.json" - - name: Install dependencies - run: sudo ./.github/workflows/posix-deps-apt.sh - - name: Configure OpenSSL env vars - run: | - echo "MULTISSL_DIR=${GITHUB_WORKSPACE}/multissl" >> "$GITHUB_ENV" - echo "OPENSSL_DIR=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}" >> "$GITHUB_ENV" - echo "LD_LIBRARY_PATH=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}/lib" >> "$GITHUB_ENV" - - name: 'Restore OpenSSL build' - id: cache-openssl - uses: actions/cache@v4 - with: - path: ./multissl/openssl/${{ env.OPENSSL_VER }} - key: ${{ matrix.os }}-multissl-openssl-${{ env.OPENSSL_VER }} - - name: Install OpenSSL - if: steps.cache-openssl.outputs.cache-hit != 'true' - run: python3 Tools/ssl/multissltests.py --steps=library --base-directory "$MULTISSL_DIR" --openssl "$OPENSSL_VER" --system Linux - - name: Add ccache to PATH - run: | - echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" - - name: Configure ccache action - uses: hendrikmuhs/ccache-action@v1.2 - with: - save: false - - name: Configure CPython - run: ./configure CFLAGS="-fdiagnostics-format=json" --config-cache --enable-slower-safety --with-pydebug --with-openssl="$OPENSSL_DIR" - - name: Build CPython - run: make -j4 - - name: Display build info - run: make pythoninfo - - name: SSL tests - run: ./python Lib/test/ssltests.py - - build-wasi: - name: 'WASI' - needs: build-context - if: needs.build-context.outputs.run-tests == 'true' - uses: ./.github/workflows/reusable-wasi.yml - with: - config_hash: ${{ needs.build-context.outputs.config-hash }} - - test-hypothesis: - name: "Hypothesis tests on Ubuntu" - runs-on: ubuntu-24.04 - timeout-minutes: 60 - needs: build-context - if: needs.build-context.outputs.run-tests == 'true' - env: - OPENSSL_VER: 3.0.16 - PYTHONSTRICTEXTENSIONBUILD: 1 - steps: - - uses: actions/checkout@v4 - with: - persist-credentials: false - - name: Register gcc problem matcher - run: echo "::add-matcher::.github/problem-matchers/gcc.json" - - name: Install dependencies - run: sudo ./.github/workflows/posix-deps-apt.sh - - name: Configure OpenSSL env vars - run: | - echo "MULTISSL_DIR=${GITHUB_WORKSPACE}/multissl" >> "$GITHUB_ENV" - echo "OPENSSL_DIR=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}" >> "$GITHUB_ENV" - echo "LD_LIBRARY_PATH=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}/lib" >> "$GITHUB_ENV" - - name: 'Restore OpenSSL build' - id: cache-openssl - uses: actions/cache@v4 - with: - path: ./multissl/openssl/${{ env.OPENSSL_VER }} - key: ${{ runner.os }}-multissl-openssl-${{ env.OPENSSL_VER }} - - name: Install OpenSSL - if: steps.cache-openssl.outputs.cache-hit != 'true' - run: python3 Tools/ssl/multissltests.py --steps=library --base-directory "$MULTISSL_DIR" --openssl "$OPENSSL_VER" --system Linux - - name: Add ccache to PATH - run: | - echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" - - name: Configure ccache action - uses: hendrikmuhs/ccache-action@v1.2 - with: - save: false - - name: Setup directory envs for out-of-tree builds - run: | - echo "CPYTHON_RO_SRCDIR=$(realpath -m "${GITHUB_WORKSPACE}"/../cpython-ro-srcdir)" >> "$GITHUB_ENV" - echo "CPYTHON_BUILDDIR=$(realpath -m "${GITHUB_WORKSPACE}"/../cpython-builddir)" >> "$GITHUB_ENV" - - name: Create directories for read-only out-of-tree builds - run: mkdir -p "$CPYTHON_RO_SRCDIR" "$CPYTHON_BUILDDIR" - - name: Bind mount sources read-only - run: sudo mount --bind -o ro "$GITHUB_WORKSPACE" "$CPYTHON_RO_SRCDIR" - - name: Runner image version - run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" - - name: Restore config.cache - uses: actions/cache@v4 - with: - path: ${{ env.CPYTHON_BUILDDIR }}/config.cache - key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} - - name: Configure CPython out-of-tree - working-directory: ${{ env.CPYTHON_BUILDDIR }} - run: | - ../cpython-ro-srcdir/configure \ - --config-cache \ - --with-pydebug \ - --enable-slower-safety \ - --with-openssl="$OPENSSL_DIR" - - name: Build CPython out-of-tree - working-directory: ${{ env.CPYTHON_BUILDDIR }} - run: make -j4 - - name: Display build info - working-directory: ${{ env.CPYTHON_BUILDDIR }} - run: make pythoninfo - - name: Remount sources writable for tests - # some tests write to srcdir, lack of pyc files slows down testing - run: sudo mount "$CPYTHON_RO_SRCDIR" -oremount,rw - - name: Setup directory envs for out-of-tree builds - run: | - echo "CPYTHON_BUILDDIR=$(realpath -m "${GITHUB_WORKSPACE}"/../cpython-builddir)" >> "$GITHUB_ENV" - - name: "Create hypothesis venv" - working-directory: ${{ env.CPYTHON_BUILDDIR }} - run: | - VENV_LOC=$(realpath -m .)/hypovenv - VENV_PYTHON=$VENV_LOC/bin/python - echo "HYPOVENV=${VENV_LOC}" >> "$GITHUB_ENV" - echo "VENV_PYTHON=${VENV_PYTHON}" >> "$GITHUB_ENV" - ./python -m venv "$VENV_LOC" && "$VENV_PYTHON" -m pip install -r "${GITHUB_WORKSPACE}/Tools/requirements-hypothesis.txt" - - name: 'Restore Hypothesis database' - id: cache-hypothesis-database - uses: actions/cache@v4 - with: - path: ${{ env.CPYTHON_BUILDDIR }}/.hypothesis/ - key: hypothesis-database-${{ github.head_ref || github.run_id }} - restore-keys: | - hypothesis-database- - - name: "Run tests" - working-directory: ${{ env.CPYTHON_BUILDDIR }} - run: | - # Most of the excluded tests are slow test suites with no property tests - # - # (GH-104097) test_sysconfig is skipped because it has tests that are - # failing when executed from inside a virtual environment. - "${VENV_PYTHON}" -m test \ - -W \ - --slowest \ - -j4 \ - --timeout 900 \ - -x test_asyncio \ - -x test_multiprocessing_fork \ - -x test_multiprocessing_forkserver \ - -x test_multiprocessing_spawn \ - -x test_concurrent_futures \ - -x test_socket \ - -x test_subprocess \ - -x test_signal \ - -x test_sysconfig - - uses: actions/upload-artifact@v4 - if: always() - with: - name: hypothesis-example-db - path: ${{ env.CPYTHON_BUILDDIR }}/.hypothesis/examples/ - - build-asan: - name: 'Address sanitizer' - runs-on: ${{ matrix.os }} - timeout-minutes: 60 - needs: build-context - if: needs.build-context.outputs.run-tests == 'true' - strategy: - fail-fast: false - matrix: - os: [ubuntu-24.04] - env: - OPENSSL_VER: 3.0.16 - PYTHONSTRICTEXTENSIONBUILD: 1 - ASAN_OPTIONS: detect_leaks=0:allocator_may_return_null=1:handle_segv=0 - steps: - - uses: actions/checkout@v4 - with: - persist-credentials: false - - name: Runner image version - run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" - - name: Restore config.cache - uses: actions/cache@v4 - with: - path: config.cache - key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} - - name: Register gcc problem matcher - run: echo "::add-matcher::.github/problem-matchers/gcc.json" - - name: Install dependencies - run: sudo ./.github/workflows/posix-deps-apt.sh - - name: Set up GCC-10 for ASAN - uses: egor-tensin/setup-gcc@v1 - with: - version: 10 - - name: Configure OpenSSL env vars - run: | - echo "MULTISSL_DIR=${GITHUB_WORKSPACE}/multissl" >> "$GITHUB_ENV" - echo "OPENSSL_DIR=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}" >> "$GITHUB_ENV" - echo "LD_LIBRARY_PATH=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}/lib" >> "$GITHUB_ENV" - - name: 'Restore OpenSSL build' - id: cache-openssl - uses: actions/cache@v4 - with: - path: ./multissl/openssl/${{ env.OPENSSL_VER }} - key: ${{ matrix.os }}-multissl-openssl-${{ env.OPENSSL_VER }} - - name: Install OpenSSL - if: steps.cache-openssl.outputs.cache-hit != 'true' - run: python3 Tools/ssl/multissltests.py --steps=library --base-directory "$MULTISSL_DIR" --openssl "$OPENSSL_VER" --system Linux - - name: Add ccache to PATH - run: | - echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" - - name: Configure ccache action - uses: hendrikmuhs/ccache-action@v1.2 - with: - save: ${{ github.event_name == 'push' }} - max-size: "200M" - - name: Configure CPython - run: ./configure --config-cache --with-address-sanitizer --without-pymalloc - - name: Build CPython - run: make -j4 - - name: Display build info - run: make pythoninfo - - name: Tests - run: xvfb-run make ci - - build-tsan: - name: >- - Thread sanitizer - ${{ fromJSON(matrix.free-threading) && '(free-threading)' || '' }} - needs: build-context - if: needs.build-context.outputs.run-tests == 'true' - strategy: - fail-fast: false - matrix: - free-threading: - - false - - true - uses: ./.github/workflows/reusable-tsan.yml - with: - config_hash: ${{ needs.build-context.outputs.config-hash }} - free-threading: ${{ matrix.free-threading }} - - build-ubsan: - name: Undefined behavior sanitizer - needs: build-context - if: needs.build-context.outputs.run-tests == 'true' - uses: ./.github/workflows/reusable-ubsan.yml - with: - config_hash: ${{ needs.build-context.outputs.config-hash }} - - cross-build-linux: - name: Cross build Linux - runs-on: ubuntu-latest - timeout-minutes: 60 - needs: build-context - if: needs.build-context.outputs.run-tests == 'true' - steps: - - uses: actions/checkout@v4 - with: - persist-credentials: false - - name: Runner image version - run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" - - name: Restore config.cache - uses: actions/cache@v4 - with: - path: config.cache - key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} - - name: Register gcc problem matcher - run: echo "::add-matcher::.github/problem-matchers/gcc.json" - - name: Set build dir - run: - # an absolute path outside of the working directoy - echo "BUILD_DIR=$(realpath ${{ github.workspace }}/../build)" >> "$GITHUB_ENV" - - name: Install dependencies - run: sudo ./.github/workflows/posix-deps-apt.sh - - name: Configure host build - run: ./configure --prefix="$BUILD_DIR/host-python" - - name: Install host Python - run: make -j8 install - - name: Run test subset with host build - run: | - "$BUILD_DIR/host-python/bin/python3" -m test test_sysconfig test_site test_embed - - name: Configure cross build - run: ./configure --prefix="$BUILD_DIR/cross-python" --with-build-python="$BUILD_DIR/host-python/bin/python3" - - name: Install cross Python - run: make -j8 install - - name: Run test subset with host build - run: | - "$BUILD_DIR/cross-python/bin/python3" -m test test_sysconfig test_site test_embed - - # CIFuzz job based on https://google.github.io/oss-fuzz/getting-started/continuous-integration/ - cifuzz: - name: CIFuzz - runs-on: ubuntu-latest - timeout-minutes: 60 - needs: build-context - if: needs.build-context.outputs.run-ci-fuzz == 'true' - permissions: - security-events: write - strategy: - fail-fast: false - matrix: - sanitizer: [address, undefined, memory] - steps: - - name: Build fuzzers (${{ matrix.sanitizer }}) - id: build - uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master - with: - oss-fuzz-project-name: cpython3 - sanitizer: ${{ matrix.sanitizer }} - - name: Run fuzzers (${{ matrix.sanitizer }}) - uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master - with: - fuzz-seconds: 600 - oss-fuzz-project-name: cpython3 - output-sarif: true - sanitizer: ${{ matrix.sanitizer }} - - name: Upload crash - if: failure() && steps.build.outcome == 'success' - uses: actions/upload-artifact@v4 - with: - name: ${{ matrix.sanitizer }}-artifacts - path: ./out/artifacts - - name: Upload SARIF - if: always() && steps.build.outcome == 'success' - uses: github/codeql-action/upload-sarif@v3 - with: - sarif_file: cifuzz-sarif/results.sarif - checkout_path: cifuzz-sarif - - all-required-green: # This job does nothing and is only used for the branch protection - name: All required checks pass - runs-on: ubuntu-latest - timeout-minutes: 5 - needs: - - build-context # Transitive dependency, needed to access `run-tests` value - - check-docs - - check-autoconf-regen - - check-generated-files - - build-windows - - build-windows-msi - - build-macos - - build-ubuntu - - build-ubuntu-ssltests - - build-wasi - - test-hypothesis - - build-asan - - build-tsan - - cross-build-linux - - cifuzz - if: false - - steps: - - name: Check whether the needed jobs succeeded or failed - uses: re-actors/alls-green@05ac9388f0aebcb5727afa17fcccfecd6f8ec5fe - with: - allowed-failures: >- - build-windows-msi, - build-ubuntu-ssltests, - test-hypothesis, - cifuzz, - allowed-skips: >- - ${{ - !fromJSON(needs.build-context.outputs.run-docs) - && ' - check-docs, - ' - || '' - }} - ${{ - needs.build-context.outputs.run-tests != 'true' - && ' - check-autoconf-regen, - check-generated-files, - build-macos, - build-ubuntu, - build-ubuntu-ssltests, - build-wasi, - test-hypothesis, - build-asan, - build-tsan, - cross-build-linux, - ' - || '' - }} - ${{ - !fromJSON(needs.build-context.outputs.run-windows-tests) - && ' - build-windows, - ' - || '' - }} - ${{ - !fromJSON(needs.build-context.outputs.run-ci-fuzz) - && ' - cifuzz, - ' - || '' - }} - jobs: ${{ toJSON(needs) }} From db64ba5dc0c25719a1cb0df23f0e84320caead8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 13 Jul 2025 14:22:29 +0200 Subject: [PATCH 57/78] !! --- Tools/build/check_extension_modules.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Tools/build/check_extension_modules.py b/Tools/build/check_extension_modules.py index cff29b4bfaab21..feb33be717e512 100644 --- a/Tools/build/check_extension_modules.py +++ b/Tools/build/check_extension_modules.py @@ -470,8 +470,9 @@ def main(): args = parser.parse_args() if args.debug: args.verbose = True + args.verbose = args.debug = True logging.basicConfig( - level=logging.DEBUG, + level=logging.DEBUG if args.debug else logging.INFO, format="[%(levelname)s] %(message)s", ) From 04012b4fb6f338b470801ca72c3ea1f092d1b775 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 13 Jul 2025 14:28:53 +0200 Subject: [PATCH 58/78] !! --- Python/cpuinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 0db0eb52efe072..3e246c3df5de83 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -335,7 +335,7 @@ detect_cpuid_xsave_state(_Py_cpuid_features *flags) assert(flags->maxleaf >= 1); (void)flags; // Keep the ordering and newlines as they are declared in the structure. - uint64_t xcr0 = flags->osxsave ? get_xgetbv(0) : 0; + uint64_t xcr0 = flags->xsave && flags->osxsave ? get_xgetbv(0) : 0; flags->xcr0_sse = XSAVE_CHECK_REG(xcr0, XCR0_SSE); flags->xcr0_avx = XSAVE_CHECK_REG(xcr0, XCR0_AVX); flags->xcr0_avx512_opmask = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_OPMASK); From 144d9ef2c54de7451e6283215ef4810b0f5eb226 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 10:01:42 +0200 Subject: [PATCH 59/78] does it work now..? --- Python/cpuinfo.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 3e246c3df5de83..322714abaafd07 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -9,14 +9,17 @@ // In the future, we should carefully enable support for ARM NEON and POWER // as well as AMD. #if defined(__x86_64__) && defined(__GNUC__) -# include // __cpuid_count() +# include // __cpuid_count() # define HAS_CPUID_SUPPORT +# if defined(__clang__) +# include // _xgetbv() +# endif # define HAS_XGETBV_SUPPORT #elif defined(_M_X64) -# include // _xgetbv() -# define HAS_XGETBV_SUPPORT -# include // __cpuidex() +# include // __cpuidex() # define HAS_CPUID_SUPPORT +# include // _xgetbv() +# define HAS_XGETBV_SUPPORT #else # undef HAS_CPUID_SUPPORT # undef HAS_XGETBV_SUPPORT @@ -146,9 +149,18 @@ get_xgetbv(uint32_t index) { assert(index == 0); // only XCR0 is supported for now #if defined(HAS_CPUID_SUPPORT) && defined(__x86_64__) && defined(__GNUC__) +# if defined(__clang__) + return (uint64_t)_xgetbv(index); +# else uint32_t eax = 0, edx = 0; - __asm__ __volatile__("xgetbv" : "=a" (eax), "=d" (edx) : "c" (index)); + __asm__ volatile( + /* raw opcode for xgetbv for compatibility with older toolchains */ + ".byte 0x0f, 0x01, 0xd0" + : "=a" (eax), "=d" (edx) + : "c" (index) + ); return ((uint64_t)edx << 32) | eax; +# endif #elif defined(HAS_CPUID_SUPPORT) && defined(_M_X64) return (uint64_t)_xgetbv(index); #else From b364ad29cc93dedeffde8e405f8d0988a3b0a6dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 10:09:22 +0200 Subject: [PATCH 60/78] does it work now..? --- Python/cpuinfo.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 322714abaafd07..023f4a2193ae4b 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -150,7 +150,11 @@ get_xgetbv(uint32_t index) assert(index == 0); // only XCR0 is supported for now #if defined(HAS_CPUID_SUPPORT) && defined(__x86_64__) && defined(__GNUC__) # if defined(__clang__) +# if defined(_MSC_VER) && _Py__has_builtin(__builtin_ia32_xgetbv) return (uint64_t)_xgetbv(index); +# else + return 0; +# endif # else uint32_t eax = 0, edx = 0; __asm__ volatile( From 0791e890e7e23ef13c6cd1902ab2ce58e6eb17a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 10:23:52 +0200 Subject: [PATCH 61/78] remove xgetbv support? --- Python/cpuinfo.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 023f4a2193ae4b..67cf7d3e300e47 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -25,6 +25,8 @@ # undef HAS_XGETBV_SUPPORT #endif +#undef HAS_XGETBV_SUPPORT + // Below, we declare macros for guarding the detection of SSE, AVX/AVX2 // and AVX-512 instructions. If the compiler does not even recognize the // corresponding flags or if we are not on an 64-bit platform we do not From 48b2cb27f33edf40d9c752ca53e9f4167876c02a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 10:24:11 +0200 Subject: [PATCH 62/78] remove xgetbv support? --- Python/cpuinfo.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 67cf7d3e300e47..66867a19768384 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -353,12 +353,14 @@ detect_cpuid_xsave_state(_Py_cpuid_features *flags) assert(flags->maxleaf >= 1); (void)flags; // Keep the ordering and newlines as they are declared in the structure. +#ifdef HAS_XGETBV_SUPPORT uint64_t xcr0 = flags->xsave && flags->osxsave ? get_xgetbv(0) : 0; flags->xcr0_sse = XSAVE_CHECK_REG(xcr0, XCR0_SSE); flags->xcr0_avx = XSAVE_CHECK_REG(xcr0, XCR0_AVX); flags->xcr0_avx512_opmask = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_OPMASK); flags->xcr0_avx512_zmm_hi256 = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_ZMM_HI256); flags->xcr0_avx512_hi16_zmm = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_HI16_ZMM); +#endif } #endif From 34f1337dc6fb0334d305c63802d7226e53c6ac27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 12:30:32 +0200 Subject: [PATCH 63/78] huh? --- Python/cpuinfo.c | 52 +++++++------------------- Tools/build/check_extension_modules.py | 1 - 2 files changed, 13 insertions(+), 40 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 66867a19768384..d3e99e8f4ecfee 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -7,7 +7,7 @@ // For now, we only try to enable SIMD instructions for x86-64 Intel CPUs. // In the future, we should carefully enable support for ARM NEON and POWER -// as well as AMD. +// as well as AMD. See https://sourceforge.net/p/predef/wiki/Architectures. #if defined(__x86_64__) && defined(__GNUC__) # include // __cpuid_count() # define HAS_CPUID_SUPPORT @@ -15,7 +15,7 @@ # include // _xgetbv() # endif # define HAS_XGETBV_SUPPORT -#elif defined(_M_X64) +#elif defined(_M_X64) || defined(__amd64__) || defined(_M_AMD64) # include // __cpuidex() # define HAS_CPUID_SUPPORT # include // _xgetbv() @@ -25,8 +25,6 @@ # undef HAS_XGETBV_SUPPORT #endif -#undef HAS_XGETBV_SUPPORT - // Below, we declare macros for guarding the detection of SSE, AVX/AVX2 // and AVX-512 instructions. If the compiler does not even recognize the // corresponding flags or if we are not on an 64-bit platform we do not @@ -115,6 +113,8 @@ # define SHOULD_PARSE_CPUID_L7S1 #endif +#undef SHOULD_PARSE_CPUID_L7S1 + #if defined(SHOULD_PARSE_CPUID_L7S0) && !defined(SHOULD_PARSE_CPUID_L7) #error "SHOULD_PARSE_CPUID_L7S0 requires SHOULD_PARSE_CPUID_L7" #endif @@ -150,14 +150,7 @@ static uint64_t /* should only be used after calling cpuid(1, 0, ...) */ get_xgetbv(uint32_t index) { assert(index == 0); // only XCR0 is supported for now -#if defined(HAS_CPUID_SUPPORT) && defined(__x86_64__) && defined(__GNUC__) -# if defined(__clang__) -# if defined(_MSC_VER) && _Py__has_builtin(__builtin_ia32_xgetbv) - return (uint64_t)_xgetbv(index); -# else - return 0; -# endif -# else +# if defined(HAS_CPUID_SUPPORT) && defined(__x86_64__) && defined(__GNUC__) uint32_t eax = 0, edx = 0; __asm__ volatile( /* raw opcode for xgetbv for compatibility with older toolchains */ @@ -166,13 +159,12 @@ get_xgetbv(uint32_t index) : "c" (index) ); return ((uint64_t)edx << 32) | eax; -# endif -#elif defined(HAS_CPUID_SUPPORT) && defined(_M_X64) +# elif defined(HAS_CPUID_SUPPORT) && defined(_M_X64) return (uint64_t)_xgetbv(index); -#else +# else (void)index; return 0; -#endif +# endif } #endif @@ -314,14 +306,11 @@ detect_cpuid_extended_features_L7S0(_Py_cpuid_features *flags, #ifdef SHOULD_PARSE_CPUID_L7S1 static void /* should only be used after calling cpuid(7, 1, ...) */ detect_cpuid_extended_features_L7S1(_Py_cpuid_features *flags, - uint32_t eax, - uint32_t ebx, - uint32_t ecx, - uint32_t edx) + uint32_t eax, uint32_t edx) { assert(flags->ready == 0); assert(flags->maxleaf >= 7); - (void)flags, (void)eax, (void)ebx, (void)ecx, (void)edx; + (void)flags, (void)eax, (void)edx; // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD #ifdef _Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS @@ -526,12 +515,8 @@ cpuid_detect_l1_features(_Py_cpuid_features *flags) assert(flags->ready == 0); if (flags->maxleaf >= 1) { uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; - printf("[L1::get_cpuid_info(1, 0)]\n"); get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx); - printf("RES: eax=%u, ebx=%u, ecx=%u, edx=%u\n", eax, ebx, ecx, edx); - printf("[L1::detect_cpuid_features]\n"); detect_cpuid_features(flags, ecx, edx); - printf("[L1::detect_cpuid_xsave_state]\n"); detect_cpuid_xsave_state(flags); } } @@ -546,9 +531,7 @@ cpuid_detect_l7s0_features(_Py_cpuid_features *flags) assert(flags->ready == 0); assert(flags->maxleaf >= 7); uint32_t _eax = 0, ebx = 0, ecx = 0, edx = 0; - printf("[L1::get_cpuid_info(7, 0)]\n"); get_cpuid_info(7, 0, &_eax, &ebx, &ecx, &edx); - printf("RES: eax=%u, ebx=%u, ecx=%u, edx=%u\n", _eax, ebx, ecx, edx); detect_cpuid_extended_features_L7S0(flags, ebx, ecx, edx); } #else @@ -561,11 +544,9 @@ cpuid_detect_l7s1_features(_Py_cpuid_features *flags) { assert(flags->ready == 0); assert(flags->maxleaf >= 7); - uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; - printf("[L1::get_cpuid_info(7, 1)]\n"); - get_cpuid_info(7, 1, &eax, &ebx, &ecx, &edx); - printf("RES: eax=%u, ebx=%u, ecx=%u, edx=%u\n", eax, ebx, ecx, edx); - detect_cpuid_extended_features_L7S1(flags, eax, ebx, ecx, edx); + uint32_t eax = 0, _ebx = 0, _ecx = 0, edx = 0; + get_cpuid_info(7, 1, &eax, &_ebx, &_ecx, &edx); + detect_cpuid_extended_features_L7S1(flags, eax, edx); } #else #define cpuid_detect_l7s1_features(FLAGS) @@ -591,19 +572,12 @@ _Py_cpuid_detect_features(_Py_cpuid_features *flags) if (flags->ready) { return; } - printf("[disable features]\n"); _Py_cpuid_disable_features(flags); - printf("[detect MAXLEAF]\n"); flags->maxleaf = detect_cpuid_maxleaf(); - printf("[L1, maxleaf=%d]\n", flags->maxleaf); cpuid_detect_l1_features(flags); - printf("[L7, maxleaf=%d]\n", flags->maxleaf); cpuid_detect_l7_features(flags); - printf("finalize\n"); cpuid_features_finalize(flags); if (!_Py_cpuid_check_features(flags)) { - printf("invalid check\n"); _Py_cpuid_disable_features(flags); } - printf("done\n"); } diff --git a/Tools/build/check_extension_modules.py b/Tools/build/check_extension_modules.py index feb33be717e512..9815bcfe27d995 100644 --- a/Tools/build/check_extension_modules.py +++ b/Tools/build/check_extension_modules.py @@ -470,7 +470,6 @@ def main(): args = parser.parse_args() if args.debug: args.verbose = True - args.verbose = args.debug = True logging.basicConfig( level=logging.DEBUG if args.debug else logging.INFO, format="[%(levelname)s] %(message)s", From 91effb4f6ef809e6065ca98d47d212fd6e0fb9c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 12:35:47 +0200 Subject: [PATCH 64/78] only parse maxleaf --- Python/cpuinfo.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index d3e99e8f4ecfee..3a9e21e135f21d 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -113,6 +113,9 @@ # define SHOULD_PARSE_CPUID_L7S1 #endif +#undef SHOULD_PARSE_CPUID_L1 +#undef SHOULD_PARSE_CPUID_L7 +#undef SHOULD_PARSE_CPUID_L7S0 #undef SHOULD_PARSE_CPUID_L7S1 #if defined(SHOULD_PARSE_CPUID_L7S0) && !defined(SHOULD_PARSE_CPUID_L7) From 6dc532d5c78cf45b5ddd8162f65634a6c0e49a18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 12:45:35 +0200 Subject: [PATCH 65/78] use different variables! --- Python/cpuinfo.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 3a9e21e135f21d..0795f3f845e3bd 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -113,11 +113,6 @@ # define SHOULD_PARSE_CPUID_L7S1 #endif -#undef SHOULD_PARSE_CPUID_L1 -#undef SHOULD_PARSE_CPUID_L7 -#undef SHOULD_PARSE_CPUID_L7S0 -#undef SHOULD_PARSE_CPUID_L7S1 - #if defined(SHOULD_PARSE_CPUID_L7S0) && !defined(SHOULD_PARSE_CPUID_L7) #error "SHOULD_PARSE_CPUID_L7S0 requires SHOULD_PARSE_CPUID_L7" #endif @@ -138,7 +133,9 @@ get_cpuid_info(uint32_t level /* input eax */, { *eax = *ebx = *ecx = *edx = 0; // ensure the output to be initialized #if defined(HAS_CPUID_SUPPORT) && defined(__x86_64__) && defined(__GNUC__) - __cpuid_count(level, count, *eax, *ebx, *ecx, *edx); + uint32_t r_eax = 0, r_ebx = 0, r_ecx = 0, r_edx = 0; + __cpuid_count(level, count, r_eax, r_ebx, r_ecx, r_edx); + *eax = r_eax, *ebx = r_ebx, *ecx = r_ecx, *edx = r_edx; #elif defined(HAS_CPUID_SUPPORT) && defined(_M_X64) uint32_t info[4] = {0}; __cpuidex(info, level, count); From 6d5dd0b142c86c98ff5ef3e23cdf9502a89119c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 12:50:34 +0200 Subject: [PATCH 66/78] disable everything! --- Python/cpuinfo.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 0795f3f845e3bd..5757a27ba01085 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -25,6 +25,9 @@ # undef HAS_XGETBV_SUPPORT #endif +#undef HAS_CPUID_SUPPORT +#undef HAS_XGETBV_SUPPORT + // Below, we declare macros for guarding the detection of SSE, AVX/AVX2 // and AVX-512 instructions. If the compiler does not even recognize the // corresponding flags or if we are not on an 64-bit platform we do not From 8e5b2f0d684c5a954bd44a3fb065a15bf4d20342 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 13:16:47 +0200 Subject: [PATCH 67/78] revert CI --- .github/workflows/build.yml | 611 ++++++++++++++++++++++++++++++++++++ 1 file changed, 611 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b5a8afed244a0f..05f20e12f4653d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -152,3 +152,614 @@ jobs: - name: Check for unsupported C global variables if: github.event_name == 'pull_request' # $GITHUB_EVENT_NAME run: make check-c-globals + + build-windows: + name: >- + Windows + ${{ fromJSON(matrix.free-threading) && '(free-threading)' || '' }} + needs: build-context + if: fromJSON(needs.build-context.outputs.run-windows-tests) + strategy: + fail-fast: false + matrix: + arch: + - x64 + - Win32 + - arm64 + free-threading: + - false + - true + exclude: + # Skip Win32 on free-threaded builds + - { arch: Win32, free-threading: true } + uses: ./.github/workflows/reusable-windows.yml + with: + arch: ${{ matrix.arch }} + free-threading: ${{ matrix.free-threading }} + + build-windows-msi: + name: >- # ${{ '' } is a hack to nest jobs under the same sidebar category + Windows MSI${{ '' }} + needs: build-context + if: fromJSON(needs.build-context.outputs.run-windows-msi) + strategy: + fail-fast: false + matrix: + arch: + - x86 + - x64 + - arm64 + uses: ./.github/workflows/reusable-windows-msi.yml + with: + arch: ${{ matrix.arch }} + + build-macos: + name: >- + macOS + ${{ fromJSON(matrix.free-threading) && '(free-threading)' || '' }} + needs: build-context + if: needs.build-context.outputs.run-tests == 'true' + strategy: + fail-fast: false + matrix: + # Cirrus and macos-14 are M1, macos-13 is default GHA Intel. + # macOS 13 only runs tests against the GIL-enabled CPython. + # Cirrus used for upstream, macos-14 for forks. + os: + - ghcr.io/cirruslabs/macos-runner:sonoma + - macos-14 + - macos-13 + is-fork: # only used for the exclusion trick + - ${{ github.repository_owner != 'python' }} + free-threading: + - false + - true + exclude: + - os: ghcr.io/cirruslabs/macos-runner:sonoma + is-fork: true + - os: macos-14 + is-fork: false + - os: macos-13 + free-threading: true + uses: ./.github/workflows/reusable-macos.yml + with: + config_hash: ${{ needs.build-context.outputs.config-hash }} + free-threading: ${{ matrix.free-threading }} + os: ${{ matrix.os }} + + build-ubuntu: + name: >- + Ubuntu + ${{ fromJSON(matrix.free-threading) && '(free-threading)' || '' }} + ${{ fromJSON(matrix.bolt) && '(bolt)' || '' }} + needs: build-context + if: needs.build-context.outputs.run-tests == 'true' + strategy: + fail-fast: false + matrix: + bolt: + - false + - true + free-threading: + - false + - true + os: + - ubuntu-24.04 + - ubuntu-24.04-arm + exclude: + # Do not test BOLT with free-threading, to conserve resources + - bolt: true + free-threading: true + # BOLT currently crashes during instrumentation on aarch64 + - os: ubuntu-24.04-arm + bolt: true + uses: ./.github/workflows/reusable-ubuntu.yml + with: + config_hash: ${{ needs.build-context.outputs.config-hash }} + bolt-optimizations: ${{ matrix.bolt }} + free-threading: ${{ matrix.free-threading }} + os: ${{ matrix.os }} + + build-ubuntu-ssltests-openssl: + name: 'Ubuntu SSL tests with OpenSSL' + runs-on: ${{ matrix.os }} + timeout-minutes: 60 + needs: build-context + if: needs.build-context.outputs.run-tests == 'true' + strategy: + fail-fast: false + matrix: + os: [ubuntu-24.04] + openssl_ver: [3.0.16, 3.1.8, 3.2.4, 3.3.3, 3.4.1] + # See Tools/ssl/make_ssl_data.py for notes on adding a new version + env: + OPENSSL_VER: ${{ matrix.openssl_ver }} + MULTISSL_DIR: ${{ github.workspace }}/multissl + OPENSSL_DIR: ${{ github.workspace }}/multissl/openssl/${{ matrix.openssl_ver }} + LD_LIBRARY_PATH: ${{ github.workspace }}/multissl/openssl/${{ matrix.openssl_ver }}/lib + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - name: Runner image version + run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" + - name: Restore config.cache + uses: actions/cache@v4 + with: + path: config.cache + key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} + - name: Register gcc problem matcher + run: echo "::add-matcher::.github/problem-matchers/gcc.json" + - name: Install dependencies + run: sudo ./.github/workflows/posix-deps-apt.sh + - name: Configure OpenSSL env vars + run: | + echo "MULTISSL_DIR=${GITHUB_WORKSPACE}/multissl" >> "$GITHUB_ENV" + echo "OPENSSL_DIR=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}" >> "$GITHUB_ENV" + echo "LD_LIBRARY_PATH=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}/lib" >> "$GITHUB_ENV" + - name: 'Restore OpenSSL build' + id: cache-openssl + uses: actions/cache@v4 + with: + path: ./multissl/openssl/${{ env.OPENSSL_VER }} + key: ${{ matrix.os }}-multissl-openssl-${{ env.OPENSSL_VER }} + - name: Install OpenSSL + if: steps.cache-openssl.outputs.cache-hit != 'true' + run: python3 Tools/ssl/multissltests.py --steps=library --base-directory "$MULTISSL_DIR" --openssl "$OPENSSL_VER" --system Linux + - name: Add ccache to PATH + run: | + echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" + - name: Configure ccache action + uses: hendrikmuhs/ccache-action@v1.2 + with: + save: false + - name: Configure CPython + run: ./configure CFLAGS="-fdiagnostics-format=json" --config-cache --enable-slower-safety --with-pydebug --with-openssl="$OPENSSL_DIR" + - name: Build CPython + run: make -j4 + - name: Display build info + run: make pythoninfo + - name: SSL tests + run: ./python Lib/test/ssltests.py + + build-ubuntu-ssltests-awslc: + name: 'Ubuntu SSL tests with AWS-LC' + runs-on: ${{ matrix.os }} + timeout-minutes: 60 + needs: build-context + if: needs.build-context.outputs.run-tests == 'true' + strategy: + fail-fast: false + matrix: + os: [ubuntu-24.04] + awslc_ver: [1.55.0] + env: + AWSLC_VER: ${{ matrix.awslc_ver}} + MULTISSL_DIR: ${{ github.workspace }}/multissl + OPENSSL_DIR: ${{ github.workspace }}/multissl/aws-lc/${{ matrix.awslc_ver }} + LD_LIBRARY_PATH: ${{ github.workspace }}/multissl/aws-lc/${{ matrix.awslc_ver }}/lib + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - name: Runner image version + run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" + - name: Restore config.cache + uses: actions/cache@v4 + with: + path: config.cache + key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} + - name: Register gcc problem matcher + run: echo "::add-matcher::.github/problem-matchers/gcc.json" + - name: Install dependencies + run: sudo ./.github/workflows/posix-deps-apt.sh + - name: Configure SSL lib env vars + run: | + echo "MULTISSL_DIR=${GITHUB_WORKSPACE}/multissl" >> "$GITHUB_ENV" + echo "OPENSSL_DIR=${GITHUB_WORKSPACE}/multissl/aws-lc/${AWSLC_VER}" >> "$GITHUB_ENV" + echo "LD_LIBRARY_PATH=${GITHUB_WORKSPACE}/multissl/aws-lc/${AWSLC_VER}/lib" >> "$GITHUB_ENV" + - name: 'Restore AWS-LC build' + id: cache-aws-lc + uses: actions/cache@v4 + with: + path: ./multissl/aws-lc/${{ matrix.awslc_ver }} + key: ${{ matrix.os }}-multissl-aws-lc-${{ matrix.awslc_ver }} + - name: Install AWS-LC + if: steps.cache-aws-lc.outputs.cache-hit != 'true' + run: | + python3 Tools/ssl/multissltests.py \ + --steps=library \ + --base-directory "$MULTISSL_DIR" \ + --awslc ${{ matrix.awslc_ver }} \ + --system Linux + - name: Add ccache to PATH + run: | + echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" + - name: Configure ccache action + uses: hendrikmuhs/ccache-action@v1.2 + with: + save: false + - name: Configure CPython + run: | + ./configure CFLAGS="-fdiagnostics-format=json" \ + --config-cache \ + --enable-slower-safety \ + --with-pydebug \ + --with-openssl="$OPENSSL_DIR" \ + --with-builtin-hashlib-hashes=blake2 \ + --with-ssl-default-suites=openssl + - name: Build CPython + run: make -j + - name: Display build info + run: make pythoninfo + - name: Verify python is linked to AWS-LC + run: ./python -c 'import ssl; print(ssl.OPENSSL_VERSION)' | grep AWS-LC + - name: SSL tests + run: ./python Lib/test/ssltests.py + + build-wasi: + name: 'WASI' + needs: build-context + if: needs.build-context.outputs.run-tests == 'true' + uses: ./.github/workflows/reusable-wasi.yml + with: + config_hash: ${{ needs.build-context.outputs.config-hash }} + + test-hypothesis: + name: "Hypothesis tests on Ubuntu" + runs-on: ubuntu-24.04 + timeout-minutes: 60 + needs: build-context + if: needs.build-context.outputs.run-tests == 'true' + env: + OPENSSL_VER: 3.0.16 + PYTHONSTRICTEXTENSIONBUILD: 1 + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - name: Register gcc problem matcher + run: echo "::add-matcher::.github/problem-matchers/gcc.json" + - name: Install dependencies + run: sudo ./.github/workflows/posix-deps-apt.sh + - name: Configure OpenSSL env vars + run: | + echo "MULTISSL_DIR=${GITHUB_WORKSPACE}/multissl" >> "$GITHUB_ENV" + echo "OPENSSL_DIR=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}" >> "$GITHUB_ENV" + echo "LD_LIBRARY_PATH=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}/lib" >> "$GITHUB_ENV" + - name: 'Restore OpenSSL build' + id: cache-openssl + uses: actions/cache@v4 + with: + path: ./multissl/openssl/${{ env.OPENSSL_VER }} + key: ${{ runner.os }}-multissl-openssl-${{ env.OPENSSL_VER }} + - name: Install OpenSSL + if: steps.cache-openssl.outputs.cache-hit != 'true' + run: python3 Tools/ssl/multissltests.py --steps=library --base-directory "$MULTISSL_DIR" --openssl "$OPENSSL_VER" --system Linux + - name: Add ccache to PATH + run: | + echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" + - name: Configure ccache action + uses: hendrikmuhs/ccache-action@v1.2 + with: + save: false + - name: Setup directory envs for out-of-tree builds + run: | + echo "CPYTHON_RO_SRCDIR=$(realpath -m "${GITHUB_WORKSPACE}"/../cpython-ro-srcdir)" >> "$GITHUB_ENV" + echo "CPYTHON_BUILDDIR=$(realpath -m "${GITHUB_WORKSPACE}"/../cpython-builddir)" >> "$GITHUB_ENV" + - name: Create directories for read-only out-of-tree builds + run: mkdir -p "$CPYTHON_RO_SRCDIR" "$CPYTHON_BUILDDIR" + - name: Bind mount sources read-only + run: sudo mount --bind -o ro "$GITHUB_WORKSPACE" "$CPYTHON_RO_SRCDIR" + - name: Runner image version + run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" + - name: Restore config.cache + uses: actions/cache@v4 + with: + path: ${{ env.CPYTHON_BUILDDIR }}/config.cache + key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} + - name: Configure CPython out-of-tree + working-directory: ${{ env.CPYTHON_BUILDDIR }} + run: | + ../cpython-ro-srcdir/configure \ + --config-cache \ + --with-pydebug \ + --enable-slower-safety \ + --with-openssl="$OPENSSL_DIR" + - name: Build CPython out-of-tree + working-directory: ${{ env.CPYTHON_BUILDDIR }} + run: make -j4 + - name: Display build info + working-directory: ${{ env.CPYTHON_BUILDDIR }} + run: make pythoninfo + - name: Remount sources writable for tests + # some tests write to srcdir, lack of pyc files slows down testing + run: sudo mount "$CPYTHON_RO_SRCDIR" -oremount,rw + - name: Setup directory envs for out-of-tree builds + run: | + echo "CPYTHON_BUILDDIR=$(realpath -m "${GITHUB_WORKSPACE}"/../cpython-builddir)" >> "$GITHUB_ENV" + - name: "Create hypothesis venv" + working-directory: ${{ env.CPYTHON_BUILDDIR }} + run: | + VENV_LOC=$(realpath -m .)/hypovenv + VENV_PYTHON=$VENV_LOC/bin/python + echo "HYPOVENV=${VENV_LOC}" >> "$GITHUB_ENV" + echo "VENV_PYTHON=${VENV_PYTHON}" >> "$GITHUB_ENV" + ./python -m venv "$VENV_LOC" && "$VENV_PYTHON" -m pip install -r "${GITHUB_WORKSPACE}/Tools/requirements-hypothesis.txt" + - name: 'Restore Hypothesis database' + id: cache-hypothesis-database + uses: actions/cache@v4 + with: + path: ${{ env.CPYTHON_BUILDDIR }}/.hypothesis/ + key: hypothesis-database-${{ github.head_ref || github.run_id }} + restore-keys: | + hypothesis-database- + - name: "Run tests" + working-directory: ${{ env.CPYTHON_BUILDDIR }} + run: | + # Most of the excluded tests are slow test suites with no property tests + # + # (GH-104097) test_sysconfig is skipped because it has tests that are + # failing when executed from inside a virtual environment. + "${VENV_PYTHON}" -m test \ + -W \ + --slowest \ + -j4 \ + --timeout 900 \ + -x test_asyncio \ + -x test_multiprocessing_fork \ + -x test_multiprocessing_forkserver \ + -x test_multiprocessing_spawn \ + -x test_concurrent_futures \ + -x test_socket \ + -x test_subprocess \ + -x test_signal \ + -x test_sysconfig + - uses: actions/upload-artifact@v4 + if: always() + with: + name: hypothesis-example-db + path: ${{ env.CPYTHON_BUILDDIR }}/.hypothesis/examples/ + + build-asan: + name: 'Address sanitizer' + runs-on: ${{ matrix.os }} + timeout-minutes: 60 + needs: build-context + if: needs.build-context.outputs.run-tests == 'true' + strategy: + fail-fast: false + matrix: + os: [ubuntu-24.04] + env: + OPENSSL_VER: 3.0.16 + PYTHONSTRICTEXTENSIONBUILD: 1 + ASAN_OPTIONS: detect_leaks=0:allocator_may_return_null=1:handle_segv=0 + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - name: Runner image version + run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" + - name: Restore config.cache + uses: actions/cache@v4 + with: + path: config.cache + key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} + - name: Register gcc problem matcher + run: echo "::add-matcher::.github/problem-matchers/gcc.json" + - name: Install dependencies + run: sudo ./.github/workflows/posix-deps-apt.sh + - name: Set up GCC-10 for ASAN + uses: egor-tensin/setup-gcc@v1 + with: + version: 10 + - name: Configure OpenSSL env vars + run: | + echo "MULTISSL_DIR=${GITHUB_WORKSPACE}/multissl" >> "$GITHUB_ENV" + echo "OPENSSL_DIR=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}" >> "$GITHUB_ENV" + echo "LD_LIBRARY_PATH=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}/lib" >> "$GITHUB_ENV" + - name: 'Restore OpenSSL build' + id: cache-openssl + uses: actions/cache@v4 + with: + path: ./multissl/openssl/${{ env.OPENSSL_VER }} + key: ${{ matrix.os }}-multissl-openssl-${{ env.OPENSSL_VER }} + - name: Install OpenSSL + if: steps.cache-openssl.outputs.cache-hit != 'true' + run: python3 Tools/ssl/multissltests.py --steps=library --base-directory "$MULTISSL_DIR" --openssl "$OPENSSL_VER" --system Linux + - name: Add ccache to PATH + run: | + echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" + - name: Configure ccache action + uses: hendrikmuhs/ccache-action@v1.2 + with: + save: ${{ github.event_name == 'push' }} + max-size: "200M" + - name: Configure CPython + run: ./configure --config-cache --with-address-sanitizer --without-pymalloc + - name: Build CPython + run: make -j4 + - name: Display build info + run: make pythoninfo + - name: Tests + run: xvfb-run make ci + + build-tsan: + name: >- + Thread sanitizer + ${{ fromJSON(matrix.free-threading) && '(free-threading)' || '' }} + needs: build-context + if: needs.build-context.outputs.run-tests == 'true' + strategy: + fail-fast: false + matrix: + free-threading: + - false + - true + uses: ./.github/workflows/reusable-tsan.yml + with: + config_hash: ${{ needs.build-context.outputs.config-hash }} + free-threading: ${{ matrix.free-threading }} + + build-ubsan: + name: Undefined behavior sanitizer + needs: build-context + if: needs.build-context.outputs.run-tests == 'true' + uses: ./.github/workflows/reusable-ubsan.yml + with: + config_hash: ${{ needs.build-context.outputs.config-hash }} + + cross-build-linux: + name: Cross build Linux + runs-on: ubuntu-latest + timeout-minutes: 60 + needs: build-context + if: needs.build-context.outputs.run-tests == 'true' + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - name: Runner image version + run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" + - name: Restore config.cache + uses: actions/cache@v4 + with: + path: config.cache + key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} + - name: Register gcc problem matcher + run: echo "::add-matcher::.github/problem-matchers/gcc.json" + - name: Set build dir + run: + # an absolute path outside of the working directoy + echo "BUILD_DIR=$(realpath ${{ github.workspace }}/../build)" >> "$GITHUB_ENV" + - name: Install dependencies + run: sudo ./.github/workflows/posix-deps-apt.sh + - name: Configure host build + run: ./configure --prefix="$BUILD_DIR/host-python" + - name: Install host Python + run: make -j8 install + - name: Run test subset with host build + run: | + "$BUILD_DIR/host-python/bin/python3" -m test test_sysconfig test_site test_embed + - name: Configure cross build + run: ./configure --prefix="$BUILD_DIR/cross-python" --with-build-python="$BUILD_DIR/host-python/bin/python3" + - name: Install cross Python + run: make -j8 install + - name: Run test subset with host build + run: | + "$BUILD_DIR/cross-python/bin/python3" -m test test_sysconfig test_site test_embed + + # CIFuzz job based on https://google.github.io/oss-fuzz/getting-started/continuous-integration/ + cifuzz: + name: CIFuzz + runs-on: ubuntu-latest + timeout-minutes: 60 + needs: build-context + if: needs.build-context.outputs.run-ci-fuzz == 'true' + permissions: + security-events: write + strategy: + fail-fast: false + matrix: + sanitizer: [address, undefined, memory] + steps: + - name: Build fuzzers (${{ matrix.sanitizer }}) + id: build + uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master + with: + oss-fuzz-project-name: cpython3 + sanitizer: ${{ matrix.sanitizer }} + - name: Run fuzzers (${{ matrix.sanitizer }}) + uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master + with: + fuzz-seconds: 600 + oss-fuzz-project-name: cpython3 + output-sarif: true + sanitizer: ${{ matrix.sanitizer }} + - name: Upload crash + if: failure() && steps.build.outcome == 'success' + uses: actions/upload-artifact@v4 + with: + name: ${{ matrix.sanitizer }}-artifacts + path: ./out/artifacts + - name: Upload SARIF + if: always() && steps.build.outcome == 'success' + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: cifuzz-sarif/results.sarif + checkout_path: cifuzz-sarif + + all-required-green: # This job does nothing and is only used for the branch protection + name: All required checks pass + runs-on: ubuntu-latest + timeout-minutes: 5 + needs: + - build-context # Transitive dependency, needed to access `run-tests` value + - check-docs + - check-autoconf-regen + - check-generated-files + - build-windows + - build-windows-msi + - build-macos + - build-ubuntu + - build-ubuntu-ssltests-awslc + - build-ubuntu-ssltests-openssl + - build-wasi + - test-hypothesis + - build-asan + - build-tsan + - cross-build-linux + - cifuzz + if: always() + + steps: + - name: Check whether the needed jobs succeeded or failed + uses: re-actors/alls-green@05ac9388f0aebcb5727afa17fcccfecd6f8ec5fe + with: + allowed-failures: >- + build-windows-msi, + build-ubuntu-ssltests-awslc, + build-ubuntu-ssltests-openssl, + test-hypothesis, + cifuzz, + allowed-skips: >- + ${{ + !fromJSON(needs.build-context.outputs.run-docs) + && ' + check-docs, + ' + || '' + }} + ${{ + needs.build-context.outputs.run-tests != 'true' + && ' + check-autoconf-regen, + check-generated-files, + build-macos, + build-ubuntu, + build-ubuntu-ssltests-awslc, + build-ubuntu-ssltests-openssl, + build-wasi, + test-hypothesis, + build-asan, + build-tsan, + cross-build-linux, + ' + || '' + }} + ${{ + !fromJSON(needs.build-context.outputs.run-windows-tests) + && ' + build-windows, + ' + || '' + }} + ${{ + !fromJSON(needs.build-context.outputs.run-ci-fuzz) + && ' + cifuzz, + ' + || '' + }} + jobs: ${{ toJSON(needs) }} From 3b495f6283d4590c1893f928fa69ab898f3e0e43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 13:23:45 +0200 Subject: [PATCH 68/78] only check maxleaf --- Python/cpuinfo.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 5757a27ba01085..e27277f485b432 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -25,7 +25,6 @@ # undef HAS_XGETBV_SUPPORT #endif -#undef HAS_CPUID_SUPPORT #undef HAS_XGETBV_SUPPORT // Below, we declare macros for guarding the detection of SSE, AVX/AVX2 @@ -116,6 +115,11 @@ # define SHOULD_PARSE_CPUID_L7S1 #endif +#undef SHOULD_PARSE_CPUID_L1 +#undef SHOULD_PARSE_CPUID_L7 +#undef SHOULD_PARSE_CPUID_L7S0 +#undef SHOULD_PARSE_CPUID_L7S1 + #if defined(SHOULD_PARSE_CPUID_L7S0) && !defined(SHOULD_PARSE_CPUID_L7) #error "SHOULD_PARSE_CPUID_L7S0 requires SHOULD_PARSE_CPUID_L7" #endif From 8019f0967b70cb94b3c7ae51e97da34be32dda50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 13:29:22 +0200 Subject: [PATCH 69/78] parse L1 --- Python/cpuinfo.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index e27277f485b432..a33cbacd13ab31 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -115,7 +115,6 @@ # define SHOULD_PARSE_CPUID_L7S1 #endif -#undef SHOULD_PARSE_CPUID_L1 #undef SHOULD_PARSE_CPUID_L7 #undef SHOULD_PARSE_CPUID_L7S0 #undef SHOULD_PARSE_CPUID_L7S1 From 820d140186e6e91ab309d3314addcebe6fbbd3b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 13:36:09 +0200 Subject: [PATCH 70/78] parse L7 --- Python/cpuinfo.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index a33cbacd13ab31..eecefd0d43d938 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -99,26 +99,31 @@ #if defined(SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD) \ || defined(SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD) /* Indicate that cpuid should be called once with EAX=1 and ECX=0. */ +# ifndef HAS_CPUID_SUPPORT +# error "HAS_CPUID_SUPPORT must be set" +# endif # define SHOULD_PARSE_CPUID_L1 #endif #if defined(SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD) \ || defined(SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD) /* Indicate that cpuid should be called once with EAX=7 and ECX=0. */ +# ifndef HAS_CPUID_SUPPORT +# error "HAS_CPUID_SUPPORT must be set" +# endif # define SHOULD_PARSE_CPUID_L7 # define SHOULD_PARSE_CPUID_L7S0 #endif #if defined(SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD) /* Indicate that cpuid should be called once with EAX=7 and ECX=1. */ +# ifndef HAS_CPUID_SUPPORT +# error "HAS_CPUID_SUPPORT must be set" +# endif # define SHOULD_PARSE_CPUID_L7 # define SHOULD_PARSE_CPUID_L7S1 #endif -#undef SHOULD_PARSE_CPUID_L7 -#undef SHOULD_PARSE_CPUID_L7S0 -#undef SHOULD_PARSE_CPUID_L7S1 - #if defined(SHOULD_PARSE_CPUID_L7S0) && !defined(SHOULD_PARSE_CPUID_L7) #error "SHOULD_PARSE_CPUID_L7S0 requires SHOULD_PARSE_CPUID_L7" #endif From df85ce5f1c0f1490b23beea35a88992dd9959a6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 14:01:18 +0200 Subject: [PATCH 71/78] it *should* work now --- Makefile.pre.in | 3 +++ Python/cpuinfo.c | 15 +++++++++++++-- configure | 46 +++++++++++++++++++++++++++++++++++++++++++++- configure.ac | 10 +++++++++- 4 files changed, 70 insertions(+), 4 deletions(-) diff --git a/Makefile.pre.in b/Makefile.pre.in index c6cca1301005ab..5c3467a5e33446 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1938,6 +1938,9 @@ Python/sysmodule.o: $(srcdir)/Python/sysmodule.c Makefile $(srcdir)/Include/pydt $(MULTIARCH_CPPFLAGS) \ -o $@ $(srcdir)/Python/sysmodule.c +Python/cpuinfo.o: $(srcdir)/Python/cpuinfo.c Makefile + $(CC) -c $(PY_CORE_CFLAGS) @CORE_CPUINFO_CFLAGS@ -o $@ $(srcdir)/Python/cpuinfo.c + $(IO_OBJS): $(IO_H) .PHONY: regen-pegen-metaparser diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index eecefd0d43d938..9034e6ac03ef67 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -25,8 +25,6 @@ # undef HAS_XGETBV_SUPPORT #endif -#undef HAS_XGETBV_SUPPORT - // Below, we declare macros for guarding the detection of SSE, AVX/AVX2 // and AVX-512 instructions. If the compiler does not even recognize the // corresponding flags or if we are not on an 64-bit platform we do not @@ -162,6 +160,18 @@ get_xgetbv(uint32_t index) { assert(index == 0); // only XCR0 is supported for now # if defined(HAS_CPUID_SUPPORT) && defined(__x86_64__) && defined(__GNUC__) +# if defined(__clang__) +# if _Py__has_builtin(__builtin_ia32_xgetbv) + return (uint64_t)_xgetbv(index); +# else + /* + * Without -mxsave support, directly using xgetbv() with raw opcode + * may still fail on some platforms (e.g., AMD64 + FreeBSD + clang). + * To be on the safe side, we assume that XGETBV is not supported. + */ + return 0; +# endif +# else /* gcc & icc */ uint32_t eax = 0, edx = 0; __asm__ volatile( /* raw opcode for xgetbv for compatibility with older toolchains */ @@ -170,6 +180,7 @@ get_xgetbv(uint32_t index) : "c" (index) ); return ((uint64_t)edx << 32) | eax; +# endif # elif defined(HAS_CPUID_SUPPORT) && defined(_M_X64) return (uint64_t)_xgetbv(index); # else diff --git a/configure b/configure index b0a7ed029fb1b0..9a72d6f30b53ea 100755 --- a/configure +++ b/configure @@ -725,6 +725,7 @@ LIBHACL_BLAKE2_SIMD128_OBJS LIBHACL_SIMD128_FLAGS LIBHACL_LDFLAGS LIBHACL_CFLAGS +CORE_CPUINFO_CFLAGS MODULE_UNICODEDATA_FALSE MODULE_UNICODEDATA_TRUE MODULE__MULTIBYTECODEC_FALSE @@ -32544,7 +32545,7 @@ fi # we do not necessarily know which instruction sets will be used, # we disable SIMD support on some older Android platforms. # -# See py_cpuid_features in pycore_cpuinfo.h for how to order fields +# See _Py_cpuid_features in pycore_cpuinfo.h for how to order fields # and where to put blank lines to separate processor generations for # AVX-512 instructions. # @@ -34165,6 +34166,49 @@ fi fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mxsave" >&5 +printf %s "checking whether C compiler accepts -mxsave... " >&6; } +if test ${ax_cv_check_cflags__Werror__mxsave+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mxsave" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mxsave=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mxsave=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mxsave" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mxsave" >&6; } +if test "x$ax_cv_check_cflags__Werror__mxsave" = xyes +then : + CORE_CPUINFO_CFLAGS=-mxsave +else case e in #( + e) CORE_CPUINFO_CFLAGS= ;; +esac +fi + + + ############################################################################### # HACL* compilation and linking configuration (contact: @picnixz) # diff --git a/configure.ac b/configure.ac index 75778af3de3170..e1c01228450fd6 100644 --- a/configure.ac +++ b/configure.ac @@ -8003,7 +8003,7 @@ AC_DEFUN([PY_SIMD_DETECT], [ # we do not necessarily know which instruction sets will be used, # we disable SIMD support on some older Android platforms. # -# See py_cpuid_features in pycore_cpuinfo.h for how to order fields +# See _Py_cpuid_features in pycore_cpuinfo.h for how to order fields # and where to put blank lines to separate processor generations for # AVX-512 instructions. # @@ -8058,6 +8058,14 @@ then PY_SIMD_DETECT([AVX512_VP2INTERSECT], [-mavx512vp2intersect]) fi +dnl Check that -mxsave can be used for cpuinfo.c as the latter +dnl requires to be compiled with this option for xgetbv() support. +AX_CHECK_COMPILE_FLAG([-mxsave], + [AS_VAR_SET([CORE_CPUINFO_CFLAGS], [-mxsave])], + [AS_VAR_SET([CORE_CPUINFO_CFLAGS], [])], + [-Werror]) +AC_SUBST([CORE_CPUINFO_CFLAGS]) + ############################################################################### # HACL* compilation and linking configuration (contact: @picnixz) # From 915383ef2f42791a84b5108a944983fae585f1b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 15:05:57 +0200 Subject: [PATCH 72/78] :@ --- Python/cpuinfo.c | 41 +++++++---------- configure | 114 +++++++++++++++++++++++++++++++++++++++++++++++ configure.ac | 36 +++++++++++++-- pyconfig.h.in | 6 +++ 4 files changed, 168 insertions(+), 29 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 9034e6ac03ef67..1a9f3237ca6299 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -8,21 +8,15 @@ // For now, we only try to enable SIMD instructions for x86-64 Intel CPUs. // In the future, we should carefully enable support for ARM NEON and POWER // as well as AMD. See https://sourceforge.net/p/predef/wiki/Architectures. +#define HAS_CPUID_SUPPORT #if defined(__x86_64__) && defined(__GNUC__) # include // __cpuid_count() -# define HAS_CPUID_SUPPORT -# if defined(__clang__) -# include // _xgetbv() -# endif -# define HAS_XGETBV_SUPPORT +# include // _xgetbv() #elif defined(_M_X64) || defined(__amd64__) || defined(_M_AMD64) # include // __cpuidex() -# define HAS_CPUID_SUPPORT # include // _xgetbv() -# define HAS_XGETBV_SUPPORT #else # undef HAS_CPUID_SUPPORT -# undef HAS_XGETBV_SUPPORT #endif // Below, we declare macros for guarding the detection of SSE, AVX/AVX2 @@ -30,6 +24,11 @@ // corresponding flags or if we are not on an 64-bit platform we do not // even try to inspect the output of CPUID for those specific features. #ifdef HAS_CPUID_SUPPORT +#if defined(_Py_CPUINFO_USE_XGETBV_FUNC) \ + || defined(_Py_CPUINFO_USE_XGETBV_OPCODE) +# define HAS_XGETBV_SUPPORT +#endif + #if defined(_Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \ || defined(_Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \ || defined(_Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \ @@ -159,19 +158,10 @@ static uint64_t /* should only be used after calling cpuid(1, 0, ...) */ get_xgetbv(uint32_t index) { assert(index == 0); // only XCR0 is supported for now -# if defined(HAS_CPUID_SUPPORT) && defined(__x86_64__) && defined(__GNUC__) -# if defined(__clang__) -# if _Py__has_builtin(__builtin_ia32_xgetbv) +#if defined(_Py_CPUINFO_USE_XGETBV_FUNC) + /* directly use the compiler's helper if -mxsave is available */ return (uint64_t)_xgetbv(index); -# else - /* - * Without -mxsave support, directly using xgetbv() with raw opcode - * may still fail on some platforms (e.g., AMD64 + FreeBSD + clang). - * To be on the safe side, we assume that XGETBV is not supported. - */ - return 0; -# endif -# else /* gcc & icc */ +#elif defined(__x86_64__) && defined(__GNUC__) uint32_t eax = 0, edx = 0; __asm__ volatile( /* raw opcode for xgetbv for compatibility with older toolchains */ @@ -180,14 +170,15 @@ get_xgetbv(uint32_t index) : "c" (index) ); return ((uint64_t)edx << 32) | eax; -# endif -# elif defined(HAS_CPUID_SUPPORT) && defined(_M_X64) +#elif defined(_M_X64) return (uint64_t)_xgetbv(index); -# else +#else (void)index; return 0; -# endif +#endif } +#else +#define get_xgetbv(_INDEX) 0 #endif /* Highest Function Parameter and Manufacturer ID (LEAF=0, SUBLEAF=0). */ @@ -364,14 +355,12 @@ detect_cpuid_xsave_state(_Py_cpuid_features *flags) assert(flags->maxleaf >= 1); (void)flags; // Keep the ordering and newlines as they are declared in the structure. -#ifdef HAS_XGETBV_SUPPORT uint64_t xcr0 = flags->xsave && flags->osxsave ? get_xgetbv(0) : 0; flags->xcr0_sse = XSAVE_CHECK_REG(xcr0, XCR0_SSE); flags->xcr0_avx = XSAVE_CHECK_REG(xcr0, XCR0_AVX); flags->xcr0_avx512_opmask = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_OPMASK); flags->xcr0_avx512_zmm_hi256 = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_ZMM_HI256); flags->xcr0_avx512_hi16_zmm = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_HI16_ZMM); -#endif } #endif diff --git a/configure b/configure index 9a72d6f30b53ea..c32775808851bb 100755 --- a/configure +++ b/configure @@ -34209,6 +34209,120 @@ fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking _xgetbv() is natively supported" >&5 +printf %s "checking _xgetbv() is natively supported... " >&6; } +if test ${ac_cv_use_xgetbv_func+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + save_CFLAGS=$CFLAGS +save_CPPFLAGS=$CPPFLAGS +save_LDFLAGS=$LDFLAGS +save_LIBS=$LIBS + + + CFLAGS="$CFLAGS -Werror -mxsave" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ +_xgetbv(0) + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_use_xgetbv_func=yes +else case e in #( + e) ac_cv_use_xgetbv_func=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +CFLAGS=$save_CFLAGS +CPPFLAGS=$save_CPPFLAGS +LDFLAGS=$save_LDFLAGS +LIBS=$save_LIBS + + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_use_xgetbv_func" >&5 +printf "%s\n" "$ac_cv_use_xgetbv_func" >&6; } +if test "$ac_cv_use_xgetbv_func" = "yes" ; then + +printf "%s\n" "#define _Py_CPUINFO_USE_XGETBV_FUNC 1" >>confdefs.h + +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking xgetbv opcode is supported" >&5 +printf %s "checking xgetbv opcode is supported... " >&6; } +if test ${ac_cv_use_xgetbv_opcode+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + save_CFLAGS=$CFLAGS +save_CPPFLAGS=$CPPFLAGS +save_LDFLAGS=$LDFLAGS +save_LIBS=$LIBS + + + CFLAGS="$CFLAGS -Werror" + if test "$cross_compiling" = yes +then : + ac_cv_use_xgetbv_opcode=no +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ + + int main(void) + { + uint32_t eax = 0, edx = 0, index = 0; + __asm__ __volatile__( + ".byte 0x0f, 0x01, 0xd0" : "=a" (eax), "=d" (edx) : "c" (index)); + return 0; + } + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO" +then : + ac_cv_use_xgetbv_opcode=yes +else case e in #( + e) ac_cv_use_xgetbv_opcode=no ;; +esac +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac +fi + +CFLAGS=$save_CFLAGS +CPPFLAGS=$save_CPPFLAGS +LDFLAGS=$save_LDFLAGS +LIBS=$save_LIBS + + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_use_xgetbv_opcode" >&5 +printf "%s\n" "$ac_cv_use_xgetbv_opcode" >&6; } +if test "$ac_cv_use_xgetbv_opcode" = "yes" ; then + +printf "%s\n" "#define _Py_CPUINFO_USE_XGETBV_OPCODE 1" >>confdefs.h + +fi + ############################################################################### # HACL* compilation and linking configuration (contact: @picnixz) # diff --git a/configure.ac b/configure.ac index e1c01228450fd6..5a37d128d1ce4b 100644 --- a/configure.ac +++ b/configure.ac @@ -8061,11 +8061,41 @@ fi dnl Check that -mxsave can be used for cpuinfo.c as the latter dnl requires to be compiled with this option for xgetbv() support. AX_CHECK_COMPILE_FLAG([-mxsave], - [AS_VAR_SET([CORE_CPUINFO_CFLAGS], [-mxsave])], - [AS_VAR_SET([CORE_CPUINFO_CFLAGS], [])], - [-Werror]) + [AS_VAR_SET([CORE_CPUINFO_CFLAGS], [-mxsave])], + [AS_VAR_SET([CORE_CPUINFO_CFLAGS], [])], + [-Werror]) AC_SUBST([CORE_CPUINFO_CFLAGS]) +AC_CACHE_CHECK([_xgetbv(0) is natively supported], [ac_cv_use_xgetbv_func], [ + WITH_SAVE_ENV([ + CFLAGS="$CFLAGS -Werror -mxsave" + AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([[@%:@include ]], [[_xgetbv(0)]])], + [ac_cv_use_xgetbv_func=yes], + [ac_cv_use_xgetbv_func=no])])]) +if test "$ac_cv_use_xgetbv_func" = "yes" ; then + AC_DEFINE([_Py_CPUINFO_USE_XGETBV_FUNC], [1], [_xgetbv() is preferred]) +fi + +AC_CACHE_CHECK([xgetbv(0) opcode is supported], [ac_cv_use_xgetbv_opcode], [ + WITH_SAVE_ENV([ + CFLAGS="$CFLAGS -Werror" + AC_RUN_IFELSE([AC_LANG_PROGRAM([[@%:@include ]], [[ + int main(void) + { + uint32_t eax = 0, edx = 0; + __asm__ __volatile__( + ".byte 0x0f, 0x01, 0xd0" : "=a" (eax), "=d" (edx) : "c" (0)); + return 0; + } + ]])], + [ac_cv_use_xgetbv_opcode=yes], + [ac_cv_use_xgetbv_opcode=no], + [ac_cv_use_xgetbv_opcode=no])])]) +if test "$ac_cv_use_xgetbv_opcode" = "yes" ; then + AC_DEFINE([_Py_CPUINFO_USE_XGETBV_OPCODE], [1], [XGETBV opcode is preferred]) +fi + ############################################################################### # HACL* compilation and linking configuration (contact: @picnixz) # diff --git a/pyconfig.h.in b/pyconfig.h.in index 478855c7022c3a..eae7c2d874a3d5 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -2104,6 +2104,12 @@ /* Define if '-mssse3' is a valid compiler flag. */ #undef _Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS +/* _xgetbv() is preferred */ +#undef _Py_CPUINFO_USE_XGETBV_FUNC + +/* XGETBV opcode is preferred */ +#undef _Py_CPUINFO_USE_XGETBV_OPCODE + /* Defined if _Complex C type can be used with libffi. */ #undef _Py_FFI_SUPPORT_C_COMPLEX From c6cf903cec6e4ae125172223351c9fb5e33639c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 15:06:10 +0200 Subject: [PATCH 73/78] :@ --- configure | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/configure b/configure index c32775808851bb..5656baadb32af5 100755 --- a/configure +++ b/configure @@ -34209,8 +34209,8 @@ fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking _xgetbv() is natively supported" >&5 -printf %s "checking _xgetbv() is natively supported... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking _xgetbv(0) is natively supported" >&5 +printf %s "checking _xgetbv(0) is natively supported... " >&6; } if test ${ac_cv_use_xgetbv_func+y} then : printf %s "(cached) " >&6 @@ -34258,8 +34258,8 @@ printf "%s\n" "#define _Py_CPUINFO_USE_XGETBV_FUNC 1" >>confdefs.h fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking xgetbv opcode is supported" >&5 -printf %s "checking xgetbv opcode is supported... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking xgetbv(0) opcode is supported" >&5 +printf %s "checking xgetbv(0) opcode is supported... " >&6; } if test ${ac_cv_use_xgetbv_opcode+y} then : printf %s "(cached) " >&6 @@ -34285,9 +34285,9 @@ main (void) int main(void) { - uint32_t eax = 0, edx = 0, index = 0; + uint32_t eax = 0, edx = 0; __asm__ __volatile__( - ".byte 0x0f, 0x01, 0xd0" : "=a" (eax), "=d" (edx) : "c" (index)); + ".byte 0x0f, 0x01, 0xd0" : "=a" (eax), "=d" (edx) : "c" (0)); return 0; } From 498518fe8efc6f7a35dd332d2d9d654965bc4f8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 15:21:38 +0200 Subject: [PATCH 74/78] waaaaa! --- Python/cpuinfo.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 1a9f3237ca6299..8b7acca3b37e73 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -29,6 +29,8 @@ # define HAS_XGETBV_SUPPORT #endif +#undef HAS_XGETBV_SUPPORT + #if defined(_Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \ || defined(_Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \ || defined(_Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \ From 3d56d9391682d1d6e7fa8155fa5c3b23d448436a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 15:48:57 +0200 Subject: [PATCH 75/78] remove support for XCR0 registers --- Include/internal/pycore_cpuinfo.h | 15 +- .../internal/pycore_cpuinfo_xsave_features.h | 43 ----- Makefile.pre.in | 4 - PCbuild/pythoncore.vcxproj | 1 - PCbuild/pythoncore.vcxproj.filters | 3 - Python/cpuinfo.c | 61 ------- Tools/cpuinfo/libcpuinfo/features/xsave.py | 51 ------ configure | 158 ------------------ configure.ac | 38 ----- pyconfig.h.in | 6 - 10 files changed, 2 insertions(+), 378 deletions(-) delete mode 100644 Include/internal/pycore_cpuinfo_xsave_features.h delete mode 100644 Tools/cpuinfo/libcpuinfo/features/xsave.py diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index 059653c844394f..c837724c59fc27 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -25,7 +25,6 @@ extern "C" { #include "Python.h" #include "pycore_cpuinfo_cpuid_features.h" -#include "pycore_cpuinfo_xsave_features.h" typedef struct _Py_cpuid_features_s { uint32_t maxleaf; @@ -101,21 +100,11 @@ typedef struct _Py_cpuid_features_s { _Py_CPUID_DECL_FLAG(xsave); // XSAVE/XRSTOR/XSETBV/XGETBV _Py_CPUID_DECL_FLAG(osxsave); // XSAVE is enabled by the OS - // --- XCR0 register bits ------------------------------------------------- - _Py_CPUID_DECL_FLAG(xcr0_sse); - // On some Intel CPUs, it is possible for the CPU to support AVX2 - // instructions even though the underlying OS does not know about - // AVX. In particular, only (SSE) XMM registers will be saved and - // restored on context-switch, but not (AVX) YMM registers. - _Py_CPUID_DECL_FLAG(xcr0_avx); - _Py_CPUID_DECL_FLAG(xcr0_avx512_opmask); - _Py_CPUID_DECL_FLAG(xcr0_avx512_zmm_hi256); - _Py_CPUID_DECL_FLAG(xcr0_avx512_hi16_zmm); #undef _Py_CPUID_DECL_FLAG // Whenever a field is added or removed above, update the - // number of fields (40) and adjust the bitsize of 'ready' + // number of fields (35) and adjust the bitsize of 'ready' // so that the size of this structure is a multiple of 8. - uint8_t ready; // set if the structure is ready for usage + uint8_t ready: 5; // set if the structure is ready for usage } _Py_cpuid_features; /* diff --git a/Include/internal/pycore_cpuinfo_xsave_features.h b/Include/internal/pycore_cpuinfo_xsave_features.h deleted file mode 100644 index e8719261b07604..00000000000000 --- a/Include/internal/pycore_cpuinfo_xsave_features.h +++ /dev/null @@ -1,43 +0,0 @@ -/** - * @author Bénédikt Tran - * @seealso @file Tools/cpuinfo/libcpuinfo/features/xsave.py - * - * XSAVE state components (XCR0 control register). - * - * See https://en.wikipedia.org/wiki/Control_register#XCR0_and_XSS. - */ - -#ifndef Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H -#define Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef Py_BUILD_CORE -# error "this header requires Py_BUILD_CORE define" -#endif - -#include "Python.h" - -/*[python input] -import os, sys -sys.path.insert(0, os.path.realpath(os.path.join(os.getcwd(), "Tools/cpuinfo"))) -from libcpuinfo.features.xsave import make_xsave_features_constants -print(make_xsave_features_constants()) -[python start generated code]*/ -// clang-format off -/** Constants for XSAVE components */ -#define _Py_XSAVE_MASK_XCR0_SSE 0x00000002 // bit = 1 -#define _Py_XSAVE_MASK_XCR0_AVX 0x00000004 // bit = 2 -#define _Py_XSAVE_MASK_XCR0_AVX512_OPMASK 0x00000020 // bit = 5 -#define _Py_XSAVE_MASK_XCR0_AVX512_ZMM_HI256 0x00000040 // bit = 6 -#define _Py_XSAVE_MASK_XCR0_AVX512_HI16_ZMM 0x00000080 // bit = 7 -// clang-format on -/*[python end generated code: output=ac059b802b4317cb input=0a1b0774d3271477]*/ - -#ifdef __cplusplus -} -#endif - -#endif // !Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H diff --git a/Makefile.pre.in b/Makefile.pre.in index 5c3467a5e33446..87fa7d06405aac 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1297,7 +1297,6 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_context.h \ $(srcdir)/Include/internal/pycore_cpuinfo.h \ $(srcdir)/Include/internal/pycore_cpuinfo_cpuid_features.h \ - $(srcdir)/Include/internal/pycore_cpuinfo_xsave_features.h \ $(srcdir)/Include/internal/pycore_critical_section.h \ $(srcdir)/Include/internal/pycore_crossinterp.h \ $(srcdir)/Include/internal/pycore_crossinterp_data_registry.h \ @@ -1938,9 +1937,6 @@ Python/sysmodule.o: $(srcdir)/Python/sysmodule.c Makefile $(srcdir)/Include/pydt $(MULTIARCH_CPPFLAGS) \ -o $@ $(srcdir)/Python/sysmodule.c -Python/cpuinfo.o: $(srcdir)/Python/cpuinfo.c Makefile - $(CC) -c $(PY_CORE_CFLAGS) @CORE_CPUINFO_CFLAGS@ -o $@ $(srcdir)/Python/cpuinfo.c - $(IO_OBJS): $(IO_H) .PHONY: regen-pegen-metaparser diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 2d843ea3bff576..fce0bd72173f6d 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -231,7 +231,6 @@ - diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 8afc2010ef93ca..6dbcb8c70d88e9 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -606,9 +606,6 @@ Include\internal - - Include\internal - Include\internal diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 8b7acca3b37e73..08d61f3deb01cc 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -11,10 +11,8 @@ #define HAS_CPUID_SUPPORT #if defined(__x86_64__) && defined(__GNUC__) # include // __cpuid_count() -# include // _xgetbv() #elif defined(_M_X64) || defined(__amd64__) || defined(_M_AMD64) # include // __cpuidex() -# include // _xgetbv() #else # undef HAS_CPUID_SUPPORT #endif @@ -24,13 +22,6 @@ // corresponding flags or if we are not on an 64-bit platform we do not // even try to inspect the output of CPUID for those specific features. #ifdef HAS_CPUID_SUPPORT -#if defined(_Py_CPUINFO_USE_XGETBV_FUNC) \ - || defined(_Py_CPUINFO_USE_XGETBV_OPCODE) -# define HAS_XGETBV_SUPPORT -#endif - -#undef HAS_XGETBV_SUPPORT - #if defined(_Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \ || defined(_Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \ || defined(_Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \ @@ -155,34 +146,6 @@ get_cpuid_info(uint32_t level /* input eax */, #endif } -#if defined(HAS_XGETBV_SUPPORT) && defined(SHOULD_PARSE_CPUID_L1) -static uint64_t /* should only be used after calling cpuid(1, 0, ...) */ -get_xgetbv(uint32_t index) -{ - assert(index == 0); // only XCR0 is supported for now -#if defined(_Py_CPUINFO_USE_XGETBV_FUNC) - /* directly use the compiler's helper if -mxsave is available */ - return (uint64_t)_xgetbv(index); -#elif defined(__x86_64__) && defined(__GNUC__) - uint32_t eax = 0, edx = 0; - __asm__ volatile( - /* raw opcode for xgetbv for compatibility with older toolchains */ - ".byte 0x0f, 0x01, 0xd0" - : "=a" (eax), "=d" (edx) - : "c" (index) - ); - return ((uint64_t)edx << 32) | eax; -#elif defined(_M_X64) - return (uint64_t)_xgetbv(index); -#else - (void)index; - return 0; -#endif -} -#else -#define get_xgetbv(_INDEX) 0 -#endif - /* Highest Function Parameter and Manufacturer ID (LEAF=0, SUBLEAF=0). */ static uint32_t detect_cpuid_maxleaf(void) @@ -349,23 +312,6 @@ detect_cpuid_extended_features_L7S1(_Py_cpuid_features *flags, } #endif -#ifdef SHOULD_PARSE_CPUID_L1 -static void /* should only be used after calling cpuid(1, 0, ...) */ -detect_cpuid_xsave_state(_Py_cpuid_features *flags) -{ - assert(flags->ready == 0); - assert(flags->maxleaf >= 1); - (void)flags; - // Keep the ordering and newlines as they are declared in the structure. - uint64_t xcr0 = flags->xsave && flags->osxsave ? get_xgetbv(0) : 0; - flags->xcr0_sse = XSAVE_CHECK_REG(xcr0, XCR0_SSE); - flags->xcr0_avx = XSAVE_CHECK_REG(xcr0, XCR0_AVX); - flags->xcr0_avx512_opmask = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_OPMASK); - flags->xcr0_avx512_zmm_hi256 = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_ZMM_HI256); - flags->xcr0_avx512_hi16_zmm = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_HI16_ZMM); -} -#endif - static void cpuid_features_finalize(_Py_cpuid_features *flags) { @@ -460,12 +406,6 @@ _Py_cpuid_check_features(const _Py_cpuid_features *flags) \ MACRO(xsave); \ MACRO(osxsave); \ - \ - MACRO(xcr0_sse); \ - MACRO(xcr0_avx); \ - MACRO(xcr0_avx512_opmask); \ - MACRO(xcr0_avx512_zmm_hi256); \ - MACRO(xcr0_avx512_hi16_zmm); \ } while (0) void @@ -530,7 +470,6 @@ cpuid_detect_l1_features(_Py_cpuid_features *flags) uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx); detect_cpuid_features(flags, ecx, edx); - detect_cpuid_xsave_state(flags); } } #else diff --git a/Tools/cpuinfo/libcpuinfo/features/xsave.py b/Tools/cpuinfo/libcpuinfo/features/xsave.py deleted file mode 100644 index 474162dfc4463b..00000000000000 --- a/Tools/cpuinfo/libcpuinfo/features/xsave.py +++ /dev/null @@ -1,51 +0,0 @@ -""" -Generate constants for XSAVE state components (XCR0 control register). - -See https://en.wikipedia.org/wiki/Control_register#XCR0_and_XSS. - -.. seealso:: :file:`Include/internal/pycore_cpuinfo_xsave_features.h` -""" - -from __future__ import annotations - -__all__ = ["make_xsave_features_constants"] - -from typing import TYPE_CHECKING - -import libcpuinfo.util as util -from libcpuinfo.util import DOXYGEN_STYLE - -if TYPE_CHECKING: - from typing import Final - - type Feature = str - type BitIndex = int - -XSAVE_FEATURES: Final[dict[Feature, BitIndex]] = { - "SSE": 1, - "AVX": 2, - "AVX512_OPMASK": 5, - "AVX512_ZMM_HI256": 6, - "AVX512_HI16_ZMM": 7, -} - - -def get_constant_name(feature: Feature) -> str: - return f"_Py_XSAVE_MASK_XCR0_{feature}" - - -_NAME_MAXSIZE: Final[int] = util.next_block( - max(map(len, map(get_constant_name, XSAVE_FEATURES))) -) - - -def make_xsave_features_constants() -> str: - """Used by :file:`Include/internal/pycore_cpuinfo_xsave_features.h`.""" - writer = util.CWriter() - writer.comment("Constants for XSAVE components", style=DOXYGEN_STYLE) - for feature_name, bit in XSAVE_FEATURES.items(): - if not 0 <= bit < 32: - raise ValueError(f"invalid bit value for {feature_name!r}") - key = get_constant_name(feature_name) - writer.write(util.make_constant(key, bit, _NAME_MAXSIZE)) - return writer.build() diff --git a/configure b/configure index 5656baadb32af5..a033cb515286da 100755 --- a/configure +++ b/configure @@ -725,7 +725,6 @@ LIBHACL_BLAKE2_SIMD128_OBJS LIBHACL_SIMD128_FLAGS LIBHACL_LDFLAGS LIBHACL_CFLAGS -CORE_CPUINFO_CFLAGS MODULE_UNICODEDATA_FALSE MODULE_UNICODEDATA_TRUE MODULE__MULTIBYTECODEC_FALSE @@ -34164,163 +34163,6 @@ fi -fi - -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mxsave" >&5 -printf %s "checking whether C compiler accepts -mxsave... " >&6; } -if test ${ax_cv_check_cflags__Werror__mxsave+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) - ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -Werror -mxsave" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main (void) -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - ax_cv_check_cflags__Werror__mxsave=yes -else case e in #( - e) ax_cv_check_cflags__Werror__mxsave=no ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mxsave" >&5 -printf "%s\n" "$ax_cv_check_cflags__Werror__mxsave" >&6; } -if test "x$ax_cv_check_cflags__Werror__mxsave" = xyes -then : - CORE_CPUINFO_CFLAGS=-mxsave -else case e in #( - e) CORE_CPUINFO_CFLAGS= ;; -esac -fi - - - -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking _xgetbv(0) is natively supported" >&5 -printf %s "checking _xgetbv(0) is natively supported... " >&6; } -if test ${ac_cv_use_xgetbv_func+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) - save_CFLAGS=$CFLAGS -save_CPPFLAGS=$CPPFLAGS -save_LDFLAGS=$LDFLAGS -save_LIBS=$LIBS - - - CFLAGS="$CFLAGS -Werror -mxsave" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -int -main (void) -{ -_xgetbv(0) - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - ac_cv_use_xgetbv_func=yes -else case e in #( - e) ac_cv_use_xgetbv_func=no ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext -CFLAGS=$save_CFLAGS -CPPFLAGS=$save_CPPFLAGS -LDFLAGS=$save_LDFLAGS -LIBS=$save_LIBS - - ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_use_xgetbv_func" >&5 -printf "%s\n" "$ac_cv_use_xgetbv_func" >&6; } -if test "$ac_cv_use_xgetbv_func" = "yes" ; then - -printf "%s\n" "#define _Py_CPUINFO_USE_XGETBV_FUNC 1" >>confdefs.h - -fi - -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking xgetbv(0) opcode is supported" >&5 -printf %s "checking xgetbv(0) opcode is supported... " >&6; } -if test ${ac_cv_use_xgetbv_opcode+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) - save_CFLAGS=$CFLAGS -save_CPPFLAGS=$CPPFLAGS -save_LDFLAGS=$LDFLAGS -save_LIBS=$LIBS - - - CFLAGS="$CFLAGS -Werror" - if test "$cross_compiling" = yes -then : - ac_cv_use_xgetbv_opcode=no -else case e in #( - e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -int -main (void) -{ - - int main(void) - { - uint32_t eax = 0, edx = 0; - __asm__ __volatile__( - ".byte 0x0f, 0x01, 0xd0" : "=a" (eax), "=d" (edx) : "c" (0)); - return 0; - } - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_run "$LINENO" -then : - ac_cv_use_xgetbv_opcode=yes -else case e in #( - e) ac_cv_use_xgetbv_opcode=no ;; -esac -fi -rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ - conftest.$ac_objext conftest.beam conftest.$ac_ext ;; -esac -fi - -CFLAGS=$save_CFLAGS -CPPFLAGS=$save_CPPFLAGS -LDFLAGS=$save_LDFLAGS -LIBS=$save_LIBS - - ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_use_xgetbv_opcode" >&5 -printf "%s\n" "$ac_cv_use_xgetbv_opcode" >&6; } -if test "$ac_cv_use_xgetbv_opcode" = "yes" ; then - -printf "%s\n" "#define _Py_CPUINFO_USE_XGETBV_OPCODE 1" >>confdefs.h - fi ############################################################################### diff --git a/configure.ac b/configure.ac index 5a37d128d1ce4b..00d57c8a0ae20b 100644 --- a/configure.ac +++ b/configure.ac @@ -8058,44 +8058,6 @@ then PY_SIMD_DETECT([AVX512_VP2INTERSECT], [-mavx512vp2intersect]) fi -dnl Check that -mxsave can be used for cpuinfo.c as the latter -dnl requires to be compiled with this option for xgetbv() support. -AX_CHECK_COMPILE_FLAG([-mxsave], - [AS_VAR_SET([CORE_CPUINFO_CFLAGS], [-mxsave])], - [AS_VAR_SET([CORE_CPUINFO_CFLAGS], [])], - [-Werror]) -AC_SUBST([CORE_CPUINFO_CFLAGS]) - -AC_CACHE_CHECK([_xgetbv(0) is natively supported], [ac_cv_use_xgetbv_func], [ - WITH_SAVE_ENV([ - CFLAGS="$CFLAGS -Werror -mxsave" - AC_COMPILE_IFELSE( - [AC_LANG_PROGRAM([[@%:@include ]], [[_xgetbv(0)]])], - [ac_cv_use_xgetbv_func=yes], - [ac_cv_use_xgetbv_func=no])])]) -if test "$ac_cv_use_xgetbv_func" = "yes" ; then - AC_DEFINE([_Py_CPUINFO_USE_XGETBV_FUNC], [1], [_xgetbv() is preferred]) -fi - -AC_CACHE_CHECK([xgetbv(0) opcode is supported], [ac_cv_use_xgetbv_opcode], [ - WITH_SAVE_ENV([ - CFLAGS="$CFLAGS -Werror" - AC_RUN_IFELSE([AC_LANG_PROGRAM([[@%:@include ]], [[ - int main(void) - { - uint32_t eax = 0, edx = 0; - __asm__ __volatile__( - ".byte 0x0f, 0x01, 0xd0" : "=a" (eax), "=d" (edx) : "c" (0)); - return 0; - } - ]])], - [ac_cv_use_xgetbv_opcode=yes], - [ac_cv_use_xgetbv_opcode=no], - [ac_cv_use_xgetbv_opcode=no])])]) -if test "$ac_cv_use_xgetbv_opcode" = "yes" ; then - AC_DEFINE([_Py_CPUINFO_USE_XGETBV_OPCODE], [1], [XGETBV opcode is preferred]) -fi - ############################################################################### # HACL* compilation and linking configuration (contact: @picnixz) # diff --git a/pyconfig.h.in b/pyconfig.h.in index eae7c2d874a3d5..478855c7022c3a 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -2104,12 +2104,6 @@ /* Define if '-mssse3' is a valid compiler flag. */ #undef _Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS -/* _xgetbv() is preferred */ -#undef _Py_CPUINFO_USE_XGETBV_FUNC - -/* XGETBV opcode is preferred */ -#undef _Py_CPUINFO_USE_XGETBV_OPCODE - /* Defined if _Complex C type can be used with libffi. */ #undef _Py_FFI_SUPPORT_C_COMPLEX From 08daa8abfaf2b18a46a15073e0af7c7af06ec8ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 16:06:55 +0200 Subject: [PATCH 76/78] fix SIMD-256 detection --- Modules/blake2module.c | 3 ++- Modules/hmacmodule.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Modules/blake2module.c b/Modules/blake2module.c index 2f8baea62d77fc..9797c98d2c3091 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -122,7 +122,8 @@ blake2module_init_cpu_features(Blake2State *state) #endif #if _Py_HACL_CAN_COMPILE_VEC256 - state->can_run_simd256 = flags.avx && flags.avx2; + state->can_run_simd256 = state->can_run_simd128 + && flags.avx && flags.avx2; #else state->can_run_simd256 = false; #endif diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index 064e31fe830deb..cfbccaab136bdf 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -1564,7 +1564,8 @@ hmacmodule_init_cpu_features(hmacmodule_state *state) #endif #if _Py_HACL_CAN_COMPILE_VEC256 - state->can_run_simd256 = flags.avx && flags.avx2; + state->can_run_simd256 = state->can_run_simd128 + && flags.avx && flags.avx2; #else state->can_run_simd256 = false; #endif From afd1137fddfce9fa6e9fb5072b31e44c6706296e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 16:22:52 +0200 Subject: [PATCH 77/78] simplify `get_cpuid_info` --- Python/cpuinfo.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 08d61f3deb01cc..3e3feb55b8c9b6 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -134,9 +134,7 @@ get_cpuid_info(uint32_t level /* input eax */, { *eax = *ebx = *ecx = *edx = 0; // ensure the output to be initialized #if defined(HAS_CPUID_SUPPORT) && defined(__x86_64__) && defined(__GNUC__) - uint32_t r_eax = 0, r_ebx = 0, r_ecx = 0, r_edx = 0; - __cpuid_count(level, count, r_eax, r_ebx, r_ecx, r_edx); - *eax = r_eax, *ebx = r_ebx, *ecx = r_ecx, *edx = r_edx; + __cpuid_count(level, count, *eax, *ebx, *ecx, *edx); #elif defined(HAS_CPUID_SUPPORT) && defined(_M_X64) uint32_t info[4] = {0}; __cpuidex(info, level, count); From 79eb72d28d726d43c1bb0a735196bc8b69f2ace6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 15 Jul 2025 11:09:17 +0200 Subject: [PATCH 78/78] add CODEOWNERS --- .github/CODEOWNERS | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 08d7a80d7726d3..1113c272529fac 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -56,6 +56,10 @@ Lib/test/test_capi/test_misc.py @markshannon Lib/test/test_pyrepl/* @pablogsal @lysnikolaou @ambv Tools/c-analyzer/ @ericsnowcurrently +# cpuinfo +Python/cpuinfo.c @picnixz +Python/pycore_cpuinfo*.h @picnixz + # dbm **/*dbm* @corona10 @erlend-aasland @serhiy-storchaka