Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 3dc2d62

Browse files
committed
Use Intel SSE 4.2 CRC instructions where available.
Modern x86 and x86-64 processors with SSE 4.2 support have special instructions, crc32b and crc32q, for calculating CRC-32C. They greatly speed up CRC calculation. Whether the instructions can be used or not depends on the compiler and the target architecture. If generation of SSE 4.2 instructions is allowed for the target (-msse4.2 flag on gcc and clang), use them. If they are not allowed by default, but the compiler supports the -msse4.2 flag to enable them, compile just the CRC-32C function with -msse4.2 flag, and check at runtime whether the processor we're running on supports it. If it doesn't, fall back to the slicing-by-8 algorithm. (With the common defaults on current operating systems, the runtime-check variant is what you get in practice.) Abhijit Menon-Sen, heavily modified by me, reviewed by Andres Freund.
1 parent 4f700bc commit 3dc2d62

11 files changed

+534
-5
lines changed

config/c-compiler.m4

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -473,3 +473,30 @@ AC_DEFUN([PGAC_HAVE_GCC__ATOMIC_INT64_CAS],
473473
if test x"$pgac_cv_gcc_atomic_int64_cas" = x"yes"; then
474474
AC_DEFINE(HAVE_GCC__ATOMIC_INT64_CAS, 1, [Define to 1 if you have __atomic_compare_exchange_n(int64 *, int *, int64).])
475475
fi])# PGAC_HAVE_GCC__ATOMIC_INT64_CAS
476+
477+
# PGAC_SSE42_CRC32_INTRINSICS
478+
# -----------------------
479+
# Check if the compiler supports _mm_crc32_u8 and _mm_crc32_u64 intrinsics.
480+
# An optional compiler flag can be passed as argument (e.g. -msse4.2). If the
481+
# intrinsics are supported, sets pgac_sse42_crc32_intrinsics, and CFLAGS_SSE42.
482+
AC_DEFUN([PGAC_SSE42_CRC32_INTRINSICS],
483+
[define([Ac_cachevar], [AS_TR_SH([pgac_cv_sse42_crc32_intrinsics_$1])])dnl
484+
AC_CACHE_CHECK([for _mm_crc32_u8 and _mm_crc32_u64 with CFLAGS=$1], [Ac_cachevar],
485+
[pgac_save_CFLAGS=$CFLAGS
486+
CFLAGS="$pgac_save_CFLAGS $1"
487+
ac_save_c_werror_flag=$ac_c_werror_flag
488+
ac_c_werror_flag=yes
489+
AC_TRY_LINK([#include <nmmintrin.h>],
490+
[unsigned int crc = 0;
491+
crc = _mm_crc32_u8(crc, 0);
492+
crc = (unsigned int) _mm_crc32_u64(crc, 0);],
493+
[Ac_cachevar=yes],
494+
[Ac_cachevar=no])
495+
ac_c_werror_flag=$ac_save_c_werror_flag
496+
CFLAGS="$pgac_save_CFLAGS"])
497+
if test x"$Ac_cachevar" = x"yes"; then
498+
CFLAGS_SSE42="$1"
499+
pgac_sse42_crc32_intrinsics=yes
500+
fi
501+
undefine([Ac_cachevar])dnl
502+
])# PGAC_SSE42_CRC32_INTRINSICS

configure

Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -650,6 +650,8 @@ MSGMERGE
650650
MSGFMT_FLAGS
651651
MSGFMT
652652
HAVE_POSIX_SIGNALS
653+
PG_CRC32C_OBJS
654+
CFLAGS_SSE42
653655
LDAP_LIBS_BE
654656
LDAP_LIBS_FE
655657
PTHREAD_CFLAGS
@@ -14095,6 +14097,216 @@ $as_echo "#define HAVE_GCC__ATOMIC_INT64_CAS 1" >>confdefs.h
1409514097

1409614098
fi
1409714099

14100+
14101+
# Check for x86 cpuid instruction
14102+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __get_cpuid" >&5
14103+
$as_echo_n "checking for __get_cpuid... " >&6; }
14104+
if ${pgac_cv__get_cpuid+:} false; then :
14105+
$as_echo_n "(cached) " >&6
14106+
else
14107+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
14108+
/* end confdefs.h. */
14109+
#include <cpuid.h>
14110+
int
14111+
main ()
14112+
{
14113+
unsigned int exx[4] = {0, 0, 0, 0};
14114+
__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
14115+
14116+
;
14117+
return 0;
14118+
}
14119+
_ACEOF
14120+
if ac_fn_c_try_link "$LINENO"; then :
14121+
pgac_cv__get_cpuid="yes"
14122+
else
14123+
pgac_cv__get_cpuid="no"
14124+
fi
14125+
rm -f core conftest.err conftest.$ac_objext \
14126+
conftest$ac_exeext conftest.$ac_ext
14127+
fi
14128+
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__get_cpuid" >&5
14129+
$as_echo "$pgac_cv__get_cpuid" >&6; }
14130+
if test x"$pgac_cv__get_cpuid" = x"yes"; then
14131+
14132+
$as_echo "#define HAVE__GET_CPUID 1" >>confdefs.h
14133+
14134+
fi
14135+
14136+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __cpuid" >&5
14137+
$as_echo_n "checking for __cpuid... " >&6; }
14138+
if ${pgac_cv__cpuid+:} false; then :
14139+
$as_echo_n "(cached) " >&6
14140+
else
14141+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
14142+
/* end confdefs.h. */
14143+
#include <intrin.h>
14144+
int
14145+
main ()
14146+
{
14147+
unsigned int exx[4] = {0, 0, 0, 0};
14148+
__get_cpuid(exx[0], 1);
14149+
14150+
;
14151+
return 0;
14152+
}
14153+
_ACEOF
14154+
if ac_fn_c_try_link "$LINENO"; then :
14155+
pgac_cv__cpuid="yes"
14156+
else
14157+
pgac_cv__cpuid="no"
14158+
fi
14159+
rm -f core conftest.err conftest.$ac_objext \
14160+
conftest$ac_exeext conftest.$ac_ext
14161+
fi
14162+
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__cpuid" >&5
14163+
$as_echo "$pgac_cv__cpuid" >&6; }
14164+
if test x"$pgac_cv__cpuid" = x"yes"; then
14165+
14166+
$as_echo "#define HAVE__CPUID 1" >>confdefs.h
14167+
14168+
fi
14169+
14170+
# Check for Intel SSE 4.2 intrinsics to do CRC calculations.
14171+
#
14172+
# First check if the _mm_crc32_u8 and _mmcrc32_u64 intrinsics can be used
14173+
# with the default compiler flags. If not, check if adding the -msse4.2
14174+
# flag helps. CFLAGS_SSE42 is set to -msse4.2 if that's required.
14175+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm_crc32_u8 and _mm_crc32_u64 with CFLAGS=" >&5
14176+
$as_echo_n "checking for _mm_crc32_u8 and _mm_crc32_u64 with CFLAGS=... " >&6; }
14177+
if ${pgac_cv_sse42_crc32_intrinsics_+:} false; then :
14178+
$as_echo_n "(cached) " >&6
14179+
else
14180+
pgac_save_CFLAGS=$CFLAGS
14181+
CFLAGS="$pgac_save_CFLAGS "
14182+
ac_save_c_werror_flag=$ac_c_werror_flag
14183+
ac_c_werror_flag=yes
14184+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
14185+
/* end confdefs.h. */
14186+
#include <nmmintrin.h>
14187+
int
14188+
main ()
14189+
{
14190+
unsigned int crc = 0;
14191+
crc = _mm_crc32_u8(crc, 0);
14192+
crc = (unsigned int) _mm_crc32_u64(crc, 0);
14193+
;
14194+
return 0;
14195+
}
14196+
_ACEOF
14197+
if ac_fn_c_try_link "$LINENO"; then :
14198+
pgac_cv_sse42_crc32_intrinsics_=yes
14199+
else
14200+
pgac_cv_sse42_crc32_intrinsics_=no
14201+
fi
14202+
rm -f core conftest.err conftest.$ac_objext \
14203+
conftest$ac_exeext conftest.$ac_ext
14204+
ac_c_werror_flag=$ac_save_c_werror_flag
14205+
CFLAGS="$pgac_save_CFLAGS"
14206+
fi
14207+
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_sse42_crc32_intrinsics_" >&5
14208+
$as_echo "$pgac_cv_sse42_crc32_intrinsics_" >&6; }
14209+
if test x"$pgac_cv_sse42_crc32_intrinsics_" = x"yes"; then
14210+
CFLAGS_SSE42=""
14211+
pgac_sse42_crc32_intrinsics=yes
14212+
fi
14213+
14214+
if test x"$pgac_sse42_crc32_intrinsics" != x"yes"; then
14215+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm_crc32_u8 and _mm_crc32_u64 with CFLAGS=-msse4.2" >&5
14216+
$as_echo_n "checking for _mm_crc32_u8 and _mm_crc32_u64 with CFLAGS=-msse4.2... " >&6; }
14217+
if ${pgac_cv_sse42_crc32_intrinsics__msse4_2+:} false; then :
14218+
$as_echo_n "(cached) " >&6
14219+
else
14220+
pgac_save_CFLAGS=$CFLAGS
14221+
CFLAGS="$pgac_save_CFLAGS -msse4.2"
14222+
ac_save_c_werror_flag=$ac_c_werror_flag
14223+
ac_c_werror_flag=yes
14224+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
14225+
/* end confdefs.h. */
14226+
#include <nmmintrin.h>
14227+
int
14228+
main ()
14229+
{
14230+
unsigned int crc = 0;
14231+
crc = _mm_crc32_u8(crc, 0);
14232+
crc = (unsigned int) _mm_crc32_u64(crc, 0);
14233+
;
14234+
return 0;
14235+
}
14236+
_ACEOF
14237+
if ac_fn_c_try_link "$LINENO"; then :
14238+
pgac_cv_sse42_crc32_intrinsics__msse4_2=yes
14239+
else
14240+
pgac_cv_sse42_crc32_intrinsics__msse4_2=no
14241+
fi
14242+
rm -f core conftest.err conftest.$ac_objext \
14243+
conftest$ac_exeext conftest.$ac_ext
14244+
ac_c_werror_flag=$ac_save_c_werror_flag
14245+
CFLAGS="$pgac_save_CFLAGS"
14246+
fi
14247+
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_sse42_crc32_intrinsics__msse4_2" >&5
14248+
$as_echo "$pgac_cv_sse42_crc32_intrinsics__msse4_2" >&6; }
14249+
if test x"$pgac_cv_sse42_crc32_intrinsics__msse4_2" = x"yes"; then
14250+
CFLAGS_SSE42="-msse4.2"
14251+
pgac_sse42_crc32_intrinsics=yes
14252+
fi
14253+
14254+
fi
14255+
14256+
14257+
# Select CRC-32C implementation.
14258+
#
14259+
# If the SSE 4.2 intrinsics are available without extra CFLAGS, then use them
14260+
# always. If they require extra CFLAGS, compile both implementations and
14261+
# select which one to use at runtime, depending on whether SSE 4.2 is
14262+
# supported by the processor we're running on.
14263+
#
14264+
# You can override this logic by setting the appropriate USE_*_CRC32 flag to 1
14265+
# in the template or configure command line.
14266+
if test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_SLICING_BY_8_CRC32C" = x""; then
14267+
if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && test x"$CFLAGS_SSE42" = x"" ; then
14268+
USE_SSE42_CRC32C=1
14269+
else
14270+
# the CPUID instruction is needed for the runtime check.
14271+
if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && (test x"$pgac_cv__get_cpuid" = x"yes" || test x"$pgac_cv__cpuid" = x"yes"); then
14272+
USE_SSE42_CRC32C_WITH_RUNTIME_CHECK=1
14273+
else
14274+
USE_SLICING_BY_8_CRC32C=1
14275+
fi
14276+
fi
14277+
fi
14278+
14279+
# Set PG_CRC32C_OBJS appropriately depending on the selected implementation.
14280+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking which CRC-32C implementation to use" >&5
14281+
$as_echo_n "checking which CRC-32C implementation to use... " >&6; }
14282+
if test x"$USE_SSE42_CRC32C" = x"1"; then
14283+
14284+
$as_echo "#define USE_SSE42_CRC32C 1" >>confdefs.h
14285+
14286+
PG_CRC32C_OBJS="pg_crc32c_sse42.o"
14287+
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: SSE 4.2" >&5
14288+
$as_echo "SSE 4.2" >&6; }
14289+
else
14290+
if test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then
14291+
14292+
$as_echo "#define USE_SSE42_CRC32C_WITH_RUNTIME_CHECK 1" >>confdefs.h
14293+
14294+
PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_choose.o"
14295+
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: SSE 4.2 with runtime check" >&5
14296+
$as_echo "SSE 4.2 with runtime check" >&6; }
14297+
else
14298+
14299+
$as_echo "#define USE_SLICING_BY_8_CRC32C 1" >>confdefs.h
14300+
14301+
PG_CRC32C_OBJS="pg_crc32c_sb8.o"
14302+
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: slicing-by-8" >&5
14303+
$as_echo "slicing-by-8" >&6; }
14304+
fi
14305+
fi
14306+
14307+
14308+
14309+
# Check that POSIX signals are available if thread safety is enabled.
1409814310
if test "$PORTNAME" != "win32"
1409914311
then
1410014312
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for POSIX signal interface" >&5

configure.in

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1790,6 +1790,84 @@ PGAC_HAVE_GCC__SYNC_INT64_CAS
17901790
PGAC_HAVE_GCC__ATOMIC_INT32_CAS
17911791
PGAC_HAVE_GCC__ATOMIC_INT64_CAS
17921792

1793+
1794+
# Check for x86 cpuid instruction
1795+
AC_CACHE_CHECK([for __get_cpuid], [pgac_cv__get_cpuid],
1796+
[AC_TRY_LINK([#include <cpuid.h>],
1797+
[unsigned int exx[4] = {0, 0, 0, 0};
1798+
__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
1799+
],
1800+
[pgac_cv__get_cpuid="yes"],
1801+
[pgac_cv__get_cpuid="no"])])
1802+
if test x"$pgac_cv__get_cpuid" = x"yes"; then
1803+
AC_DEFINE(HAVE__GET_CPUID, 1, [Define to 1 if you have __get_cpuid.])
1804+
fi
1805+
1806+
AC_CACHE_CHECK([for __cpuid], [pgac_cv__cpuid],
1807+
[AC_TRY_LINK([#include <intrin.h>],
1808+
[unsigned int exx[4] = {0, 0, 0, 0};
1809+
__get_cpuid(exx[0], 1);
1810+
],
1811+
[pgac_cv__cpuid="yes"],
1812+
[pgac_cv__cpuid="no"])])
1813+
if test x"$pgac_cv__cpuid" = x"yes"; then
1814+
AC_DEFINE(HAVE__CPUID, 1, [Define to 1 if you have __cpuid.])
1815+
fi
1816+
1817+
# Check for Intel SSE 4.2 intrinsics to do CRC calculations.
1818+
#
1819+
# First check if the _mm_crc32_u8 and _mmcrc32_u64 intrinsics can be used
1820+
# with the default compiler flags. If not, check if adding the -msse4.2
1821+
# flag helps. CFLAGS_SSE42 is set to -msse4.2 if that's required.
1822+
PGAC_SSE42_CRC32_INTRINSICS([])
1823+
if test x"$pgac_sse42_crc32_intrinsics" != x"yes"; then
1824+
PGAC_SSE42_CRC32_INTRINSICS([-msse4.2])
1825+
fi
1826+
AC_SUBST(CFLAGS_SSE42)
1827+
1828+
# Select CRC-32C implementation.
1829+
#
1830+
# If the SSE 4.2 intrinsics are available without extra CFLAGS, then use them
1831+
# always. If they require extra CFLAGS, compile both implementations and
1832+
# select which one to use at runtime, depending on whether SSE 4.2 is
1833+
# supported by the processor we're running on.
1834+
#
1835+
# You can override this logic by setting the appropriate USE_*_CRC32 flag to 1
1836+
# in the template or configure command line.
1837+
if test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_SLICING_BY_8_CRC32C" = x""; then
1838+
if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && test x"$CFLAGS_SSE42" = x"" ; then
1839+
USE_SSE42_CRC32C=1
1840+
else
1841+
# the CPUID instruction is needed for the runtime check.
1842+
if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && (test x"$pgac_cv__get_cpuid" = x"yes" || test x"$pgac_cv__cpuid" = x"yes"); then
1843+
USE_SSE42_CRC32C_WITH_RUNTIME_CHECK=1
1844+
else
1845+
USE_SLICING_BY_8_CRC32C=1
1846+
fi
1847+
fi
1848+
fi
1849+
1850+
# Set PG_CRC32C_OBJS appropriately depending on the selected implementation.
1851+
AC_MSG_CHECKING([which CRC-32C implementation to use])
1852+
if test x"$USE_SSE42_CRC32C" = x"1"; then
1853+
AC_DEFINE(USE_SSE42_CRC32C, 1, [Define to 1 use Intel SSE 4.2 CRC instructions.])
1854+
PG_CRC32C_OBJS="pg_crc32c_sse42.o"
1855+
AC_MSG_RESULT(SSE 4.2)
1856+
else
1857+
if test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then
1858+
AC_DEFINE(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK, 1, [Define to 1 to use Intel SSSE 4.2 CRC instructions with a runtime check.])
1859+
PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_choose.o"
1860+
AC_MSG_RESULT(SSE 4.2 with runtime check)
1861+
else
1862+
AC_DEFINE(USE_SLICING_BY_8_CRC32C, 1, [Define to 1 to use Intel SSE 4.2 CRC instructions with a runtime check.])
1863+
PG_CRC32C_OBJS="pg_crc32c_sb8.o"
1864+
AC_MSG_RESULT(slicing-by-8)
1865+
fi
1866+
fi
1867+
AC_SUBST(PG_CRC32C_OBJS)
1868+
1869+
1870+
# Check that POSIX signals are available if thread safety is enabled.
17931871
if test "$PORTNAME" != "win32"
17941872
then
17951873
PGAC_FUNC_POSIX_SIGNALS

src/Makefile.global.in

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,7 @@ GCC = @GCC@
225225
SUN_STUDIO_CC = @SUN_STUDIO_CC@
226226
CFLAGS = @CFLAGS@
227227
CFLAGS_VECTOR = @CFLAGS_VECTOR@
228+
CFLAGS_SSE42 = @CFLAGS_SSE42@
228229

229230
# Kind-of compilers
230231

@@ -548,6 +549,9 @@ endif
548549

549550
LIBOBJS = @LIBOBJS@
550551

552+
# files needed for the chosen CRC-32C implementation
553+
PG_CRC32C_OBJS = @PG_CRC32C_OBJS@
554+
551555
LIBS := -lpgcommon -lpgport $(LIBS)
552556

553557
# to make ws2_32.lib the last library

src/include/pg_config.h.in

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -675,6 +675,12 @@
675675
/* Define to 1 if your compiler understands __builtin_unreachable. */
676676
#undef HAVE__BUILTIN_UNREACHABLE
677677

678+
/* Define to 1 if you have __cpuid. */
679+
#undef HAVE__CPUID
680+
681+
/* Define to 1 if you have __get_cpuid. */
682+
#undef HAVE__GET_CPUID
683+
678684
/* Define to 1 if your compiler understands _Static_assert. */
679685
#undef HAVE__STATIC_ASSERT
680686

@@ -818,6 +824,15 @@
818824
/* Use replacement snprintf() functions. */
819825
#undef USE_REPL_SNPRINTF
820826

827+
/* Define to 1 to use Intel SSE 4.2 CRC instructions with a runtime check. */
828+
#undef USE_SLICING_BY_8_CRC32C
829+
830+
/* Define to 1 use Intel SSE 4.2 CRC instructions. */
831+
#undef USE_SSE42_CRC32C
832+
833+
/* Define to 1 to use Intel SSSE 4.2 CRC instructions with a runtime check. */
834+
#undef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK
835+
821836
/* Define to select SysV-style semaphores. */
822837
#undef USE_SYSV_SEMAPHORES
823838

0 commit comments

Comments
 (0)