Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 9f76296

Browse files
sterrettm2Commitfest Bot
authored and
Commitfest Bot
committed
Enable autovectorizing pg_checksum_block
1 parent b006bcd commit 9f76296

File tree

6 files changed

+250
-23
lines changed

6 files changed

+250
-23
lines changed

config/c-compiler.m4

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -710,6 +710,37 @@ fi
710710
undefine([Ac_cachevar])dnl
711711
])# PGAC_XSAVE_INTRINSICS
712712

713+
# PGAC_AVX2_SUPPORT
714+
# -----------------------------
715+
# Check if the compiler supports AVX2 in attribute((target))
716+
# and using AVX2 intrinsics in those functions
717+
#
718+
# If the intrinsics are supported, sets pgac_avx2_support.
719+
AC_DEFUN([PGAC_AVX2_SUPPORT],
720+
[define([Ac_cachevar], [AS_TR_SH([pgac_cv_avx2_support])])dnl
721+
AC_CACHE_CHECK([for AVX2 support], [Ac_cachevar],
722+
[AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <immintrin.h>
723+
#include <stdint.h>
724+
#if defined(__has_attribute) && __has_attribute (target)
725+
__attribute__((target("avx2")))
726+
#endif
727+
static int avx2_test(void)
728+
{
729+
const char buf@<:@sizeof(__m256i)@:>@;
730+
__m256i accum = _mm256_loadu_si256((const __m256i *) buf);
731+
accum = _mm256_add_epi32(accum, accum);
732+
int result = _mm256_extract_epi32(accum, 0);
733+
return (int) result;
734+
}],
735+
[return avx2_test();])],
736+
[Ac_cachevar=yes],
737+
[Ac_cachevar=no])])
738+
if test x"$Ac_cachevar" = x"yes"; then
739+
pgac_avx2_support=yes
740+
fi
741+
undefine([Ac_cachevar])dnl
742+
])# PGAC_AVX2_SUPPORT
743+
713744
# PGAC_AVX512_POPCNT_INTRINSICS
714745
# -----------------------------
715746
# Check if the compiler supports the AVX-512 popcount instructions using the

configure

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17724,6 +17724,58 @@ $as_echo "#define HAVE_XSAVE_INTRINSICS 1" >>confdefs.h
1772417724

1772517725
fi
1772617726

17727+
# Check for AVX2 target and intrinsic support
17728+
#
17729+
if test x"$host_cpu" = x"x86_64"; then
17730+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX2 support" >&5
17731+
$as_echo_n "checking for AVX2 support... " >&6; }
17732+
if ${pgac_cv_avx2_support+:} false; then :
17733+
$as_echo_n "(cached) " >&6
17734+
else
17735+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
17736+
/* end confdefs.h. */
17737+
#include <immintrin.h>
17738+
#include <stdint.h>
17739+
#if defined(__has_attribute) && __has_attribute (target)
17740+
__attribute__((target("avx2")))
17741+
#endif
17742+
static int avx2_test(void)
17743+
{
17744+
const char buf[sizeof(__m256i)];
17745+
__m256i accum = _mm256_loadu_si256((const __m256i *) buf);
17746+
accum = _mm256_add_epi32(accum, accum);
17747+
int result = _mm256_extract_epi32(accum, 0);
17748+
return (int) result;
17749+
}
17750+
int
17751+
main ()
17752+
{
17753+
return avx2_test();
17754+
;
17755+
return 0;
17756+
}
17757+
_ACEOF
17758+
if ac_fn_c_try_link "$LINENO"; then :
17759+
pgac_cv_avx2_support=yes
17760+
else
17761+
pgac_cv_avx2_support=no
17762+
fi
17763+
rm -f core conftest.err conftest.$ac_objext \
17764+
conftest$ac_exeext conftest.$ac_ext
17765+
fi
17766+
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_avx2_support" >&5
17767+
$as_echo "$pgac_cv_avx2_support" >&6; }
17768+
if test x"$pgac_cv_avx2_support" = x"yes"; then
17769+
pgac_avx2_support=yes
17770+
fi
17771+
17772+
if test x"$pgac_avx2_support" = x"yes"; then
17773+
17774+
$as_echo "#define USE_AVX2_WITH_RUNTIME_CHECK 1" >>confdefs.h
17775+
17776+
fi
17777+
fi
17778+
1772717779
# Check for AVX-512 popcount intrinsics
1772817780
#
1772917781
if test x"$host_cpu" = x"x86_64"; then

configure.ac

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2089,6 +2089,15 @@ if test x"$pgac_xsave_intrinsics" = x"yes"; then
20892089
AC_DEFINE(HAVE_XSAVE_INTRINSICS, 1, [Define to 1 if you have XSAVE intrinsics.])
20902090
fi
20912091

2092+
# Check for AVX2 target and intrinsic support
2093+
#
2094+
if test x"$host_cpu" = x"x86_64"; then
2095+
PGAC_AVX2_SUPPORT()
2096+
if test x"$pgac_avx2_support" = x"yes"; then
2097+
AC_DEFINE(USE_AVX2_WITH_RUNTIME_CHECK, 1, [Define to 1 to use AVX2 instructions with a runtime check.])
2098+
fi
2099+
fi
2100+
20922101
# Check for AVX-512 popcount intrinsics
20932102
#
20942103
if test x"$host_cpu" = x"x86_64"; then

meson.build

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2301,6 +2301,34 @@ int main(void)
23012301

23022302
endif
23032303

2304+
###############################################################
2305+
# Check for the availability of AVX2 support
2306+
###############################################################
2307+
2308+
if host_cpu == 'x86_64'
2309+
2310+
prog = '''
2311+
#include <immintrin.h>
2312+
#include <stdint.h>
2313+
#if defined(__has_attribute) && __has_attribute (target)
2314+
__attribute__((target("avx2")))
2315+
#endif
2316+
int main(void)
2317+
{
2318+
const char buf[sizeof(__m256i)];
2319+
__m256i accum = _mm256_loadu_si256((const __m256i *) buf);
2320+
accum = _mm256_add_epi32(accum, accum);
2321+
int result = _mm256_extract_epi32(accum, 0);
2322+
return (int) result;
2323+
}
2324+
'''
2325+
2326+
if cc.links(prog, name: 'AVX2 support', args: test_c_args)
2327+
cdata.set('USE_AVX2_WITH_RUNTIME_CHECK', 1)
2328+
endif
2329+
2330+
endif
2331+
23042332

23052333
###############################################################
23062334
# Check for the availability of AVX-512 popcount intrinsics.

src/include/pg_config.h.in

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -672,6 +672,9 @@
672672
/* Define to 1 to use AVX-512 CRC algorithms with a runtime check. */
673673
#undef USE_AVX512_CRC32C_WITH_RUNTIME_CHECK
674674

675+
/* Define to 1 to use AVX2 instructions with a runtime check. */
676+
#undef USE_AVX2_WITH_RUNTIME_CHECK
677+
675678
/* Define to 1 to use AVX-512 popcount instructions with a runtime check. */
676679
#undef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
677680

src/include/storage/checksum_impl.h

Lines changed: 127 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -101,12 +101,83 @@
101101
*/
102102

103103
#include "storage/bufpage.h"
104+
#include "pg_config.h"
104105

105106
/* number of checksums to calculate in parallel */
106107
#define N_SUMS 32
107108
/* prime multiplier of FNV-1a hash */
108109
#define FNV_PRIME 16777619
109110

111+
#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT)
112+
#include <cpuid.h>
113+
#endif
114+
115+
#include <immintrin.h>
116+
117+
#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX)
118+
#include <intrin.h>
119+
#endif
120+
121+
/*
122+
* Does CPUID say there's support for XSAVE instructions?
123+
*/
124+
static inline bool
125+
xsave_available(void)
126+
{
127+
unsigned int exx[4] = {0, 0, 0, 0};
128+
129+
#if defined(HAVE__GET_CPUID)
130+
__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
131+
#elif defined(HAVE__CPUID)
132+
__cpuid(exx, 1);
133+
#else
134+
#error cpuid instruction not available
135+
#endif
136+
return (exx[2] & (1 << 27)) != 0; /* osxsave */
137+
}
138+
139+
/*
140+
* Does XGETBV say the YMM registers are enabled?
141+
*
142+
* NB: Caller is responsible for verifying that xsave_available() returns true
143+
* before calling this.
144+
*/
145+
#ifdef HAVE_XSAVE_INTRINSICS
146+
pg_attribute_target("xsave")
147+
#endif
148+
static inline bool
149+
ymm_regs_available(void)
150+
{
151+
#ifdef HAVE_XSAVE_INTRINSICS
152+
return (_xgetbv(0) & 0x06) == 0x06;
153+
#else
154+
return false;
155+
#endif
156+
}
157+
158+
/*
159+
* Does CPUID say there's support for AVX-2
160+
*/
161+
static inline bool
162+
avx2_available(void)
163+
{
164+
#ifdef USE_AVX2_WITH_RUNTIME_CHECK
165+
unsigned int exx[4] = {0, 0, 0, 0};
166+
if (!xsave_available() || !ymm_regs_available()) return false;
167+
168+
#if defined(HAVE__GET_CPUID_COUNT)
169+
__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
170+
#elif defined(HAVE__CPUIDEX)
171+
__cpuidex(exx, 7, 0);
172+
#else
173+
#error cpuid instruction not available
174+
#endif
175+
return (exx[1] & (1 << 5)) != 0; /* avx2 */
176+
#else
177+
return false;
178+
#endif
179+
}
180+
110181
/* Use a union so that this code is valid under strict aliasing */
111182
typedef union
112183
{
@@ -142,35 +213,68 @@ do { \
142213
* Block checksum algorithm. The page must be adequately aligned
143214
* (at least on 4-byte boundary).
144215
*/
145-
static uint32
146-
pg_checksum_block(const PGChecksummablePage *page)
147-
{
148-
uint32 sums[N_SUMS];
149-
uint32 result = 0;
150-
uint32 i,
151-
j;
216+
217+
#define PG_DECLARE_CHECKSUM_ISA(ISANAME) \
218+
static uint32 \
219+
pg_checksum_block_##ISANAME(const PGChecksummablePage *page);
220+
221+
#define PG_DEFINE_CHECKSUM_ISA(ISANAME) \
222+
pg_attribute_target(#ISANAME) \
223+
static uint32 \
224+
pg_checksum_block_##ISANAME(const PGChecksummablePage *page) \
225+
{ \
226+
uint32 sums[N_SUMS]; \
227+
uint32 result = 0; \
228+
uint32 i, \
229+
j; \
230+
\
231+
/* ensure that the size is compatible with the algorithm */ \
232+
Assert(sizeof(PGChecksummablePage) == BLCKSZ); \
233+
\
234+
/* initialize partial checksums to their corresponding offsets */ \
235+
memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets)); \
236+
\
237+
/* main checksum calculation */ \
238+
/* this is the main place that autovectorization occurs */ \
239+
for (i = 0; i < (uint32) (BLCKSZ / (sizeof(uint32) * N_SUMS)); i++) \
240+
for (j = 0; j < N_SUMS; j++) \
241+
CHECKSUM_COMP(sums[j], page->data[i][j]); \
242+
\
243+
/* finally add in two rounds of zeroes for additional mixing */ \
244+
for (i = 0; i < 2; i++) \
245+
for (j = 0; j < N_SUMS; j++) \
246+
CHECKSUM_COMP(sums[j], 0); \
247+
\
248+
/* xor fold partial checksums together */ \
249+
for (i = 0; i < N_SUMS; i++) \
250+
result ^= sums[i]; \
251+
\
252+
return result; \
253+
}
152254

153-
/* ensure that the size is compatible with the algorithm */
154-
Assert(sizeof(PGChecksummablePage) == BLCKSZ);
255+
/* Declarations are always defined to make dynamic dispatch code simpler */
155256

156-
/* initialize partial checksums to their corresponding offsets */
157-
memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets));
257+
PG_DECLARE_CHECKSUM_ISA(default);
258+
PG_DECLARE_CHECKSUM_ISA(avx2);
158259

159-
/* main checksum calculation */
160-
for (i = 0; i < (uint32) (BLCKSZ / (sizeof(uint32) * N_SUMS)); i++)
161-
for (j = 0; j < N_SUMS; j++)
162-
CHECKSUM_COMP(sums[j], page->data[i][j]);
260+
PG_DEFINE_CHECKSUM_ISA(default);
261+
#ifdef USE_AVX2_WITH_RUNTIME_CHECK
262+
PG_DEFINE_CHECKSUM_ISA(avx2);
263+
#endif
163264

164-
/* finally add in two rounds of zeroes for additional mixing */
165-
for (i = 0; i < 2; i++)
166-
for (j = 0; j < N_SUMS; j++)
167-
CHECKSUM_COMP(sums[j], 0);
265+
static uint32
266+
pg_checksum_block_dispatch(const PGChecksummablePage *page);
168267

169-
/* xor fold partial checksums together */
170-
for (i = 0; i < N_SUMS; i++)
171-
result ^= sums[i];
268+
static uint32 (*pg_checksum_block)(const PGChecksummablePage *page) = pg_checksum_block_dispatch;
172269

173-
return result;
270+
static uint32
271+
pg_checksum_block_dispatch(const PGChecksummablePage *page){
272+
if (avx2_available()){
273+
pg_checksum_block = pg_checksum_block_avx2;
274+
}else{
275+
pg_checksum_block = pg_checksum_block_default;
276+
}
277+
return pg_checksum_block(page);
174278
}
175279

176280
/*

0 commit comments

Comments
 (0)