Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit deb1486

Browse files
Inline pg_popcount() for small buffers.
If there aren't many bytes to process, the function call overhead of the optimized implementation isn't worth taking, so instead we inline a loop that consults pg_number_of_ones in that case. If there are many bytes to process, we accept the function call overhead because the optimized versions are likely to be faster. The threshold at which we use the optimized implementation is set to the smallest amount of data required to use special popcount instructions. Reviewed-by: Alvaro Herrera, Tom Lane Discussion: https://postgr.es/m/20240402155301.GA2750455%40nathanxps13
1 parent 6dbb490 commit deb1486

File tree

2 files changed

+42
-8
lines changed

2 files changed

+42
-8
lines changed

src/include/port/pg_bitutils.h

+36-2
Original file line numberDiff line numberDiff line change
@@ -302,16 +302,50 @@ pg_ceil_log2_64(uint64 num)
302302
/* Attempt to use the POPCNT instruction, but perform a runtime check first */
303303
extern PGDLLIMPORT int (*pg_popcount32) (uint32 word);
304304
extern PGDLLIMPORT int (*pg_popcount64) (uint64 word);
305-
extern PGDLLIMPORT uint64 (*pg_popcount) (const char *buf, int bytes);
305+
extern PGDLLIMPORT uint64 (*pg_popcount_optimized) (const char *buf, int bytes);
306306

307307
#else
308308
/* Use a portable implementation -- no need for a function pointer. */
309309
extern int pg_popcount32(uint32 word);
310310
extern int pg_popcount64(uint64 word);
311-
extern uint64 pg_popcount(const char *buf, int bytes);
311+
extern uint64 pg_popcount_optimized(const char *buf, int bytes);
312312

313313
#endif /* TRY_POPCNT_FAST */
314314

315+
/*
316+
* Returns the number of 1-bits in buf.
317+
*
318+
* If there aren't many bytes to process, the function call overhead of the
319+
* optimized versions isn't worth taking, so we inline a loop that consults
320+
* pg_number_of_ones in that case. If there are many bytes to process, we
321+
* accept the function call overhead because the optimized versions are likely
322+
* to be faster.
323+
*/
324+
static inline uint64
325+
pg_popcount(const char *buf, int bytes)
326+
{
327+
/*
328+
* We set the threshold to the point at which we'll first use special
329+
* instructions in the optimized version.
330+
*/
331+
#if SIZEOF_VOID_P >= 8
332+
int threshold = 8;
333+
#else
334+
int threshold = 4;
335+
#endif
336+
337+
if (bytes < threshold)
338+
{
339+
uint64 popcnt = 0;
340+
341+
while (bytes--)
342+
popcnt += pg_number_of_ones[(unsigned char) *buf++];
343+
return popcnt;
344+
}
345+
346+
return pg_popcount_optimized(buf, bytes);
347+
}
348+
315349
/*
316350
* Rotate the bits of "word" to the right/left by n bits.
317351
*/

src/port/pg_bitutils.c

+6-6
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ static uint64 pg_popcount_fast(const char *buf, int bytes);
118118

119119
int (*pg_popcount32) (uint32 word) = pg_popcount32_choose;
120120
int (*pg_popcount64) (uint64 word) = pg_popcount64_choose;
121-
uint64 (*pg_popcount) (const char *buf, int bytes) = pg_popcount_choose;
121+
uint64 (*pg_popcount_optimized) (const char *buf, int bytes) = pg_popcount_choose;
122122
#endif /* TRY_POPCNT_FAST */
123123

124124
#ifdef TRY_POPCNT_FAST
@@ -155,13 +155,13 @@ choose_popcount_functions(void)
155155
{
156156
pg_popcount32 = pg_popcount32_fast;
157157
pg_popcount64 = pg_popcount64_fast;
158-
pg_popcount = pg_popcount_fast;
158+
pg_popcount_optimized = pg_popcount_fast;
159159
}
160160
else
161161
{
162162
pg_popcount32 = pg_popcount32_slow;
163163
pg_popcount64 = pg_popcount64_slow;
164-
pg_popcount = pg_popcount_slow;
164+
pg_popcount_optimized = pg_popcount_slow;
165165
}
166166
}
167167

@@ -183,7 +183,7 @@ static uint64
183183
pg_popcount_choose(const char *buf, int bytes)
184184
{
185185
choose_popcount_functions();
186-
return pg_popcount(buf, bytes);
186+
return pg_popcount_optimized(buf, bytes);
187187
}
188188

189189
/*
@@ -387,11 +387,11 @@ pg_popcount64(uint64 word)
387387
}
388388

389389
/*
390-
* pg_popcount
390+
* pg_popcount_optimized
391391
* Returns the number of 1-bits in buf
392392
*/
393393
uint64
394-
pg_popcount(const char *buf, int bytes)
394+
pg_popcount_optimized(const char *buf, int bytes)
395395
{
396396
return pg_popcount_slow(buf, bytes);
397397
}

0 commit comments

Comments
 (0)