Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit a365d9e

Browse files
committed
Speed up tail processing when hashing aligned C strings, take two
After encountering the NUL terminator, the word-at-a-time loop exits and we must hash the remaining bytes. Previously we calculated the terminator's position and re-loaded the remaining bytes from the input string. This was slower than the unaligned case for very short strings. We already have all the data we need in a register, so let's just mask off the bytes we need and hash them immediately. In addition to endianness issues, the previous attempt upset valgrind in the way it computed the mask. Whether by accident or by wisdom, the author's proposed method passes locally with valgrind 3.22. Ants Aasma, with cosmetic adjustments by me Discussion: https://postgr.es/m/CANwKhkP7pCiW_5fAswLhs71-JKGEz1c1%2BPC0a_w1fwY4iGMqUA%40mail.gmail.com
1 parent 0c25fee commit a365d9e

File tree

1 file changed

+36
-10
lines changed

1 file changed

+36
-10
lines changed

src/include/common/hashfn_unstable.h

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,13 @@ fasthash_accum(fasthash_state *hs, const char *k, size_t len)
219219
#define haszero64(v) \
220220
(((v) - 0x0101010101010101) & ~(v) & 0x8080808080808080)
221221

222+
/* get first byte in memory order */
223+
#ifdef WORDS_BIGENDIAN
224+
#define firstbyte64(v) ((v) >> 56)
225+
#else
226+
#define firstbyte64(v) ((v) & 0xFF)
227+
#endif
228+
222229
/*
223230
* all-purpose workhorse for fasthash_accum_cstring
224231
*/
@@ -255,7 +262,7 @@ static inline size_t
255262
fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
256263
{
257264
const char *const start = str;
258-
size_t remainder;
265+
uint64 chunk;
259266
uint64 zero_byte_low;
260267

261268
Assert(PointerIsAligned(start, uint64));
@@ -275,7 +282,7 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
275282
*/
276283
for (;;)
277284
{
278-
uint64 chunk = *(uint64 *) str;
285+
chunk = *(uint64 *) str;
279286

280287
#ifdef WORDS_BIGENDIAN
281288
zero_byte_low = haszero64(pg_bswap64(chunk));
@@ -290,14 +297,33 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
290297
str += FH_SIZEOF_ACCUM;
291298
}
292299

293-
/*
294-
* The byte corresponding to the NUL will be 0x80, so the rightmost bit
295-
* position will be in the range 7, 15, ..., 63. Turn this into byte
296-
* position by dividing by 8.
297-
*/
298-
remainder = pg_rightmost_one_pos64(zero_byte_low) / BITS_PER_BYTE;
299-
fasthash_accum(hs, str, remainder);
300-
str += remainder;
300+
if (firstbyte64(chunk) != 0)
301+
{
302+
size_t remainder;
303+
uint64 mask;
304+
305+
/*
306+
* The byte corresponding to the NUL will be 0x80, so the rightmost
307+
* bit position will be in the range 15, 23, ..., 63. Turn this into
308+
* byte position by dividing by 8.
309+
*/
310+
remainder = pg_rightmost_one_pos64(zero_byte_low) / BITS_PER_BYTE;
311+
312+
/*
313+
* Create a mask for the remaining bytes so we can combine them into
314+
* the hash. This must have the same result as mixing the remaining
315+
* bytes with fasthash_accum().
316+
*/
317+
#ifdef WORDS_BIGENDIAN
318+
mask = ~UINT64CONST(0) << BITS_PER_BYTE * (FH_SIZEOF_ACCUM - remainder);
319+
#else
320+
mask = ~UINT64CONST(0) >> BITS_PER_BYTE * (FH_SIZEOF_ACCUM - remainder);
321+
#endif
322+
hs->accum = chunk & mask;
323+
fasthash_combine(hs);
324+
325+
str += remainder;
326+
}
301327

302328
return str - start;
303329
}

0 commit comments

Comments
 (0)