|
46 | 46 |
|
47 | 47 | #include "postgres.h"
|
48 | 48 |
|
| 49 | +#include "port/pg_bitutils.h" |
49 | 50 | #include "utils/memdebug.h"
|
50 | 51 | #include "utils/memutils.h"
|
51 | 52 |
|
@@ -297,18 +298,6 @@ static const MemoryContextMethods AllocSetMethods = {
|
297 | 298 | #endif
|
298 | 299 | };
|
299 | 300 |
|
300 |
| -/* |
301 |
| - * Table for AllocSetFreeIndex |
302 |
| - */ |
303 |
| -#define LT16(n) n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n |
304 |
| - |
305 |
| -static const unsigned char LogTable256[256] = |
306 |
| -{ |
307 |
| - 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, |
308 |
| - LT16(5), LT16(6), LT16(6), LT16(7), LT16(7), LT16(7), LT16(7), |
309 |
| - LT16(8), LT16(8), LT16(8), LT16(8), LT16(8), LT16(8), LT16(8), LT16(8) |
310 |
| -}; |
311 |
| - |
312 | 301 | /* ----------
|
313 | 302 | * Debug macros
|
314 | 303 | * ----------
|
@@ -337,24 +326,41 @@ static inline int
|
337 | 326 | AllocSetFreeIndex(Size size)
|
338 | 327 | {
|
339 | 328 | int idx;
|
340 |
| - unsigned int t, |
341 |
| - tsize; |
342 | 329 |
|
343 | 330 | if (size > (1 << ALLOC_MINBITS))
|
344 | 331 | {
|
345 |
| - tsize = (size - 1) >> ALLOC_MINBITS; |
346 |
| - |
347 |
| - /* |
348 |
| - * At this point we need to obtain log2(tsize)+1, ie, the number of |
349 |
| - * not-all-zero bits at the right. We used to do this with a |
350 |
| - * shift-and-count loop, but this function is enough of a hotspot to |
351 |
| - * justify micro-optimization effort. The best approach seems to be |
352 |
| - * to use a lookup table. Note that this code assumes that |
353 |
| - * ALLOCSET_NUM_FREELISTS <= 17, since we only cope with two bytes of |
354 |
| - * the tsize value. |
| 332 | + /*---------- |
| 333 | + * At this point we must compute ceil(log2(size >> ALLOC_MINBITS)). |
| 334 | + * This is the same as |
| 335 | + * pg_leftmost_one_pos32((size - 1) >> ALLOC_MINBITS) + 1 |
| 336 | + * or equivalently |
| 337 | + * pg_leftmost_one_pos32(size - 1) - ALLOC_MINBITS + 1 |
| 338 | + * |
| 339 | + * However, rather than just calling that function, we duplicate the |
| 340 | + * logic here, allowing an additional optimization. It's reasonable |
| 341 | + * to assume that ALLOC_CHUNK_LIMIT fits in 16 bits, so we can unroll |
| 342 | + * the byte-at-a-time loop in pg_leftmost_one_pos32 and just handle |
| 343 | + * the last two bytes. |
| 344 | + * |
| 345 | + * Yes, this function is enough of a hot-spot to make it worth this |
| 346 | + * much trouble. |
| 347 | + *---------- |
355 | 348 | */
|
| 349 | +#ifdef HAVE__BUILTIN_CLZ |
| 350 | + idx = 31 - __builtin_clz((uint32) size - 1) - ALLOC_MINBITS + 1; |
| 351 | +#else |
| 352 | + uint32 t, |
| 353 | + tsize; |
| 354 | + |
| 355 | + /* Statically assert that we only have a 16-bit input value. */ |
| 356 | + StaticAssertStmt(ALLOC_CHUNK_LIMIT < (1 << 16), |
| 357 | + "ALLOC_CHUNK_LIMIT must be less than 64kB"); |
| 358 | + |
| 359 | + tsize = size - 1; |
356 | 360 | t = tsize >> 8;
|
357 |
| - idx = t ? LogTable256[t] + 8 : LogTable256[tsize]; |
| 361 | + idx = t ? pg_leftmost_one_pos[t] + 8 : pg_leftmost_one_pos[tsize]; |
| 362 | + idx -= ALLOC_MINBITS - 1; |
| 363 | +#endif |
358 | 364 |
|
359 | 365 | Assert(idx < ALLOCSET_NUM_FREELISTS);
|
360 | 366 | }
|
|
0 commit comments