|
14 | 14 | * Copyright (c) 2003-2005, PostgreSQL Global Development Group
|
15 | 15 | *
|
16 | 16 | * IDENTIFICATION
|
17 |
| - * $PostgreSQL: pgsql/src/backend/nodes/bitmapset.c,v 1.8 2005/06/08 23:02:04 tgl Exp $ |
| 17 | + * $PostgreSQL: pgsql/src/backend/nodes/bitmapset.c,v 1.9 2005/06/15 16:24:07 tgl Exp $ |
18 | 18 | *
|
19 | 19 | *-------------------------------------------------------------------------
|
20 | 20 | */
|
@@ -769,22 +769,36 @@ bms_first_member(Bitmapset *a)
|
769 | 769 | *
|
770 | 770 | * Note: we must ensure that any two bitmapsets that are bms_equal() will
|
771 | 771 | * hash to the same value; in practice this means that trailing all-zero
|
772 |
| - * words cannot affect the result. Longitudinal XOR provides a reasonable |
773 |
| - * hash value that has this property. |
| 772 | + * words cannot affect the result. The circular-shift-and-XOR hash method |
| 773 | + * used here has this property, so long as we work from back to front. |
| 774 | + * |
| 775 | + * Note: you might wonder why we bother with the circular shift; at first |
| 776 | + * glance a straight longitudinal XOR seems as good and much simpler. The |
| 777 | + * reason is empirical: this gives a better distribution of hash values on |
| 778 | + * the bitmapsets actually generated by the planner. A common way to have |
| 779 | + * multiword bitmapsets is "a JOIN b JOIN c JOIN d ...", which gives rise |
| 780 | + * to rangetables in which base tables and JOIN nodes alternate; so |
| 781 | + * bitmapsets of base table RT indexes tend to use only odd-numbered or only |
| 782 | + * even-numbered bits. A straight longitudinal XOR would preserve this |
| 783 | + * property, leading to a much smaller set of possible outputs than if |
| 784 | + * we include a shift. |
774 | 785 | */
|
775 | 786 | uint32
|
776 | 787 | bms_hash_value(const Bitmapset *a)
|
777 | 788 | {
|
778 | 789 | bitmapword result = 0;
|
779 |
| - int nwords; |
780 | 790 | int wordnum;
|
781 | 791 |
|
782 |
| - if (a == NULL) |
| 792 | + if (a == NULL || a->nwords <= 0) |
783 | 793 | return 0; /* All empty sets hash to 0 */
|
784 |
| - nwords = a->nwords; |
785 |
| - for (wordnum = 0; wordnum < nwords; wordnum++) |
| 794 | + for (wordnum = a->nwords; --wordnum > 0; ) |
786 | 795 | {
|
787 | 796 | result ^= a->words[wordnum];
|
| 797 | + if (result & ((bitmapword) 1 << (BITS_PER_BITMAPWORD - 1))) |
| 798 | + result = (result << 1) | 1; |
| 799 | + else |
| 800 | + result = (result << 1); |
788 | 801 | }
|
| 802 | + result ^= a->words[0]; |
789 | 803 | return (uint32) result;
|
790 | 804 | }
|
0 commit comments