39
39
40
40
41
41
static void ExecHashIncreaseNumBatches (HashJoinTable hashtable );
42
+ static void ExecHashIncreaseNumBuckets (HashJoinTable hashtable );
42
43
static void ExecHashBuildSkewHash (HashJoinTable hashtable , Hash * node ,
43
44
int mcvsToUse );
44
45
static void ExecHashSkewTableInsert (HashJoinTable hashtable ,
@@ -117,6 +118,7 @@ MultiExecHash(HashState *node)
117
118
/* It's a skew tuple, so put it into that hash table */
118
119
ExecHashSkewTableInsert (hashtable , slot , hashvalue ,
119
120
bucketNumber );
121
+ hashtable -> skewTuples += 1 ;
120
122
}
121
123
else
122
124
{
@@ -127,6 +129,25 @@ MultiExecHash(HashState *node)
127
129
}
128
130
}
129
131
132
+ /* resize the hash table if needed (NTUP_PER_BUCKET exceeded) */
133
+ if (hashtable -> nbuckets != hashtable -> nbuckets_optimal )
134
+ {
135
+ /* We never decrease the number of buckets. */
136
+ Assert (hashtable -> nbuckets_optimal > hashtable -> nbuckets );
137
+
138
+ #ifdef HJDEBUG
139
+ printf ("Increasing nbuckets %d => %d\n" ,
140
+ hashtable -> nbuckets , hashtable -> nbuckets_optimal );
141
+ #endif
142
+
143
+ ExecHashIncreaseNumBuckets (hashtable );
144
+ }
145
+
146
+ /* Account for the buckets in spaceUsed (reported in EXPLAIN ANALYZE) */
147
+ hashtable -> spaceUsed += hashtable -> nbuckets * sizeof (HashJoinTuple );
148
+ if (hashtable -> spaceUsed > hashtable -> spacePeak )
149
+ hashtable -> spacePeak = hashtable -> spaceUsed ;
150
+
130
151
/* must provide our own instrumentation support */
131
152
if (node -> ps .instrument )
132
153
InstrStopNode (node -> ps .instrument , hashtable -> totalTuples );
@@ -272,7 +293,10 @@ ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls)
272
293
*/
273
294
hashtable = (HashJoinTable ) palloc (sizeof (HashJoinTableData ));
274
295
hashtable -> nbuckets = nbuckets ;
296
+ hashtable -> nbuckets_original = nbuckets ;
297
+ hashtable -> nbuckets_optimal = nbuckets ;
275
298
hashtable -> log2_nbuckets = log2_nbuckets ;
299
+ hashtable -> log2_nbuckets_optimal = log2_nbuckets ;
276
300
hashtable -> buckets = NULL ;
277
301
hashtable -> keepNulls = keepNulls ;
278
302
hashtable -> skewEnabled = false;
@@ -286,6 +310,7 @@ ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls)
286
310
hashtable -> nbatch_outstart = nbatch ;
287
311
hashtable -> growEnabled = true;
288
312
hashtable -> totalTuples = 0 ;
313
+ hashtable -> skewTuples = 0 ;
289
314
hashtable -> innerBatchFile = NULL ;
290
315
hashtable -> outerBatchFile = NULL ;
291
316
hashtable -> spaceUsed = 0 ;
@@ -620,6 +645,19 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
620
645
*/
621
646
ninmemory = nfreed = 0 ;
622
647
648
+ /* If know we need to resize nbuckets, we can do it while rebatching. */
649
+ if (hashtable -> nbuckets_optimal != hashtable -> nbuckets )
650
+ {
651
+ /* we never decrease the number of buckets */
652
+ Assert (hashtable -> nbuckets_optimal > hashtable -> nbuckets );
653
+
654
+ hashtable -> nbuckets = hashtable -> nbuckets_optimal ;
655
+ hashtable -> log2_nbuckets = hashtable -> log2_nbuckets_optimal ;
656
+
657
+ hashtable -> buckets = repalloc (hashtable -> buckets ,
658
+ sizeof (HashJoinTuple ) * hashtable -> nbuckets );
659
+ }
660
+
623
661
/*
624
662
* We will scan through the chunks directly, so that we can reset the
625
663
* buckets now and not have to keep track which tuples in the buckets have
@@ -703,6 +741,78 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
703
741
}
704
742
}
705
743
744
+ /*
745
+ * ExecHashIncreaseNumBuckets
746
+ * increase the original number of buckets in order to reduce
747
+ * number of tuples per bucket
748
+ */
749
+ static void
750
+ ExecHashIncreaseNumBuckets (HashJoinTable hashtable )
751
+ {
752
+ HashMemoryChunk chunk ;
753
+
754
+ /* do nothing if not an increase (it's called increase for a reason) */
755
+ if (hashtable -> nbuckets >= hashtable -> nbuckets_optimal )
756
+ return ;
757
+
758
+ /*
759
+ * We already know the optimal number of buckets, so let's just
760
+ * compute the log2_nbuckets for it.
761
+ */
762
+ hashtable -> nbuckets = hashtable -> nbuckets_optimal ;
763
+ hashtable -> log2_nbuckets = my_log2 (hashtable -> nbuckets_optimal );
764
+
765
+ Assert (hashtable -> nbuckets > 1 );
766
+ Assert (hashtable -> nbuckets <= (INT_MAX / 2 ));
767
+ Assert (hashtable -> nbuckets == (1 << hashtable -> log2_nbuckets ));
768
+
769
+ #ifdef HJDEBUG
770
+ printf ("Increasing nbuckets to %d\n" , hashtable -> nbuckets );
771
+ #endif
772
+
773
+ /*
774
+ * Just reallocate the proper number of buckets - we don't need to
775
+ * walk through them - we can walk the dense-allocated chunks
776
+ * (just like in ExecHashIncreaseNumBatches, but without all the
777
+ * copying into new chunks)
778
+ */
779
+ hashtable -> buckets =
780
+ (HashJoinTuple * ) repalloc (hashtable -> buckets ,
781
+ hashtable -> nbuckets * sizeof (HashJoinTuple ));
782
+
783
+ memset (hashtable -> buckets , 0 , sizeof (void * ) * hashtable -> nbuckets );
784
+
785
+ /* scan through all tuples in all chunks to rebuild the hash table */
786
+ for (chunk = hashtable -> chunks ; chunk != NULL ; chunk = chunk -> next )
787
+ {
788
+ /* process all tuples stored in this chunk */
789
+ size_t idx = 0 ;
790
+ while (idx < chunk -> used )
791
+ {
792
+ HashJoinTuple hashTuple = (HashJoinTuple ) (chunk -> data + idx );
793
+ int bucketno ;
794
+ int batchno ;
795
+
796
+ ExecHashGetBucketAndBatch (hashtable , hashTuple -> hashvalue ,
797
+ & bucketno , & batchno );
798
+
799
+ /* add the tuple to the proper bucket */
800
+ hashTuple -> next = hashtable -> buckets [bucketno ];
801
+ hashtable -> buckets [bucketno ] = hashTuple ;
802
+
803
+ /* advance index past the tuple */
804
+ idx += MAXALIGN (HJTUPLE_OVERHEAD +
805
+ HJTUPLE_MINTUPLE (hashTuple )-> t_len );
806
+ }
807
+ }
808
+
809
+ #ifdef HJDEBUG
810
+ printf ("Nbuckets increased to %d, average items per bucket %.1f\n" ,
811
+ hashtable -> nbuckets , batchTuples / hashtable -> nbuckets );
812
+ #endif
813
+ }
814
+
815
+
706
816
/*
707
817
* ExecHashTableInsert
708
818
* insert a tuple into the hash table depending on the hash value
@@ -736,6 +846,7 @@ ExecHashTableInsert(HashJoinTable hashtable,
736
846
*/
737
847
HashJoinTuple hashTuple ;
738
848
int hashTupleSize ;
849
+ double ntuples = (hashtable -> totalTuples - hashtable -> skewTuples );
739
850
740
851
/* Create the HashJoinTuple */
741
852
hashTupleSize = HJTUPLE_OVERHEAD + tuple -> t_len ;
@@ -756,11 +867,24 @@ ExecHashTableInsert(HashJoinTable hashtable,
756
867
hashTuple -> next = hashtable -> buckets [bucketno ];
757
868
hashtable -> buckets [bucketno ] = hashTuple ;
758
869
870
+ /*
871
+ * Increase the (optimal) number of buckets if we just exceeded the
872
+ * NTUP_PER_BUCKET threshold, but only when there's still a single batch.
873
+ */
874
+ if ((hashtable -> nbatch == 1 ) &&
875
+ (hashtable -> nbuckets_optimal <= INT_MAX /2 ) && /* overflow protection */
876
+ (ntuples >= (hashtable -> nbuckets_optimal * NTUP_PER_BUCKET )))
877
+ {
878
+ hashtable -> nbuckets_optimal *= 2 ;
879
+ hashtable -> log2_nbuckets_optimal += 1 ;
880
+ }
881
+
759
882
/* Account for space used, and back off if we've used too much */
760
883
hashtable -> spaceUsed += hashTupleSize ;
761
884
if (hashtable -> spaceUsed > hashtable -> spacePeak )
762
885
hashtable -> spacePeak = hashtable -> spaceUsed ;
763
- if (hashtable -> spaceUsed + hashtable -> nbuckets * sizeof (HashJoinTuple )
886
+ if (hashtable -> spaceUsed +
887
+ hashtable -> nbuckets_optimal * sizeof (HashJoinTuple )
764
888
> hashtable -> spaceAllowed )
765
889
ExecHashIncreaseNumBatches (hashtable );
766
890
}
@@ -885,7 +1009,10 @@ ExecHashGetHashValue(HashJoinTable hashtable,
885
1009
* functions are good about randomizing all their output bits, else we are
886
1010
* likely to have very skewed bucket or batch occupancy.)
887
1011
*
888
- * nbuckets doesn't change over the course of the join.
1012
+ * nbuckets and log2_nbuckets may change while nbatch == 1 because of dynamic
1013
+ * bucket count growth. Once we start batching, the value is fixed and does
1014
+ * not change over the course of the join (making it possible to compute batch
1015
+ * number the way we do here).
889
1016
*
890
1017
* nbatch is always a power of 2; we increase it only by doubling it. This
891
1018
* effectively adds one more bit to the top of the batchno.
0 commit comments