@@ -386,7 +386,7 @@ ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls)
386
386
*/
387
387
388
388
/* Target bucket loading (tuples per bucket) */
389
- #define NTUP_PER_BUCKET 10
389
+ #define NTUP_PER_BUCKET 1
390
390
391
391
void
392
392
ExecChooseHashTableSize (double ntuples , int tupwidth , bool useskew ,
@@ -396,12 +396,13 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
396
396
{
397
397
int tupsize ;
398
398
double inner_rel_bytes ;
399
+ long bucket_bytes ;
399
400
long hash_table_bytes ;
400
401
long skew_table_bytes ;
401
402
long max_pointers ;
402
- int nbatch ;
403
+ int nbatch = 1 ;
403
404
int nbuckets ;
404
- int i ;
405
+ double dbuckets ;
405
406
406
407
/* Force a plausible relation size if no info */
407
408
if (ntuples <= 0.0 )
@@ -460,56 +461,61 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
460
461
461
462
/*
462
463
* Set nbuckets to achieve an average bucket load of NTUP_PER_BUCKET when
463
- * memory is filled. Set nbatch to the smallest power of 2 that appears
464
- * sufficient. The Min() steps limit the results so that the pointer
465
- * arrays we'll try to allocate do not exceed work_mem.
464
+ * memory is filled, assuming a single batch. The Min() step limits the
465
+ * results so that the pointer arrays we'll try to allocate do not exceed
466
+ * work_mem.
466
467
*/
467
468
max_pointers = (work_mem * 1024L ) / sizeof (void * );
468
469
/* also ensure we avoid integer overflow in nbatch and nbuckets */
469
470
max_pointers = Min (max_pointers , INT_MAX / 2 );
471
+ dbuckets = ceil (ntuples / NTUP_PER_BUCKET );
472
+ dbuckets = Min (dbuckets , max_pointers );
473
+ nbuckets = Max ((int ) dbuckets , 1024 );
474
+ nbuckets = 1 << my_log2 (nbuckets );
475
+ bucket_bytes = sizeof (HashJoinTuple ) * nbuckets ;
470
476
471
- if (inner_rel_bytes > hash_table_bytes )
477
+ /*
478
+ * If there's not enough space to store the projected number of tuples
479
+ * and the required bucket headers, we will need multiple batches.
480
+ */
481
+ if (inner_rel_bytes + bucket_bytes > hash_table_bytes )
472
482
{
473
483
/* We'll need multiple batches */
474
484
long lbuckets ;
475
485
double dbatch ;
476
486
int minbatch ;
487
+ long bucket_size ;
477
488
478
- lbuckets = (hash_table_bytes / tupsize ) / NTUP_PER_BUCKET ;
489
+ /*
490
+ * Estimate the number of buckets we'll want to have when work_mem
491
+ * is entirely full. Each bucket will contain a bucket pointer plus
492
+ * NTUP_PER_BUCKET tuples, whose projected size already includes
493
+ * overhead for the hash code, pointer to the next tuple, etc.
494
+ */
495
+ bucket_size = (tupsize * NTUP_PER_BUCKET + sizeof (HashJoinTuple ));
496
+ lbuckets = 1 << my_log2 (hash_table_bytes / bucket_size );
479
497
lbuckets = Min (lbuckets , max_pointers );
480
498
nbuckets = (int ) lbuckets ;
499
+ bucket_bytes = nbuckets * sizeof (HashJoinTuple );
500
+
501
+ /*
502
+ * Buckets are simple pointers to hashjoin tuples, while tupsize
503
+ * includes the pointer, hash code, and MinimalTupleData. So buckets
504
+ * should never really exceed 25% of work_mem (even for
505
+ * NTUP_PER_BUCKET=1); except maybe * for work_mem values that are
506
+ * not 2^N bytes, where we might get more * because of doubling.
507
+ * So let's look for 50% here.
508
+ */
509
+ Assert (bucket_bytes <= hash_table_bytes / 2 );
481
510
482
- dbatch = ceil (inner_rel_bytes / hash_table_bytes );
511
+ /* Calculate required number of batches. */
512
+ dbatch = ceil (inner_rel_bytes / (hash_table_bytes - bucket_bytes ));
483
513
dbatch = Min (dbatch , max_pointers );
484
514
minbatch = (int ) dbatch ;
485
515
nbatch = 2 ;
486
516
while (nbatch < minbatch )
487
517
nbatch <<= 1 ;
488
518
}
489
- else
490
- {
491
- /* We expect the hashtable to fit in memory */
492
- double dbuckets ;
493
-
494
- dbuckets = ceil (ntuples / NTUP_PER_BUCKET );
495
- dbuckets = Min (dbuckets , max_pointers );
496
- nbuckets = (int ) dbuckets ;
497
-
498
- nbatch = 1 ;
499
- }
500
-
501
- /*
502
- * Both nbuckets and nbatch must be powers of 2 to make
503
- * ExecHashGetBucketAndBatch fast. We already fixed nbatch; now inflate
504
- * nbuckets to the next larger power of 2. We also force nbuckets to not
505
- * be real small, by starting the search at 2^10. (Note: above we made
506
- * sure that nbuckets is not more than INT_MAX / 2, so this loop cannot
507
- * overflow, nor can the final shift to recalculate nbuckets.)
508
- */
509
- i = 10 ;
510
- while ((1 << i ) < nbuckets )
511
- i ++ ;
512
- nbuckets = (1 << i );
513
519
514
520
* numbuckets = nbuckets ;
515
521
* numbatches = nbatch ;
@@ -754,7 +760,8 @@ ExecHashTableInsert(HashJoinTable hashtable,
754
760
hashtable -> spaceUsed += hashTupleSize ;
755
761
if (hashtable -> spaceUsed > hashtable -> spacePeak )
756
762
hashtable -> spacePeak = hashtable -> spaceUsed ;
757
- if (hashtable -> spaceUsed > hashtable -> spaceAllowed )
763
+ if (hashtable -> spaceUsed + hashtable -> nbuckets * sizeof (HashJoinTuple )
764
+ > hashtable -> spaceAllowed )
758
765
ExecHashIncreaseNumBatches (hashtable );
759
766
}
760
767
else
0 commit comments