Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 7351bfe

Browse files
committed
Fix costing for disk-based hash aggregation.
Report and suggestions from Richard Guo and Tomas Vondra. Discussion: https://postgr.es/m/CAMbWs4_W8fYbAn8KxgidAaZHON_Oo08OYn9ze=7remJymLqo5g@mail.gmail.com
1 parent 4083f44 commit 7351bfe

File tree

2 files changed

+16
-16
lines changed

2 files changed

+16
-16
lines changed

src/backend/executor/nodeAgg.c

+2
Original file line numberDiff line numberDiff line change
@@ -1728,6 +1728,8 @@ hash_agg_set_limits(double hashentrysize, uint64 input_groups, int used_bits,
17281728
/* if not expected to spill, use all of work_mem */
17291729
if (input_groups * hashentrysize < work_mem * 1024L)
17301730
{
1731+
if (num_partitions != NULL)
1732+
*num_partitions = 0;
17311733
*mem_limit = work_mem * 1024L;
17321734
*ngroups_limit = *mem_limit / hashentrysize;
17331735
return;

src/backend/optimizer/path/costsize.c

+14-16
Original file line numberDiff line numberDiff line change
@@ -2257,14 +2257,15 @@ cost_agg(Path *path, PlannerInfo *root,
22572257
*/
22582258
if (aggstrategy == AGG_HASHED || aggstrategy == AGG_MIXED)
22592259
{
2260+
double pages;
22602261
double pages_written = 0.0;
22612262
double pages_read = 0.0;
22622263
double hashentrysize;
22632264
double nbatches;
22642265
Size mem_limit;
22652266
uint64 ngroups_limit;
22662267
int num_partitions;
2267-
2268+
int depth;
22682269

22692270
/*
22702271
* Estimate number of batches based on the computed limits. If less
@@ -2279,25 +2280,22 @@ cost_agg(Path *path, PlannerInfo *root,
22792280
nbatches = Max( (numGroups * hashentrysize) / mem_limit,
22802281
numGroups / ngroups_limit );
22812282

2283+
nbatches = Max(ceil(nbatches), 1.0);
2284+
num_partitions = Max(num_partitions, 2);
2285+
2286+
/*
2287+
* The number of partitions can change at different levels of
2288+
* recursion; but for the purposes of this calculation assume it stays
2289+
* constant.
2290+
*/
2291+
depth = ceil( log(nbatches) / log(num_partitions) );
2292+
22822293
/*
22832294
* Estimate number of pages read and written. For each level of
22842295
* recursion, a tuple must be written and then later read.
22852296
*/
2286-
if (nbatches > 1.0)
2287-
{
2288-
double depth;
2289-
double pages;
2290-
2291-
pages = relation_byte_size(input_tuples, input_width) / BLCKSZ;
2292-
2293-
/*
2294-
* The number of partitions can change at different levels of
2295-
* recursion; but for the purposes of this calculation assume it
2296-
* stays constant.
2297-
*/
2298-
depth = ceil( log(nbatches - 1) / log(num_partitions) );
2299-
pages_written = pages_read = pages * depth;
2300-
}
2297+
pages = relation_byte_size(input_tuples, input_width) / BLCKSZ;
2298+
pages_written = pages_read = pages * depth;
23012299

23022300
startup_cost += pages_written * random_page_cost;
23032301
total_cost += pages_written * random_page_cost;

0 commit comments

Comments
 (0)