Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit deb71fa

Browse files
committed
Fix costing for parallel aggregation.
The original patch kind of ignored the fact that we were doing something different from a costing point of view, but nobody noticed. This patch fixes that oversight. David Rowley
1 parent 46d73e0 commit deb71fa

File tree

3 files changed

+92
-25
lines changed

3 files changed

+92
-25
lines changed

src/backend/optimizer/plan/planner.c

+31-8
Original file line numberDiff line numberDiff line change
@@ -3262,6 +3262,8 @@ create_grouping_paths(PlannerInfo *root,
32623262
RelOptInfo *grouped_rel;
32633263
PathTarget *partial_grouping_target = NULL;
32643264
AggClauseCosts agg_costs;
3265+
AggClauseCosts agg_partial_costs; /* parallel only */
3266+
AggClauseCosts agg_final_costs; /* parallel only */
32653267
Size hashaggtablesize;
32663268
double dNumGroups;
32673269
double dNumPartialGroups = 0;
@@ -3346,8 +3348,10 @@ create_grouping_paths(PlannerInfo *root,
33463348
MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
33473349
if (parse->hasAggs)
33483350
{
3349-
count_agg_clauses(root, (Node *) target->exprs, &agg_costs);
3350-
count_agg_clauses(root, parse->havingQual, &agg_costs);
3351+
count_agg_clauses(root, (Node *) target->exprs, &agg_costs, true,
3352+
false, false);
3353+
count_agg_clauses(root, parse->havingQual, &agg_costs, true, false,
3354+
false);
33513355
}
33523356

33533357
/*
@@ -3422,6 +3426,25 @@ create_grouping_paths(PlannerInfo *root,
34223426
NIL,
34233427
NIL);
34243428

3429+
/*
3430+
* Collect statistics about aggregates for estimating costs of
3431+
* performing aggregation in parallel.
3432+
*/
3433+
MemSet(&agg_partial_costs, 0, sizeof(AggClauseCosts));
3434+
MemSet(&agg_final_costs, 0, sizeof(AggClauseCosts));
3435+
if (parse->hasAggs)
3436+
{
3437+
/* partial phase */
3438+
count_agg_clauses(root, (Node *) partial_grouping_target->exprs,
3439+
&agg_partial_costs, false, false, true);
3440+
3441+
/* final phase */
3442+
count_agg_clauses(root, (Node *) target->exprs, &agg_final_costs,
3443+
true, true, true);
3444+
count_agg_clauses(root, parse->havingQual, &agg_final_costs, true,
3445+
true, true);
3446+
}
3447+
34253448
if (can_sort)
34263449
{
34273450
/* Checked in set_grouped_rel_consider_parallel() */
@@ -3457,7 +3480,7 @@ create_grouping_paths(PlannerInfo *root,
34573480
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
34583481
parse->groupClause,
34593482
NIL,
3460-
&agg_costs,
3483+
&agg_partial_costs,
34613484
dNumPartialGroups,
34623485
false,
34633486
false,
@@ -3482,7 +3505,7 @@ create_grouping_paths(PlannerInfo *root,
34823505

34833506
hashaggtablesize =
34843507
estimate_hashagg_tablesize(cheapest_partial_path,
3485-
&agg_costs,
3508+
&agg_partial_costs,
34863509
dNumPartialGroups);
34873510

34883511
/*
@@ -3499,7 +3522,7 @@ create_grouping_paths(PlannerInfo *root,
34993522
AGG_HASHED,
35003523
parse->groupClause,
35013524
NIL,
3502-
&agg_costs,
3525+
&agg_partial_costs,
35033526
dNumPartialGroups,
35043527
false,
35053528
false,
@@ -3631,7 +3654,7 @@ create_grouping_paths(PlannerInfo *root,
36313654
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
36323655
parse->groupClause,
36333656
(List *) parse->havingQual,
3634-
&agg_costs,
3657+
&agg_final_costs,
36353658
dNumGroups,
36363659
true,
36373660
true,
@@ -3691,7 +3714,7 @@ create_grouping_paths(PlannerInfo *root,
36913714
Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
36923715

36933716
hashaggtablesize = estimate_hashagg_tablesize(path,
3694-
&agg_costs,
3717+
&agg_final_costs,
36953718
dNumGroups);
36963719

36973720
if (hashaggtablesize < work_mem * 1024L)
@@ -3713,7 +3736,7 @@ create_grouping_paths(PlannerInfo *root,
37133736
AGG_HASHED,
37143737
parse->groupClause,
37153738
(List *) parse->havingQual,
3716-
&agg_costs,
3739+
&agg_final_costs,
37173740
dNumGroups,
37183741
true,
37193742
true,

src/backend/optimizer/util/clauses.c

+59-16
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ typedef struct
6161
{
6262
PlannerInfo *root;
6363
AggClauseCosts *costs;
64+
bool finalizeAggs;
65+
bool combineStates;
66+
bool serialStates;
6467
} count_agg_clauses_context;
6568

6669
typedef struct
@@ -540,12 +543,16 @@ contain_agg_clause_walker(Node *node, void *context)
540543
* are no subqueries. There mustn't be outer-aggregate references either.
541544
*/
542545
void
543-
count_agg_clauses(PlannerInfo *root, Node *clause, AggClauseCosts *costs)
546+
count_agg_clauses(PlannerInfo *root, Node *clause, AggClauseCosts *costs,
547+
bool finalizeAggs, bool combineStates, bool serialStates)
544548
{
545549
count_agg_clauses_context context;
546550

547551
context.root = root;
548552
context.costs = costs;
553+
context.finalizeAggs = finalizeAggs;
554+
context.combineStates = combineStates;
555+
context.serialStates = serialStates;
549556
(void) count_agg_clauses_walker(clause, &context);
550557
}
551558

@@ -562,6 +569,9 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context)
562569
Form_pg_aggregate aggform;
563570
Oid aggtransfn;
564571
Oid aggfinalfn;
572+
Oid aggcombinefn;
573+
Oid aggserialfn;
574+
Oid aggdeserialfn;
565575
Oid aggtranstype;
566576
int32 aggtransspace;
567577
QualCost argcosts;
@@ -583,6 +593,9 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context)
583593
aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
584594
aggtransfn = aggform->aggtransfn;
585595
aggfinalfn = aggform->aggfinalfn;
596+
aggcombinefn = aggform->aggcombinefn;
597+
aggserialfn = aggform->aggserialfn;
598+
aggdeserialfn = aggform->aggdeserialfn;
586599
aggtranstype = aggform->aggtranstype;
587600
aggtransspace = aggform->aggtransspace;
588601
ReleaseSysCache(aggTuple);
@@ -592,28 +605,58 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context)
592605
if (aggref->aggorder != NIL || aggref->aggdistinct != NIL)
593606
costs->numOrderedAggs++;
594607

595-
/* add component function execution costs to appropriate totals */
596-
costs->transCost.per_tuple += get_func_cost(aggtransfn) * cpu_operator_cost;
597-
if (OidIsValid(aggfinalfn))
598-
costs->finalCost += get_func_cost(aggfinalfn) * cpu_operator_cost;
608+
/*
609+
* Add the appropriate component function execution costs to
610+
* appropriate totals.
611+
*/
612+
if (context->combineStates)
613+
{
614+
/* charge for combining previously aggregated states */
615+
costs->transCost.per_tuple += get_func_cost(aggcombinefn) * cpu_operator_cost;
599616

600-
/* also add the input expressions' cost to per-input-row costs */
601-
cost_qual_eval_node(&argcosts, (Node *) aggref->args, context->root);
602-
costs->transCost.startup += argcosts.startup;
603-
costs->transCost.per_tuple += argcosts.per_tuple;
617+
/* charge for deserialization, when appropriate */
618+
if (context->serialStates && OidIsValid(aggdeserialfn))
619+
costs->transCost.per_tuple += get_func_cost(aggdeserialfn) * cpu_operator_cost;
620+
}
621+
else
622+
costs->transCost.per_tuple += get_func_cost(aggtransfn) * cpu_operator_cost;
623+
624+
if (context->finalizeAggs)
625+
{
626+
if (OidIsValid(aggfinalfn))
627+
costs->finalCost += get_func_cost(aggfinalfn) * cpu_operator_cost;
628+
}
629+
else if (context->serialStates)
630+
{
631+
if (OidIsValid(aggserialfn))
632+
costs->finalCost += get_func_cost(aggserialfn) * cpu_operator_cost;
633+
}
604634

605635
/*
606-
* Add any filter's cost to per-input-row costs.
607-
*
608-
* XXX Ideally we should reduce input expression costs according to
609-
* filter selectivity, but it's not clear it's worth the trouble.
636+
* Some costs will already have been incurred by the initial aggregate
637+
* node, so we mustn't include these again.
610638
*/
611-
if (aggref->aggfilter)
639+
if (!context->combineStates)
612640
{
613-
cost_qual_eval_node(&argcosts, (Node *) aggref->aggfilter,
614-
context->root);
641+
/* add the input expressions' cost to per-input-row costs */
642+
cost_qual_eval_node(&argcosts, (Node *) aggref->args, context->root);
615643
costs->transCost.startup += argcosts.startup;
616644
costs->transCost.per_tuple += argcosts.per_tuple;
645+
646+
/*
647+
* Add any filter's cost to per-input-row costs.
648+
*
649+
* XXX Ideally we should reduce input expression costs according
650+
* to filter selectivity, but it's not clear it's worth the
651+
* trouble.
652+
*/
653+
if (aggref->aggfilter)
654+
{
655+
cost_qual_eval_node(&argcosts, (Node *) aggref->aggfilter,
656+
context->root);
657+
costs->transCost.startup += argcosts.startup;
658+
costs->transCost.per_tuple += argcosts.per_tuple;
659+
}
617660
}
618661

619662
/*

src/include/optimizer/clauses.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,8 @@ extern List *make_ands_implicit(Expr *clause);
6767
extern PartialAggType aggregates_allow_partial(Node *clause);
6868
extern bool contain_agg_clause(Node *clause);
6969
extern void count_agg_clauses(PlannerInfo *root, Node *clause,
70-
AggClauseCosts *costs);
70+
AggClauseCosts *costs, bool finalizeAggs,
71+
bool combineStates, bool serialStates);
7172

7273
extern bool contain_window_function(Node *clause);
7374
extern WindowFuncLists *find_window_functions(Node *clause, Index maxWinRef);

0 commit comments

Comments
 (0)