Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Fix costing for parallel aggregation.
authorRobert Haas <rhaas@postgresql.org>
Tue, 12 Apr 2016 20:24:55 +0000 (16:24 -0400)
committerRobert Haas <rhaas@postgresql.org>
Tue, 12 Apr 2016 20:25:55 +0000 (16:25 -0400)
The original patch kind of ignored the fact that we were doing something
different from a costing point of view, but nobody noticed.  This patch
fixes that oversight.

David Rowley

src/backend/optimizer/plan/planner.c
src/backend/optimizer/util/clauses.c
src/include/optimizer/clauses.h

index 7c1e3d6bbfd29ae9c2f8bc916262d7c7c5776381..9377550ad672f2e06f3ebfadf58f72ad2f9fb7fc 100644 (file)
@@ -3262,6 +3262,8 @@ create_grouping_paths(PlannerInfo *root,
    RelOptInfo *grouped_rel;
    PathTarget *partial_grouping_target = NULL;
    AggClauseCosts agg_costs;
+   AggClauseCosts agg_partial_costs;   /* parallel only */
+   AggClauseCosts agg_final_costs;     /* parallel only */
    Size        hashaggtablesize;
    double      dNumGroups;
    double      dNumPartialGroups = 0;
@@ -3346,8 +3348,10 @@ create_grouping_paths(PlannerInfo *root,
    MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
    if (parse->hasAggs)
    {
-       count_agg_clauses(root, (Node *) target->exprs, &agg_costs);
-       count_agg_clauses(root, parse->havingQual, &agg_costs);
+       count_agg_clauses(root, (Node *) target->exprs, &agg_costs, true,
+                         false, false);
+       count_agg_clauses(root, parse->havingQual, &agg_costs, true, false,
+                         false);
    }
 
    /*
@@ -3422,6 +3426,25 @@ create_grouping_paths(PlannerInfo *root,
                                                 NIL,
                                                 NIL);
 
+       /*
+        * Collect statistics about aggregates for estimating costs of
+        * performing aggregation in parallel.
+        */
+       MemSet(&agg_partial_costs, 0, sizeof(AggClauseCosts));
+       MemSet(&agg_final_costs, 0, sizeof(AggClauseCosts));
+       if (parse->hasAggs)
+       {
+           /* partial phase */
+           count_agg_clauses(root, (Node *) partial_grouping_target->exprs,
+                             &agg_partial_costs, false, false, true);
+
+           /* final phase */
+           count_agg_clauses(root, (Node *) target->exprs, &agg_final_costs,
+                             true, true, true);
+           count_agg_clauses(root, parse->havingQual, &agg_final_costs, true,
+                             true, true);
+       }
+
        if (can_sort)
        {
            /* Checked in set_grouped_rel_consider_parallel() */
@@ -3457,7 +3480,7 @@ create_grouping_paths(PlannerInfo *root,
                                parse->groupClause ? AGG_SORTED : AGG_PLAIN,
                                                    parse->groupClause,
                                                    NIL,
-                                                   &agg_costs,
+                                                   &agg_partial_costs,
                                                    dNumPartialGroups,
                                                    false,
                                                    false,
@@ -3482,7 +3505,7 @@ create_grouping_paths(PlannerInfo *root,
 
            hashaggtablesize =
                estimate_hashagg_tablesize(cheapest_partial_path,
-                                          &agg_costs,
+                                          &agg_partial_costs,
                                           dNumPartialGroups);
 
            /*
@@ -3499,7 +3522,7 @@ create_grouping_paths(PlannerInfo *root,
                                            AGG_HASHED,
                                            parse->groupClause,
                                            NIL,
-                                           &agg_costs,
+                                           &agg_partial_costs,
                                            dNumPartialGroups,
                                            false,
                                            false,
@@ -3631,7 +3654,7 @@ create_grouping_paths(PlannerInfo *root,
                                parse->groupClause ? AGG_SORTED : AGG_PLAIN,
                                            parse->groupClause,
                                            (List *) parse->havingQual,
-                                           &agg_costs,
+                                           &agg_final_costs,
                                            dNumGroups,
                                            true,
                                            true,
@@ -3691,7 +3714,7 @@ create_grouping_paths(PlannerInfo *root,
            Path   *path = (Path *) linitial(grouped_rel->partial_pathlist);
 
            hashaggtablesize = estimate_hashagg_tablesize(path,
-                                                         &agg_costs,
+                                                         &agg_final_costs,
                                                          dNumGroups);
 
            if (hashaggtablesize < work_mem * 1024L)
@@ -3713,7 +3736,7 @@ create_grouping_paths(PlannerInfo *root,
                                            AGG_HASHED,
                                            parse->groupClause,
                                            (List *) parse->havingQual,
-                                           &agg_costs,
+                                           &agg_final_costs,
                                            dNumGroups,
                                            true,
                                            true,
index 5674a73dfe0f834e4ab73ac2c06052dd8c7e495c..759566ad4616fff050e393d65b54bc2c7b9a0a5c 100644 (file)
@@ -61,6 +61,9 @@ typedef struct
 {
    PlannerInfo *root;
    AggClauseCosts *costs;
+   bool        finalizeAggs;
+   bool        combineStates;
+   bool        serialStates;
 } count_agg_clauses_context;
 
 typedef struct
@@ -540,12 +543,16 @@ contain_agg_clause_walker(Node *node, void *context)
  * are no subqueries.  There mustn't be outer-aggregate references either.
  */
 void
-count_agg_clauses(PlannerInfo *root, Node *clause, AggClauseCosts *costs)
+count_agg_clauses(PlannerInfo *root, Node *clause, AggClauseCosts *costs,
+                 bool finalizeAggs, bool combineStates, bool serialStates)
 {
    count_agg_clauses_context context;
 
    context.root = root;
    context.costs = costs;
+   context.finalizeAggs = finalizeAggs;
+   context.combineStates = combineStates;
+   context.serialStates = serialStates;
    (void) count_agg_clauses_walker(clause, &context);
 }
 
@@ -562,6 +569,9 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context)
        Form_pg_aggregate aggform;
        Oid         aggtransfn;
        Oid         aggfinalfn;
+       Oid         aggcombinefn;
+       Oid         aggserialfn;
+       Oid         aggdeserialfn;
        Oid         aggtranstype;
        int32       aggtransspace;
        QualCost    argcosts;
@@ -583,6 +593,9 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context)
        aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
        aggtransfn = aggform->aggtransfn;
        aggfinalfn = aggform->aggfinalfn;
+       aggcombinefn = aggform->aggcombinefn;
+       aggserialfn = aggform->aggserialfn;
+       aggdeserialfn = aggform->aggdeserialfn;
        aggtranstype = aggform->aggtranstype;
        aggtransspace = aggform->aggtransspace;
        ReleaseSysCache(aggTuple);
@@ -592,28 +605,58 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context)
        if (aggref->aggorder != NIL || aggref->aggdistinct != NIL)
            costs->numOrderedAggs++;
 
-       /* add component function execution costs to appropriate totals */
-       costs->transCost.per_tuple += get_func_cost(aggtransfn) * cpu_operator_cost;
-       if (OidIsValid(aggfinalfn))
-           costs->finalCost += get_func_cost(aggfinalfn) * cpu_operator_cost;
+       /*
+        * Add the appropriate component function execution costs to
+        * appropriate totals.
+        */
+       if (context->combineStates)
+       {
+           /* charge for combining previously aggregated states */
+           costs->transCost.per_tuple += get_func_cost(aggcombinefn) * cpu_operator_cost;
 
-       /* also add the input expressions' cost to per-input-row costs */
-       cost_qual_eval_node(&argcosts, (Node *) aggref->args, context->root);
-       costs->transCost.startup += argcosts.startup;
-       costs->transCost.per_tuple += argcosts.per_tuple;
+           /* charge for deserialization, when appropriate */
+           if (context->serialStates && OidIsValid(aggdeserialfn))
+               costs->transCost.per_tuple += get_func_cost(aggdeserialfn) * cpu_operator_cost;
+       }
+       else
+           costs->transCost.per_tuple += get_func_cost(aggtransfn) * cpu_operator_cost;
+
+       if (context->finalizeAggs)
+       {
+           if (OidIsValid(aggfinalfn))
+               costs->finalCost += get_func_cost(aggfinalfn) * cpu_operator_cost;
+       }
+       else if (context->serialStates)
+       {
+           if (OidIsValid(aggserialfn))
+               costs->finalCost += get_func_cost(aggserialfn) * cpu_operator_cost;
+       }
 
        /*
-        * Add any filter's cost to per-input-row costs.
-        *
-        * XXX Ideally we should reduce input expression costs according to
-        * filter selectivity, but it's not clear it's worth the trouble.
+        * Some costs will already have been incurred by the initial aggregate
+        * node, so we mustn't include these again.
         */
-       if (aggref->aggfilter)
+       if (!context->combineStates)
        {
-           cost_qual_eval_node(&argcosts, (Node *) aggref->aggfilter,
-                               context->root);
+           /* add the input expressions' cost to per-input-row costs */
+           cost_qual_eval_node(&argcosts, (Node *) aggref->args, context->root);
            costs->transCost.startup += argcosts.startup;
            costs->transCost.per_tuple += argcosts.per_tuple;
+
+           /*
+            * Add any filter's cost to per-input-row costs.
+            *
+            * XXX Ideally we should reduce input expression costs according
+            * to filter selectivity, but it's not clear it's worth the
+            * trouble.
+            */
+           if (aggref->aggfilter)
+           {
+               cost_qual_eval_node(&argcosts, (Node *) aggref->aggfilter,
+                                   context->root);
+               costs->transCost.startup += argcosts.startup;
+               costs->transCost.per_tuple += argcosts.per_tuple;
+           }
        }
 
        /*
index 3ab57f155d23736683497551c0b1a5997c3e5acc..1eb1eb4a543950fb8cbcf997cbd2e37165c5eb46 100644 (file)
@@ -67,7 +67,8 @@ extern List *make_ands_implicit(Expr *clause);
 extern PartialAggType aggregates_allow_partial(Node *clause);
 extern bool contain_agg_clause(Node *clause);
 extern void count_agg_clauses(PlannerInfo *root, Node *clause,
-                 AggClauseCosts *costs);
+                 AggClauseCosts *costs, bool finalizeAggs,
+                 bool combineStates, bool serialStates);
 
 extern bool contain_window_function(Node *clause);
 extern WindowFuncLists *find_window_functions(Node *clause, Index maxWinRef);