Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 616be05

Browse files
committed
Fix improper repetition of previous results from a hashed aggregate.
ExecReScanAgg's check for whether it could re-use a previously calculated hashtable neglected the possibility that the Agg node might reference PARAM_EXEC Params that are not referenced by its input plan node. That's okay if the Params are in upper tlist or qual expressions; but if one appears in aggregate input expressions, then the hashtable contents need to be recomputed when the Param's value changes. To avoid unnecessary performance degradation in the case of a Param that isn't within an aggregate input, add logic to the planner to determine which Params are within aggregate inputs. This requires a new field in struct Agg, but fortunately we never write plans to disk, so this isn't an initdb-forcing change. Per report from Jeevan Chalke. This has been broken since forever, so back-patch to all supported branches. Andrew Gierth, with minor adjustments by me Report: <CAM2+6=VY8ykfLT5Q8vb9B6EbeBk-NGuLbT6seaQ+Fq4zXvrDcA@mail.gmail.com>
1 parent eaae83c commit 616be05

File tree

9 files changed

+156
-6
lines changed

9 files changed

+156
-6
lines changed

src/backend/executor/nodeAgg.c

+6-4
Original file line numberDiff line numberDiff line change
@@ -3425,11 +3425,13 @@ ExecReScanAgg(AggState *node)
34253425
return;
34263426

34273427
/*
3428-
* If we do have the hash table and the subplan does not have any
3429-
* parameter changes, then we can just rescan the existing hash table;
3430-
* no need to build it again.
3428+
* If we do have the hash table, and the subplan does not have any
3429+
* parameter changes, and none of our own parameter changes affect
3430+
* input expressions of the aggregated functions, then we can just
3431+
* rescan the existing hash table; no need to build it again.
34313432
*/
3432-
if (outerPlan->chgParam == NULL)
3433+
if (outerPlan->chgParam == NULL &&
3434+
!bms_overlap(node->ss.ps.chgParam, aggnode->aggParams))
34333435
{
34343436
ResetTupleHashIterator(node->hashtable, &node->hashiter);
34353437
return;

src/backend/nodes/copyfuncs.c

+1
Original file line numberDiff line numberDiff line change
@@ -877,6 +877,7 @@ _copyAgg(const Agg *from)
877877
COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid));
878878
}
879879
COPY_SCALAR_FIELD(numGroups);
880+
COPY_BITMAPSET_FIELD(aggParams);
880881
COPY_NODE_FIELD(groupingSets);
881882
COPY_NODE_FIELD(chain);
882883

src/backend/nodes/outfuncs.c

+1
Original file line numberDiff line numberDiff line change
@@ -716,6 +716,7 @@ _outAgg(StringInfo str, const Agg *node)
716716
appendStringInfo(str, " %u", node->grpOperators[i]);
717717

718718
WRITE_LONG_FIELD(numGroups);
719+
WRITE_BITMAPSET_FIELD(aggParams);
719720
WRITE_NODE_FIELD(groupingSets);
720721
WRITE_NODE_FIELD(chain);
721722
}

src/backend/nodes/readfuncs.c

+1
Original file line numberDiff line numberDiff line change
@@ -1991,6 +1991,7 @@ _readAgg(void)
19911991
READ_ATTRNUMBER_ARRAY(grpColIdx, local_node->numCols);
19921992
READ_OID_ARRAY(grpOperators, local_node->numCols);
19931993
READ_LONG_FIELD(numGroups);
1994+
READ_BITMAPSET_FIELD(aggParams);
19941995
READ_NODE_FIELD(groupingSets);
19951996
READ_NODE_FIELD(chain);
19961997

src/backend/optimizer/plan/createplan.c

+1
Original file line numberDiff line numberDiff line change
@@ -5664,6 +5664,7 @@ make_agg(List *tlist, List *qual,
56645664
node->grpColIdx = grpColIdx;
56655665
node->grpOperators = grpOperators;
56665666
node->numGroups = numGroups;
5667+
node->aggParams = NULL; /* SS_finalize_plan() will fill this */
56675668
node->groupingSets = groupingSets;
56685669
node->chain = chain;
56695670

src/backend/optimizer/plan/subselect.c

+47-1
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ static Bitmapset *finalize_plan(PlannerInfo *root,
8282
Bitmapset *valid_params,
8383
Bitmapset *scan_params);
8484
static bool finalize_primnode(Node *node, finalize_primnode_context *context);
85+
static bool finalize_agg_primnode(Node *node, finalize_primnode_context *context);
8586

8687

8788
/*
@@ -2652,6 +2653,29 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
26522653
locally_added_param);
26532654
break;
26542655

2656+
case T_Agg:
2657+
{
2658+
Agg *agg = (Agg *) plan;
2659+
2660+
/*
2661+
* AGG_HASHED plans need to know which Params are referenced
2662+
* in aggregate calls. Do a separate scan to identify them.
2663+
*/
2664+
if (agg->aggstrategy == AGG_HASHED)
2665+
{
2666+
finalize_primnode_context aggcontext;
2667+
2668+
aggcontext.root = root;
2669+
aggcontext.paramids = NULL;
2670+
finalize_agg_primnode((Node *) agg->plan.targetlist,
2671+
&aggcontext);
2672+
finalize_agg_primnode((Node *) agg->plan.qual,
2673+
&aggcontext);
2674+
agg->aggParams = aggcontext.paramids;
2675+
}
2676+
}
2677+
break;
2678+
26552679
case T_WindowAgg:
26562680
finalize_primnode(((WindowAgg *) plan)->startOffset,
26572681
&context);
@@ -2660,7 +2684,6 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
26602684
break;
26612685

26622686
case T_Hash:
2663-
case T_Agg:
26642687
case T_Material:
26652688
case T_Sort:
26662689
case T_Unique:
@@ -2811,6 +2834,29 @@ finalize_primnode(Node *node, finalize_primnode_context *context)
28112834
(void *) context);
28122835
}
28132836

2837+
/*
2838+
* finalize_agg_primnode: find all Aggref nodes in the given expression tree,
2839+
* and add IDs of all PARAM_EXEC params appearing within their aggregated
2840+
* arguments to the result set.
2841+
*/
2842+
static bool
2843+
finalize_agg_primnode(Node *node, finalize_primnode_context *context)
2844+
{
2845+
if (node == NULL)
2846+
return false;
2847+
if (IsA(node, Aggref))
2848+
{
2849+
Aggref *agg = (Aggref *) node;
2850+
2851+
/* we should not consider the direct arguments, if any */
2852+
finalize_primnode((Node *) agg->args, context);
2853+
finalize_primnode((Node *) agg->aggfilter, context);
2854+
return false; /* there can't be any Aggrefs below here */
2855+
}
2856+
return expression_tree_walker(node, finalize_agg_primnode,
2857+
(void *) context);
2858+
}
2859+
28142860
/*
28152861
* SS_make_initplan_output_param - make a Param for an initPlan's output
28162862
*

src/include/nodes/plannodes.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -715,7 +715,8 @@ typedef struct Agg
715715
AttrNumber *grpColIdx; /* their indexes in the target list */
716716
Oid *grpOperators; /* equality operators to compare with */
717717
long numGroups; /* estimated number of groups in input */
718-
/* Note: the planner only provides numGroups in AGG_HASHED case */
718+
Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */
719+
/* Note: planner provides numGroups & aggParams only in AGG_HASHED case */
719720
List *groupingSets; /* grouping sets to use */
720721
List *chain; /* chained Agg/Sort nodes */
721722
} Agg;

src/test/regress/expected/aggregates.out

+75
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,81 @@ from tenk1 o;
366366
9999
367367
(1 row)
368368

369+
-- Test handling of Params within aggregate arguments in hashed aggregation.
370+
-- Per bug report from Jeevan Chalke.
371+
explain (verbose, costs off)
372+
select s1, s2, sm
373+
from generate_series(1, 3) s1,
374+
lateral (select s2, sum(s1 + s2) sm
375+
from generate_series(1, 3) s2 group by s2) ss
376+
order by 1, 2;
377+
QUERY PLAN
378+
------------------------------------------------------------------
379+
Sort
380+
Output: s1.s1, s2.s2, (sum((s1.s1 + s2.s2)))
381+
Sort Key: s1.s1, s2.s2
382+
-> Nested Loop
383+
Output: s1.s1, s2.s2, (sum((s1.s1 + s2.s2)))
384+
-> Function Scan on pg_catalog.generate_series s1
385+
Output: s1.s1
386+
Function Call: generate_series(1, 3)
387+
-> HashAggregate
388+
Output: s2.s2, sum((s1.s1 + s2.s2))
389+
Group Key: s2.s2
390+
-> Function Scan on pg_catalog.generate_series s2
391+
Output: s2.s2
392+
Function Call: generate_series(1, 3)
393+
(14 rows)
394+
395+
select s1, s2, sm
396+
from generate_series(1, 3) s1,
397+
lateral (select s2, sum(s1 + s2) sm
398+
from generate_series(1, 3) s2 group by s2) ss
399+
order by 1, 2;
400+
s1 | s2 | sm
401+
----+----+----
402+
1 | 1 | 2
403+
1 | 2 | 3
404+
1 | 3 | 4
405+
2 | 1 | 3
406+
2 | 2 | 4
407+
2 | 3 | 5
408+
3 | 1 | 4
409+
3 | 2 | 5
410+
3 | 3 | 6
411+
(9 rows)
412+
413+
explain (verbose, costs off)
414+
select array(select sum(x+y) s
415+
from generate_series(1,3) y group by y order by s)
416+
from generate_series(1,3) x;
417+
QUERY PLAN
418+
-------------------------------------------------------------------
419+
Function Scan on pg_catalog.generate_series x
420+
Output: (SubPlan 1)
421+
Function Call: generate_series(1, 3)
422+
SubPlan 1
423+
-> Sort
424+
Output: (sum((x.x + y.y))), y.y
425+
Sort Key: (sum((x.x + y.y)))
426+
-> HashAggregate
427+
Output: sum((x.x + y.y)), y.y
428+
Group Key: y.y
429+
-> Function Scan on pg_catalog.generate_series y
430+
Output: y.y
431+
Function Call: generate_series(1, 3)
432+
(13 rows)
433+
434+
select array(select sum(x+y) s
435+
from generate_series(1,3) y group by y order by s)
436+
from generate_series(1,3) x;
437+
array
438+
---------
439+
{2,3,4}
440+
{3,4,5}
441+
{4,5,6}
442+
(3 rows)
443+
369444
--
370445
-- test for bitwise integer aggregates
371446
--

src/test/regress/sql/aggregates.sql

+22
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,28 @@ select
9898
(select max((select i.unique2 from tenk1 i where i.unique1 = o.unique1)))
9999
from tenk1 o;
100100

101+
-- Test handling of Params within aggregate arguments in hashed aggregation.
102+
-- Per bug report from Jeevan Chalke.
103+
explain (verbose, costs off)
104+
select s1, s2, sm
105+
from generate_series(1, 3) s1,
106+
lateral (select s2, sum(s1 + s2) sm
107+
from generate_series(1, 3) s2 group by s2) ss
108+
order by 1, 2;
109+
select s1, s2, sm
110+
from generate_series(1, 3) s1,
111+
lateral (select s2, sum(s1 + s2) sm
112+
from generate_series(1, 3) s2 group by s2) ss
113+
order by 1, 2;
114+
115+
explain (verbose, costs off)
116+
select array(select sum(x+y) s
117+
from generate_series(1,3) y group by y order by s)
118+
from generate_series(1,3) x;
119+
select array(select sum(x+y) s
120+
from generate_series(1,3) y group by y order by s)
121+
from generate_series(1,3) x;
122+
101123
--
102124
-- test for bitwise integer aggregates
103125
--

0 commit comments

Comments
 (0)