Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 1bc16a9

Browse files
committed
Improve make_subplanTargetList to avoid including Vars unnecessarily.
If a Var was used only in a GROUP BY expression, the previous implementation would include the Var by itself (as well as the expression) in the generated targetlist. This wouldn't affect the efficiency of the scan/join part of the plan at all, but it could result in passing unnecessarily-wide rows through sorting and grouping steps. It turns out to take only a little more code, and not noticeably more time, to generate a tlist without such redundancy, so let's do that. Per a recent gripe from HarmeekSingh Bedi.
1 parent 1af37ec commit 1bc16a9

File tree

1 file changed

+110
-52
lines changed

1 file changed

+110
-52
lines changed

src/backend/optimizer/plan/planner.c

Lines changed: 110 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ static bool choose_hashed_distinct(PlannerInfo *root,
8585
double dNumDistinctRows);
8686
static List *make_subplanTargetList(PlannerInfo *root, List *tlist,
8787
AttrNumber **groupColIdx, bool *need_tlist_eval);
88+
static int get_grouping_column_index(Query *parse, TargetEntry *tle);
8889
static void locate_grouping_columns(PlannerInfo *root,
8990
List *tlist,
9091
List *sub_tlist,
@@ -2536,14 +2537,9 @@ choose_hashed_distinct(PlannerInfo *root,
25362537
* For example, given a query like
25372538
* SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
25382539
* we want to pass this targetlist to the subplan:
2539-
* a,b,c,d,a+b
2540+
* a+b,c,d
25402541
* where the a+b target will be used by the Sort/Group steps, and the
2541-
* other targets will be used for computing the final results. (In the
2542-
* above example we could theoretically suppress the a and b targets and
2543-
* pass down only c,d,a+b, but it's not really worth the trouble to
2544-
* eliminate simple var references from the subplan. We will avoid doing
2545-
* the extra computation to recompute a+b at the outer level; see
2546-
* fix_upper_expr() in setrefs.c.)
2542+
* other targets will be used for computing the final results.
25472543
*
25482544
* If we are grouping or aggregating, *and* there are no non-Var grouping
25492545
* expressions, then the returned tlist is effectively dummy; we do not
@@ -2569,7 +2565,8 @@ make_subplanTargetList(PlannerInfo *root,
25692565
{
25702566
Query *parse = root->parse;
25712567
List *sub_tlist;
2572-
List *extravars;
2568+
List *non_group_cols;
2569+
List *non_group_vars;
25732570
int numCols;
25742571

25752572
*groupColIdx = NULL;
@@ -2586,71 +2583,132 @@ make_subplanTargetList(PlannerInfo *root,
25862583
}
25872584

25882585
/*
2589-
* Otherwise, start with a "flattened" tlist (having just the Vars
2590-
* mentioned in the targetlist and HAVING qual). Note this includes Vars
2591-
* used in resjunk items, so we are covering the needs of ORDER BY and
2592-
* window specifications. Vars used within Aggrefs will be pulled out
2593-
* here, too.
2586+
* Otherwise, we must build a tlist containing all grouping columns,
2587+
* plus any other Vars mentioned in the targetlist and HAVING qual.
25942588
*/
2595-
sub_tlist = flatten_tlist(tlist,
2596-
PVC_RECURSE_AGGREGATES,
2597-
PVC_INCLUDE_PLACEHOLDERS);
2598-
extravars = pull_var_clause(parse->havingQual,
2599-
PVC_RECURSE_AGGREGATES,
2600-
PVC_INCLUDE_PLACEHOLDERS);
2601-
sub_tlist = add_to_flat_tlist(sub_tlist, extravars);
2602-
list_free(extravars);
2589+
sub_tlist = NIL;
2590+
non_group_cols = NIL;
26032591
*need_tlist_eval = false; /* only eval if not flat tlist */
26042592

2605-
/*
2606-
* If grouping, create sub_tlist entries for all GROUP BY expressions
2607-
* (GROUP BY items that are simple Vars should be in the list already),
2608-
* and make an array showing where the group columns are in the sub_tlist.
2609-
*/
26102593
numCols = list_length(parse->groupClause);
26112594
if (numCols > 0)
26122595
{
2613-
int keyno = 0;
2596+
/*
2597+
* If grouping, create sub_tlist entries for all GROUP BY columns, and
2598+
* make an array showing where the group columns are in the sub_tlist.
2599+
*
2600+
* Note: with this implementation, the array entries will always be
2601+
* 1..N, but we don't want callers to assume that.
2602+
*/
26142603
AttrNumber *grpColIdx;
2615-
ListCell *gl;
2604+
ListCell *tl;
26162605

2617-
grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
2606+
grpColIdx = (AttrNumber *) palloc0(sizeof(AttrNumber) * numCols);
26182607
*groupColIdx = grpColIdx;
26192608

2620-
foreach(gl, parse->groupClause)
2609+
foreach(tl, tlist)
26212610
{
2622-
SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
2623-
Node *groupexpr = get_sortgroupclause_expr(grpcl, tlist);
2624-
TargetEntry *te;
2611+
TargetEntry *tle = (TargetEntry *) lfirst(tl);
2612+
int colno;
26252613

2626-
/*
2627-
* Find or make a matching sub_tlist entry. If the groupexpr
2628-
* isn't a Var, no point in searching. (Note that the parser
2629-
* won't make multiple groupClause entries for the same TLE.)
2630-
*/
2631-
if (groupexpr && IsA(groupexpr, Var))
2632-
te = tlist_member(groupexpr, sub_tlist);
2633-
else
2634-
te = NULL;
2614+
colno = get_grouping_column_index(parse, tle);
2615+
if (colno >= 0)
2616+
{
2617+
/*
2618+
* It's a grouping column, so add it to the result tlist and
2619+
* remember its resno in grpColIdx[].
2620+
*/
2621+
TargetEntry *newtle;
26352622

2636-
if (!te)
2623+
newtle = makeTargetEntry(tle->expr,
2624+
list_length(sub_tlist) + 1,
2625+
NULL,
2626+
false);
2627+
sub_tlist = lappend(sub_tlist, newtle);
2628+
2629+
Assert(grpColIdx[colno] == 0); /* no dups expected */
2630+
grpColIdx[colno] = newtle->resno;
2631+
2632+
if (!(newtle->expr && IsA(newtle->expr, Var)))
2633+
*need_tlist_eval = true; /* tlist contains non Vars */
2634+
}
2635+
else
26372636
{
2638-
te = makeTargetEntry((Expr *) groupexpr,
2639-
list_length(sub_tlist) + 1,
2640-
NULL,
2641-
false);
2642-
sub_tlist = lappend(sub_tlist, te);
2643-
*need_tlist_eval = true; /* it's not flat anymore */
2637+
/*
2638+
* Non-grouping column, so just remember the expression
2639+
* for later call to pull_var_clause. There's no need for
2640+
* pull_var_clause to examine the TargetEntry node itself.
2641+
*/
2642+
non_group_cols = lappend(non_group_cols, tle->expr);
26442643
}
2645-
2646-
/* and save its resno */
2647-
grpColIdx[keyno++] = te->resno;
26482644
}
26492645
}
2646+
else
2647+
{
2648+
/*
2649+
* With no grouping columns, just pass whole tlist to pull_var_clause.
2650+
* Need (shallow) copy to avoid damaging input tlist below.
2651+
*/
2652+
non_group_cols = list_copy(tlist);
2653+
}
2654+
2655+
/*
2656+
* If there's a HAVING clause, we'll need the Vars it uses, too.
2657+
*/
2658+
if (parse->havingQual)
2659+
non_group_cols = lappend(non_group_cols, parse->havingQual);
2660+
2661+
/*
2662+
* Pull out all the Vars mentioned in non-group cols (plus HAVING), and
2663+
* add them to the result tlist if not already present. (A Var used
2664+
* directly as a GROUP BY item will be present already.) Note this
2665+
* includes Vars used in resjunk items, so we are covering the needs of
2666+
* ORDER BY and window specifications. Vars used within Aggrefs will be
2667+
* pulled out here, too.
2668+
*/
2669+
non_group_vars = pull_var_clause((Node *) non_group_cols,
2670+
PVC_RECURSE_AGGREGATES,
2671+
PVC_INCLUDE_PLACEHOLDERS);
2672+
sub_tlist = add_to_flat_tlist(sub_tlist, non_group_vars);
2673+
2674+
/* clean up cruft */
2675+
list_free(non_group_vars);
2676+
list_free(non_group_cols);
26502677

26512678
return sub_tlist;
26522679
}
26532680

2681+
/*
2682+
* get_grouping_column_index
2683+
* Get the GROUP BY column position, if any, of a targetlist entry.
2684+
*
2685+
* Returns the index (counting from 0) of the TLE in the GROUP BY list, or -1
2686+
* if it's not a grouping column. Note: the result is unique because the
2687+
* parser won't make multiple groupClause entries for the same TLE.
2688+
*/
2689+
static int
2690+
get_grouping_column_index(Query *parse, TargetEntry *tle)
2691+
{
2692+
int colno = 0;
2693+
Index ressortgroupref = tle->ressortgroupref;
2694+
ListCell *gl;
2695+
2696+
/* No need to search groupClause if TLE hasn't got a sortgroupref */
2697+
if (ressortgroupref == 0)
2698+
return -1;
2699+
2700+
foreach(gl, parse->groupClause)
2701+
{
2702+
SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
2703+
2704+
if (grpcl->tleSortGroupRef == ressortgroupref)
2705+
return colno;
2706+
colno++;
2707+
}
2708+
2709+
return -1;
2710+
}
2711+
26542712
/*
26552713
* locate_grouping_columns
26562714
* Locate grouping columns in the tlist chosen by create_plan.

0 commit comments

Comments
 (0)