diff options
author | Richard Guo | 2024-09-10 03:36:48 +0000 |
---|---|---|
committer | Richard Guo | 2024-09-10 03:36:48 +0000 |
commit | f5050f795aea67dfc40bbc429c8934e9439e22e7 (patch) | |
tree | de5ea9e20be4c6563165346e8c00dd05892993b0 /src/backend | |
parent | 247dea89f7616fdf06b7272b74abafc29e8e5860 (diff) |
Mark expressions nullable by grouping sets
When generating window_pathkeys, distinct_pathkeys, or sort_pathkeys,
we failed to realize that the grouping/ordering expressions might be
nullable by grouping sets. As a result, we may incorrectly deem that
the PathKeys are redundant by EquivalenceClass processing and thus
remove them from the pathkeys list. That would lead to wrong results
in some cases.
To fix this issue, we mark the grouping expressions nullable by
grouping sets if that is the case. If the grouping expression is a
Var or PlaceHolderVar or constructed from those, we can just add the
RT index of the RTE_GROUP RTE to the existing nullingrels field(s);
otherwise we have to add a PlaceHolderVar to carry on the nullingrel
bit.
However, we have to manually remove this nullingrel bit from
expressions in various cases where these expressions are logically
below the grouping step, such as when we generate groupClause pathkeys
for grouping sets, or when we generate PathTarget for initial input to
grouping nodes.
Furthermore, in set_upper_references, the targetlist and quals of an
Agg node should have nullingrels that include the effects of the
grouping step, ie they will have nullingrels equal to the input
Vars/PHVs' nullingrels plus the nullingrel bit that references the
grouping RTE. In order to perform exact nullingrels matches, we also
need to manually remove this nullingrel bit.
Bump catversion because this changes the querytree produced by the
parser.
Thanks to Tom Lane for the idea to invent a new kind of RTE.
Per reports from Geoff Winkless, Tobias Wendorff, Richard Guo from
various threads.
Author: Richard Guo
Reviewed-by: Ashutosh Bapat, Sutou Kouhei
Discussion: https://postgr.es/m/CAMbWs4_dp7e7oTwaiZeBX8+P1rXw4ThkZxh1QG81rhu9Z47VsQ@mail.gmail.com
Diffstat (limited to 'src/backend')
-rw-r--r-- | src/backend/optimizer/path/equivclass.c | 12 | ||||
-rw-r--r-- | src/backend/optimizer/path/pathkeys.c | 14 | ||||
-rw-r--r-- | src/backend/optimizer/plan/initsplan.c | 4 | ||||
-rw-r--r-- | src/backend/optimizer/plan/planner.c | 49 | ||||
-rw-r--r-- | src/backend/optimizer/plan/setrefs.c | 23 | ||||
-rw-r--r-- | src/backend/optimizer/util/var.c | 88 | ||||
-rw-r--r-- | src/backend/parser/parse_agg.c | 13 |
7 files changed, 196 insertions, 7 deletions
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index d871396e20c..47644b26c6a 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -726,6 +726,10 @@ get_eclass_for_sort_expr(PlannerInfo *root, { RelOptInfo *rel = root->simple_rel_array[i]; + /* ignore the RTE_GROUP RTE */ + if (i == root->group_rtindex) + continue; + if (rel == NULL) /* must be an outer join */ { Assert(bms_is_member(i, root->outer_join_rels)); @@ -1087,6 +1091,10 @@ generate_base_implied_equalities(PlannerInfo *root) { RelOptInfo *rel = root->simple_rel_array[i]; + /* ignore the RTE_GROUP RTE */ + if (i == root->group_rtindex) + continue; + if (rel == NULL) /* must be an outer join */ { Assert(bms_is_member(i, root->outer_join_rels)); @@ -3354,6 +3362,10 @@ get_eclass_indexes_for_relids(PlannerInfo *root, Relids relids) { RelOptInfo *rel = root->simple_rel_array[i]; + /* ignore the RTE_GROUP RTE */ + if (i == root->group_rtindex) + continue; + if (rel == NULL) /* must be an outer join */ { Assert(bms_is_member(i, root->outer_join_rels)); diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index e25798972f6..035bbaa3856 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -25,6 +25,7 @@ #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "partitioning/partbounds.h" +#include "rewrite/rewriteManip.h" #include "utils/lsyscache.h" /* Consider reordering of GROUP BY keys? */ @@ -1341,6 +1342,7 @@ make_pathkeys_for_sortclauses(PlannerInfo *root, &sortclauses, tlist, false, + false, &sortable, false); /* It's caller error if not all clauses were sortable */ @@ -1359,6 +1361,9 @@ make_pathkeys_for_sortclauses(PlannerInfo *root, * give rise to redundant pathkeys are removed from the sortclauses list * (which therefore must be pass-by-reference in this version). * + * If remove_group_rtindex is true, then we need to remove the RT index of the + * grouping step from the sort expressions before we make PathKeys for them. + * * *sortable is set to true if all the sort clauses are in fact sortable. * If any are not, they are ignored except for setting *sortable false. * (In that case, the output pathkey list isn't really useful. However, @@ -1375,6 +1380,7 @@ make_pathkeys_for_sortclauses_extended(PlannerInfo *root, List **sortclauses, List *tlist, bool remove_redundant, + bool remove_group_rtindex, bool *sortable, bool set_ec_sortref) { @@ -1394,6 +1400,14 @@ make_pathkeys_for_sortclauses_extended(PlannerInfo *root, *sortable = false; continue; } + if (remove_group_rtindex) + { + Assert(root->group_rtindex > 0); + sortkey = (Expr *) + remove_nulling_relids((Node *) sortkey, + bms_make_singleton(root->group_rtindex), + NULL); + } pathkey = make_pathkey_from_sortop(root, sortkey, sortcl->sortop, diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index e2c68fe6f99..f3b9821498e 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -1328,6 +1328,10 @@ mark_rels_nulled_by_join(PlannerInfo *root, Index ojrelid, { RelOptInfo *rel = root->simple_rel_array[relid]; + /* ignore the RTE_GROUP RTE */ + if (relid == root->group_rtindex) + continue; + if (rel == NULL) /* must be an outer join */ { Assert(bms_is_member(relid, root->outer_join_rels)); diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index bd4b652f7a3..df35d1ff9c4 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -58,6 +58,7 @@ #include "parser/parse_relation.h" #include "parser/parsetree.h" #include "partitioning/partdesc.h" +#include "rewrite/rewriteManip.h" #include "utils/lsyscache.h" #include "utils/rel.h" #include "utils/selfuncs.h" @@ -3506,9 +3507,23 @@ standard_qp_callback(PlannerInfo *root, void *extra) if (grouping_is_sortable(groupClause)) { - root->group_pathkeys = make_pathkeys_for_sortclauses(root, - groupClause, - tlist); + bool sortable; + + /* + * The groupClause is logically below the grouping step. So if + * there is an RTE entry for the grouping step, we need to remove + * its RT index from the sort expressions before we make PathKeys + * for them. + */ + root->group_pathkeys = + make_pathkeys_for_sortclauses_extended(root, + &groupClause, + tlist, + false, + parse->hasGroupRTE, + &sortable, + false); + Assert(sortable); root->num_groupby_pathkeys = list_length(root->group_pathkeys); } else @@ -3538,6 +3553,7 @@ standard_qp_callback(PlannerInfo *root, void *extra) &root->processed_groupClause, tlist, true, + false, &sortable, true); if (!sortable) @@ -3589,6 +3605,7 @@ standard_qp_callback(PlannerInfo *root, void *extra) &root->processed_distinctClause, tlist, true, + false, &sortable, false); if (!sortable) @@ -3616,6 +3633,7 @@ standard_qp_callback(PlannerInfo *root, void *extra) &groupClauses, tlist, false, + false, &sortable, false); if (!sortable) @@ -5526,7 +5544,19 @@ make_group_input_target(PlannerInfo *root, PathTarget *final_target) { /* * It's a grouping column, so add it to the input target as-is. + * + * Note that the target is logically below the grouping step. So + * with grouping sets we need to remove the RT index of the + * grouping step if there is any from the target expression. */ + if (parse->hasGroupRTE && parse->groupingSets != NIL) + { + Assert(root->group_rtindex > 0); + expr = (Expr *) + remove_nulling_relids((Node *) expr, + bms_make_singleton(root->group_rtindex), + NULL); + } add_column_to_pathtarget(input_target, expr, sgref); } else @@ -5554,11 +5584,23 @@ make_group_input_target(PlannerInfo *root, PathTarget *final_target) * includes Vars used in resjunk items, so we are covering the needs of * ORDER BY and window specifications. Vars used within Aggrefs and * WindowFuncs will be pulled out here, too. + * + * Note that the target is logically below the grouping step. So with + * grouping sets we need to remove the RT index of the grouping step if + * there is any from the non-group Vars. */ non_group_vars = pull_var_clause((Node *) non_group_cols, PVC_RECURSE_AGGREGATES | PVC_RECURSE_WINDOWFUNCS | PVC_INCLUDE_PLACEHOLDERS); + if (parse->hasGroupRTE && parse->groupingSets != NIL) + { + Assert(root->group_rtindex > 0); + non_group_vars = (List *) + remove_nulling_relids((Node *) non_group_vars, + bms_make_singleton(root->group_rtindex), + NULL); + } add_new_columns_to_pathtarget(input_target, non_group_vars); /* clean up cruft */ @@ -6207,6 +6249,7 @@ make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc, &wc->partitionClause, tlist, true, + false, &sortable, false); diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 8caf094f7d6..91c7c4fe2fe 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -26,6 +26,7 @@ #include "optimizer/subselect.h" #include "optimizer/tlist.h" #include "parser/parse_relation.h" +#include "rewrite/rewriteManip.h" #include "tcop/utility.h" #include "utils/syscache.h" @@ -2426,6 +2427,28 @@ set_upper_references(PlannerInfo *root, Plan *plan, int rtoffset) subplan_itlist = build_tlist_index(subplan->targetlist); + /* + * If it's a grouping node with grouping sets, any Vars and PHVs appearing + * in the targetlist and quals should have nullingrels that include the + * effects of the grouping step, ie they will have nullingrels equal to + * the input Vars/PHVs' nullingrels plus the RT index of the grouping + * step. In order to perform exact nullingrels matches, we remove the RT + * index of the grouping step first. + */ + if (IsA(plan, Agg) && + root->group_rtindex > 0 && + ((Agg *) plan)->groupingSets) + { + plan->targetlist = (List *) + remove_nulling_relids((Node *) plan->targetlist, + bms_make_singleton(root->group_rtindex), + NULL); + plan->qual = (List *) + remove_nulling_relids((Node *) plan->qual, + bms_make_singleton(root->group_rtindex), + NULL); + } + output_targetlist = NIL; foreach(l, plan->targetlist) { diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c index b189185fca2..f7534ad53d6 100644 --- a/src/backend/optimizer/util/var.c +++ b/src/backend/optimizer/util/var.c @@ -22,6 +22,7 @@ #include "access/sysattr.h" #include "nodes/nodeFuncs.h" +#include "optimizer/clauses.h" #include "optimizer/optimizer.h" #include "optimizer/placeholder.h" #include "optimizer/prep.h" @@ -83,6 +84,8 @@ static Node *flatten_join_alias_vars_mutator(Node *node, flatten_join_alias_vars_context *context); static Node *flatten_group_exprs_mutator(Node *node, flatten_join_alias_vars_context *context); +static Node *mark_nullable_by_grouping(PlannerInfo *root, Node *newnode, + Var *oldvar); static Node *add_nullingrels_if_needed(PlannerInfo *root, Node *newnode, Var *oldvar); static bool is_standard_join_alias_expression(Node *newnode, Var *oldvar); @@ -909,6 +912,18 @@ flatten_join_alias_vars_mutator(Node *node, * flatten_group_exprs * Replace Vars that reference GROUP outputs with the underlying grouping * expressions. + * + * We have to preserve any varnullingrels info attached to the group Vars we're + * replacing. If the replacement expression is a Var or PlaceHolderVar or + * constructed from those, we can just add the varnullingrels bits to the + * existing nullingrels field(s); otherwise we have to add a PlaceHolderVar + * wrapper. + * + * NOTE: this is also used by ruleutils.c, to deparse one query parsetree back + * to source text. For that use-case, root will be NULL, which is why we have + * to pass the Query separately. We need the root itself only for preserving + * varnullingrels. We can avoid preserving varnullingrels in the ruleutils.c's + * usage because it does not make any difference to the deparsed source text. */ Node * flatten_group_exprs(PlannerInfo *root, Query *query, Node *node) @@ -973,7 +988,8 @@ flatten_group_exprs_mutator(Node *node, if (context->possible_sublink && !context->inserted_sublink) context->inserted_sublink = checkExprHasSubLink(newvar); - return newvar; + /* Lastly, add any varnullingrels to the replacement expression */ + return mark_nullable_by_grouping(context->root, newvar, var); } if (IsA(node, Aggref)) @@ -1041,6 +1057,76 @@ flatten_group_exprs_mutator(Node *node, } /* + * Add oldvar's varnullingrels, if any, to a flattened grouping expression. + * The newnode has been copied, so we can modify it freely. + */ +static Node * +mark_nullable_by_grouping(PlannerInfo *root, Node *newnode, Var *oldvar) +{ + Relids relids; + + if (root == NULL) + return newnode; + if (oldvar->varnullingrels == NULL) + return newnode; /* nothing to do */ + + Assert(bms_equal(oldvar->varnullingrels, + bms_make_singleton(root->group_rtindex))); + + relids = pull_varnos_of_level(root, newnode, oldvar->varlevelsup); + + if (!bms_is_empty(relids)) + { + /* + * If the newnode is not variable-free, we set the nullingrels of Vars + * or PHVs that are contained in the expression. This is not really + * 'correct' in theory, because it is the whole expression that can be + * nullable by grouping sets, not its individual vars. But it works + * in practice, because what we need is that the expression can be + * somehow distinguished from the same expression in ECs, and marking + * its vars is sufficient for this purpose. + */ + newnode = add_nulling_relids(newnode, + relids, + oldvar->varnullingrels); + } + else /* variable-free? */ + { + /* + * If the newnode is variable-free and does not contain volatile + * functions or set-returning functions, it can be treated as a member + * of EC that is redundant. So wrap it in a new PlaceHolderVar to + * carry the nullingrels. Otherwise we do not bother to make any + * changes. + * + * Aggregate functions and window functions are not allowed in + * grouping expressions. + */ + Assert(!contain_agg_clause(newnode)); + Assert(!contain_window_function(newnode)); + + if (!contain_volatile_functions(newnode) && + !expression_returns_set(newnode)) + { + PlaceHolderVar *newphv; + Relids phrels; + + phrels = get_relids_in_jointree((Node *) root->parse->jointree, + true, false); + Assert(!bms_is_empty(phrels)); + + newphv = make_placeholder_expr(root, (Expr *) newnode, phrels); + /* newphv has zero phlevelsup and NULL phnullingrels; fix it */ + newphv->phlevelsup = oldvar->varlevelsup; + newphv->phnullingrels = bms_copy(oldvar->varnullingrels); + newnode = (Node *) newphv; + } + } + + return newnode; +} + +/* * Add oldvar's varnullingrels, if any, to a flattened join alias expression. * The newnode has been copied, so we can modify it freely. */ diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c index bd095d05c0b..102accd0716 100644 --- a/src/backend/parser/parse_agg.c +++ b/src/backend/parser/parse_agg.c @@ -1333,9 +1333,6 @@ substitute_grouped_columns_mutator(Node *node, if (node == NULL) return NULL; - if (IsA(node, Const) || - IsA(node, Param)) - return node; /* constants are always acceptable */ if (IsA(node, Aggref)) { @@ -1410,6 +1407,16 @@ substitute_grouped_columns_mutator(Node *node, } /* + * Constants are always acceptable. We have to do this after we checked + * the subexpression as a whole for a match, because it is possible that + * we have GROUP BY items that are constants, and the constants would + * become not so constant after the grouping step. + */ + if (IsA(node, Const) || + IsA(node, Param)) + return node; + + /* * If we have an ungrouped Var of the original query level, we have a * failure. Vars below the original query level are not a problem, and * neither are Vars from above it. (If such Vars are ungrouped as far as |