diff options
author | Tom Lane | 2023-01-30 18:16:20 +0000 |
---|---|---|
committer | Tom Lane | 2023-01-30 18:16:20 +0000 |
commit | 2489d76c4906f4461a364ca8ad7e0751ead8aa0d (patch) | |
tree | 145ebc28d5ea8f5a5ba340b9e353a11de786adae /src/backend/optimizer/path | |
parent | ec7e053a98f39a9e3c7e6d35f0d2e83933882399 (diff) |
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value
of a table column everywhere in parse and plan trees. This choice
predates our support for SQL outer joins, and it's really a pretty
bad idea with outer joins, because the Var's value can depend on
where it is in the tree: it might go to NULL above an outer join.
So expression nodes that are equal() per equalfuncs.c might not
represent the same value, which is a huge correctness hazard for
the planner.
To improve this, decorate Var nodes with a bitmapset showing
which outer joins (identified by RTE indexes) may have nulled
them at the point in the parse tree where the Var appears.
This allows us to trust that equal() Vars represent the same value.
A certain amount of klugery is still needed to cope with cases
where we re-order two outer joins, but it's possible to make it
work without sacrificing that core principle. PlaceHolderVars
receive similar decoration for the same reason.
In the planner, we include these outer join bitmapsets into the relids
that an expression is considered to depend on, and in consequence also
add outer-join relids to the relids of join RelOptInfos. This allows
us to correctly perceive whether an expression can be calculated above
or below a particular outer join.
This change affects FDWs that want to plan foreign joins. They *must*
follow suit when labeling foreign joins in order to match with the
core planner, but for many purposes (if postgres_fdw is any guide)
they'd prefer to consider only base relations within the join.
To support both requirements, redefine ForeignScan.fs_relids as
base+OJ relids, and add a new field fs_base_relids that's set up by
the core planner.
Large though it is, this commit just does the minimum necessary to
install the new mechanisms and get check-world passing again.
Follow-up patches will perform some cleanup. (The README additions
and comments mention some stuff that will appear in the follow-up.)
Patch by me; thanks to Richard Guo for review.
Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
Diffstat (limited to 'src/backend/optimizer/path')
-rw-r--r-- | src/backend/optimizer/path/allpaths.c | 36 | ||||
-rw-r--r-- | src/backend/optimizer/path/clausesel.c | 45 | ||||
-rw-r--r-- | src/backend/optimizer/path/costsize.c | 8 | ||||
-rw-r--r-- | src/backend/optimizer/path/equivclass.c | 132 | ||||
-rw-r--r-- | src/backend/optimizer/path/indxpath.c | 9 | ||||
-rw-r--r-- | src/backend/optimizer/path/joinpath.c | 65 | ||||
-rw-r--r-- | src/backend/optimizer/path/joinrels.c | 30 | ||||
-rw-r--r-- | src/backend/optimizer/path/tidpath.c | 1 |
8 files changed, 221 insertions, 105 deletions
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index c2fc568dc8a..26b294d5d07 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -159,27 +159,6 @@ make_one_rel(PlannerInfo *root, List *joinlist) Index rti; double total_pages; - /* - * Construct the all_baserels Relids set. - */ - root->all_baserels = NULL; - for (rti = 1; rti < root->simple_rel_array_size; rti++) - { - RelOptInfo *brel = root->simple_rel_array[rti]; - - /* there may be empty slots corresponding to non-baserel RTEs */ - if (brel == NULL) - continue; - - Assert(brel->relid == rti); /* sanity check on array */ - - /* ignore RTEs that are "other rels" */ - if (brel->reloptkind != RELOPT_BASEREL) - continue; - - root->all_baserels = bms_add_member(root->all_baserels, brel->relid); - } - /* Mark base rels as to whether we care about fast-start plans */ set_base_rel_consider_startup(root); @@ -207,6 +186,7 @@ make_one_rel(PlannerInfo *root, List *joinlist) { RelOptInfo *brel = root->simple_rel_array[rti]; + /* there may be empty slots corresponding to non-baserel RTEs */ if (brel == NULL) continue; @@ -231,9 +211,9 @@ make_one_rel(PlannerInfo *root, List *joinlist) rel = make_rel_from_joinlist(root, joinlist); /* - * The result should join all and only the query's base rels. + * The result should join all and only the query's base + outer-join rels. */ - Assert(bms_equal(rel->relids, root->all_baserels)); + Assert(bms_equal(rel->relids, root->all_query_rels)); return rel; } @@ -558,7 +538,7 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, * the final scan/join targetlist is available (see grouping_planner). */ if (rel->reloptkind == RELOPT_BASEREL && - !bms_equal(rel->relids, root->all_baserels)) + !bms_equal(rel->relids, root->all_query_rels)) generate_useful_gather_paths(root, rel, false); /* Now find the cheapest of the paths for this rel */ @@ -879,7 +859,7 @@ set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry * * to support an uncommon usage of second-rate sampling methods. Instead, * if there is a risk that the query might perform an unsafe join, just * wrap the SampleScan in a Materialize node. We can check for joins by - * counting the membership of all_baserels (note that this correctly + * counting the membership of all_query_rels (note that this correctly * counts inheritance trees as single rels). If we're inside a subquery, * we can't easily check whether a join might occur in the outer query, so * just assume one is possible. @@ -888,7 +868,7 @@ set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry * * so check repeatable_across_scans last, even though that's a bit odd. */ if ((root->query_level > 1 || - bms_membership(root->all_baserels) != BMS_SINGLETON) && + bms_membership(root->all_query_rels) != BMS_SINGLETON) && !(GetTsmRoutine(rte->tablesample->tsmhandler)->repeatable_across_scans)) { path = (Path *) create_material_path(rel, path); @@ -970,7 +950,7 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, if (enable_partitionwise_join && rel->reloptkind == RELOPT_BASEREL && rte->relkind == RELKIND_PARTITIONED_TABLE && - rel->attr_needed[InvalidAttrNumber - rel->min_attr] == NULL) + bms_is_empty(rel->attr_needed[InvalidAttrNumber - rel->min_attr])) rel->consider_partitionwise_join = true; /* @@ -3429,7 +3409,7 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels) * partial paths. We'll do the same for the topmost scan/join rel * once we know the final targetlist (see grouping_planner). */ - if (!bms_equal(rel->relids, root->all_baserels)) + if (!bms_equal(rel->relids, root->all_query_rels)) generate_useful_gather_paths(root, rel, false); /* Find and save the cheapest paths for this rel */ diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c index 929a2311121..61db6ad951b 100644 --- a/src/backend/optimizer/path/clausesel.c +++ b/src/backend/optimizer/path/clausesel.c @@ -218,7 +218,7 @@ clauselist_selectivity_ext(PlannerInfo *root, if (rinfo) { - ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) && + ok = (rinfo->num_base_rels == 1) && (is_pseudo_constant_clause_relids(lsecond(expr->args), rinfo->right_relids) || (varonleft = false, @@ -580,30 +580,6 @@ find_single_rel_for_clauses(PlannerInfo *root, List *clauses) } /* - * bms_is_subset_singleton - * - * Same result as bms_is_subset(s, bms_make_singleton(x)), - * but a little faster and doesn't leak memory. - * - * Is this of use anywhere else? If so move to bitmapset.c ... - */ -static bool -bms_is_subset_singleton(const Bitmapset *s, int x) -{ - switch (bms_membership(s)) - { - case BMS_EMPTY_SET: - return true; - case BMS_SINGLETON: - return bms_is_member(x, s); - case BMS_MULTIPLE: - return false; - } - /* can't get here... */ - return false; -} - -/* * treat_as_join_clause - * Decide whether an operator clause is to be handled by the * restriction or join estimator. Subroutine for clause_selectivity(). @@ -631,17 +607,20 @@ treat_as_join_clause(PlannerInfo *root, Node *clause, RestrictInfo *rinfo, else { /* - * Otherwise, it's a join if there's more than one relation used. We - * can optimize this calculation if an rinfo was passed. + * Otherwise, it's a join if there's more than one base relation used. + * We can optimize this calculation if an rinfo was passed. * * XXX Since we know the clause is being evaluated at a join, the * only way it could be single-relation is if it was delayed by outer - * joins. Although we can make use of the restriction qual estimators - * anyway, it seems likely that we ought to account for the - * probability of injected nulls somehow. + * joins. We intentionally count only baserels here, not OJs that + * might be present in rinfo->clause_relids, so that we direct such + * cases to the restriction qual estimators not join estimators. + * Eventually some notice should be taken of the possibility of + * injected nulls, but we'll likely want to do that in the restriction + * estimators rather than starting to treat such cases as join quals. */ if (rinfo) - return (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE); + return (rinfo->num_base_rels > 1); else return (NumRelids(root, clause) > 1); } @@ -754,7 +733,9 @@ clause_selectivity_ext(PlannerInfo *root, * for all non-JOIN_INNER cases. */ if (varRelid == 0 || - bms_is_subset_singleton(rinfo->clause_relids, varRelid)) + rinfo->num_base_rels == 0 || + (rinfo->num_base_rels == 1 && + bms_is_member(varRelid, rinfo->clause_relids))) { /* Cacheable --- do we already have the result? */ if (jointype == JOIN_INNER) diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 29ae32d9603..7d957a47a49 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -4781,6 +4781,10 @@ compute_semi_anti_join_factors(PlannerInfo *root, norm_sjinfo.syn_lefthand = outerrel->relids; norm_sjinfo.syn_righthand = innerrel->relids; norm_sjinfo.jointype = JOIN_INNER; + norm_sjinfo.ojrelid = 0; + norm_sjinfo.commute_above_l = NULL; + norm_sjinfo.commute_above_r = NULL; + norm_sjinfo.commute_below = NULL; /* we don't bother trying to make the remaining fields valid */ norm_sjinfo.lhs_strict = false; norm_sjinfo.delay_upper_joins = false; @@ -4946,6 +4950,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) sjinfo.syn_lefthand = path->outerjoinpath->parent->relids; sjinfo.syn_righthand = path->innerjoinpath->parent->relids; sjinfo.jointype = JOIN_INNER; + sjinfo.ojrelid = 0; + sjinfo.commute_above_l = NULL; + sjinfo.commute_above_r = NULL; + sjinfo.commute_below = NULL; /* we don't bother trying to make the remaining fields valid */ sjinfo.lhs_strict = false; sjinfo.delay_upper_joins = false; diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 7d7e6facdfd..490953f2ff5 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -29,12 +29,14 @@ #include "optimizer/paths.h" #include "optimizer/planmain.h" #include "optimizer/restrictinfo.h" +#include "rewrite/rewriteManip.h" #include "utils/lsyscache.h" static EquivalenceMember *add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, Relids nullable_relids, - bool is_child, Oid datatype); + EquivalenceMember *parent, + Oid datatype); static bool is_exprlist_member(Expr *node, List *exprs); static void generate_base_implied_equalities_const(PlannerInfo *root, EquivalenceClass *ec); @@ -61,10 +63,10 @@ static RestrictInfo *create_join_clause(PlannerInfo *root, EquivalenceMember *rightem, EquivalenceClass *parent_ec); static bool reconsider_outer_join_clause(PlannerInfo *root, - RestrictInfo *rinfo, + OuterJoinClauseInfo *ojcinfo, bool outer_on_left); static bool reconsider_full_join_clause(PlannerInfo *root, - RestrictInfo *rinfo); + OuterJoinClauseInfo *ojcinfo); static Bitmapset *get_eclass_indexes_for_relids(PlannerInfo *root, Relids relids); static Bitmapset *get_common_eclass_indexes(PlannerInfo *root, Relids relids1, @@ -399,7 +401,7 @@ process_equivalence(PlannerInfo *root, { /* Case 3: add item2 to ec1 */ em2 = add_eq_member(ec1, item2, item2_relids, item2_nullable_relids, - false, item2_type); + NULL, item2_type); ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo); ec1->ec_below_outer_join |= below_outer_join; ec1->ec_min_security = Min(ec1->ec_min_security, @@ -417,7 +419,7 @@ process_equivalence(PlannerInfo *root, { /* Case 3: add item1 to ec2 */ em1 = add_eq_member(ec2, item1, item1_relids, item1_nullable_relids, - false, item1_type); + NULL, item1_type); ec2->ec_sources = lappend(ec2->ec_sources, restrictinfo); ec2->ec_below_outer_join |= below_outer_join; ec2->ec_min_security = Min(ec2->ec_min_security, @@ -451,9 +453,9 @@ process_equivalence(PlannerInfo *root, ec->ec_max_security = restrictinfo->security_level; ec->ec_merged = NULL; em1 = add_eq_member(ec, item1, item1_relids, item1_nullable_relids, - false, item1_type); + NULL, item1_type); em2 = add_eq_member(ec, item2, item2_relids, item2_nullable_relids, - false, item2_type); + NULL, item2_type); root->eq_classes = lappend(root->eq_classes, ec); @@ -543,7 +545,7 @@ canonicalize_ec_expression(Expr *expr, Oid req_type, Oid req_collation) */ static EquivalenceMember * add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, - Relids nullable_relids, bool is_child, Oid datatype) + Relids nullable_relids, EquivalenceMember *parent, Oid datatype) { EquivalenceMember *em = makeNode(EquivalenceMember); @@ -551,8 +553,9 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, em->em_relids = relids; em->em_nullable_relids = nullable_relids; em->em_is_const = false; - em->em_is_child = is_child; + em->em_is_child = (parent != NULL); em->em_datatype = datatype; + em->em_parent = parent; if (bms_is_empty(relids)) { @@ -564,12 +567,12 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, * get_eclass_for_sort_expr() has to work harder. We put the tests * there not here to save cycles in the equivalence case. */ - Assert(!is_child); + Assert(!parent); em->em_is_const = true; ec->ec_has_const = true; /* it can't affect ec_relids */ } - else if (!is_child) /* child members don't add to ec_relids */ + else if (!parent) /* child members don't add to ec_relids */ { ec->ec_relids = bms_add_members(ec->ec_relids, relids); } @@ -722,7 +725,7 @@ get_eclass_for_sort_expr(PlannerInfo *root, nullable_relids = bms_intersect(nullable_relids, expr_relids); newem = add_eq_member(newec, copyObject(expr), expr_relids, - nullable_relids, false, opcintype); + nullable_relids, NULL, opcintype); /* * add_eq_member doesn't check for volatile functions, set-returning @@ -757,6 +760,12 @@ get_eclass_for_sort_expr(PlannerInfo *root, { RelOptInfo *rel = root->simple_rel_array[i]; + if (rel == NULL) /* must be an outer join */ + { + Assert(bms_is_member(i, root->outer_join_rels)); + continue; + } + Assert(rel->reloptkind == RELOPT_BASEREL || rel->reloptkind == RELOPT_DEADREL); @@ -1113,6 +1122,12 @@ generate_base_implied_equalities(PlannerInfo *root) { RelOptInfo *rel = root->simple_rel_array[i]; + if (rel == NULL) /* must be an outer join */ + { + Assert(bms_is_member(i, root->outer_join_rels)); + continue; + } + Assert(rel->reloptkind == RELOPT_BASEREL); rel->eclass_indexes = bms_add_member(rel->eclass_indexes, @@ -1808,6 +1823,7 @@ create_join_clause(PlannerInfo *root, EquivalenceClass *parent_ec) { RestrictInfo *rinfo; + RestrictInfo *parent_rinfo = NULL; ListCell *lc; MemoryContext oldcontext; @@ -1852,6 +1868,20 @@ create_join_clause(PlannerInfo *root, */ oldcontext = MemoryContextSwitchTo(root->planner_cxt); + /* + * If either EM is a child, recursively create the corresponding + * parent-to-parent clause, so that we can duplicate its rinfo_serial. + */ + if (leftem->em_is_child || rightem->em_is_child) + { + EquivalenceMember *leftp = leftem->em_parent ? leftem->em_parent : leftem; + EquivalenceMember *rightp = rightem->em_parent ? rightem->em_parent : rightem; + + parent_rinfo = create_join_clause(root, ec, opno, + leftp, rightp, + parent_ec); + } + rinfo = build_implied_join_equality(root, opno, ec->ec_collation, @@ -1863,6 +1893,10 @@ create_join_clause(PlannerInfo *root, rightem->em_nullable_relids), ec->ec_min_security); + /* If it's a child clause, copy the parent's rinfo_serial */ + if (parent_rinfo) + rinfo->rinfo_serial = parent_rinfo->rinfo_serial; + /* Mark the clause as redundant, or not */ rinfo->parent_ec = parent_ec; @@ -1977,10 +2011,12 @@ reconsider_outer_join_clauses(PlannerInfo *root) /* Process the LEFT JOIN clauses */ foreach(cell, root->left_join_clauses) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); + OuterJoinClauseInfo *ojcinfo = (OuterJoinClauseInfo *) lfirst(cell); - if (reconsider_outer_join_clause(root, rinfo, true)) + if (reconsider_outer_join_clause(root, ojcinfo, true)) { + RestrictInfo *rinfo = ojcinfo->rinfo; + found = true; /* remove it from the list */ root->left_join_clauses = @@ -1996,10 +2032,12 @@ reconsider_outer_join_clauses(PlannerInfo *root) /* Process the RIGHT JOIN clauses */ foreach(cell, root->right_join_clauses) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); + OuterJoinClauseInfo *ojcinfo = (OuterJoinClauseInfo *) lfirst(cell); - if (reconsider_outer_join_clause(root, rinfo, false)) + if (reconsider_outer_join_clause(root, ojcinfo, false)) { + RestrictInfo *rinfo = ojcinfo->rinfo; + found = true; /* remove it from the list */ root->right_join_clauses = @@ -2015,10 +2053,12 @@ reconsider_outer_join_clauses(PlannerInfo *root) /* Process the FULL JOIN clauses */ foreach(cell, root->full_join_clauses) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); + OuterJoinClauseInfo *ojcinfo = (OuterJoinClauseInfo *) lfirst(cell); - if (reconsider_full_join_clause(root, rinfo)) + if (reconsider_full_join_clause(root, ojcinfo)) { + RestrictInfo *rinfo = ojcinfo->rinfo; + found = true; /* remove it from the list */ root->full_join_clauses = @@ -2035,21 +2075,21 @@ reconsider_outer_join_clauses(PlannerInfo *root) /* Now, any remaining clauses have to be thrown back */ foreach(cell, root->left_join_clauses) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); + OuterJoinClauseInfo *ojcinfo = (OuterJoinClauseInfo *) lfirst(cell); - distribute_restrictinfo_to_rels(root, rinfo); + distribute_restrictinfo_to_rels(root, ojcinfo->rinfo); } foreach(cell, root->right_join_clauses) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); + OuterJoinClauseInfo *ojcinfo = (OuterJoinClauseInfo *) lfirst(cell); - distribute_restrictinfo_to_rels(root, rinfo); + distribute_restrictinfo_to_rels(root, ojcinfo->rinfo); } foreach(cell, root->full_join_clauses) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); + OuterJoinClauseInfo *ojcinfo = (OuterJoinClauseInfo *) lfirst(cell); - distribute_restrictinfo_to_rels(root, rinfo); + distribute_restrictinfo_to_rels(root, ojcinfo->rinfo); } } @@ -2059,9 +2099,10 @@ reconsider_outer_join_clauses(PlannerInfo *root) * Returns true if we were able to propagate a constant through the clause. */ static bool -reconsider_outer_join_clause(PlannerInfo *root, RestrictInfo *rinfo, +reconsider_outer_join_clause(PlannerInfo *root, OuterJoinClauseInfo *ojcinfo, bool outer_on_left) { + RestrictInfo *rinfo = ojcinfo->rinfo; Expr *outervar, *innervar; Oid opno, @@ -2185,8 +2226,11 @@ reconsider_outer_join_clause(PlannerInfo *root, RestrictInfo *rinfo, * Returns true if we were able to propagate a constant through the clause. */ static bool -reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo) +reconsider_full_join_clause(PlannerInfo *root, OuterJoinClauseInfo *ojcinfo) { + RestrictInfo *rinfo = ojcinfo->rinfo; + SpecialJoinInfo *sjinfo = ojcinfo->sjinfo; + Relids fjrelids = bms_make_singleton(sjinfo->ojrelid); Expr *leftvar; Expr *rightvar; Oid opno, @@ -2268,6 +2312,18 @@ reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo) cfirst = (Node *) linitial(cexpr->args); csecond = (Node *) lsecond(cexpr->args); + /* + * The COALESCE arguments will be marked as possibly nulled by + * the full join, while we wish to generate clauses that apply + * to the join's inputs. So we must strip the join from the + * nullingrels fields of cfirst/csecond before comparing them + * to leftvar/rightvar. (Perhaps with a less hokey + * representation for FULL JOIN USING output columns, this + * wouldn't be needed?) + */ + cfirst = remove_nulling_relids(cfirst, fjrelids, NULL); + csecond = remove_nulling_relids(csecond, fjrelids, NULL); + if (equal(leftvar, cfirst) && equal(rightvar, csecond)) { coal_idx = foreach_current_index(lc2); @@ -2605,10 +2661,18 @@ add_child_rel_equivalences(PlannerInfo *root, if (cur_em->em_is_child) continue; /* ignore children here */ - /* Does this member reference child's topmost parent rel? */ - if (bms_overlap(cur_em->em_relids, top_parent_relids)) + /* + * Consider only members that reference and can be computed at + * child's topmost parent rel. In particular we want to exclude + * parent-rel Vars that have nonempty varnullingrels. Translating + * those might fail, if the transformed expression wouldn't be a + * simple Var; and in any case it wouldn't produce a member that + * has any use in creating plans for the child rel. + */ + if (bms_is_subset(cur_em->em_relids, top_parent_relids) && + !bms_is_empty(cur_em->em_relids)) { - /* Yes, generate transformed child version */ + /* OK, generate transformed child version */ Expr *child_expr; Relids new_relids; Relids new_nullable_relids; @@ -2656,7 +2720,7 @@ add_child_rel_equivalences(PlannerInfo *root, (void) add_eq_member(cur_ec, child_expr, new_relids, new_nullable_relids, - true, cur_em->em_datatype); + cur_em, cur_em->em_datatype); /* Record this EC index for the child rel */ child_rel->eclass_indexes = bms_add_member(child_rel->eclass_indexes, i); @@ -2797,7 +2861,7 @@ add_child_join_rel_equivalences(PlannerInfo *root, (void) add_eq_member(cur_ec, child_expr, new_relids, new_nullable_relids, - true, cur_em->em_datatype); + cur_em, cur_em->em_datatype); } } } @@ -3204,6 +3268,12 @@ get_eclass_indexes_for_relids(PlannerInfo *root, Relids relids) { RelOptInfo *rel = root->simple_rel_array[i]; + if (rel == NULL) /* must be an outer join */ + { + Assert(bms_is_member(i, root->outer_join_rels)); + continue; + } + ec_indexes = bms_add_members(ec_indexes, rel->eclass_indexes); } return ec_indexes; diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index e13c8f19149..e9b784bcab9 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -3352,13 +3352,13 @@ check_index_predicates(PlannerInfo *root, RelOptInfo *rel) * Add on any equivalence-derivable join clauses. Computing the correct * relid sets for generate_join_implied_equalities is slightly tricky * because the rel could be a child rel rather than a true baserel, and in - * that case we must remove its parents' relid(s) from all_baserels. + * that case we must subtract its parents' relid(s) from all_query_rels. */ if (rel->reloptkind == RELOPT_OTHER_MEMBER_REL) - otherrels = bms_difference(root->all_baserels, + otherrels = bms_difference(root->all_query_rels, find_childrel_parents(root, rel)); else - otherrels = bms_difference(root->all_baserels, rel->relids); + otherrels = bms_difference(root->all_query_rels, rel->relids); if (!bms_is_empty(otherrels)) clauselist = @@ -3736,7 +3736,8 @@ match_index_to_operand(Node *operand, */ if (operand && IsA(operand, Var) && index->rel->relid == ((Var *) operand)->varno && - indkey == ((Var *) operand)->varattno) + indkey == ((Var *) operand)->varattno && + ((Var *) operand)->varnullingrels == NULL) return true; } else diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index d345c0437a4..dfbb839be16 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -234,7 +234,9 @@ add_paths_to_joinrel(PlannerInfo *root, * reduces the number of parameterized paths we have to deal with at * higher join levels, without compromising the quality of the resulting * plan. We express the restriction as a Relids set that must overlap the - * parameterization of any proposed join path. + * parameterization of any proposed join path. Note: param_source_rels + * should contain only baserels, not OJ relids, so starting from + * all_baserels not all_query_rels is correct. */ foreach(lc, root->join_info_list) { @@ -366,6 +368,60 @@ allow_star_schema_join(PlannerInfo *root, } /* + * If the parameterization is only partly satisfied by the outer rel, + * the unsatisfied part can't include any outer-join relids that could + * null rels of the satisfied part. That would imply that we're trying + * to use a clause involving a Var with nonempty varnullingrels at + * a join level where that value isn't yet computable. + * + * In practice, this test never finds a problem because earlier join order + * restrictions prevent us from attempting a join that would cause a problem. + * (That's unsurprising, because the code worked before we ever added + * outer-join relids to expression relids.) It still seems worth checking + * as a backstop, but we don't go to a lot of trouble: just reject if the + * unsatisfied part includes any outer-join relids at all. + */ +static inline bool +have_unsafe_outer_join_ref(PlannerInfo *root, + Relids outerrelids, + Relids inner_paramrels) +{ + bool result = false; + Relids unsatisfied = bms_difference(inner_paramrels, outerrelids); + + if (unlikely(bms_overlap(unsatisfied, root->outer_join_rels))) + { +#ifdef NOT_USED + /* If we ever weaken the join order restrictions, we might need this */ + ListCell *lc; + + foreach(lc, root->join_info_list) + { + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc); + + if (!bms_is_member(sjinfo->ojrelid, unsatisfied)) + continue; /* not relevant */ + if (bms_overlap(inner_paramrels, sjinfo->min_righthand) || + (sjinfo->jointype == JOIN_FULL && + bms_overlap(inner_paramrels, sjinfo->min_lefthand))) + { + result = true; /* doesn't work */ + break; + } + } +#else + /* For now, if we do see an overlap, just assume it's trouble */ + result = true; +#endif + } + + /* Waste no memory when we reject a path here */ + bms_free(unsatisfied); + + return result; +} + +/* * paraminfo_get_equal_hashops * Determine if param_info and innerrel's lateral_vars can be hashed. * Returns true the hashing is possible, otherwise return false. @@ -657,15 +713,16 @@ try_nestloop_path(PlannerInfo *root, /* * Check to see if proposed path is still parameterized, and reject if the * parameterization wouldn't be sensible --- unless allow_star_schema_join - * says to allow it anyway. Also, we must reject if have_dangerous_phv - * doesn't like the look of it, which could only happen if the nestloop is - * still parameterized. + * says to allow it anyway. Also, we must reject if either + * have_unsafe_outer_join_ref or have_dangerous_phv don't like the look of + * it, which could only happen if the nestloop is still parameterized. */ required_outer = calc_nestloop_required_outer(outerrelids, outer_paramrels, innerrelids, inner_paramrels); if (required_outer && ((!bms_overlap(required_outer, extra->param_source_rels) && !allow_star_schema_join(root, outerrelids, inner_paramrels)) || + have_unsafe_outer_join_ref(root, outerrelids, inner_paramrels) || have_dangerous_phv(root, outerrelids, inner_paramrels))) { /* Waste no memory when we reject a path here */ diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 9a5930ce86c..56dd1073c5d 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -353,7 +353,10 @@ make_rels_by_clauseless_joins(PlannerInfo *root, * * Caller must supply not only the two rels, but the union of their relids. * (We could simplify the API by computing joinrelids locally, but this - * would be redundant work in the normal path through make_join_rel.) + * would be redundant work in the normal path through make_join_rel. + * Note that this value does NOT include the RT index of any outer join that + * might need to be performed here, so it's not the canonical identifier + * of the join relation.) * * On success, *sjinfo_p is set to NULL if this is to be a plain inner join, * else it's set to point to the associated SpecialJoinInfo node. Also, @@ -695,7 +698,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) /* We should never try to join two overlapping sets of rels. */ Assert(!bms_overlap(rel1->relids, rel2->relids)); - /* Construct Relids set that identifies the joinrel. */ + /* Construct Relids set that identifies the joinrel (without OJ as yet). */ joinrelids = bms_union(rel1->relids, rel2->relids); /* Check validity and determine join type. */ @@ -707,6 +710,10 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) return NULL; } + /* If we have an outer join, add its RTI to form the canonical relids. */ + if (sjinfo && sjinfo->ojrelid != 0) + joinrelids = bms_add_member(joinrelids, sjinfo->ojrelid); + /* Swap rels if needed to match the join info. */ if (reversed) { @@ -730,6 +737,10 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) sjinfo->syn_lefthand = rel1->relids; sjinfo->syn_righthand = rel2->relids; sjinfo->jointype = JOIN_INNER; + sjinfo->ojrelid = 0; + sjinfo->commute_above_l = NULL; + sjinfo->commute_above_r = NULL; + sjinfo->commute_below = NULL; /* we don't bother trying to make the remaining fields valid */ sjinfo->lhs_strict = false; sjinfo->delay_upper_joins = false; @@ -1510,8 +1521,6 @@ try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, /* We should never try to join two overlapping sets of rels. */ Assert(!bms_overlap(child_rel1->relids, child_rel2->relids)); - child_joinrelids = bms_union(child_rel1->relids, child_rel2->relids); - appinfos = find_appinfos_by_relids(root, child_joinrelids, &nappinfos); /* * Construct SpecialJoinInfo from parent join relations's @@ -1521,6 +1530,15 @@ try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, child_rel1->relids, child_rel2->relids); + /* Build correct join relids for child join */ + child_joinrelids = bms_union(child_rel1->relids, child_rel2->relids); + if (child_sjinfo->ojrelid != 0) + child_joinrelids = bms_add_member(child_joinrelids, + child_sjinfo->ojrelid); + + /* Find the AppendRelInfo structures */ + appinfos = find_appinfos_by_relids(root, child_joinrelids, &nappinfos); + /* * Construct restrictions applicable to the child join from those * applicable to the parent join. @@ -1536,8 +1554,7 @@ try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, { child_joinrel = build_child_join_rel(root, child_rel1, child_rel2, joinrel, child_restrictlist, - child_sjinfo, - child_sjinfo->jointype); + child_sjinfo); joinrel->part_rels[cnt_parts] = child_joinrel; joinrel->live_parts = bms_add_member(joinrel->live_parts, cnt_parts); joinrel->all_partrels = bms_add_members(joinrel->all_partrels, @@ -1583,6 +1600,7 @@ build_child_join_sjinfo(PlannerInfo *root, SpecialJoinInfo *parent_sjinfo, sjinfo->syn_righthand = adjust_child_relids(sjinfo->syn_righthand, right_nappinfos, right_appinfos); + /* outer-join relids need no adjustment */ sjinfo->semi_rhs_exprs = (List *) adjust_appendrel_attrs(root, (Node *) sjinfo->semi_rhs_exprs, right_nappinfos, diff --git a/src/backend/optimizer/path/tidpath.c b/src/backend/optimizer/path/tidpath.c index 6de994480b9..05ad585a8fe 100644 --- a/src/backend/optimizer/path/tidpath.c +++ b/src/backend/optimizer/path/tidpath.c @@ -59,6 +59,7 @@ IsCTIDVar(Var *var, RelOptInfo *rel) if (var->varattno == SelfItemPointerAttributeNumber && var->vartype == TIDOID && var->varno == rel->relid && + var->varnullingrels == NULL && var->varlevelsup == 0) return true; return false; |