Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit f15821e

Browse files
committed
Allow join removal in some cases involving a left join to a subquery.
We can remove a left join to a relation if the relation's output is provably distinct for the columns involved in the join clause (considering only equijoin clauses) and the relation supplies no variables needed above the join. Previously, the join removal logic could only prove distinctness by reference to unique indexes of a table. This patch extends the logic to consider subquery relations, wherein distinctness might be proven by reference to GROUP BY, DISTINCT, etc. We actually already had some code to check that a subquery's output was provably distinct, but it was hidden inside pathnode.c; which was a pretty bad place for it really, since that file is mostly boilerplate Path construction and comparison. Move that code to analyzejoins.c, which is arguably a more appropriate location, and is certainly the site of the new usage for it. David Rowley, reviewed by Simon Riggs
1 parent 5571caf commit f15821e

File tree

5 files changed

+417
-186
lines changed

5 files changed

+417
-186
lines changed

src/backend/optimizer/plan/analyzejoins.c

+281-14
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,21 @@
2222
*/
2323
#include "postgres.h"
2424

25+
#include "nodes/nodeFuncs.h"
26+
#include "optimizer/clauses.h"
2527
#include "optimizer/joininfo.h"
2628
#include "optimizer/pathnode.h"
2729
#include "optimizer/paths.h"
2830
#include "optimizer/planmain.h"
29-
#include "optimizer/var.h"
31+
#include "optimizer/tlist.h"
32+
#include "utils/lsyscache.h"
3033

3134
/* local functions */
3235
static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
3336
static void remove_rel_from_query(PlannerInfo *root, int relid,
3437
Relids joinrelids);
3538
static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
39+
static Oid distinct_col_search(int colno, List *colnos, List *opids);
3640

3741

3842
/*
@@ -147,18 +151,15 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
147151
{
148152
int innerrelid;
149153
RelOptInfo *innerrel;
154+
Query *subquery = NULL;
150155
Relids joinrelids;
151156
List *clause_list = NIL;
152157
ListCell *l;
153158
int attroff;
154159

155160
/*
156-
* Currently, we only know how to remove left joins to a baserel with
157-
* unique indexes. We can check most of these criteria pretty trivially
158-
* to avoid doing useless extra work. But checking whether any of the
159-
* indexes are unique would require iterating over the indexlist, so for
160-
* now we just make sure there are indexes of some sort or other. If none
161-
* of them are unique, join removal will still fail, just slightly later.
161+
* Must be a non-delaying left join to a single baserel, else we aren't
162+
* going to be able to do anything with it.
162163
*/
163164
if (sjinfo->jointype != JOIN_LEFT ||
164165
sjinfo->delay_upper_joins ||
@@ -168,11 +169,39 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
168169
innerrelid = bms_singleton_member(sjinfo->min_righthand);
169170
innerrel = find_base_rel(root, innerrelid);
170171

171-
if (innerrel->reloptkind != RELOPT_BASEREL ||
172-
innerrel->rtekind != RTE_RELATION ||
173-
innerrel->indexlist == NIL)
172+
if (innerrel->reloptkind != RELOPT_BASEREL)
174173
return false;
175174

175+
/*
176+
* Before we go to the effort of checking whether any innerrel variables
177+
* are needed above the join, make a quick check to eliminate cases in
178+
* which we will surely be unable to prove uniqueness of the innerrel.
179+
*/
180+
if (innerrel->rtekind == RTE_RELATION)
181+
{
182+
/*
183+
* For a plain-relation innerrel, we only know how to prove uniqueness
184+
* by reference to unique indexes. If there are no indexes then
185+
* there's certainly no unique indexes so there's no point in going
186+
* further.
187+
*/
188+
if (innerrel->indexlist == NIL)
189+
return false;
190+
}
191+
else if (innerrel->rtekind == RTE_SUBQUERY)
192+
{
193+
subquery = root->simple_rte_array[innerrelid]->subquery;
194+
195+
/*
196+
* If the subquery has no qualities that support distinctness proofs
197+
* then there's no point in going further.
198+
*/
199+
if (!query_supports_distinctness(subquery))
200+
return false;
201+
}
202+
else
203+
return false; /* unsupported rtekind */
204+
176205
/* Compute the relid set for the join we are considering */
177206
joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
178207

@@ -272,12 +301,64 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
272301

273302
/*
274303
* relation_has_unique_index_for automatically adds any usable restriction
275-
* clauses for the innerrel, so we needn't do that here.
304+
* clauses for the innerrel, so we needn't do that here. (XXX we are not
305+
* considering restriction clauses for subqueries; is that worth doing?)
276306
*/
277307

278-
/* Now examine the indexes to see if we have a matching unique index */
279-
if (relation_has_unique_index_for(root, innerrel, clause_list, NIL, NIL))
280-
return true;
308+
if (innerrel->rtekind == RTE_RELATION)
309+
{
310+
/* Now examine the indexes to see if we have a matching unique index */
311+
if (relation_has_unique_index_for(root, innerrel, clause_list, NIL, NIL))
312+
return true;
313+
}
314+
else /* innerrel->rtekind == RTE_SUBQUERY */
315+
{
316+
List *colnos = NIL;
317+
List *opids = NIL;
318+
319+
/*
320+
* Build the argument lists for query_is_distinct_for: a list of
321+
* output column numbers that the query needs to be distinct over, and
322+
* a list of equality operators that the output columns need to be
323+
* distinct according to.
324+
*/
325+
foreach(l, clause_list)
326+
{
327+
RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
328+
Oid op;
329+
Var *var;
330+
331+
/*
332+
* Get the equality operator we need uniqueness according to.
333+
* (This might be a cross-type operator and thus not exactly the
334+
* same operator the subquery would consider; that's all right
335+
* since query_is_distinct_for can resolve such cases.) The
336+
* mergejoinability test above should have selected only OpExprs.
337+
*/
338+
Assert(IsA(rinfo->clause, OpExpr));
339+
op = ((OpExpr *) rinfo->clause)->opno;
340+
341+
/* clause_sides_match_join identified the inner side for us */
342+
if (rinfo->outer_is_left)
343+
var = (Var *) get_rightop(rinfo->clause);
344+
else
345+
var = (Var *) get_leftop(rinfo->clause);
346+
347+
/*
348+
* If inner side isn't a Var referencing a subquery output column,
349+
* this clause doesn't help us.
350+
*/
351+
if (!var || !IsA(var, Var) ||
352+
var->varno != innerrelid || var->varlevelsup != 0)
353+
continue;
354+
355+
colnos = lappend_int(colnos, var->varattno);
356+
opids = lappend_oid(opids, op);
357+
}
358+
359+
if (query_is_distinct_for(subquery, colnos, opids))
360+
return true;
361+
}
281362

282363
/*
283364
* Some day it would be nice to check for other methods of establishing
@@ -481,3 +562,189 @@ remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
481562

482563
return result;
483564
}
565+
566+
567+
/*
568+
* query_supports_distinctness - could the query possibly be proven distinct
569+
* on some set of output columns?
570+
*
571+
* This is effectively a pre-checking function for query_is_distinct_for().
572+
* It must return TRUE if query_is_distinct_for() could possibly return TRUE
573+
* with this query, but it should not expend a lot of cycles. The idea is
574+
* that callers can avoid doing possibly-expensive processing to compute
575+
* query_is_distinct_for()'s argument lists if the call could not possibly
576+
* succeed.
577+
*/
578+
bool
579+
query_supports_distinctness(Query *query)
580+
{
581+
if (query->distinctClause != NIL ||
582+
query->groupClause != NIL ||
583+
query->hasAggs ||
584+
query->havingQual ||
585+
query->setOperations)
586+
return true;
587+
588+
return false;
589+
}
590+
591+
/*
592+
* query_is_distinct_for - does query never return duplicates of the
593+
* specified columns?
594+
*
595+
* query is a not-yet-planned subquery (in current usage, it's always from
596+
* a subquery RTE, which the planner avoids scribbling on).
597+
*
598+
* colnos is an integer list of output column numbers (resno's). We are
599+
* interested in whether rows consisting of just these columns are certain
600+
* to be distinct. "Distinctness" is defined according to whether the
601+
* corresponding upper-level equality operators listed in opids would think
602+
* the values are distinct. (Note: the opids entries could be cross-type
603+
* operators, and thus not exactly the equality operators that the subquery
604+
* would use itself. We use equality_ops_are_compatible() to check
605+
* compatibility. That looks at btree or hash opfamily membership, and so
606+
* should give trustworthy answers for all operators that we might need
607+
* to deal with here.)
608+
*/
609+
bool
610+
query_is_distinct_for(Query *query, List *colnos, List *opids)
611+
{
612+
ListCell *l;
613+
Oid opid;
614+
615+
Assert(list_length(colnos) == list_length(opids));
616+
617+
/*
618+
* A set-returning function in the query's targetlist can result in
619+
* returning duplicate rows, if the SRF is evaluated after the
620+
* de-duplication step; so we play it safe and say "no" if there are any
621+
* SRFs. (We could be certain that it's okay if SRFs appear only in the
622+
* specified columns, since those must be evaluated before de-duplication;
623+
* but it doesn't presently seem worth the complication to check that.)
624+
*/
625+
if (expression_returns_set((Node *) query->targetList))
626+
return false;
627+
628+
/*
629+
* DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
630+
* columns in the DISTINCT clause appear in colnos and operator semantics
631+
* match.
632+
*/
633+
if (query->distinctClause)
634+
{
635+
foreach(l, query->distinctClause)
636+
{
637+
SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
638+
TargetEntry *tle = get_sortgroupclause_tle(sgc,
639+
query->targetList);
640+
641+
opid = distinct_col_search(tle->resno, colnos, opids);
642+
if (!OidIsValid(opid) ||
643+
!equality_ops_are_compatible(opid, sgc->eqop))
644+
break; /* exit early if no match */
645+
}
646+
if (l == NULL) /* had matches for all? */
647+
return true;
648+
}
649+
650+
/*
651+
* Similarly, GROUP BY guarantees uniqueness if all the grouped columns
652+
* appear in colnos and operator semantics match.
653+
*/
654+
if (query->groupClause)
655+
{
656+
foreach(l, query->groupClause)
657+
{
658+
SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
659+
TargetEntry *tle = get_sortgroupclause_tle(sgc,
660+
query->targetList);
661+
662+
opid = distinct_col_search(tle->resno, colnos, opids);
663+
if (!OidIsValid(opid) ||
664+
!equality_ops_are_compatible(opid, sgc->eqop))
665+
break; /* exit early if no match */
666+
}
667+
if (l == NULL) /* had matches for all? */
668+
return true;
669+
}
670+
else
671+
{
672+
/*
673+
* If we have no GROUP BY, but do have aggregates or HAVING, then the
674+
* result is at most one row so it's surely unique, for any operators.
675+
*/
676+
if (query->hasAggs || query->havingQual)
677+
return true;
678+
}
679+
680+
/*
681+
* UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
682+
* except with ALL.
683+
*/
684+
if (query->setOperations)
685+
{
686+
SetOperationStmt *topop = (SetOperationStmt *) query->setOperations;
687+
688+
Assert(IsA(topop, SetOperationStmt));
689+
Assert(topop->op != SETOP_NONE);
690+
691+
if (!topop->all)
692+
{
693+
ListCell *lg;
694+
695+
/* We're good if all the nonjunk output columns are in colnos */
696+
lg = list_head(topop->groupClauses);
697+
foreach(l, query->targetList)
698+
{
699+
TargetEntry *tle = (TargetEntry *) lfirst(l);
700+
SortGroupClause *sgc;
701+
702+
if (tle->resjunk)
703+
continue; /* ignore resjunk columns */
704+
705+
/* non-resjunk columns should have grouping clauses */
706+
Assert(lg != NULL);
707+
sgc = (SortGroupClause *) lfirst(lg);
708+
lg = lnext(lg);
709+
710+
opid = distinct_col_search(tle->resno, colnos, opids);
711+
if (!OidIsValid(opid) ||
712+
!equality_ops_are_compatible(opid, sgc->eqop))
713+
break; /* exit early if no match */
714+
}
715+
if (l == NULL) /* had matches for all? */
716+
return true;
717+
}
718+
}
719+
720+
/*
721+
* XXX Are there any other cases in which we can easily see the result
722+
* must be distinct?
723+
*
724+
* If you do add more smarts to this function, be sure to update
725+
* query_supports_distinctness() to match.
726+
*/
727+
728+
return false;
729+
}
730+
731+
/*
732+
* distinct_col_search - subroutine for query_is_distinct_for
733+
*
734+
* If colno is in colnos, return the corresponding element of opids,
735+
* else return InvalidOid. (Ordinarily colnos would not contain duplicates,
736+
* but if it does, we arbitrarily select the first match.)
737+
*/
738+
static Oid
739+
distinct_col_search(int colno, List *colnos, List *opids)
740+
{
741+
ListCell *lc1,
742+
*lc2;
743+
744+
forboth(lc1, colnos, lc2, opids)
745+
{
746+
if (colno == lfirst_int(lc1))
747+
return lfirst_oid(lc2);
748+
}
749+
return InvalidOid;
750+
}

0 commit comments

Comments
 (0)