22
22
*/
23
23
#include "postgres.h"
24
24
25
+ #include "nodes/nodeFuncs.h"
26
+ #include "optimizer/clauses.h"
25
27
#include "optimizer/joininfo.h"
26
28
#include "optimizer/pathnode.h"
27
29
#include "optimizer/paths.h"
28
30
#include "optimizer/planmain.h"
29
- #include "optimizer/var.h"
31
+ #include "optimizer/tlist.h"
32
+ #include "utils/lsyscache.h"
30
33
31
34
/* local functions */
32
35
static bool join_is_removable (PlannerInfo * root , SpecialJoinInfo * sjinfo );
33
36
static void remove_rel_from_query (PlannerInfo * root , int relid ,
34
37
Relids joinrelids );
35
38
static List * remove_rel_from_joinlist (List * joinlist , int relid , int * nremoved );
39
+ static Oid distinct_col_search (int colno , List * colnos , List * opids );
36
40
37
41
38
42
/*
@@ -147,18 +151,15 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
147
151
{
148
152
int innerrelid ;
149
153
RelOptInfo * innerrel ;
154
+ Query * subquery = NULL ;
150
155
Relids joinrelids ;
151
156
List * clause_list = NIL ;
152
157
ListCell * l ;
153
158
int attroff ;
154
159
155
160
/*
156
- * Currently, we only know how to remove left joins to a baserel with
157
- * unique indexes. We can check most of these criteria pretty trivially
158
- * to avoid doing useless extra work. But checking whether any of the
159
- * indexes are unique would require iterating over the indexlist, so for
160
- * now we just make sure there are indexes of some sort or other. If none
161
- * of them are unique, join removal will still fail, just slightly later.
161
+ * Must be a non-delaying left join to a single baserel, else we aren't
162
+ * going to be able to do anything with it.
162
163
*/
163
164
if (sjinfo -> jointype != JOIN_LEFT ||
164
165
sjinfo -> delay_upper_joins ||
@@ -168,11 +169,39 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
168
169
innerrelid = bms_singleton_member (sjinfo -> min_righthand );
169
170
innerrel = find_base_rel (root , innerrelid );
170
171
171
- if (innerrel -> reloptkind != RELOPT_BASEREL ||
172
- innerrel -> rtekind != RTE_RELATION ||
173
- innerrel -> indexlist == NIL )
172
+ if (innerrel -> reloptkind != RELOPT_BASEREL )
174
173
return false;
175
174
175
+ /*
176
+ * Before we go to the effort of checking whether any innerrel variables
177
+ * are needed above the join, make a quick check to eliminate cases in
178
+ * which we will surely be unable to prove uniqueness of the innerrel.
179
+ */
180
+ if (innerrel -> rtekind == RTE_RELATION )
181
+ {
182
+ /*
183
+ * For a plain-relation innerrel, we only know how to prove uniqueness
184
+ * by reference to unique indexes. If there are no indexes then
185
+ * there's certainly no unique indexes so there's no point in going
186
+ * further.
187
+ */
188
+ if (innerrel -> indexlist == NIL )
189
+ return false;
190
+ }
191
+ else if (innerrel -> rtekind == RTE_SUBQUERY )
192
+ {
193
+ subquery = root -> simple_rte_array [innerrelid ]-> subquery ;
194
+
195
+ /*
196
+ * If the subquery has no qualities that support distinctness proofs
197
+ * then there's no point in going further.
198
+ */
199
+ if (!query_supports_distinctness (subquery ))
200
+ return false;
201
+ }
202
+ else
203
+ return false; /* unsupported rtekind */
204
+
176
205
/* Compute the relid set for the join we are considering */
177
206
joinrelids = bms_union (sjinfo -> min_lefthand , sjinfo -> min_righthand );
178
207
@@ -272,12 +301,64 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
272
301
273
302
/*
274
303
* relation_has_unique_index_for automatically adds any usable restriction
275
- * clauses for the innerrel, so we needn't do that here.
304
+ * clauses for the innerrel, so we needn't do that here. (XXX we are not
305
+ * considering restriction clauses for subqueries; is that worth doing?)
276
306
*/
277
307
278
- /* Now examine the indexes to see if we have a matching unique index */
279
- if (relation_has_unique_index_for (root , innerrel , clause_list , NIL , NIL ))
280
- return true;
308
+ if (innerrel -> rtekind == RTE_RELATION )
309
+ {
310
+ /* Now examine the indexes to see if we have a matching unique index */
311
+ if (relation_has_unique_index_for (root , innerrel , clause_list , NIL , NIL ))
312
+ return true;
313
+ }
314
+ else /* innerrel->rtekind == RTE_SUBQUERY */
315
+ {
316
+ List * colnos = NIL ;
317
+ List * opids = NIL ;
318
+
319
+ /*
320
+ * Build the argument lists for query_is_distinct_for: a list of
321
+ * output column numbers that the query needs to be distinct over, and
322
+ * a list of equality operators that the output columns need to be
323
+ * distinct according to.
324
+ */
325
+ foreach (l , clause_list )
326
+ {
327
+ RestrictInfo * rinfo = (RestrictInfo * ) lfirst (l );
328
+ Oid op ;
329
+ Var * var ;
330
+
331
+ /*
332
+ * Get the equality operator we need uniqueness according to.
333
+ * (This might be a cross-type operator and thus not exactly the
334
+ * same operator the subquery would consider; that's all right
335
+ * since query_is_distinct_for can resolve such cases.) The
336
+ * mergejoinability test above should have selected only OpExprs.
337
+ */
338
+ Assert (IsA (rinfo -> clause , OpExpr ));
339
+ op = ((OpExpr * ) rinfo -> clause )-> opno ;
340
+
341
+ /* clause_sides_match_join identified the inner side for us */
342
+ if (rinfo -> outer_is_left )
343
+ var = (Var * ) get_rightop (rinfo -> clause );
344
+ else
345
+ var = (Var * ) get_leftop (rinfo -> clause );
346
+
347
+ /*
348
+ * If inner side isn't a Var referencing a subquery output column,
349
+ * this clause doesn't help us.
350
+ */
351
+ if (!var || !IsA (var , Var ) ||
352
+ var -> varno != innerrelid || var -> varlevelsup != 0 )
353
+ continue ;
354
+
355
+ colnos = lappend_int (colnos , var -> varattno );
356
+ opids = lappend_oid (opids , op );
357
+ }
358
+
359
+ if (query_is_distinct_for (subquery , colnos , opids ))
360
+ return true;
361
+ }
281
362
282
363
/*
283
364
* Some day it would be nice to check for other methods of establishing
@@ -481,3 +562,189 @@ remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
481
562
482
563
return result ;
483
564
}
565
+
566
+
567
+ /*
568
+ * query_supports_distinctness - could the query possibly be proven distinct
569
+ * on some set of output columns?
570
+ *
571
+ * This is effectively a pre-checking function for query_is_distinct_for().
572
+ * It must return TRUE if query_is_distinct_for() could possibly return TRUE
573
+ * with this query, but it should not expend a lot of cycles. The idea is
574
+ * that callers can avoid doing possibly-expensive processing to compute
575
+ * query_is_distinct_for()'s argument lists if the call could not possibly
576
+ * succeed.
577
+ */
578
+ bool
579
+ query_supports_distinctness (Query * query )
580
+ {
581
+ if (query -> distinctClause != NIL ||
582
+ query -> groupClause != NIL ||
583
+ query -> hasAggs ||
584
+ query -> havingQual ||
585
+ query -> setOperations )
586
+ return true;
587
+
588
+ return false;
589
+ }
590
+
591
+ /*
592
+ * query_is_distinct_for - does query never return duplicates of the
593
+ * specified columns?
594
+ *
595
+ * query is a not-yet-planned subquery (in current usage, it's always from
596
+ * a subquery RTE, which the planner avoids scribbling on).
597
+ *
598
+ * colnos is an integer list of output column numbers (resno's). We are
599
+ * interested in whether rows consisting of just these columns are certain
600
+ * to be distinct. "Distinctness" is defined according to whether the
601
+ * corresponding upper-level equality operators listed in opids would think
602
+ * the values are distinct. (Note: the opids entries could be cross-type
603
+ * operators, and thus not exactly the equality operators that the subquery
604
+ * would use itself. We use equality_ops_are_compatible() to check
605
+ * compatibility. That looks at btree or hash opfamily membership, and so
606
+ * should give trustworthy answers for all operators that we might need
607
+ * to deal with here.)
608
+ */
609
+ bool
610
+ query_is_distinct_for (Query * query , List * colnos , List * opids )
611
+ {
612
+ ListCell * l ;
613
+ Oid opid ;
614
+
615
+ Assert (list_length (colnos ) == list_length (opids ));
616
+
617
+ /*
618
+ * A set-returning function in the query's targetlist can result in
619
+ * returning duplicate rows, if the SRF is evaluated after the
620
+ * de-duplication step; so we play it safe and say "no" if there are any
621
+ * SRFs. (We could be certain that it's okay if SRFs appear only in the
622
+ * specified columns, since those must be evaluated before de-duplication;
623
+ * but it doesn't presently seem worth the complication to check that.)
624
+ */
625
+ if (expression_returns_set ((Node * ) query -> targetList ))
626
+ return false;
627
+
628
+ /*
629
+ * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
630
+ * columns in the DISTINCT clause appear in colnos and operator semantics
631
+ * match.
632
+ */
633
+ if (query -> distinctClause )
634
+ {
635
+ foreach (l , query -> distinctClause )
636
+ {
637
+ SortGroupClause * sgc = (SortGroupClause * ) lfirst (l );
638
+ TargetEntry * tle = get_sortgroupclause_tle (sgc ,
639
+ query -> targetList );
640
+
641
+ opid = distinct_col_search (tle -> resno , colnos , opids );
642
+ if (!OidIsValid (opid ) ||
643
+ !equality_ops_are_compatible (opid , sgc -> eqop ))
644
+ break ; /* exit early if no match */
645
+ }
646
+ if (l == NULL ) /* had matches for all? */
647
+ return true;
648
+ }
649
+
650
+ /*
651
+ * Similarly, GROUP BY guarantees uniqueness if all the grouped columns
652
+ * appear in colnos and operator semantics match.
653
+ */
654
+ if (query -> groupClause )
655
+ {
656
+ foreach (l , query -> groupClause )
657
+ {
658
+ SortGroupClause * sgc = (SortGroupClause * ) lfirst (l );
659
+ TargetEntry * tle = get_sortgroupclause_tle (sgc ,
660
+ query -> targetList );
661
+
662
+ opid = distinct_col_search (tle -> resno , colnos , opids );
663
+ if (!OidIsValid (opid ) ||
664
+ !equality_ops_are_compatible (opid , sgc -> eqop ))
665
+ break ; /* exit early if no match */
666
+ }
667
+ if (l == NULL ) /* had matches for all? */
668
+ return true;
669
+ }
670
+ else
671
+ {
672
+ /*
673
+ * If we have no GROUP BY, but do have aggregates or HAVING, then the
674
+ * result is at most one row so it's surely unique, for any operators.
675
+ */
676
+ if (query -> hasAggs || query -> havingQual )
677
+ return true;
678
+ }
679
+
680
+ /*
681
+ * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
682
+ * except with ALL.
683
+ */
684
+ if (query -> setOperations )
685
+ {
686
+ SetOperationStmt * topop = (SetOperationStmt * ) query -> setOperations ;
687
+
688
+ Assert (IsA (topop , SetOperationStmt ));
689
+ Assert (topop -> op != SETOP_NONE );
690
+
691
+ if (!topop -> all )
692
+ {
693
+ ListCell * lg ;
694
+
695
+ /* We're good if all the nonjunk output columns are in colnos */
696
+ lg = list_head (topop -> groupClauses );
697
+ foreach (l , query -> targetList )
698
+ {
699
+ TargetEntry * tle = (TargetEntry * ) lfirst (l );
700
+ SortGroupClause * sgc ;
701
+
702
+ if (tle -> resjunk )
703
+ continue ; /* ignore resjunk columns */
704
+
705
+ /* non-resjunk columns should have grouping clauses */
706
+ Assert (lg != NULL );
707
+ sgc = (SortGroupClause * ) lfirst (lg );
708
+ lg = lnext (lg );
709
+
710
+ opid = distinct_col_search (tle -> resno , colnos , opids );
711
+ if (!OidIsValid (opid ) ||
712
+ !equality_ops_are_compatible (opid , sgc -> eqop ))
713
+ break ; /* exit early if no match */
714
+ }
715
+ if (l == NULL ) /* had matches for all? */
716
+ return true;
717
+ }
718
+ }
719
+
720
+ /*
721
+ * XXX Are there any other cases in which we can easily see the result
722
+ * must be distinct?
723
+ *
724
+ * If you do add more smarts to this function, be sure to update
725
+ * query_supports_distinctness() to match.
726
+ */
727
+
728
+ return false;
729
+ }
730
+
731
+ /*
732
+ * distinct_col_search - subroutine for query_is_distinct_for
733
+ *
734
+ * If colno is in colnos, return the corresponding element of opids,
735
+ * else return InvalidOid. (Ordinarily colnos would not contain duplicates,
736
+ * but if it does, we arbitrarily select the first match.)
737
+ */
738
+ static Oid
739
+ distinct_col_search (int colno , List * colnos , List * opids )
740
+ {
741
+ ListCell * lc1 ,
742
+ * lc2 ;
743
+
744
+ forboth (lc1 , colnos , lc2 , opids )
745
+ {
746
+ if (colno == lfirst_int (lc1 ))
747
+ return lfirst_oid (lc2 );
748
+ }
749
+ return InvalidOid ;
750
+ }
0 commit comments