8
8
*
9
9
*
10
10
* IDENTIFICATION
11
- * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.122 2005/06/05 22:32:56 tgl Exp $
11
+ * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.123 2005/07/15 17:09:25 tgl Exp $
12
12
*
13
13
*-------------------------------------------------------------------------
14
14
*/
34
34
#include "utils/syscache.h"
35
35
36
36
37
- static bool is_distinct_query (Query * query );
37
+ static List * translate_sub_tlist (List * tlist , int relid );
38
+ static bool query_is_distinct_for (Query * query , List * colnos );
38
39
static bool hash_safe_tlist (List * tlist );
39
40
40
41
@@ -800,14 +801,41 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
800
801
pathnode -> subpath = subpath ;
801
802
802
803
/*
803
- * If the input is a subquery whose output must be unique already, we
804
- * don't need to do anything.
804
+ * Try to identify the targetlist that will actually be unique-ified.
805
+ * In current usage, this routine is only used for sub-selects of IN
806
+ * clauses, so we should be able to find the tlist in in_info_list.
807
+ */
808
+ sub_targetlist = NIL ;
809
+ foreach (l , root -> in_info_list )
810
+ {
811
+ InClauseInfo * ininfo = (InClauseInfo * ) lfirst (l );
812
+
813
+ if (bms_equal (ininfo -> righthand , rel -> relids ))
814
+ {
815
+ sub_targetlist = ininfo -> sub_targetlist ;
816
+ break ;
817
+ }
818
+ }
819
+
820
+ /*
821
+ * If the input is a subquery whose output must be unique already,
822
+ * then we don't need to do anything. The test for uniqueness has
823
+ * to consider exactly which columns we are extracting; for example
824
+ * "SELECT DISTINCT x,y" doesn't guarantee that x alone is distinct.
825
+ * So we cannot check for this optimization unless we found our own
826
+ * targetlist above, and it consists only of simple Vars referencing
827
+ * subquery outputs. (Possibly we could do something with expressions
828
+ * in the subquery outputs, too, but for now keep it simple.)
805
829
*/
806
- if (rel -> rtekind == RTE_SUBQUERY )
830
+ if (sub_targetlist && rel -> rtekind == RTE_SUBQUERY )
807
831
{
808
832
RangeTblEntry * rte = rt_fetch (rel -> relid , root -> parse -> rtable );
833
+ List * sub_tlist_colnos ;
834
+
835
+ sub_tlist_colnos = translate_sub_tlist (sub_targetlist , rel -> relid );
809
836
810
- if (is_distinct_query (rte -> subquery ))
837
+ if (sub_tlist_colnos &&
838
+ query_is_distinct_for (rte -> subquery , sub_tlist_colnos ))
811
839
{
812
840
pathnode -> umethod = UNIQUE_PATH_NOOP ;
813
841
pathnode -> rows = rel -> rows ;
@@ -821,23 +849,6 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
821
849
}
822
850
}
823
851
824
- /*
825
- * Try to identify the targetlist that will actually be unique-ified.
826
- * In current usage, this routine is only used for sub-selects of IN
827
- * clauses, so we should be able to find the tlist in in_info_list.
828
- */
829
- sub_targetlist = NIL ;
830
- foreach (l , root -> in_info_list )
831
- {
832
- InClauseInfo * ininfo = (InClauseInfo * ) lfirst (l );
833
-
834
- if (bms_equal (ininfo -> righthand , rel -> relids ))
835
- {
836
- sub_targetlist = ininfo -> sub_targetlist ;
837
- break ;
838
- }
839
- }
840
-
841
852
/*
842
853
* If we know the targetlist, try to estimate number of result rows;
843
854
* otherwise punt.
@@ -913,46 +924,83 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
913
924
}
914
925
915
926
/*
916
- * is_distinct_query - does query never return duplicate rows?
927
+ * translate_sub_tlist - get subquery column numbers represented by tlist
928
+ *
929
+ * The given targetlist should contain only Vars referencing the given relid.
930
+ * Extract their varattnos (ie, the column numbers of the subquery) and return
931
+ * as an integer List.
932
+ *
933
+ * If any of the tlist items is not a simple Var, we cannot determine whether
934
+ * the subquery's uniqueness condition (if any) matches ours, so punt and
935
+ * return NIL.
917
936
*/
918
- static bool
919
- is_distinct_query ( Query * query )
937
+ static List *
938
+ translate_sub_tlist ( List * tlist , int relid )
920
939
{
921
- /* DISTINCT (but not DISTINCT ON) guarantees uniqueness */
922
- if (has_distinct_clause (query ))
923
- return true;
940
+ List * result = NIL ;
941
+ ListCell * l ;
924
942
925
- /* UNION, INTERSECT, EXCEPT guarantee uniqueness, except with ALL */
926
- if (query -> setOperations )
943
+ foreach (l , tlist )
927
944
{
928
- SetOperationStmt * topop = (SetOperationStmt * ) query -> setOperations ;
945
+ Var * var = (Var * ) lfirst ( l ) ;
929
946
930
- Assert (IsA (topop , SetOperationStmt ));
931
- Assert (topop -> op != SETOP_NONE );
947
+ if (!var || !IsA (var , Var ) ||
948
+ var -> varno != relid )
949
+ return NIL ; /* punt */
932
950
933
- if (!topop -> all )
951
+ result = lappend_int (result , var -> varattno );
952
+ }
953
+ return result ;
954
+ }
955
+
956
+ /*
957
+ * query_is_distinct_for - does query never return duplicates of the
958
+ * specified columns?
959
+ *
960
+ * colnos is an integer list of output column numbers (resno's). We are
961
+ * interested in whether rows consisting of just these columns are certain
962
+ * to be distinct.
963
+ */
964
+ static bool
965
+ query_is_distinct_for (Query * query , List * colnos )
966
+ {
967
+ ListCell * l ;
968
+
969
+ /*
970
+ * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
971
+ * columns in the DISTINCT clause appear in colnos.
972
+ */
973
+ if (query -> distinctClause )
974
+ {
975
+ foreach (l , query -> distinctClause )
976
+ {
977
+ SortClause * scl = (SortClause * ) lfirst (l );
978
+ TargetEntry * tle = get_sortgroupclause_tle (scl ,
979
+ query -> targetList );
980
+
981
+ if (!list_member_int (colnos , tle -> resno ))
982
+ break ; /* exit early if no match */
983
+ }
984
+ if (l == NULL ) /* had matches for all? */
934
985
return true;
935
986
}
936
987
937
988
/*
938
- * GROUP BY guarantees uniqueness if all the grouped columns appear in
939
- * the output. In our implementation this means checking they are non
940
- * resjunk columns.
989
+ * Similarly, GROUP BY guarantees uniqueness if all the grouped columns
990
+ * appear in colnos.
941
991
*/
942
992
if (query -> groupClause )
943
993
{
944
- ListCell * gl ;
945
-
946
- foreach (gl , query -> groupClause )
994
+ foreach (l , query -> groupClause )
947
995
{
948
- GroupClause * grpcl = (GroupClause * ) lfirst (gl );
996
+ GroupClause * grpcl = (GroupClause * ) lfirst (l );
949
997
TargetEntry * tle = get_sortgroupclause_tle (grpcl ,
950
998
query -> targetList );
951
999
952
- if (tle -> resjunk )
953
- break ;
1000
+ if (! list_member_int ( colnos , tle -> resno ) )
1001
+ break ; /* exit early if no match */
954
1002
}
955
- if (! gl ) /* got to the end ? */
1003
+ if (l == NULL ) /* had matches for all ? */
956
1004
return true;
957
1005
}
958
1006
else
@@ -965,6 +1013,33 @@ is_distinct_query(Query *query)
965
1013
return true;
966
1014
}
967
1015
1016
+ /*
1017
+ * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
1018
+ * except with ALL
1019
+ */
1020
+ if (query -> setOperations )
1021
+ {
1022
+ SetOperationStmt * topop = (SetOperationStmt * ) query -> setOperations ;
1023
+
1024
+ Assert (IsA (topop , SetOperationStmt ));
1025
+ Assert (topop -> op != SETOP_NONE );
1026
+
1027
+ if (!topop -> all )
1028
+ {
1029
+ /* We're good if all the nonjunk output columns are in colnos */
1030
+ foreach (l , query -> targetList )
1031
+ {
1032
+ TargetEntry * tle = (TargetEntry * ) lfirst (l );
1033
+
1034
+ if (!tle -> resjunk &&
1035
+ !list_member_int (colnos , tle -> resno ))
1036
+ break ; /* exit early if no match */
1037
+ }
1038
+ if (l == NULL ) /* had matches for all? */
1039
+ return true;
1040
+ }
1041
+ }
1042
+
968
1043
/*
969
1044
* XXX Are there any other cases in which we can easily see the result
970
1045
* must be distinct?
0 commit comments