@@ -44,6 +44,12 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
44
44
bool varonleft , bool isLTsel , Selectivity s2 );
45
45
static RelOptInfo * find_single_rel_for_clauses (PlannerInfo * root ,
46
46
List * clauses );
47
+ static Selectivity clauselist_selectivity_or (PlannerInfo * root ,
48
+ List * clauses ,
49
+ int varRelid ,
50
+ JoinType jointype ,
51
+ SpecialJoinInfo * sjinfo ,
52
+ bool use_extended_stats );
47
53
48
54
/****************************************************************************
49
55
* ROUTINES TO COMPUTE SELECTIVITIES
@@ -61,64 +67,10 @@ static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root,
61
67
*
62
68
* The basic approach is to apply extended statistics first, on as many
63
69
* clauses as possible, in order to capture cross-column dependencies etc.
64
- * The remaining clauses are then estimated using regular statistics tracked
65
- * for individual columns. This is done by simply passing the clauses to
66
- * clauselist_selectivity_simple.
67
- */
68
- Selectivity
69
- clauselist_selectivity (PlannerInfo * root ,
70
- List * clauses ,
71
- int varRelid ,
72
- JoinType jointype ,
73
- SpecialJoinInfo * sjinfo )
74
- {
75
- Selectivity s1 = 1.0 ;
76
- RelOptInfo * rel ;
77
- Bitmapset * estimatedclauses = NULL ;
78
-
79
- /*
80
- * Determine if these clauses reference a single relation. If so, and if
81
- * it has extended statistics, try to apply those.
82
- */
83
- rel = find_single_rel_for_clauses (root , clauses );
84
- if (rel && rel -> rtekind == RTE_RELATION && rel -> statlist != NIL )
85
- {
86
- /*
87
- * Estimate as many clauses as possible using extended statistics.
88
- *
89
- * 'estimatedclauses' tracks the 0-based list position index of
90
- * clauses that we've estimated using extended statistics, and that
91
- * should be ignored.
92
- */
93
- s1 *= statext_clauselist_selectivity (root , clauses , varRelid ,
94
- jointype , sjinfo , rel ,
95
- & estimatedclauses );
96
- }
97
-
98
- /*
99
- * Apply normal selectivity estimates for the remaining clauses, passing
100
- * 'estimatedclauses' so that it skips already estimated ones.
101
- */
102
- return s1 * clauselist_selectivity_simple (root , clauses , varRelid ,
103
- jointype , sjinfo ,
104
- estimatedclauses );
105
- }
106
-
107
- /*
108
- * clauselist_selectivity_simple -
109
- * Compute the selectivity of an implicitly-ANDed list of boolean
110
- * expression clauses. The list can be empty, in which case 1.0
111
- * must be returned. List elements may be either RestrictInfos
112
- * or bare expression clauses --- the former is preferred since
113
- * it allows caching of results. The estimatedclauses bitmap tracks
114
- * clauses that have already been estimated by other means.
115
- *
116
- * See clause_selectivity() for the meaning of the additional parameters.
117
- *
118
- * Our basic approach is to take the product of the selectivities of the
119
- * subclauses. However, that's only right if the subclauses have independent
120
- * probabilities, and in reality they are often NOT independent. So,
121
- * we want to be smarter where we can.
70
+ * The remaining clauses are then estimated by taking the product of their
71
+ * selectivities, but that's only right if they have independent
72
+ * probabilities, and in reality they are often NOT independent even if they
73
+ * only refer to a single column. So, we want to be smarter where we can.
122
74
*
123
75
* We also recognize "range queries", such as "x > 34 AND x < 42". Clauses
124
76
* are recognized as possible range query components if they are restriction
@@ -147,28 +99,68 @@ clauselist_selectivity(PlannerInfo *root,
147
99
* selectivity functions; perhaps some day we can generalize the approach.
148
100
*/
149
101
Selectivity
150
- clauselist_selectivity_simple (PlannerInfo * root ,
151
- List * clauses ,
152
- int varRelid ,
153
- JoinType jointype ,
154
- SpecialJoinInfo * sjinfo ,
155
- Bitmapset * estimatedclauses )
102
+ clauselist_selectivity (PlannerInfo * root ,
103
+ List * clauses ,
104
+ int varRelid ,
105
+ JoinType jointype ,
106
+ SpecialJoinInfo * sjinfo )
107
+ {
108
+ return clauselist_selectivity_ext (root , clauses , varRelid ,
109
+ jointype , sjinfo , true);
110
+ }
111
+
112
+ /*
113
+ * clauselist_selectivity_ext -
114
+ * Extended version of clauselist_selectivity(). If "use_extended_stats"
115
+ * is false, all extended statistics will be ignored, and only per-column
116
+ * statistics will be used.
117
+ */
118
+ Selectivity
119
+ clauselist_selectivity_ext (PlannerInfo * root ,
120
+ List * clauses ,
121
+ int varRelid ,
122
+ JoinType jointype ,
123
+ SpecialJoinInfo * sjinfo ,
124
+ bool use_extended_stats )
156
125
{
157
126
Selectivity s1 = 1.0 ;
127
+ RelOptInfo * rel ;
128
+ Bitmapset * estimatedclauses = NULL ;
158
129
RangeQueryClause * rqlist = NULL ;
159
130
ListCell * l ;
160
131
int listidx ;
161
132
162
133
/*
163
- * If there's exactly one clause (and it was not estimated yet), just go
164
- * directly to clause_selectivity(). None of what we might do below is
165
- * relevant.
134
+ * If there's exactly one clause, just go directly to
135
+ * clause_selectivity_ext(). None of what we might do below is relevant.
166
136
*/
167
- if (list_length (clauses ) == 1 && bms_is_empty (estimatedclauses ))
168
- return clause_selectivity (root , (Node * ) linitial (clauses ),
169
- varRelid , jointype , sjinfo );
137
+ if (list_length (clauses ) == 1 )
138
+ return clause_selectivity_ext (root , (Node * ) linitial (clauses ),
139
+ varRelid , jointype , sjinfo ,
140
+ use_extended_stats );
141
+
142
+ /*
143
+ * Determine if these clauses reference a single relation. If so, and if
144
+ * it has extended statistics, try to apply those.
145
+ */
146
+ rel = find_single_rel_for_clauses (root , clauses );
147
+ if (use_extended_stats && rel && rel -> rtekind == RTE_RELATION && rel -> statlist != NIL )
148
+ {
149
+ /*
150
+ * Estimate as many clauses as possible using extended statistics.
151
+ *
152
+ * 'estimatedclauses' is populated with the 0-based list position
153
+ * index of clauses estimated here, and that should be ignored below.
154
+ */
155
+ s1 = statext_clauselist_selectivity (root , clauses , varRelid ,
156
+ jointype , sjinfo , rel ,
157
+ & estimatedclauses , false);
158
+ }
170
159
171
160
/*
161
+ * Apply normal selectivity estimates for remaining clauses. We'll be
162
+ * careful to skip any clauses which were already estimated above.
163
+ *
172
164
* Anything that doesn't look like a potential rangequery clause gets
173
165
* multiplied into s1 and forgotten. Anything that does gets inserted into
174
166
* an rqlist entry.
@@ -189,8 +181,9 @@ clauselist_selectivity_simple(PlannerInfo *root,
189
181
if (bms_is_member (listidx , estimatedclauses ))
190
182
continue ;
191
183
192
- /* Always compute the selectivity using clause_selectivity */
193
- s2 = clause_selectivity (root , clause , varRelid , jointype , sjinfo );
184
+ /* Compute the selectivity of this clause in isolation */
185
+ s2 = clause_selectivity_ext (root , clause , varRelid , jointype , sjinfo ,
186
+ use_extended_stats );
194
187
195
188
/*
196
189
* Check for being passed a RestrictInfo.
@@ -350,6 +343,83 @@ clauselist_selectivity_simple(PlannerInfo *root,
350
343
return s1 ;
351
344
}
352
345
346
+ /*
347
+ * clauselist_selectivity_or -
348
+ * Compute the selectivity of an implicitly-ORed list of boolean
349
+ * expression clauses. The list can be empty, in which case 0.0
350
+ * must be returned. List elements may be either RestrictInfos
351
+ * or bare expression clauses --- the former is preferred since
352
+ * it allows caching of results.
353
+ *
354
+ * See clause_selectivity() for the meaning of the additional parameters.
355
+ *
356
+ * The basic approach is to apply extended statistics first, on as many
357
+ * clauses as possible, in order to capture cross-column dependencies etc.
358
+ * The remaining clauses are then estimated as if they were independent.
359
+ */
360
+ static Selectivity
361
+ clauselist_selectivity_or (PlannerInfo * root ,
362
+ List * clauses ,
363
+ int varRelid ,
364
+ JoinType jointype ,
365
+ SpecialJoinInfo * sjinfo ,
366
+ bool use_extended_stats )
367
+ {
368
+ Selectivity s1 = 0.0 ;
369
+ RelOptInfo * rel ;
370
+ Bitmapset * estimatedclauses = NULL ;
371
+ ListCell * lc ;
372
+ int listidx ;
373
+
374
+ /*
375
+ * Determine if these clauses reference a single relation. If so, and if
376
+ * it has extended statistics, try to apply those.
377
+ */
378
+ rel = find_single_rel_for_clauses (root , clauses );
379
+ if (use_extended_stats && rel && rel -> rtekind == RTE_RELATION && rel -> statlist != NIL )
380
+ {
381
+ /*
382
+ * Estimate as many clauses as possible using extended statistics.
383
+ *
384
+ * 'estimatedclauses' is populated with the 0-based list position
385
+ * index of clauses estimated here, and that should be ignored below.
386
+ */
387
+ s1 = statext_clauselist_selectivity (root , clauses , varRelid ,
388
+ jointype , sjinfo , rel ,
389
+ & estimatedclauses , true);
390
+ }
391
+
392
+ /*
393
+ * Estimate the remaining clauses as if they were independent.
394
+ *
395
+ * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to account
396
+ * for the probable overlap of selected tuple sets.
397
+ *
398
+ * XXX is this too conservative?
399
+ */
400
+ listidx = -1 ;
401
+ foreach (lc , clauses )
402
+ {
403
+ Selectivity s2 ;
404
+
405
+ listidx ++ ;
406
+
407
+ /*
408
+ * Skip this clause if it's already been estimated by some other
409
+ * statistics above.
410
+ */
411
+ if (bms_is_member (listidx , estimatedclauses ))
412
+ continue ;
413
+
414
+ s2 = clause_selectivity_ext (root , (Node * ) lfirst (lc ), varRelid ,
415
+ jointype , sjinfo , use_extended_stats );
416
+
417
+ s1 = s1 + s2 - s1 * s2 ;
418
+ }
419
+
420
+ return s1 ;
421
+ }
422
+
353
423
/*
354
424
* addRangeClause --- add a new range clause for clauselist_selectivity
355
425
*
@@ -601,6 +671,24 @@ clause_selectivity(PlannerInfo *root,
601
671
int varRelid ,
602
672
JoinType jointype ,
603
673
SpecialJoinInfo * sjinfo )
674
+ {
675
+ return clause_selectivity_ext (root , clause , varRelid ,
676
+ jointype , sjinfo , true);
677
+ }
678
+
679
+ /*
680
+ * clause_selectivity_ext -
681
+ * Extended version of clause_selectivity(). If "use_extended_stats" is
682
+ * false, all extended statistics will be ignored, and only per-column
683
+ * statistics will be used.
684
+ */
685
+ Selectivity
686
+ clause_selectivity_ext (PlannerInfo * root ,
687
+ Node * clause ,
688
+ int varRelid ,
689
+ JoinType jointype ,
690
+ SpecialJoinInfo * sjinfo ,
691
+ bool use_extended_stats )
604
692
{
605
693
Selectivity s1 = 0.5 ; /* default for any unhandled clause type */
606
694
RestrictInfo * rinfo = NULL ;
@@ -716,42 +804,35 @@ clause_selectivity(PlannerInfo *root,
716
804
else if (is_notclause (clause ))
717
805
{
718
806
/* inverse of the selectivity of the underlying clause */
719
- s1 = 1.0 - clause_selectivity (root ,
720
- (Node * ) get_notclausearg ((Expr * ) clause ),
721
- varRelid ,
722
- jointype ,
723
- sjinfo );
807
+ s1 = 1.0 - clause_selectivity_ext (root ,
808
+ (Node * ) get_notclausearg ((Expr * ) clause ),
809
+ varRelid ,
810
+ jointype ,
811
+ sjinfo ,
812
+ use_extended_stats );
724
813
}
725
814
else if (is_andclause (clause ))
726
815
{
727
816
/* share code with clauselist_selectivity() */
728
- s1 = clauselist_selectivity (root ,
729
- ((BoolExpr * ) clause )-> args ,
730
- varRelid ,
731
- jointype ,
732
- sjinfo );
817
+ s1 = clauselist_selectivity_ext (root ,
818
+ ((BoolExpr * ) clause )-> args ,
819
+ varRelid ,
820
+ jointype ,
821
+ sjinfo ,
822
+ use_extended_stats );
733
823
}
734
824
else if (is_orclause (clause ))
735
825
{
736
826
/*
737
- * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to
738
- * account for the probable overlap of selected tuple sets.
739
- *
740
- * XXX is this too conservative?
827
+ * Almost the same thing as clauselist_selectivity, but with the
828
+ * clauses connected by OR.
741
829
*/
742
- ListCell * arg ;
743
-
744
- s1 = 0.0 ;
745
- foreach (arg , ((BoolExpr * ) clause )-> args )
746
- {
747
- Selectivity s2 = clause_selectivity (root ,
748
- (Node * ) lfirst (arg ),
749
- varRelid ,
750
- jointype ,
751
- sjinfo );
752
-
753
- s1 = s1 + s2 - s1 * s2 ;
754
- }
830
+ s1 = clauselist_selectivity_or (root ,
831
+ ((BoolExpr * ) clause )-> args ,
832
+ varRelid ,
833
+ jointype ,
834
+ sjinfo ,
835
+ use_extended_stats );
755
836
}
756
837
else if (is_opclause (clause ) || IsA (clause , DistinctExpr ))
757
838
{
@@ -852,20 +933,22 @@ clause_selectivity(PlannerInfo *root,
852
933
else if (IsA (clause , RelabelType ))
853
934
{
854
935
/* Not sure this case is needed, but it can't hurt */
855
- s1 = clause_selectivity (root ,
856
- (Node * ) ((RelabelType * ) clause )-> arg ,
857
- varRelid ,
858
- jointype ,
859
- sjinfo );
936
+ s1 = clause_selectivity_ext (root ,
937
+ (Node * ) ((RelabelType * ) clause )-> arg ,
938
+ varRelid ,
939
+ jointype ,
940
+ sjinfo ,
941
+ use_extended_stats );
860
942
}
861
943
else if (IsA (clause , CoerceToDomain ))
862
944
{
863
945
/* Not sure this case is needed, but it can't hurt */
864
- s1 = clause_selectivity (root ,
865
- (Node * ) ((CoerceToDomain * ) clause )-> arg ,
866
- varRelid ,
867
- jointype ,
868
- sjinfo );
946
+ s1 = clause_selectivity_ext (root ,
947
+ (Node * ) ((CoerceToDomain * ) clause )-> arg ,
948
+ varRelid ,
949
+ jointype ,
950
+ sjinfo ,
951
+ use_extended_stats );
869
952
}
870
953
else
871
954
{
0 commit comments