8
8
*
9
9
*
10
10
* IDENTIFICATION
11
- * $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepqual.c,v 1.21 2000/01/26 05:56:39 momjian Exp $
11
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepqual.c,v 1.22 2000/01/28 03:22:36 tgl Exp $
12
12
*
13
13
*-------------------------------------------------------------------------
14
14
*/
@@ -32,7 +32,8 @@ static Expr *find_ands(Expr *qual);
32
32
static Expr * and_normalize (List * andlist );
33
33
static Expr * qual_cleanup (Expr * qual );
34
34
static List * remove_duplicates (List * list );
35
- static int count_bool_nodes (Expr * qual );
35
+ static void count_bool_nodes (Expr * qual , double * nodes ,
36
+ double * cnfnodes , double * dnfnodes );
36
37
37
38
/*****************************************************************************
38
39
*
@@ -84,12 +85,12 @@ static int count_bool_nodes(Expr *qual);
84
85
List *
85
86
canonicalize_qual (Expr * qual , bool removeAndFlag )
86
87
{
87
- Expr * newqual ,
88
- * cnfqual ,
89
- * dnfqual ;
90
- int qualcnt ,
91
- cnfcnt ,
92
- dnfcnt ;
88
+ Expr * newqual ;
89
+ double nodes ,
90
+ cnfnodes ,
91
+ dnfnodes ;
92
+ bool cnfok ,
93
+ dnfok ;
93
94
94
95
if (qual == NULL )
95
96
return NIL ;
@@ -98,57 +99,64 @@ canonicalize_qual(Expr *qual, bool removeAndFlag)
98
99
* This improvement is always worthwhile, so do it unconditionally.
99
100
*/
100
101
qual = flatten_andors (qual );
102
+
101
103
/* Push down NOTs. We do this only in the top-level boolean
102
104
* expression, without examining arguments of operators/functions.
103
105
* Even so, it might not be a win if we are unable to find negators
104
- * for all the operators involved; so we keep the flattened-but-not -
105
- * NOT-pushed qual as the reference point for comparsions.
106
+ * for all the operators involved; perhaps we should compare before -
107
+ * and-after tree sizes?
106
108
*/
107
109
newqual = find_nots (qual );
108
- /*
109
- * Generate both CNF and DNF forms from newqual.
110
- */
111
- /* Normalize into conjunctive normal form, and clean up the result. */
112
- cnfqual = qual_cleanup (find_ors (newqual ));
113
- /* Likewise for DNF */
114
- dnfqual = qual_cleanup (find_ands (newqual ));
115
110
116
111
/*
117
- * Now, choose whether to return qual, cnfqual , or dnfqual .
112
+ * Choose whether to convert to CNF, or DNF , or leave well enough alone .
118
113
*
119
- * First heuristic is to forget about either CNF or DNF if it shows
114
+ * We make an approximate estimate of the number of bottom-level nodes
115
+ * that will appear in the CNF and DNF forms of the query.
116
+ */
117
+ count_bool_nodes (newqual , & nodes , & cnfnodes , & dnfnodes );
118
+ /*
119
+ * First heuristic is to forget about *both* normal forms if there are
120
+ * a huge number of terms in the qual clause. This would only happen
121
+ * with machine-generated queries, presumably; and most likely such
122
+ * a query is already in either CNF or DNF.
123
+ */
124
+ cnfok = dnfok = true;
125
+ if (nodes >= 500.0 )
126
+ cnfok = dnfok = false;
127
+ /*
128
+ * Second heuristic is to forget about either CNF or DNF if it shows
120
129
* unreasonable growth compared to the original form of the qual,
121
130
* where we define "unreasonable" a tad arbitrarily as 4x more
122
131
* operators.
123
132
*/
124
- qualcnt = count_bool_nodes (qual );
125
- cnfcnt = count_bool_nodes (cnfqual );
126
- dnfcnt = count_bool_nodes (dnfqual );
127
- if (cnfcnt >= 4 * qualcnt )
128
- cnfqual = NULL ; /* mark CNF not usable */
129
- if (dnfcnt >= 4 * qualcnt )
130
- dnfqual = NULL ; /* mark DNF not usable */
131
-
133
+ if (cnfnodes >= 4.0 * nodes )
134
+ cnfok = false;
135
+ if (dnfnodes >= 4.0 * nodes )
136
+ dnfok = false;
132
137
/*
133
- * Second heuristic is to prefer DNF if only one relation is mentioned
134
- * and it is smaller than the CNF representation.
138
+ * Third heuristic is to prefer DNF if top level is already an OR,
139
+ * and only one relation is mentioned, and DNF is no larger than
140
+ * the CNF representation. (Pretty shaky; can we improve on this?)
135
141
*/
136
- if (dnfqual && dnfcnt < cnfcnt && NumRelids ((Node * ) dnfqual ) == 1 )
137
- cnfqual = NULL ;
138
-
142
+ if (dnfok && dnfnodes <= cnfnodes && or_clause ((Node * ) newqual ) &&
143
+ NumRelids (( Node * ) newqual ) == 1 )
144
+ cnfok = false;
139
145
/*
140
146
* Otherwise, we prefer CNF.
141
147
*
142
148
* XXX obviously, these rules could be improved upon.
143
149
*/
144
-
145
- /* pick preferred survivor */
146
- if (cnfqual )
147
- newqual = cnfqual ;
148
- else if (dnfqual )
149
- newqual = dnfqual ;
150
- else
151
- newqual = qual ;
150
+ if (cnfok )
151
+ {
152
+ /* Normalize into conjunctive normal form, and clean up the result. */
153
+ newqual = qual_cleanup (find_ors (newqual ));
154
+ }
155
+ else if (dnfok )
156
+ {
157
+ /* Normalize into disjunctive normal form, and clean up the result. */
158
+ newqual = qual_cleanup (find_ands (newqual ));
159
+ }
152
160
153
161
/* Convert to implicit-AND list if requested */
154
162
if (removeAndFlag )
@@ -828,27 +836,72 @@ remove_duplicates(List *list)
828
836
/*
829
837
* count_bool_nodes
830
838
* Support for heuristics in canonicalize_qual(): count the
831
- * number of nodes in the top level AND/OR/NOT part of a qual tree
839
+ * number of nodes that are inputs to the top level AND/OR/NOT
840
+ * part of a qual tree, and estimate how many nodes will appear
841
+ * in the CNF'ified or DNF'ified equivalent of the expression.
842
+ *
843
+ * This is just an approximate calculation; it doesn't deal with NOTs
844
+ * very well, and of course it cannot detect possible simplifications
845
+ * from eliminating duplicate subclauses. The idea is just to cheaply
846
+ * determine whether CNF will be markedly worse than DNF or vice versa.
847
+ *
848
+ * The counts/estimates are represented as doubles to avoid risk of overflow.
832
849
*/
833
- static int
834
- count_bool_nodes (Expr * qual )
850
+ static void
851
+ count_bool_nodes (Expr * qual ,
852
+ double * nodes ,
853
+ double * cnfnodes ,
854
+ double * dnfnodes )
835
855
{
836
- if ( qual == NULL )
837
- return 0 ;
856
+ List * temp ;
857
+ double subnodes , subcnfnodes , subdnfnodes ;
838
858
839
- if (and_clause ((Node * ) qual ) ||
840
- or_clause ((Node * ) qual ))
859
+ if (and_clause ((Node * ) qual ))
841
860
{
842
- int sum = 1 ; /* for the and/or itself */
843
- List * temp ;
861
+ * nodes = * cnfnodes = 0.0 ;
862
+ * dnfnodes = 1.0 ; /* DNF nodes will be product of sub-counts */
844
863
845
864
foreach (temp , qual -> args )
846
- sum += count_bool_nodes (lfirst (temp ));
865
+ {
866
+ count_bool_nodes (lfirst (temp ),
867
+ & subnodes , & subcnfnodes , & subdnfnodes );
868
+ * nodes += subnodes ;
869
+ * cnfnodes += subcnfnodes ;
870
+ * dnfnodes *= subdnfnodes ;
871
+ }
872
+ /* we could get dnfnodes < cnfnodes here, if all the sub-nodes are
873
+ * simple ones with count 1. Make sure dnfnodes isn't too small.
874
+ */
875
+ if (* dnfnodes < * cnfnodes )
876
+ * dnfnodes = * cnfnodes ;
877
+ }
878
+ else if (or_clause ((Node * ) qual ))
879
+ {
880
+ * nodes = * dnfnodes = 0.0 ;
881
+ * cnfnodes = 1.0 ; /* CNF nodes will be product of sub-counts */
847
882
848
- return sum ;
883
+ foreach (temp , qual -> args )
884
+ {
885
+ count_bool_nodes (lfirst (temp ),
886
+ & subnodes , & subcnfnodes , & subdnfnodes );
887
+ * nodes += subnodes ;
888
+ * cnfnodes *= subcnfnodes ;
889
+ * dnfnodes += subdnfnodes ;
890
+ }
891
+ /* we could get cnfnodes < dnfnodes here, if all the sub-nodes are
892
+ * simple ones with count 1. Make sure cnfnodes isn't too small.
893
+ */
894
+ if (* cnfnodes < * dnfnodes )
895
+ * cnfnodes = * dnfnodes ;
849
896
}
850
897
else if (not_clause ((Node * ) qual ))
851
- return count_bool_nodes (get_notclausearg (qual )) + 1 ;
898
+ {
899
+ count_bool_nodes (get_notclausearg (qual ),
900
+ nodes , cnfnodes , dnfnodes );
901
+ }
852
902
else
853
- return 1 ; /* anything else counts 1 for my purposes */
903
+ {
904
+ /* anything else counts 1 for my purposes */
905
+ * nodes = * cnfnodes = * dnfnodes = 1.0 ;
906
+ }
854
907
}
0 commit comments