Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 003dd96

Browse files
committed
Apply the heuristic proposed by Taral (see pgsql-general archives for
2-Oct-98 or TODO.detail/cnfify) to decide whether we want to reduce WHERE clause to CNF form, DNF form, or neither. This is a HUGE win. The heuristic conditions could probably still use a little tweaking to make sure we don't pick CNF when DNF would be better, or vice versa, but the risk of exponential explosion in cnfify() is gone. I was able to run ten-thousand-AND-subclause queries through the planner in a reasonable amount of time.
1 parent b53955f commit 003dd96

File tree

1 file changed

+106
-53
lines changed

1 file changed

+106
-53
lines changed

src/backend/optimizer/prep/prepqual.c

Lines changed: 106 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepqual.c,v 1.21 2000/01/26 05:56:39 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepqual.c,v 1.22 2000/01/28 03:22:36 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -32,7 +32,8 @@ static Expr *find_ands(Expr *qual);
3232
static Expr *and_normalize(List *andlist);
3333
static Expr *qual_cleanup(Expr *qual);
3434
static List *remove_duplicates(List *list);
35-
static int count_bool_nodes(Expr *qual);
35+
static void count_bool_nodes(Expr *qual, double *nodes,
36+
double *cnfnodes, double *dnfnodes);
3637

3738
/*****************************************************************************
3839
*
@@ -84,12 +85,12 @@ static int count_bool_nodes(Expr *qual);
8485
List *
8586
canonicalize_qual(Expr *qual, bool removeAndFlag)
8687
{
87-
Expr *newqual,
88-
*cnfqual,
89-
*dnfqual;
90-
int qualcnt,
91-
cnfcnt,
92-
dnfcnt;
88+
Expr *newqual;
89+
double nodes,
90+
cnfnodes,
91+
dnfnodes;
92+
bool cnfok,
93+
dnfok;
9394

9495
if (qual == NULL)
9596
return NIL;
@@ -98,57 +99,64 @@ canonicalize_qual(Expr *qual, bool removeAndFlag)
9899
* This improvement is always worthwhile, so do it unconditionally.
99100
*/
100101
qual = flatten_andors(qual);
102+
101103
/* Push down NOTs. We do this only in the top-level boolean
102104
* expression, without examining arguments of operators/functions.
103105
* Even so, it might not be a win if we are unable to find negators
104-
* for all the operators involved; so we keep the flattened-but-not-
105-
* NOT-pushed qual as the reference point for comparsions.
106+
* for all the operators involved; perhaps we should compare before-
107+
* and-after tree sizes?
106108
*/
107109
newqual = find_nots(qual);
108-
/*
109-
* Generate both CNF and DNF forms from newqual.
110-
*/
111-
/* Normalize into conjunctive normal form, and clean up the result. */
112-
cnfqual = qual_cleanup(find_ors(newqual));
113-
/* Likewise for DNF */
114-
dnfqual = qual_cleanup(find_ands(newqual));
115110

116111
/*
117-
* Now, choose whether to return qual, cnfqual, or dnfqual.
112+
* Choose whether to convert to CNF, or DNF, or leave well enough alone.
118113
*
119-
* First heuristic is to forget about either CNF or DNF if it shows
114+
* We make an approximate estimate of the number of bottom-level nodes
115+
* that will appear in the CNF and DNF forms of the query.
116+
*/
117+
count_bool_nodes(newqual, &nodes, &cnfnodes, &dnfnodes);
118+
/*
119+
* First heuristic is to forget about *both* normal forms if there are
120+
* a huge number of terms in the qual clause. This would only happen
121+
* with machine-generated queries, presumably; and most likely such
122+
* a query is already in either CNF or DNF.
123+
*/
124+
cnfok = dnfok = true;
125+
if (nodes >= 500.0)
126+
cnfok = dnfok = false;
127+
/*
128+
* Second heuristic is to forget about either CNF or DNF if it shows
120129
* unreasonable growth compared to the original form of the qual,
121130
* where we define "unreasonable" a tad arbitrarily as 4x more
122131
* operators.
123132
*/
124-
qualcnt = count_bool_nodes(qual);
125-
cnfcnt = count_bool_nodes(cnfqual);
126-
dnfcnt = count_bool_nodes(dnfqual);
127-
if (cnfcnt >= 4 * qualcnt)
128-
cnfqual = NULL; /* mark CNF not usable */
129-
if (dnfcnt >= 4 * qualcnt)
130-
dnfqual = NULL; /* mark DNF not usable */
131-
133+
if (cnfnodes >= 4.0 * nodes)
134+
cnfok = false;
135+
if (dnfnodes >= 4.0 * nodes)
136+
dnfok = false;
132137
/*
133-
* Second heuristic is to prefer DNF if only one relation is mentioned
134-
* and it is smaller than the CNF representation.
138+
* Third heuristic is to prefer DNF if top level is already an OR,
139+
* and only one relation is mentioned, and DNF is no larger than
140+
* the CNF representation. (Pretty shaky; can we improve on this?)
135141
*/
136-
if (dnfqual && dnfcnt < cnfcnt && NumRelids((Node *) dnfqual) == 1)
137-
cnfqual = NULL;
138-
142+
if (dnfok && dnfnodes <= cnfnodes && or_clause((Node *) newqual) &&
143+
NumRelids((Node *) newqual) == 1)
144+
cnfok = false;
139145
/*
140146
* Otherwise, we prefer CNF.
141147
*
142148
* XXX obviously, these rules could be improved upon.
143149
*/
144-
145-
/* pick preferred survivor */
146-
if (cnfqual)
147-
newqual = cnfqual;
148-
else if (dnfqual)
149-
newqual = dnfqual;
150-
else
151-
newqual = qual;
150+
if (cnfok)
151+
{
152+
/* Normalize into conjunctive normal form, and clean up the result. */
153+
newqual = qual_cleanup(find_ors(newqual));
154+
}
155+
else if (dnfok)
156+
{
157+
/* Normalize into disjunctive normal form, and clean up the result. */
158+
newqual = qual_cleanup(find_ands(newqual));
159+
}
152160

153161
/* Convert to implicit-AND list if requested */
154162
if (removeAndFlag)
@@ -828,27 +836,72 @@ remove_duplicates(List *list)
828836
/*
829837
* count_bool_nodes
830838
* Support for heuristics in canonicalize_qual(): count the
831-
* number of nodes in the top level AND/OR/NOT part of a qual tree
839+
* number of nodes that are inputs to the top level AND/OR/NOT
840+
* part of a qual tree, and estimate how many nodes will appear
841+
* in the CNF'ified or DNF'ified equivalent of the expression.
842+
*
843+
* This is just an approximate calculation; it doesn't deal with NOTs
844+
* very well, and of course it cannot detect possible simplifications
845+
* from eliminating duplicate subclauses. The idea is just to cheaply
846+
* determine whether CNF will be markedly worse than DNF or vice versa.
847+
*
848+
* The counts/estimates are represented as doubles to avoid risk of overflow.
832849
*/
833-
static int
834-
count_bool_nodes(Expr *qual)
850+
static void
851+
count_bool_nodes(Expr *qual,
852+
double *nodes,
853+
double *cnfnodes,
854+
double *dnfnodes)
835855
{
836-
if (qual == NULL)
837-
return 0;
856+
List *temp;
857+
double subnodes, subcnfnodes, subdnfnodes;
838858

839-
if (and_clause((Node *) qual) ||
840-
or_clause((Node *) qual))
859+
if (and_clause((Node *) qual))
841860
{
842-
int sum = 1; /* for the and/or itself */
843-
List *temp;
861+
*nodes = *cnfnodes = 0.0;
862+
*dnfnodes = 1.0; /* DNF nodes will be product of sub-counts */
844863

845864
foreach(temp, qual->args)
846-
sum += count_bool_nodes(lfirst(temp));
865+
{
866+
count_bool_nodes(lfirst(temp),
867+
&subnodes, &subcnfnodes, &subdnfnodes);
868+
*nodes += subnodes;
869+
*cnfnodes += subcnfnodes;
870+
*dnfnodes *= subdnfnodes;
871+
}
872+
/* we could get dnfnodes < cnfnodes here, if all the sub-nodes are
873+
* simple ones with count 1. Make sure dnfnodes isn't too small.
874+
*/
875+
if (*dnfnodes < *cnfnodes)
876+
*dnfnodes = *cnfnodes;
877+
}
878+
else if (or_clause((Node *) qual))
879+
{
880+
*nodes = *dnfnodes = 0.0;
881+
*cnfnodes = 1.0; /* CNF nodes will be product of sub-counts */
847882

848-
return sum;
883+
foreach(temp, qual->args)
884+
{
885+
count_bool_nodes(lfirst(temp),
886+
&subnodes, &subcnfnodes, &subdnfnodes);
887+
*nodes += subnodes;
888+
*cnfnodes *= subcnfnodes;
889+
*dnfnodes += subdnfnodes;
890+
}
891+
/* we could get cnfnodes < dnfnodes here, if all the sub-nodes are
892+
* simple ones with count 1. Make sure cnfnodes isn't too small.
893+
*/
894+
if (*cnfnodes < *dnfnodes)
895+
*cnfnodes = *dnfnodes;
849896
}
850897
else if (not_clause((Node *) qual))
851-
return count_bool_nodes(get_notclausearg(qual)) + 1;
898+
{
899+
count_bool_nodes(get_notclausearg(qual),
900+
nodes, cnfnodes, dnfnodes);
901+
}
852902
else
853-
return 1; /* anything else counts 1 for my purposes */
903+
{
904+
/* anything else counts 1 for my purposes */
905+
*nodes = *cnfnodes = *dnfnodes = 1.0;
906+
}
854907
}

0 commit comments

Comments
 (0)