Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 0dbffa7

Browse files
committed
First cut at making useful selectivity estimates for range queries
(ie, WHERE x > lowbound AND x < highbound). It's not very bright yet but it does something useful. Also, rename intltsel/intgtsel to scalarltsel/scalargtsel to reflect usage better. Extend convert_to_scalar to do something a little bit useful with string data types. Still need to make it do something with date/time datatypes, but I'll wait for Thomas's datetime unification dust to settle first. Eventually the routine ought not have any type-specific knowledge at all; it ought to be calling a type-dependent routine found via a pg_type column; but that's a task for another day.
1 parent 8bcac56 commit 0dbffa7

File tree

9 files changed

+528
-247
lines changed

9 files changed

+528
-247
lines changed

doc/src/sgml/xindex.sgml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<!--
2-
$Header: /cvsroot/pgsql/doc/src/sgml/xindex.sgml,v 1.6 2000/01/22 23:50:08 tgl Exp $
2+
$Header: /cvsroot/pgsql/doc/src/sgml/xindex.sgml,v 1.7 2000/01/24 07:16:49 tgl Exp $
33
Postgres documentation
44
-->
55

@@ -542,25 +542,25 @@ CREATE OPERATOR = (
542542
oprleft = (SELECT oid FROM pg_type WHERE typname = 'complex_abs');
543543

544544
UPDATE pg_operator
545-
SET oprrest = 'intltsel'::regproc, oprjoin = 'intltjoinsel'
545+
SET oprrest = 'scalarltsel'::regproc, oprjoin = 'scalarltjoinsel'
546546
WHERE oprname = '<' AND
547547
oprleft = oprright AND
548548
oprleft = (SELECT oid FROM pg_type WHERE typname = 'complex_abs');
549549

550550
UPDATE pg_operator
551-
SET oprrest = 'intltsel'::regproc, oprjoin = 'intltjoinsel'
551+
SET oprrest = 'scalarltsel'::regproc, oprjoin = 'scalarltjoinsel'
552552
WHERE oprname = '<=' AND
553553
oprleft = oprright AND
554554
oprleft = (SELECT oid FROM pg_type WHERE typname = 'complex_abs');
555555

556556
UPDATE pg_operator
557-
SET oprrest = 'intgtsel'::regproc, oprjoin = 'intgtjoinsel'
557+
SET oprrest = 'scalargtsel'::regproc, oprjoin = 'scalargtjoinsel'
558558
WHERE oprname = '>' AND
559559
oprleft = oprright AND
560560
oprleft = (SELECT oid FROM pg_type WHERE typname = 'complex_abs');
561561

562562
UPDATE pg_operator
563-
SET oprrest = 'intgtsel'::regproc, oprjoin = 'intgtjoinsel'
563+
SET oprrest = 'scalargtsel'::regproc, oprjoin = 'scalargtjoinsel'
564564
WHERE oprname = '>=' AND
565565
oprleft = oprright AND
566566
oprleft = (SELECT oid FROM pg_type WHERE typname = 'complex_abs');</filename></filename>

doc/src/sgml/xoper.sgml

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -231,8 +231,8 @@ SELECT (a + b) AS c FROM test_complex;
231231
<ProgramListing>
232232
eqsel for =
233233
neqsel for &lt;&gt;
234-
intltsel for &lt; or &lt;=
235-
intgtsel for &gt; or &gt;=
234+
scalarltsel for &lt; or &lt;=
235+
scalargtsel for &gt; or &gt;=
236236
</ProgramListing>
237237
It might seem a little odd that these are the categories, but they
238238
make sense if you think about it. '=' will typically accept only
@@ -254,6 +254,17 @@ SELECT (a + b) AS c FROM test_complex;
254254
matching operators (~, ~*, etc) use eqsel on the assumption that they'll
255255
usually only match a small fraction of the entries in a table.
256256
</para>
257+
258+
<para>
259+
You can use scalarltsel and scalargtsel for comparisons on datatypes that
260+
have some sensible means of being converted into numeric scalars for
261+
range comparisons. If possible, add the datatype to those understood
262+
by the routine convert_to_scalar() in src/backend/utils/adt/selfuncs.c.
263+
(Eventually, this routine should be replaced by per-datatype functions
264+
identified through a column of the pg_type table; but that hasn't happened
265+
yet.) If you do not do this, things will still work, but the optimizer's
266+
estimates won't be as good as they could be.
267+
</para>
257268
</sect2>
258269

259270
<sect2>
@@ -281,8 +292,8 @@ SELECT (a + b) AS c FROM test_complex;
281292
<ProgramListing>
282293
eqjoinsel for =
283294
neqjoinsel for &lt;&gt;
284-
intltjoinsel for &lt; or &lt;=
285-
intgtjoinsel for &gt; or &gt;=
295+
scalarltjoinsel for &lt; or &lt;=
296+
scalargtjoinsel for &gt; or &gt;=
286297
</ProgramListing>
287298
</para>
288299
</sect2>

src/backend/optimizer/path/clausesel.c

Lines changed: 235 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.28 2000/01/23 02:06:58 tgl Exp $
10+
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.29 2000/01/24 07:16:46 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -23,6 +23,23 @@
2323
#include "utils/lsyscache.h"
2424

2525

26+
/*
27+
* Data structure for accumulating info about possible range-query
28+
* clause pairs in clauselist_selectivity.
29+
*/
30+
typedef struct RangeQueryClause {
31+
struct RangeQueryClause *next; /* next in linked list */
32+
Node *var; /* The common variable of the clauses */
33+
bool have_lobound; /* found a low-bound clause yet? */
34+
bool have_hibound; /* found a high-bound clause yet? */
35+
Selectivity lobound; /* Selectivity of a var > something clause */
36+
Selectivity hibound; /* Selectivity of a var < something clause */
37+
} RangeQueryClause;
38+
39+
static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
40+
int flag, bool isLTsel, Selectivity s2);
41+
42+
2643
/****************************************************************************
2744
* ROUTINES TO COMPUTE SELECTIVITIES
2845
****************************************************************************/
@@ -55,29 +72,237 @@ restrictlist_selectivity(Query *root,
5572
* must be returned.
5673
*
5774
* See clause_selectivity() for the meaning of the varRelid parameter.
75+
*
76+
* Our basic approach is to take the product of the selectivities of the
77+
* subclauses. However, that's only right if the subclauses have independent
78+
* probabilities, and in reality they are often NOT independent. So,
79+
* we want to be smarter where we can.
80+
81+
* Currently, the only extra smarts we have is to recognize "range queries",
82+
* such as "x > 34 AND x < 42". Clauses are recognized as possible range
83+
* query components if they are restriction opclauses whose operators have
84+
* scalarltsel() or scalargtsel() as their restriction selectivity estimator.
85+
* We pair up clauses of this form that refer to the same variable. An
86+
* unpairable clause of this kind is simply multiplied into the selectivity
87+
* product in the normal way. But when we find a pair, we know that the
88+
* selectivities represent the relative positions of the low and high bounds
89+
* within the column's range, so instead of figuring the selectivity as
90+
* hisel * losel, we can figure it as hisel + losel - 1. (To visualize this,
91+
* see that hisel is the fraction of the range below the high bound, while
92+
* losel is the fraction above the low bound; so hisel can be interpreted
93+
* directly as a 0..1 value but we need to convert losel to 1-losel before
94+
* interpreting it as a value. Then the available range is 1-losel to hisel.)
95+
* If the calculation yields zero or negative, however, we chicken out and
96+
* use the default interpretation; that probably means that one or both
97+
* selectivities is a default estimate rather than an actual range value.
98+
* Of course this is all very dependent on the behavior of
99+
* scalarltsel/scalargtsel; perhaps some day we can generalize the approach.
58100
*/
59101
Selectivity
60102
clauselist_selectivity(Query *root,
61103
List *clauses,
62104
int varRelid)
63105
{
64-
Selectivity s1 = 1.0;
65-
List *clause;
106+
Selectivity s1 = 1.0;
107+
RangeQueryClause *rqlist = NULL;
108+
List *clist;
66109

67-
/* Use the product of the selectivities of the subclauses.
68-
* XXX this is too optimistic, since the subclauses
69-
* are very likely not independent...
110+
/*
111+
* Initial scan over clauses. Anything that doesn't look like a
112+
* potential rangequery clause gets multiplied into s1 and forgotten.
113+
* Anything that does gets inserted into an rqlist entry.
70114
*/
71-
foreach(clause, clauses)
115+
foreach(clist, clauses)
72116
{
73-
Selectivity s2 = clause_selectivity(root,
74-
(Node *) lfirst(clause),
75-
varRelid);
117+
Node *clause = (Node *) lfirst(clist);
118+
Selectivity s2;
119+
120+
/*
121+
* See if it looks like a restriction clause with a constant.
122+
* (If it's not a constant we can't really trust the selectivity!)
123+
* NB: for consistency of results, this fragment of code had
124+
* better match what clause_selectivity() would do.
125+
*/
126+
if (varRelid != 0 || NumRelids(clause) == 1)
127+
{
128+
int relidx;
129+
AttrNumber attno;
130+
Datum constval;
131+
int flag;
132+
133+
get_relattval(clause, varRelid,
134+
&relidx, &attno, &constval, &flag);
135+
if (relidx != 0 && (flag & SEL_CONSTANT))
136+
{
137+
/* if get_relattval succeeded, it must be an opclause */
138+
Oid opno = ((Oper *) ((Expr *) clause)->oper)->opno;
139+
RegProcedure oprrest = get_oprrest(opno);
140+
141+
if (!oprrest)
142+
s2 = (Selectivity) 0.5;
143+
else
144+
s2 = restriction_selectivity(oprrest, opno,
145+
getrelid(relidx,
146+
root->rtable),
147+
attno,
148+
constval, flag);
149+
/*
150+
* If we reach here, we have computed the same result
151+
* that clause_selectivity would, so we can just use s2
152+
* if it's the wrong oprrest. But if it's the right
153+
* oprrest, add the clause to rqlist for later processing.
154+
*/
155+
switch (oprrest)
156+
{
157+
case F_SCALARLTSEL:
158+
addRangeClause(&rqlist, clause, flag, true, s2);
159+
break;
160+
case F_SCALARGTSEL:
161+
addRangeClause(&rqlist, clause, flag, false, s2);
162+
break;
163+
default:
164+
/* Just merge the selectivity in generically */
165+
s1 = s1 * s2;
166+
break;
167+
}
168+
continue; /* drop to loop bottom */
169+
}
170+
}
171+
/* Not the right form, so treat it generically. */
172+
s2 = clause_selectivity(root, clause, varRelid);
76173
s1 = s1 * s2;
77174
}
175+
176+
/*
177+
* Now scan the rangequery pair list.
178+
*/
179+
while (rqlist != NULL)
180+
{
181+
RangeQueryClause *rqnext;
182+
183+
if (rqlist->have_lobound && rqlist->have_hibound)
184+
{
185+
/* Successfully matched a pair of range clauses */
186+
Selectivity s2 = rqlist->hibound + rqlist->lobound - 1.0;
187+
188+
if (s2 > 0.0)
189+
{
190+
/* All our hard work has paid off! */
191+
s1 *= s2;
192+
}
193+
else
194+
{
195+
/* One or both is probably a default estimate,
196+
* so punt and just merge them in generically.
197+
*/
198+
s1 *= rqlist->hibound * rqlist->lobound;
199+
}
200+
}
201+
else
202+
{
203+
/* Only found one of a pair, merge it in generically */
204+
if (rqlist->have_lobound)
205+
s1 *= rqlist->lobound;
206+
else
207+
s1 *= rqlist->hibound;
208+
}
209+
/* release storage and advance */
210+
rqnext = rqlist->next;
211+
pfree(rqlist);
212+
rqlist = rqnext;
213+
}
214+
78215
return s1;
79216
}
80217

218+
/*
219+
* addRangeClause --- add a new range clause for clauselist_selectivity
220+
*
221+
* Here is where we try to match up pairs of range-query clauses
222+
*/
223+
static void
224+
addRangeClause(RangeQueryClause **rqlist, Node *clause,
225+
int flag, bool isLTsel, Selectivity s2)
226+
{
227+
RangeQueryClause *rqelem;
228+
Node *var;
229+
bool is_lobound;
230+
231+
/* get_relattval sets flag&SEL_RIGHT if the var is on the LEFT. */
232+
if (flag & SEL_RIGHT)
233+
{
234+
var = (Node *) get_leftop((Expr *) clause);
235+
is_lobound = ! isLTsel; /* x < something is high bound */
236+
}
237+
else
238+
{
239+
var = (Node *) get_rightop((Expr *) clause);
240+
is_lobound = isLTsel; /* something < x is low bound */
241+
}
242+
243+
for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
244+
{
245+
/* We use full equal() here because the "var" might be a function
246+
* of one or more attributes of the same relation...
247+
*/
248+
if (! equal(var, rqelem->var))
249+
continue;
250+
/* Found the right group to put this clause in */
251+
if (is_lobound)
252+
{
253+
if (! rqelem->have_lobound)
254+
{
255+
rqelem->have_lobound = true;
256+
rqelem->lobound = s2;
257+
}
258+
else
259+
{
260+
/* We have found two similar clauses, such as
261+
* x < y AND x < z. Keep only the more restrictive one.
262+
*/
263+
if (rqelem->lobound > s2)
264+
rqelem->lobound = s2;
265+
}
266+
}
267+
else
268+
{
269+
if (! rqelem->have_hibound)
270+
{
271+
rqelem->have_hibound = true;
272+
rqelem->hibound = s2;
273+
}
274+
else
275+
{
276+
/* We have found two similar clauses, such as
277+
* x > y AND x > z. Keep only the more restrictive one.
278+
*/
279+
if (rqelem->hibound > s2)
280+
rqelem->hibound = s2;
281+
}
282+
}
283+
return;
284+
}
285+
286+
/* No matching var found, so make a new clause-pair data structure */
287+
rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
288+
rqelem->var = var;
289+
if (is_lobound)
290+
{
291+
rqelem->have_lobound = true;
292+
rqelem->have_hibound = false;
293+
rqelem->lobound = s2;
294+
}
295+
else
296+
{
297+
rqelem->have_lobound = false;
298+
rqelem->have_hibound = true;
299+
rqelem->hibound = s2;
300+
}
301+
rqelem->next = *rqlist;
302+
*rqlist = rqelem;
303+
}
304+
305+
81306
/*
82307
* clause_selectivity -
83308
* Compute the selectivity of a general boolean expression clause.

0 commit comments

Comments
 (0)