|
7 | 7 | *
|
8 | 8 | *
|
9 | 9 | * IDENTIFICATION
|
10 |
| - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.28 2000/01/23 02:06:58 tgl Exp $ |
| 10 | + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.29 2000/01/24 07:16:46 tgl Exp $ |
11 | 11 | *
|
12 | 12 | *-------------------------------------------------------------------------
|
13 | 13 | */
|
|
23 | 23 | #include "utils/lsyscache.h"
|
24 | 24 |
|
25 | 25 |
|
| 26 | +/* |
| 27 | + * Data structure for accumulating info about possible range-query |
| 28 | + * clause pairs in clauselist_selectivity. |
| 29 | + */ |
| 30 | +typedef struct RangeQueryClause { |
| 31 | + struct RangeQueryClause *next; /* next in linked list */ |
| 32 | + Node *var; /* The common variable of the clauses */ |
| 33 | + bool have_lobound; /* found a low-bound clause yet? */ |
| 34 | + bool have_hibound; /* found a high-bound clause yet? */ |
| 35 | + Selectivity lobound; /* Selectivity of a var > something clause */ |
| 36 | + Selectivity hibound; /* Selectivity of a var < something clause */ |
| 37 | +} RangeQueryClause; |
| 38 | + |
| 39 | +static void addRangeClause(RangeQueryClause **rqlist, Node *clause, |
| 40 | + int flag, bool isLTsel, Selectivity s2); |
| 41 | + |
| 42 | + |
26 | 43 | /****************************************************************************
|
27 | 44 | * ROUTINES TO COMPUTE SELECTIVITIES
|
28 | 45 | ****************************************************************************/
|
@@ -55,29 +72,237 @@ restrictlist_selectivity(Query *root,
|
55 | 72 | * must be returned.
|
56 | 73 | *
|
57 | 74 | * See clause_selectivity() for the meaning of the varRelid parameter.
|
| 75 | + * |
| 76 | + * Our basic approach is to take the product of the selectivities of the |
| 77 | + * subclauses. However, that's only right if the subclauses have independent |
| 78 | + * probabilities, and in reality they are often NOT independent. So, |
| 79 | + * we want to be smarter where we can. |
| 80 | +
|
| 81 | + * Currently, the only extra smarts we have is to recognize "range queries", |
| 82 | + * such as "x > 34 AND x < 42". Clauses are recognized as possible range |
| 83 | + * query components if they are restriction opclauses whose operators have |
| 84 | + * scalarltsel() or scalargtsel() as their restriction selectivity estimator. |
| 85 | + * We pair up clauses of this form that refer to the same variable. An |
| 86 | + * unpairable clause of this kind is simply multiplied into the selectivity |
| 87 | + * product in the normal way. But when we find a pair, we know that the |
| 88 | + * selectivities represent the relative positions of the low and high bounds |
| 89 | + * within the column's range, so instead of figuring the selectivity as |
| 90 | + * hisel * losel, we can figure it as hisel + losel - 1. (To visualize this, |
| 91 | + * see that hisel is the fraction of the range below the high bound, while |
| 92 | + * losel is the fraction above the low bound; so hisel can be interpreted |
| 93 | + * directly as a 0..1 value but we need to convert losel to 1-losel before |
| 94 | + * interpreting it as a value. Then the available range is 1-losel to hisel.) |
| 95 | + * If the calculation yields zero or negative, however, we chicken out and |
| 96 | + * use the default interpretation; that probably means that one or both |
| 97 | + * selectivities is a default estimate rather than an actual range value. |
| 98 | + * Of course this is all very dependent on the behavior of |
| 99 | + * scalarltsel/scalargtsel; perhaps some day we can generalize the approach. |
58 | 100 | */
|
59 | 101 | Selectivity
|
60 | 102 | clauselist_selectivity(Query *root,
|
61 | 103 | List *clauses,
|
62 | 104 | int varRelid)
|
63 | 105 | {
|
64 |
| - Selectivity s1 = 1.0; |
65 |
| - List *clause; |
| 106 | + Selectivity s1 = 1.0; |
| 107 | + RangeQueryClause *rqlist = NULL; |
| 108 | + List *clist; |
66 | 109 |
|
67 |
| - /* Use the product of the selectivities of the subclauses. |
68 |
| - * XXX this is too optimistic, since the subclauses |
69 |
| - * are very likely not independent... |
| 110 | + /* |
| 111 | + * Initial scan over clauses. Anything that doesn't look like a |
| 112 | + * potential rangequery clause gets multiplied into s1 and forgotten. |
| 113 | + * Anything that does gets inserted into an rqlist entry. |
70 | 114 | */
|
71 |
| - foreach(clause, clauses) |
| 115 | + foreach(clist, clauses) |
72 | 116 | {
|
73 |
| - Selectivity s2 = clause_selectivity(root, |
74 |
| - (Node *) lfirst(clause), |
75 |
| - varRelid); |
| 117 | + Node *clause = (Node *) lfirst(clist); |
| 118 | + Selectivity s2; |
| 119 | + |
| 120 | + /* |
| 121 | + * See if it looks like a restriction clause with a constant. |
| 122 | + * (If it's not a constant we can't really trust the selectivity!) |
| 123 | + * NB: for consistency of results, this fragment of code had |
| 124 | + * better match what clause_selectivity() would do. |
| 125 | + */ |
| 126 | + if (varRelid != 0 || NumRelids(clause) == 1) |
| 127 | + { |
| 128 | + int relidx; |
| 129 | + AttrNumber attno; |
| 130 | + Datum constval; |
| 131 | + int flag; |
| 132 | + |
| 133 | + get_relattval(clause, varRelid, |
| 134 | + &relidx, &attno, &constval, &flag); |
| 135 | + if (relidx != 0 && (flag & SEL_CONSTANT)) |
| 136 | + { |
| 137 | + /* if get_relattval succeeded, it must be an opclause */ |
| 138 | + Oid opno = ((Oper *) ((Expr *) clause)->oper)->opno; |
| 139 | + RegProcedure oprrest = get_oprrest(opno); |
| 140 | + |
| 141 | + if (!oprrest) |
| 142 | + s2 = (Selectivity) 0.5; |
| 143 | + else |
| 144 | + s2 = restriction_selectivity(oprrest, opno, |
| 145 | + getrelid(relidx, |
| 146 | + root->rtable), |
| 147 | + attno, |
| 148 | + constval, flag); |
| 149 | + /* |
| 150 | + * If we reach here, we have computed the same result |
| 151 | + * that clause_selectivity would, so we can just use s2 |
| 152 | + * if it's the wrong oprrest. But if it's the right |
| 153 | + * oprrest, add the clause to rqlist for later processing. |
| 154 | + */ |
| 155 | + switch (oprrest) |
| 156 | + { |
| 157 | + case F_SCALARLTSEL: |
| 158 | + addRangeClause(&rqlist, clause, flag, true, s2); |
| 159 | + break; |
| 160 | + case F_SCALARGTSEL: |
| 161 | + addRangeClause(&rqlist, clause, flag, false, s2); |
| 162 | + break; |
| 163 | + default: |
| 164 | + /* Just merge the selectivity in generically */ |
| 165 | + s1 = s1 * s2; |
| 166 | + break; |
| 167 | + } |
| 168 | + continue; /* drop to loop bottom */ |
| 169 | + } |
| 170 | + } |
| 171 | + /* Not the right form, so treat it generically. */ |
| 172 | + s2 = clause_selectivity(root, clause, varRelid); |
76 | 173 | s1 = s1 * s2;
|
77 | 174 | }
|
| 175 | + |
| 176 | + /* |
| 177 | + * Now scan the rangequery pair list. |
| 178 | + */ |
| 179 | + while (rqlist != NULL) |
| 180 | + { |
| 181 | + RangeQueryClause *rqnext; |
| 182 | + |
| 183 | + if (rqlist->have_lobound && rqlist->have_hibound) |
| 184 | + { |
| 185 | + /* Successfully matched a pair of range clauses */ |
| 186 | + Selectivity s2 = rqlist->hibound + rqlist->lobound - 1.0; |
| 187 | + |
| 188 | + if (s2 > 0.0) |
| 189 | + { |
| 190 | + /* All our hard work has paid off! */ |
| 191 | + s1 *= s2; |
| 192 | + } |
| 193 | + else |
| 194 | + { |
| 195 | + /* One or both is probably a default estimate, |
| 196 | + * so punt and just merge them in generically. |
| 197 | + */ |
| 198 | + s1 *= rqlist->hibound * rqlist->lobound; |
| 199 | + } |
| 200 | + } |
| 201 | + else |
| 202 | + { |
| 203 | + /* Only found one of a pair, merge it in generically */ |
| 204 | + if (rqlist->have_lobound) |
| 205 | + s1 *= rqlist->lobound; |
| 206 | + else |
| 207 | + s1 *= rqlist->hibound; |
| 208 | + } |
| 209 | + /* release storage and advance */ |
| 210 | + rqnext = rqlist->next; |
| 211 | + pfree(rqlist); |
| 212 | + rqlist = rqnext; |
| 213 | + } |
| 214 | + |
78 | 215 | return s1;
|
79 | 216 | }
|
80 | 217 |
|
| 218 | +/* |
| 219 | + * addRangeClause --- add a new range clause for clauselist_selectivity |
| 220 | + * |
| 221 | + * Here is where we try to match up pairs of range-query clauses |
| 222 | + */ |
| 223 | +static void |
| 224 | +addRangeClause(RangeQueryClause **rqlist, Node *clause, |
| 225 | + int flag, bool isLTsel, Selectivity s2) |
| 226 | +{ |
| 227 | + RangeQueryClause *rqelem; |
| 228 | + Node *var; |
| 229 | + bool is_lobound; |
| 230 | + |
| 231 | + /* get_relattval sets flag&SEL_RIGHT if the var is on the LEFT. */ |
| 232 | + if (flag & SEL_RIGHT) |
| 233 | + { |
| 234 | + var = (Node *) get_leftop((Expr *) clause); |
| 235 | + is_lobound = ! isLTsel; /* x < something is high bound */ |
| 236 | + } |
| 237 | + else |
| 238 | + { |
| 239 | + var = (Node *) get_rightop((Expr *) clause); |
| 240 | + is_lobound = isLTsel; /* something < x is low bound */ |
| 241 | + } |
| 242 | + |
| 243 | + for (rqelem = *rqlist; rqelem; rqelem = rqelem->next) |
| 244 | + { |
| 245 | + /* We use full equal() here because the "var" might be a function |
| 246 | + * of one or more attributes of the same relation... |
| 247 | + */ |
| 248 | + if (! equal(var, rqelem->var)) |
| 249 | + continue; |
| 250 | + /* Found the right group to put this clause in */ |
| 251 | + if (is_lobound) |
| 252 | + { |
| 253 | + if (! rqelem->have_lobound) |
| 254 | + { |
| 255 | + rqelem->have_lobound = true; |
| 256 | + rqelem->lobound = s2; |
| 257 | + } |
| 258 | + else |
| 259 | + { |
| 260 | + /* We have found two similar clauses, such as |
| 261 | + * x < y AND x < z. Keep only the more restrictive one. |
| 262 | + */ |
| 263 | + if (rqelem->lobound > s2) |
| 264 | + rqelem->lobound = s2; |
| 265 | + } |
| 266 | + } |
| 267 | + else |
| 268 | + { |
| 269 | + if (! rqelem->have_hibound) |
| 270 | + { |
| 271 | + rqelem->have_hibound = true; |
| 272 | + rqelem->hibound = s2; |
| 273 | + } |
| 274 | + else |
| 275 | + { |
| 276 | + /* We have found two similar clauses, such as |
| 277 | + * x > y AND x > z. Keep only the more restrictive one. |
| 278 | + */ |
| 279 | + if (rqelem->hibound > s2) |
| 280 | + rqelem->hibound = s2; |
| 281 | + } |
| 282 | + } |
| 283 | + return; |
| 284 | + } |
| 285 | + |
| 286 | + /* No matching var found, so make a new clause-pair data structure */ |
| 287 | + rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause)); |
| 288 | + rqelem->var = var; |
| 289 | + if (is_lobound) |
| 290 | + { |
| 291 | + rqelem->have_lobound = true; |
| 292 | + rqelem->have_hibound = false; |
| 293 | + rqelem->lobound = s2; |
| 294 | + } |
| 295 | + else |
| 296 | + { |
| 297 | + rqelem->have_lobound = false; |
| 298 | + rqelem->have_hibound = true; |
| 299 | + rqelem->hibound = s2; |
| 300 | + } |
| 301 | + rqelem->next = *rqlist; |
| 302 | + *rqlist = rqelem; |
| 303 | +} |
| 304 | + |
| 305 | + |
81 | 306 | /*
|
82 | 307 | * clause_selectivity -
|
83 | 308 | * Compute the selectivity of a general boolean expression clause.
|
|
0 commit comments