Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 39df0f1

Browse files
committed
Allow planner to use expression-index stats for function calls in WHERE.
Previously, a function call appearing at the top level of WHERE had a hard-wired selectivity estimate of 0.3333333, a kludge conveniently dated in the source code itself to July 1992. The expectation at the time was that somebody would soon implement estimator support functions analogous to those for operators; but no such code has appeared, nor does it seem likely to in the near future. We do have an alternative solution though, at least for immutable functions on single relations: creating an expression index on the function call will allow ANALYZE to gather stats about the function's selectivity. But the code in clause_selectivity() failed to make use of such data even if it exists. Refactor so that that will happen. I chose to make it try this technique for any clause type for which clause_selectivity() doesn't have a special case, not just functions. To avoid adding unnecessary overhead in the common case where we don't learn anything new, make selfuncs.c provide an API that hooks directly to examine_variable() and then var_eq_const(), rather than the previous coding which laboriously constructed an OpExpr only so that it could be expensively deconstructed again. I preserved the behavior that the default estimate for a function call is 0.3333333. (For any other expression node type, it's 0.5, as before.) I had originally thought to make the default be 0.5 across the board, but changing a default estimate that's survived for twenty-three years seems like something not to do without a lot more testing than I care to put into it right now. Per a complaint from Jehan-Guillaume de Rorthais. Back-patch into 9.5, but not further, at least for the moment.
1 parent 76f965f commit 39df0f1

File tree

3 files changed

+59
-32
lines changed

3 files changed

+59
-32
lines changed

src/backend/optimizer/path/clausesel.c

+13-32
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
*/
1515
#include "postgres.h"
1616

17-
#include "catalog/pg_operator.h"
1817
#include "nodes/makefuncs.h"
1918
#include "optimizer/clauses.h"
2019
#include "optimizer/cost.h"
@@ -568,18 +567,8 @@ clause_selectivity(PlannerInfo *root,
568567
if (var->varlevelsup == 0 &&
569568
(varRelid == 0 || varRelid == (int) var->varno))
570569
{
571-
/*
572-
* A Var at the top of a clause must be a bool Var. This is
573-
* equivalent to the clause reln.attribute = 't', so we compute
574-
* the selectivity as if that is what we have.
575-
*/
576-
s1 = restriction_selectivity(root,
577-
BooleanEqualOperator,
578-
list_make2(var,
579-
makeBoolConst(true,
580-
false)),
581-
InvalidOid,
582-
varRelid);
570+
/* Use the restriction selectivity function for a bool Var */
571+
s1 = boolvarsel(root, (Node *) var, varRelid);
583572
}
584573
}
585574
else if (IsA(clause, Const))
@@ -680,25 +669,6 @@ clause_selectivity(PlannerInfo *root,
680669
if (IsA(clause, DistinctExpr))
681670
s1 = 1.0 - s1;
682671
}
683-
else if (is_funcclause(clause))
684-
{
685-
/*
686-
* This is not an operator, so we guess at the selectivity. THIS IS A
687-
* HACK TO GET V4 OUT THE DOOR. FUNCS SHOULD BE ABLE TO HAVE
688-
* SELECTIVITIES THEMSELVES. -- JMH 7/9/92
689-
*/
690-
s1 = (Selectivity) 0.3333333;
691-
}
692-
#ifdef NOT_USED
693-
else if (IsA(clause, SubPlan) ||
694-
IsA(clause, AlternativeSubPlan))
695-
{
696-
/*
697-
* Just for the moment! FIX ME! - vadim 02/04/98
698-
*/
699-
s1 = (Selectivity) 0.5;
700-
}
701-
#endif
702672
else if (IsA(clause, ScalarArrayOpExpr))
703673
{
704674
/* Use node specific selectivity calculation function */
@@ -766,6 +736,17 @@ clause_selectivity(PlannerInfo *root,
766736
jointype,
767737
sjinfo);
768738
}
739+
else
740+
{
741+
/*
742+
* For anything else, see if we can consider it as a boolean variable.
743+
* This only works if it's an immutable expression in Vars of a single
744+
* relation; but there's no point in us checking that here because
745+
* boolvarsel() will do it internally, and return a suitable default
746+
* selectivity if not.
747+
*/
748+
s1 = boolvarsel(root, clause, varRelid);
749+
}
769750

770751
/* Cache the result if possible */
771752
if (cacheable)

src/backend/utils/adt/selfuncs.c

+45
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@
105105
#include "access/sysattr.h"
106106
#include "catalog/index.h"
107107
#include "catalog/pg_collation.h"
108+
#include "catalog/pg_operator.h"
108109
#include "catalog/pg_opfamily.h"
109110
#include "catalog/pg_statistic.h"
110111
#include "catalog/pg_type.h"
@@ -1439,6 +1440,50 @@ icnlikesel(PG_FUNCTION_ARGS)
14391440
PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, true));
14401441
}
14411442

1443+
/*
1444+
* boolvarsel - Selectivity of Boolean variable.
1445+
*
1446+
* This can actually be called on any boolean-valued expression. If it
1447+
* involves only Vars of the specified relation, and if there are statistics
1448+
* about the Var or expression (the latter is possible if it's indexed) then
1449+
* we'll produce a real estimate; otherwise it's just a default.
1450+
*/
1451+
Selectivity
1452+
boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
1453+
{
1454+
VariableStatData vardata;
1455+
double selec;
1456+
1457+
examine_variable(root, arg, varRelid, &vardata);
1458+
if (HeapTupleIsValid(vardata.statsTuple))
1459+
{
1460+
/*
1461+
* A boolean variable V is equivalent to the clause V = 't', so we
1462+
* compute the selectivity as if that is what we have.
1463+
*/
1464+
selec = var_eq_const(&vardata, BooleanEqualOperator,
1465+
BoolGetDatum(true), false, true);
1466+
}
1467+
else if (is_funcclause(arg))
1468+
{
1469+
/*
1470+
* If we have no stats and it's a function call, estimate 0.3333333.
1471+
* This seems a pretty unprincipled choice, but Postgres has been
1472+
* using that estimate for function calls since 1992. The hoariness
1473+
* of this behavior suggests that we should not be in too much hurry
1474+
* to use another value.
1475+
*/
1476+
selec = 0.3333333;
1477+
}
1478+
else
1479+
{
1480+
/* Otherwise, the default estimate is 0.5 */
1481+
selec = 0.5;
1482+
}
1483+
ReleaseVariableStats(vardata);
1484+
return selec;
1485+
}
1486+
14421487
/*
14431488
* booltestsel - Selectivity of BooleanTest Node.
14441489
*/

src/include/utils/selfuncs.h

+1
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ extern Datum icregexnejoinsel(PG_FUNCTION_ARGS);
164164
extern Datum nlikejoinsel(PG_FUNCTION_ARGS);
165165
extern Datum icnlikejoinsel(PG_FUNCTION_ARGS);
166166

167+
extern Selectivity boolvarsel(PlannerInfo *root, Node *arg, int varRelid);
167168
extern Selectivity booltestsel(PlannerInfo *root, BoolTestType booltesttype,
168169
Node *arg, int varRelid,
169170
JoinType jointype, SpecialJoinInfo *sjinfo);

0 commit comments

Comments
 (0)