Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit a391ff3

Browse files
committed
Build out the planner support function infrastructure.
Add support function requests for estimating the selectivity, cost, and number of result rows (if a SRF) of the target function. The lack of a way to estimate selectivity of a boolean-returning function in WHERE has been a recognized deficiency of the planner since Berkeley days. This commit finally fixes it. In addition, non-constant estimates of cost and number of output rows are now possible. We still fall back to looking at procost and prorows if the support function doesn't service the request, of course. To make concrete use of the possibility of estimating output rowcount for SRFs, this commit adds support functions for array_unnest(anyarray) and the integer variants of generate_series; the lack of plausible rowcount estimates for those, even when it's obvious to a human, has been a repeated subject of complaints. Obviously, much more could now be done in this line, but I'm mostly just trying to get the infrastructure in place. Discussion: https://postgr.es/m/15193.1548028093@sss.pgh.pa.us
1 parent 1fb57af commit a391ff3

27 files changed

+792
-90
lines changed

contrib/postgres_fdw/postgres_fdw.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -2779,6 +2779,7 @@ estimate_path_cost_size(PlannerInfo *root,
27792779
startup_cost = ofpinfo->rel_startup_cost;
27802780
startup_cost += aggcosts.transCost.startup;
27812781
startup_cost += aggcosts.transCost.per_tuple * input_rows;
2782+
startup_cost += aggcosts.finalCost.startup;
27822783
startup_cost += (cpu_operator_cost * numGroupCols) * input_rows;
27832784

27842785
/*-----
@@ -2788,7 +2789,7 @@ estimate_path_cost_size(PlannerInfo *root,
27882789
*-----
27892790
*/
27902791
run_cost = ofpinfo->rel_total_cost - ofpinfo->rel_startup_cost;
2791-
run_cost += aggcosts.finalCost * numGroups;
2792+
run_cost += aggcosts.finalCost.per_tuple * numGroups;
27922793
run_cost += cpu_tuple_cost * numGroups;
27932794

27942795
/* Account for the eval cost of HAVING quals, if any */

doc/src/sgml/xfunc.sgml

+21
Original file line numberDiff line numberDiff line change
@@ -3439,4 +3439,25 @@ supportfn(internal) returns internal
34393439
simplify. Ensure rigorous equivalence between the simplified
34403440
expression and an actual execution of the target function.
34413441
</para>
3442+
3443+
<para>
3444+
For target functions that return boolean, it is often useful to estimate
3445+
the fraction of rows that will be selected by a WHERE clause using that
3446+
function. This can be done by a support function that implements
3447+
the <literal>SupportRequestSelectivity</literal> request type.
3448+
</para>
3449+
3450+
<para>
3451+
If the target function's runtime is highly dependent on its inputs,
3452+
it may be useful to provide a non-constant cost estimate for it.
3453+
This can be done by a support function that implements
3454+
the <literal>SupportRequestCost</literal> request type.
3455+
</para>
3456+
3457+
<para>
3458+
For target functions that return sets, it is often useful to provide
3459+
a non-constant estimate for the number of rows that will be returned.
3460+
This can be done by a support function that implements
3461+
the <literal>SupportRequestRows</literal> request type.
3462+
</para>
34423463
</sect1>

src/backend/optimizer/path/clausesel.c

+15
Original file line numberDiff line numberDiff line change
@@ -762,6 +762,21 @@ clause_selectivity(PlannerInfo *root,
762762
if (IsA(clause, DistinctExpr))
763763
s1 = 1.0 - s1;
764764
}
765+
else if (is_funcclause(clause))
766+
{
767+
FuncExpr *funcclause = (FuncExpr *) clause;
768+
769+
/* Try to get an estimate from the support function, if any */
770+
s1 = function_selectivity(root,
771+
funcclause->funcid,
772+
funcclause->args,
773+
funcclause->inputcollid,
774+
treat_as_join_clause(clause, rinfo,
775+
varRelid, sjinfo),
776+
varRelid,
777+
jointype,
778+
sjinfo);
779+
}
765780
else if (IsA(clause, ScalarArrayOpExpr))
766781
{
767782
/* Use node specific selectivity calculation function */

src/backend/optimizer/path/costsize.c

+32-18
Original file line numberDiff line numberDiff line change
@@ -2112,9 +2112,9 @@ cost_agg(Path *path, PlannerInfo *root,
21122112
/*
21132113
* The transCost.per_tuple component of aggcosts should be charged once
21142114
* per input tuple, corresponding to the costs of evaluating the aggregate
2115-
* transfns and their input expressions (with any startup cost of course
2116-
* charged but once). The finalCost component is charged once per output
2117-
* tuple, corresponding to the costs of evaluating the finalfns.
2115+
* transfns and their input expressions. The finalCost.per_tuple component
2116+
* is charged once per output tuple, corresponding to the costs of
2117+
* evaluating the finalfns. Startup costs are of course charged but once.
21182118
*
21192119
* If we are grouping, we charge an additional cpu_operator_cost per
21202120
* grouping column per input tuple for grouping comparisons.
@@ -2136,7 +2136,8 @@ cost_agg(Path *path, PlannerInfo *root,
21362136
startup_cost = input_total_cost;
21372137
startup_cost += aggcosts->transCost.startup;
21382138
startup_cost += aggcosts->transCost.per_tuple * input_tuples;
2139-
startup_cost += aggcosts->finalCost;
2139+
startup_cost += aggcosts->finalCost.startup;
2140+
startup_cost += aggcosts->finalCost.per_tuple;
21402141
/* we aren't grouping */
21412142
total_cost = startup_cost + cpu_tuple_cost;
21422143
output_tuples = 1;
@@ -2155,7 +2156,8 @@ cost_agg(Path *path, PlannerInfo *root,
21552156
total_cost += aggcosts->transCost.startup;
21562157
total_cost += aggcosts->transCost.per_tuple * input_tuples;
21572158
total_cost += (cpu_operator_cost * numGroupCols) * input_tuples;
2158-
total_cost += aggcosts->finalCost * numGroups;
2159+
total_cost += aggcosts->finalCost.startup;
2160+
total_cost += aggcosts->finalCost.per_tuple * numGroups;
21592161
total_cost += cpu_tuple_cost * numGroups;
21602162
output_tuples = numGroups;
21612163
}
@@ -2168,8 +2170,9 @@ cost_agg(Path *path, PlannerInfo *root,
21682170
startup_cost += aggcosts->transCost.startup;
21692171
startup_cost += aggcosts->transCost.per_tuple * input_tuples;
21702172
startup_cost += (cpu_operator_cost * numGroupCols) * input_tuples;
2173+
startup_cost += aggcosts->finalCost.startup;
21712174
total_cost = startup_cost;
2172-
total_cost += aggcosts->finalCost * numGroups;
2175+
total_cost += aggcosts->finalCost.per_tuple * numGroups;
21732176
total_cost += cpu_tuple_cost * numGroups;
21742177
output_tuples = numGroups;
21752178
}
@@ -2234,7 +2237,11 @@ cost_windowagg(Path *path, PlannerInfo *root,
22342237
Cost wfunccost;
22352238
QualCost argcosts;
22362239

2237-
wfunccost = get_func_cost(wfunc->winfnoid) * cpu_operator_cost;
2240+
argcosts.startup = argcosts.per_tuple = 0;
2241+
add_function_cost(root, wfunc->winfnoid, (Node *) wfunc,
2242+
&argcosts);
2243+
startup_cost += argcosts.startup;
2244+
wfunccost = argcosts.per_tuple;
22382245

22392246
/* also add the input expressions' cost to per-input-row costs */
22402247
cost_qual_eval_node(&argcosts, (Node *) wfunc->args, root);
@@ -3864,17 +3871,17 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
38643871
*/
38653872
if (IsA(node, FuncExpr))
38663873
{
3867-
context->total.per_tuple +=
3868-
get_func_cost(((FuncExpr *) node)->funcid) * cpu_operator_cost;
3874+
add_function_cost(context->root, ((FuncExpr *) node)->funcid, node,
3875+
&context->total);
38693876
}
38703877
else if (IsA(node, OpExpr) ||
38713878
IsA(node, DistinctExpr) ||
38723879
IsA(node, NullIfExpr))
38733880
{
38743881
/* rely on struct equivalence to treat these all alike */
38753882
set_opfuncid((OpExpr *) node);
3876-
context->total.per_tuple +=
3877-
get_func_cost(((OpExpr *) node)->opfuncid) * cpu_operator_cost;
3883+
add_function_cost(context->root, ((OpExpr *) node)->opfuncid, node,
3884+
&context->total);
38783885
}
38793886
else if (IsA(node, ScalarArrayOpExpr))
38803887
{
@@ -3884,10 +3891,15 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
38843891
*/
38853892
ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) node;
38863893
Node *arraynode = (Node *) lsecond(saop->args);
3894+
QualCost sacosts;
38873895

38883896
set_sa_opfuncid(saop);
3889-
context->total.per_tuple += get_func_cost(saop->opfuncid) *
3890-
cpu_operator_cost * estimate_array_length(arraynode) * 0.5;
3897+
sacosts.startup = sacosts.per_tuple = 0;
3898+
add_function_cost(context->root, saop->opfuncid, NULL,
3899+
&sacosts);
3900+
context->total.startup += sacosts.startup;
3901+
context->total.per_tuple += sacosts.per_tuple *
3902+
estimate_array_length(arraynode) * 0.5;
38913903
}
38923904
else if (IsA(node, Aggref) ||
38933905
IsA(node, WindowFunc))
@@ -3913,11 +3925,13 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
39133925
/* check the result type's input function */
39143926
getTypeInputInfo(iocoerce->resulttype,
39153927
&iofunc, &typioparam);
3916-
context->total.per_tuple += get_func_cost(iofunc) * cpu_operator_cost;
3928+
add_function_cost(context->root, iofunc, NULL,
3929+
&context->total);
39173930
/* check the input type's output function */
39183931
getTypeOutputInfo(exprType((Node *) iocoerce->arg),
39193932
&iofunc, &typisvarlena);
3920-
context->total.per_tuple += get_func_cost(iofunc) * cpu_operator_cost;
3933+
add_function_cost(context->root, iofunc, NULL,
3934+
&context->total);
39213935
}
39223936
else if (IsA(node, ArrayCoerceExpr))
39233937
{
@@ -3941,8 +3955,8 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
39413955
{
39423956
Oid opid = lfirst_oid(lc);
39433957

3944-
context->total.per_tuple += get_func_cost(get_opcode(opid)) *
3945-
cpu_operator_cost;
3958+
add_function_cost(context->root, get_opcode(opid), NULL,
3959+
&context->total);
39463960
}
39473961
}
39483962
else if (IsA(node, MinMaxExpr) ||
@@ -4941,7 +4955,7 @@ set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel)
49414955
foreach(lc, rte->functions)
49424956
{
49434957
RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc);
4944-
double ntup = expression_returns_set_rows(rtfunc->funcexpr);
4958+
double ntup = expression_returns_set_rows(root, rtfunc->funcexpr);
49454959

49464960
if (ntup > rel->tuples)
49474961
rel->tuples = ntup;

src/backend/optimizer/util/clauses.c

+16-11
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@
3636
#include "optimizer/clauses.h"
3737
#include "optimizer/cost.h"
3838
#include "optimizer/optimizer.h"
39+
#include "optimizer/plancat.h"
3940
#include "optimizer/planmain.h"
40-
#include "optimizer/prep.h"
4141
#include "parser/analyze.h"
4242
#include "parser/parse_agg.h"
4343
#include "parser/parse_coerce.h"
@@ -343,19 +343,24 @@ get_agg_clause_costs_walker(Node *node, get_agg_clause_costs_context *context)
343343
if (DO_AGGSPLIT_COMBINE(context->aggsplit))
344344
{
345345
/* charge for combining previously aggregated states */
346-
costs->transCost.per_tuple += get_func_cost(aggcombinefn) * cpu_operator_cost;
346+
add_function_cost(context->root, aggcombinefn, NULL,
347+
&costs->transCost);
347348
}
348349
else
349-
costs->transCost.per_tuple += get_func_cost(aggtransfn) * cpu_operator_cost;
350+
add_function_cost(context->root, aggtransfn, NULL,
351+
&costs->transCost);
350352
if (DO_AGGSPLIT_DESERIALIZE(context->aggsplit) &&
351353
OidIsValid(aggdeserialfn))
352-
costs->transCost.per_tuple += get_func_cost(aggdeserialfn) * cpu_operator_cost;
354+
add_function_cost(context->root, aggdeserialfn, NULL,
355+
&costs->transCost);
353356
if (DO_AGGSPLIT_SERIALIZE(context->aggsplit) &&
354357
OidIsValid(aggserialfn))
355-
costs->finalCost += get_func_cost(aggserialfn) * cpu_operator_cost;
358+
add_function_cost(context->root, aggserialfn, NULL,
359+
&costs->finalCost);
356360
if (!DO_AGGSPLIT_SKIPFINAL(context->aggsplit) &&
357361
OidIsValid(aggfinalfn))
358-
costs->finalCost += get_func_cost(aggfinalfn) * cpu_operator_cost;
362+
add_function_cost(context->root, aggfinalfn, NULL,
363+
&costs->finalCost);
359364

360365
/*
361366
* These costs are incurred only by the initial aggregate node, so we
@@ -392,8 +397,8 @@ get_agg_clause_costs_walker(Node *node, get_agg_clause_costs_context *context)
392397
{
393398
cost_qual_eval_node(&argcosts, (Node *) aggref->aggdirectargs,
394399
context->root);
395-
costs->transCost.startup += argcosts.startup;
396-
costs->finalCost += argcosts.per_tuple;
400+
costs->finalCost.startup += argcosts.startup;
401+
costs->finalCost.per_tuple += argcosts.per_tuple;
397402
}
398403

399404
/*
@@ -561,7 +566,7 @@ find_window_functions_walker(Node *node, WindowFuncLists *lists)
561566
* Note: keep this in sync with expression_returns_set() in nodes/nodeFuncs.c.
562567
*/
563568
double
564-
expression_returns_set_rows(Node *clause)
569+
expression_returns_set_rows(PlannerInfo *root, Node *clause)
565570
{
566571
if (clause == NULL)
567572
return 1.0;
@@ -570,7 +575,7 @@ expression_returns_set_rows(Node *clause)
570575
FuncExpr *expr = (FuncExpr *) clause;
571576

572577
if (expr->funcretset)
573-
return clamp_row_est(get_func_rows(expr->funcid));
578+
return clamp_row_est(get_function_rows(root, expr->funcid, clause));
574579
}
575580
if (IsA(clause, OpExpr))
576581
{
@@ -579,7 +584,7 @@ expression_returns_set_rows(Node *clause)
579584
if (expr->opretset)
580585
{
581586
set_opfuncid(expr);
582-
return clamp_row_est(get_func_rows(expr->opfuncid));
587+
return clamp_row_est(get_function_rows(root, expr->opfuncid, clause));
583588
}
584589
}
585590
return 1.0;

src/backend/optimizer/util/pathnode.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -2711,7 +2711,7 @@ create_set_projection_path(PlannerInfo *root,
27112711
Node *node = (Node *) lfirst(lc);
27122712
double itemrows;
27132713

2714-
itemrows = expression_returns_set_rows(node);
2714+
itemrows = expression_returns_set_rows(root, node);
27152715
if (tlist_rows < itemrows)
27162716
tlist_rows = itemrows;
27172717
}

0 commit comments

Comments
 (0)