Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 0f61d4d

Browse files
committed
Improve relation width estimation for subqueries.
As per the ancient comment for set_rel_width, it really wasn't much good for relations that aren't plain tables: it would never find any stats and would always fall back on datatype-based estimates, which are often pretty silly. Fix that by copying up width estimates from the subquery planning process. At some point we might want to do this for CTEs too, but that would be a significantly more invasive patch because the sub-PlannerInfo is no longer accessible by the time it's needed. I refrained from doing anything about that, partly for fear of breaking the unmerged CTE-related patches. In passing, also generate less bogus width estimates for whole-row Vars. Per a gripe from Jon Nelson.
1 parent fe24d78 commit 0f61d4d

File tree

6 files changed

+150
-20
lines changed

6 files changed

+150
-20
lines changed

src/backend/optimizer/path/allpaths.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -758,11 +758,8 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
758758
rel->subrtable = subroot->parse->rtable;
759759
rel->subrowmark = subroot->rowMarks;
760760

761-
/* Copy number of output rows from subplan */
762-
rel->tuples = rel->subplan->plan_rows;
763-
764761
/* Mark rel with estimated output rows, width, etc */
765-
set_baserel_size_estimates(root, rel);
762+
set_subquery_size_estimates(root, rel, subroot);
766763

767764
/* Convert subquery pathkeys to outer representation */
768765
pathkeys = convert_subquery_pathkeys(root, rel, subroot->query_pathkeys);

src/backend/optimizer/path/costsize.c

Lines changed: 129 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
#include "optimizer/cost.h"
7777
#include "optimizer/pathnode.h"
7878
#include "optimizer/placeholder.h"
79+
#include "optimizer/plancat.h"
7980
#include "optimizer/planmain.h"
8081
#include "optimizer/restrictinfo.h"
8182
#include "parser/parsetree.h"
@@ -2986,7 +2987,7 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals)
29862987
* Set the size estimates for the given base relation.
29872988
*
29882989
* The rel's targetlist and restrictinfo list must have been constructed
2989-
* already.
2990+
* already, and rel->tuples must be set.
29902991
*
29912992
* We set the following fields of the rel node:
29922993
* rows: the estimated number of output tuples (after applying
@@ -3151,6 +3152,76 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
31513152
rel->rows = clamp_row_est(nrows);
31523153
}
31533154

3155+
/*
3156+
* set_subquery_size_estimates
3157+
* Set the size estimates for a base relation that is a subquery.
3158+
*
3159+
* The rel's targetlist and restrictinfo list must have been constructed
3160+
* already, and the plan for the subquery must have been completed.
3161+
* We look at the subquery's plan and PlannerInfo to extract data.
3162+
*
3163+
* We set the same fields as set_baserel_size_estimates.
3164+
*/
3165+
void
3166+
set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel,
3167+
PlannerInfo *subroot)
3168+
{
3169+
RangeTblEntry *rte;
3170+
ListCell *lc;
3171+
3172+
/* Should only be applied to base relations that are subqueries */
3173+
Assert(rel->relid > 0);
3174+
rte = planner_rt_fetch(rel->relid, root);
3175+
Assert(rte->rtekind == RTE_SUBQUERY);
3176+
3177+
/* Copy raw number of output rows from subplan */
3178+
rel->tuples = rel->subplan->plan_rows;
3179+
3180+
/*
3181+
* Compute per-output-column width estimates by examining the subquery's
3182+
* targetlist. For any output that is a plain Var, get the width estimate
3183+
* that was made while planning the subquery. Otherwise, fall back on a
3184+
* datatype-based estimate.
3185+
*/
3186+
foreach(lc, subroot->parse->targetList)
3187+
{
3188+
TargetEntry *te = (TargetEntry *) lfirst(lc);
3189+
Node *texpr = (Node *) te->expr;
3190+
int32 item_width;
3191+
3192+
Assert(IsA(te, TargetEntry));
3193+
/* junk columns aren't visible to upper query */
3194+
if (te->resjunk)
3195+
continue;
3196+
3197+
/*
3198+
* XXX This currently doesn't work for subqueries containing set
3199+
* operations, because the Vars in their tlists are bogus references
3200+
* to the first leaf subquery, which wouldn't give the right answer
3201+
* even if we could still get to its PlannerInfo. So fall back on
3202+
* datatype in that case.
3203+
*/
3204+
if (IsA(texpr, Var) &&
3205+
subroot->parse->setOperations == NULL)
3206+
{
3207+
Var *var = (Var *) texpr;
3208+
RelOptInfo *subrel = find_base_rel(subroot, var->varno);
3209+
3210+
item_width = subrel->attr_widths[var->varattno - subrel->min_attr];
3211+
}
3212+
else
3213+
{
3214+
item_width = get_typavgwidth(exprType(texpr), exprTypmod(texpr));
3215+
}
3216+
Assert(item_width > 0);
3217+
Assert(te->resno >= rel->min_attr && te->resno <= rel->max_attr);
3218+
rel->attr_widths[te->resno - rel->min_attr] = item_width;
3219+
}
3220+
3221+
/* Now estimate number of output rows, etc */
3222+
set_baserel_size_estimates(root, rel);
3223+
}
3224+
31543225
/*
31553226
* set_function_size_estimates
31563227
* Set the size estimates for a base relation that is a function call.
@@ -3251,11 +3322,17 @@ set_cte_size_estimates(PlannerInfo *root, RelOptInfo *rel, Plan *cteplan)
32513322
* set_rel_width
32523323
* Set the estimated output width of a base relation.
32533324
*
3325+
* The estimated output width is the sum of the per-attribute width estimates
3326+
* for the actually-referenced columns, plus any PHVs or other expressions
3327+
* that have to be calculated at this relation. This is the amount of data
3328+
* we'd need to pass upwards in case of a sort, hash, etc.
3329+
*
32543330
* NB: this works best on plain relations because it prefers to look at
3255-
* real Vars. It will fail to make use of pg_statistic info when applied
3256-
* to a subquery relation, even if the subquery outputs are simple vars
3257-
* that we could have gotten info for. Is it worth trying to be smarter
3258-
* about subqueries?
3331+
* real Vars. For subqueries, set_subquery_size_estimates will already have
3332+
* copied up whatever per-column estimates were made within the subquery,
3333+
* and for other types of rels there isn't much we can do anyway. We fall
3334+
* back on (fairly stupid) datatype-based width estimates if we can't get
3335+
* any better number.
32593336
*
32603337
* The per-attribute width estimates are cached for possible re-use while
32613338
* building join relations.
@@ -3265,6 +3342,7 @@ set_rel_width(PlannerInfo *root, RelOptInfo *rel)
32653342
{
32663343
Oid reloid = planner_rt_fetch(rel->relid, root)->relid;
32673344
int32 tuple_width = 0;
3345+
bool have_wholerow_var = false;
32683346
ListCell *lc;
32693347

32703348
foreach(lc, rel->reltargetlist)
@@ -3284,8 +3362,18 @@ set_rel_width(PlannerInfo *root, RelOptInfo *rel)
32843362
ndx = var->varattno - rel->min_attr;
32853363

32863364
/*
3287-
* The width probably hasn't been cached yet, but may as well
3288-
* check
3365+
* If it's a whole-row Var, we'll deal with it below after we
3366+
* have already cached as many attr widths as possible.
3367+
*/
3368+
if (var->varattno == 0)
3369+
{
3370+
have_wholerow_var = true;
3371+
continue;
3372+
}
3373+
3374+
/*
3375+
* The width may have been cached already (especially if it's
3376+
* a subquery), so don't duplicate effort.
32893377
*/
32903378
if (rel->attr_widths[ndx] > 0)
32913379
{
@@ -3294,7 +3382,7 @@ set_rel_width(PlannerInfo *root, RelOptInfo *rel)
32943382
}
32953383

32963384
/* Try to get column width from statistics */
3297-
if (reloid != InvalidOid)
3385+
if (reloid != InvalidOid && var->varattno > 0)
32983386
{
32993387
item_width = get_attavgwidth(reloid, var->varattno);
33003388
if (item_width > 0)
@@ -3335,6 +3423,39 @@ set_rel_width(PlannerInfo *root, RelOptInfo *rel)
33353423
tuple_width += item_width;
33363424
}
33373425
}
3426+
3427+
/*
3428+
* If we have a whole-row reference, estimate its width as the sum of
3429+
* per-column widths plus sizeof(HeapTupleHeaderData).
3430+
*/
3431+
if (have_wholerow_var)
3432+
{
3433+
int32 wholerow_width = sizeof(HeapTupleHeaderData);
3434+
3435+
if (reloid != InvalidOid)
3436+
{
3437+
/* Real relation, so estimate true tuple width */
3438+
wholerow_width += get_relation_data_width(reloid,
3439+
rel->attr_widths - rel->min_attr);
3440+
}
3441+
else
3442+
{
3443+
/* Do what we can with info for a phony rel */
3444+
AttrNumber i;
3445+
3446+
for (i = 1; i <= rel->max_attr; i++)
3447+
wholerow_width += rel->attr_widths[i - rel->min_attr];
3448+
}
3449+
3450+
rel->attr_widths[0 - rel->min_attr] = wholerow_width;
3451+
3452+
/*
3453+
* Include the whole-row Var as part of the output tuple. Yes,
3454+
* that really is what happens at runtime.
3455+
*/
3456+
tuple_width += wholerow_width;
3457+
}
3458+
33383459
Assert(tuple_width >= 0);
33393460
rel->width = tuple_width;
33403461
}

src/backend/optimizer/plan/planner.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3102,7 +3102,7 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid)
31023102
* set_baserel_size_estimates, just do a quick hack for rows and width.
31033103
*/
31043104
rel->rows = rel->tuples;
3105-
rel->width = get_relation_data_width(tableOid);
3105+
rel->width = get_relation_data_width(tableOid, NULL);
31063106

31073107
root->total_table_pages = rel->pages;
31083108

src/backend/optimizer/util/plancat.c

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
322322
* estimate_rel_size - estimate # pages and # tuples in a table or index
323323
*
324324
* If attr_widths isn't NULL, it points to the zero-index entry of the
325-
* relation's attr_width[] cache; we fill this in if we have need to compute
325+
* relation's attr_widths[] cache; we fill this in if we have need to compute
326326
* the attribute widths for estimation purposes.
327327
*/
328328
void
@@ -435,8 +435,9 @@ estimate_rel_size(Relation rel, int32 *attr_widths,
435435
* get_rel_data_width
436436
*
437437
* Estimate the average width of (the data part of) the relation's tuples.
438-
* If attr_widths isn't NULL, also store per-column width estimates into
439-
* that array.
438+
*
439+
* If attr_widths isn't NULL, it points to the zero-index entry of the
440+
* relation's attr_widths[] cache; use and update that cache as appropriate.
440441
*
441442
* Currently we ignore dropped columns. Ideally those should be included
442443
* in the result, but we haven't got any way to get info about them; and
@@ -456,6 +457,14 @@ get_rel_data_width(Relation rel, int32 *attr_widths)
456457

457458
if (att->attisdropped)
458459
continue;
460+
461+
/* use previously cached data, if any */
462+
if (attr_widths != NULL && attr_widths[i] > 0)
463+
{
464+
tuple_width += attr_widths[i];
465+
continue;
466+
}
467+
459468
/* This should match set_rel_width() in costsize.c */
460469
item_width = get_attavgwidth(RelationGetRelid(rel), i);
461470
if (item_width <= 0)
@@ -474,18 +483,19 @@ get_rel_data_width(Relation rel, int32 *attr_widths)
474483
/*
475484
* get_relation_data_width
476485
*
477-
* External API for get_rel_data_width
486+
* External API for get_rel_data_width: same behavior except we have to
487+
* open the relcache entry.
478488
*/
479489
int32
480-
get_relation_data_width(Oid relid)
490+
get_relation_data_width(Oid relid, int32 *attr_widths)
481491
{
482492
int32 result;
483493
Relation relation;
484494

485495
/* As above, assume relation is already locked */
486496
relation = heap_open(relid, NoLock);
487497

488-
result = get_rel_data_width(relation, NULL);
498+
result = get_rel_data_width(relation, attr_widths);
489499

490500
heap_close(relation, NoLock);
491501

src/include/optimizer/cost.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
121121
RelOptInfo *inner_rel,
122122
SpecialJoinInfo *sjinfo,
123123
List *restrictlist);
124+
extern void set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel,
125+
PlannerInfo *subroot);
124126
extern void set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel);
125127
extern void set_values_size_estimates(PlannerInfo *root, RelOptInfo *rel);
126128
extern void set_cte_size_estimates(PlannerInfo *root, RelOptInfo *rel,

src/include/optimizer/plancat.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ extern void get_relation_info(PlannerInfo *root, Oid relationObjectId,
3131
extern void estimate_rel_size(Relation rel, int32 *attr_widths,
3232
BlockNumber *pages, double *tuples);
3333

34-
extern int32 get_relation_data_width(Oid relid);
34+
extern int32 get_relation_data_width(Oid relid, int32 *attr_widths);
3535

3636
extern bool relation_excluded_by_constraints(PlannerInfo *root,
3737
RelOptInfo *rel, RangeTblEntry *rte);

0 commit comments

Comments
 (0)