Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 696d784

Browse files
committed
tableam: Move heap specific logic from estimate_rel_size below tableam.
This just moves the table/matview[/toast] determination of relation size to a callback, and uses a copy of the existing logic to implement that callback for heap. It probably would make sense to also move the index specific logic into a callback, so the metapage handling (and probably more) can be index specific. But that's a separate task. Author: Andres Freund Discussion: https://postgr.es/m/20180703070645.wchpu5muyto5n647@alap3.anarazel.de
1 parent 737a292 commit 696d784

File tree

4 files changed

+174
-44
lines changed

4 files changed

+174
-44
lines changed

src/backend/access/heap/heapam_handler.c

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
*/
2020
#include "postgres.h"
2121

22+
#include <math.h>
23+
2224
#include "miscadmin.h"
2325

2426
#include "access/genam.h"
@@ -33,6 +35,7 @@
3335
#include "catalog/storage_xlog.h"
3436
#include "commands/progress.h"
3537
#include "executor/executor.h"
38+
#include "optimizer/plancat.h"
3639
#include "pgstat.h"
3740
#include "storage/bufmgr.h"
3841
#include "storage/bufpage.h"
@@ -1870,6 +1873,114 @@ reform_and_rewrite_tuple(HeapTuple tuple,
18701873
}
18711874

18721875

1876+
/* ------------------------------------------------------------------------
1877+
* Planner related callbacks for the heap AM
1878+
* ------------------------------------------------------------------------
1879+
*/
1880+
1881+
static void
1882+
heapam_estimate_rel_size(Relation rel, int32 *attr_widths,
1883+
BlockNumber *pages, double *tuples,
1884+
double *allvisfrac)
1885+
{
1886+
BlockNumber curpages;
1887+
BlockNumber relpages;
1888+
double reltuples;
1889+
BlockNumber relallvisible;
1890+
double density;
1891+
1892+
/* it has storage, ok to call the smgr */
1893+
curpages = RelationGetNumberOfBlocks(rel);
1894+
1895+
/* coerce values in pg_class to more desirable types */
1896+
relpages = (BlockNumber) rel->rd_rel->relpages;
1897+
reltuples = (double) rel->rd_rel->reltuples;
1898+
relallvisible = (BlockNumber) rel->rd_rel->relallvisible;
1899+
1900+
/*
1901+
* HACK: if the relation has never yet been vacuumed, use a minimum size
1902+
* estimate of 10 pages. The idea here is to avoid assuming a
1903+
* newly-created table is really small, even if it currently is, because
1904+
* that may not be true once some data gets loaded into it. Once a vacuum
1905+
* or analyze cycle has been done on it, it's more reasonable to believe
1906+
* the size is somewhat stable.
1907+
*
1908+
* (Note that this is only an issue if the plan gets cached and used again
1909+
* after the table has been filled. What we're trying to avoid is using a
1910+
* nestloop-type plan on a table that has grown substantially since the
1911+
* plan was made. Normally, autovacuum/autoanalyze will occur once enough
1912+
* inserts have happened and cause cached-plan invalidation; but that
1913+
* doesn't happen instantaneously, and it won't happen at all for cases
1914+
* such as temporary tables.)
1915+
*
1916+
* We approximate "never vacuumed" by "has relpages = 0", which means this
1917+
* will also fire on genuinely empty relations. Not great, but
1918+
* fortunately that's a seldom-seen case in the real world, and it
1919+
* shouldn't degrade the quality of the plan too much anyway to err in
1920+
* this direction.
1921+
*
1922+
* If the table has inheritance children, we don't apply this heuristic.
1923+
* Totally empty parent tables are quite common, so we should be willing
1924+
* to believe that they are empty.
1925+
*/
1926+
if (curpages < 10 &&
1927+
relpages == 0 &&
1928+
!rel->rd_rel->relhassubclass)
1929+
curpages = 10;
1930+
1931+
/* report estimated # pages */
1932+
*pages = curpages;
1933+
/* quick exit if rel is clearly empty */
1934+
if (curpages == 0)
1935+
{
1936+
*tuples = 0;
1937+
*allvisfrac = 0;
1938+
return;
1939+
}
1940+
1941+
/* estimate number of tuples from previous tuple density */
1942+
if (relpages > 0)
1943+
density = reltuples / (double) relpages;
1944+
else
1945+
{
1946+
/*
1947+
* When we have no data because the relation was truncated, estimate
1948+
* tuple width from attribute datatypes. We assume here that the
1949+
* pages are completely full, which is OK for tables (since they've
1950+
* presumably not been VACUUMed yet) but is probably an overestimate
1951+
* for indexes. Fortunately get_relation_info() can clamp the
1952+
* overestimate to the parent table's size.
1953+
*
1954+
* Note: this code intentionally disregards alignment considerations,
1955+
* because (a) that would be gilding the lily considering how crude
1956+
* the estimate is, and (b) it creates platform dependencies in the
1957+
* default plans which are kind of a headache for regression testing.
1958+
*/
1959+
int32 tuple_width;
1960+
1961+
tuple_width = get_rel_data_width(rel, attr_widths);
1962+
tuple_width += MAXALIGN(SizeofHeapTupleHeader);
1963+
tuple_width += sizeof(ItemIdData);
1964+
/* note: integer division is intentional here */
1965+
density = (BLCKSZ - SizeOfPageHeaderData) / tuple_width;
1966+
}
1967+
*tuples = rint(density * (double) curpages);
1968+
1969+
/*
1970+
* We use relallvisible as-is, rather than scaling it up like we do for
1971+
* the pages and tuples counts, on the theory that any pages added since
1972+
* the last VACUUM are most likely not marked all-visible. But costsize.c
1973+
* wants it converted to a fraction.
1974+
*/
1975+
if (relallvisible == 0 || curpages <= 0)
1976+
*allvisfrac = 0;
1977+
else if ((double) relallvisible >= curpages)
1978+
*allvisfrac = 1;
1979+
else
1980+
*allvisfrac = (double) relallvisible / curpages;
1981+
}
1982+
1983+
18731984
/* ------------------------------------------------------------------------
18741985
* Definition of the heap table access method.
18751986
* ------------------------------------------------------------------------
@@ -1915,6 +2026,8 @@ static const TableAmRoutine heapam_methods = {
19152026
.scan_analyze_next_tuple = heapam_scan_analyze_next_tuple,
19162027
.index_build_range_scan = heapam_index_build_range_scan,
19172028
.index_validate_scan = heapam_index_validate_scan,
2029+
2030+
.relation_estimate_size = heapam_estimate_rel_size,
19182031
};
19192032

19202033

src/backend/optimizer/util/plancat.c

Lines changed: 25 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "access/genam.h"
2121
#include "access/htup_details.h"
2222
#include "access/nbtree.h"
23+
#include "access/tableam.h"
2324
#include "access/sysattr.h"
2425
#include "access/table.h"
2526
#include "access/transam.h"
@@ -64,7 +65,6 @@ static void get_relation_foreign_keys(PlannerInfo *root, RelOptInfo *rel,
6465
Relation relation, bool inhparent);
6566
static bool infer_collation_opclass_match(InferenceElem *elem, Relation idxRel,
6667
List *idxExprs);
67-
static int32 get_rel_data_width(Relation rel, int32 *attr_widths);
6868
static List *get_relation_constraints(PlannerInfo *root,
6969
Oid relationObjectId, RelOptInfo *rel,
7070
bool include_notnull);
@@ -948,47 +948,26 @@ estimate_rel_size(Relation rel, int32 *attr_widths,
948948
switch (rel->rd_rel->relkind)
949949
{
950950
case RELKIND_RELATION:
951-
case RELKIND_INDEX:
952951
case RELKIND_MATVIEW:
953952
case RELKIND_TOASTVALUE:
954-
/* it has storage, ok to call the smgr */
955-
curpages = RelationGetNumberOfBlocks(rel);
953+
table_relation_estimate_size(rel, attr_widths, pages, tuples,
954+
allvisfrac);
955+
break;
956+
957+
case RELKIND_INDEX:
956958

957959
/*
958-
* HACK: if the relation has never yet been vacuumed, use a
959-
* minimum size estimate of 10 pages. The idea here is to avoid
960-
* assuming a newly-created table is really small, even if it
961-
* currently is, because that may not be true once some data gets
962-
* loaded into it. Once a vacuum or analyze cycle has been done
963-
* on it, it's more reasonable to believe the size is somewhat
964-
* stable.
965-
*
966-
* (Note that this is only an issue if the plan gets cached and
967-
* used again after the table has been filled. What we're trying
968-
* to avoid is using a nestloop-type plan on a table that has
969-
* grown substantially since the plan was made. Normally,
970-
* autovacuum/autoanalyze will occur once enough inserts have
971-
* happened and cause cached-plan invalidation; but that doesn't
972-
* happen instantaneously, and it won't happen at all for cases
973-
* such as temporary tables.)
974-
*
975-
* We approximate "never vacuumed" by "has relpages = 0", which
976-
* means this will also fire on genuinely empty relations. Not
977-
* great, but fortunately that's a seldom-seen case in the real
978-
* world, and it shouldn't degrade the quality of the plan too
979-
* much anyway to err in this direction.
980-
*
981-
* There are two exceptions wherein we don't apply this heuristic.
982-
* One is if the table has inheritance children. Totally empty
983-
* parent tables are quite common, so we should be willing to
984-
* believe that they are empty. Also, we don't apply the 10-page
985-
* minimum to indexes.
960+
* XXX: It'd probably be good to move this into a callback,
961+
* individual index types e.g. know if they have a metapage.
986962
*/
987-
if (curpages < 10 &&
988-
rel->rd_rel->relpages == 0 &&
989-
!rel->rd_rel->relhassubclass &&
990-
rel->rd_rel->relkind != RELKIND_INDEX)
991-
curpages = 10;
963+
964+
/* it has storage, ok to call the smgr */
965+
curpages = RelationGetNumberOfBlocks(rel);
966+
967+
/* coerce values in pg_class to more desirable types */
968+
relpages = (BlockNumber) rel->rd_rel->relpages;
969+
reltuples = (double) rel->rd_rel->reltuples;
970+
relallvisible = (BlockNumber) rel->rd_rel->relallvisible;
992971

993972
/* report estimated # pages */
994973
*pages = curpages;
@@ -1005,13 +984,12 @@ estimate_rel_size(Relation rel, int32 *attr_widths,
1005984
relallvisible = (BlockNumber) rel->rd_rel->relallvisible;
1006985

1007986
/*
1008-
* If it's an index, discount the metapage while estimating the
1009-
* number of tuples. This is a kluge because it assumes more than
1010-
* it ought to about index structure. Currently it's OK for
1011-
* btree, hash, and GIN indexes but suspect for GiST indexes.
987+
* Discount the metapage while estimating the number of tuples.
988+
* This is a kluge because it assumes more than it ought to about
989+
* index structure. Currently it's OK for btree, hash, and GIN
990+
* indexes but suspect for GiST indexes.
1012991
*/
1013-
if (rel->rd_rel->relkind == RELKIND_INDEX &&
1014-
relpages > 0)
992+
if (relpages > 0)
1015993
{
1016994
curpages--;
1017995
relpages--;
@@ -1036,6 +1014,8 @@ estimate_rel_size(Relation rel, int32 *attr_widths,
10361014
* considering how crude the estimate is, and (b) it creates
10371015
* platform dependencies in the default plans which are kind
10381016
* of a headache for regression testing.
1017+
*
1018+
* XXX: Should this logic be more index specific?
10391019
*/
10401020
int32 tuple_width;
10411021

@@ -1060,6 +1040,7 @@ estimate_rel_size(Relation rel, int32 *attr_widths,
10601040
else
10611041
*allvisfrac = (double) relallvisible / curpages;
10621042
break;
1043+
10631044
case RELKIND_SEQUENCE:
10641045
/* Sequences always have a known size */
10651046
*pages = 1;
@@ -1095,7 +1076,7 @@ estimate_rel_size(Relation rel, int32 *attr_widths,
10951076
* since they might be mostly NULLs, treating them as zero-width is not
10961077
* necessarily the wrong thing anyway.
10971078
*/
1098-
static int32
1079+
int32
10991080
get_rel_data_width(Relation rel, int32 *attr_widths)
11001081
{
11011082
int32 tuple_width = 0;

src/include/access/tableam.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,22 @@ typedef struct TableAmRoutine
491491
Snapshot snapshot,
492492
struct ValidateIndexState *state);
493493

494+
495+
/* ------------------------------------------------------------------------
496+
* Planner related functions.
497+
* ------------------------------------------------------------------------
498+
*/
499+
500+
/*
501+
* See table_relation_estimate_size().
502+
*
503+
* While block oriented, it shouldn't be too hard to for an AM that
504+
* doesn't internally use blocks to convert into a usable representation.
505+
*/
506+
void (*relation_estimate_size) (Relation rel, int32 *attr_widths,
507+
BlockNumber *pages, double *tuples,
508+
double *allvisfrac);
509+
494510
} TableAmRoutine;
495511

496512

@@ -1286,6 +1302,25 @@ table_index_validate_scan(Relation heap_rel,
12861302
}
12871303

12881304

1305+
/* ----------------------------------------------------------------------------
1306+
* Planner related functionality
1307+
* ----------------------------------------------------------------------------
1308+
*/
1309+
1310+
/*
1311+
* Estimate the current size of the relation, as an AM specific workhorse for
1312+
* estimate_rel_size(). Look there for an explanation of the parameters.
1313+
*/
1314+
static inline void
1315+
table_relation_estimate_size(Relation rel, int32 *attr_widths,
1316+
BlockNumber *pages, double *tuples,
1317+
double *allvisfrac)
1318+
{
1319+
rel->rd_tableam->relation_estimate_size(rel, attr_widths, pages, tuples,
1320+
allvisfrac);
1321+
}
1322+
1323+
12891324
/* ----------------------------------------------------------------------------
12901325
* Functions to make modifications a bit simpler.
12911326
* ----------------------------------------------------------------------------

src/include/optimizer/plancat.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ extern List *infer_arbiter_indexes(PlannerInfo *root);
3333
extern void estimate_rel_size(Relation rel, int32 *attr_widths,
3434
BlockNumber *pages, double *tuples, double *allvisfrac);
3535

36+
extern int32 get_rel_data_width(Relation rel, int32 *attr_widths);
3637
extern int32 get_relation_data_width(Oid relid, int32 *attr_widths);
3738

3839
extern bool relation_excluded_by_constraints(PlannerInfo *root,

0 commit comments

Comments
 (0)