Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit e6858e6

Browse files
committed
Measure the number of all-visible pages for use in index-only scan costing.
Add a column pg_class.relallvisible to remember the number of pages that were all-visible according to the visibility map as of the last VACUUM (or ANALYZE, or some other operations that update pg_class.relpages). Use relallvisible/relpages, instead of an arbitrary constant, to estimate how many heap page fetches can be avoided during an index-only scan. This is pretty primitive and will no doubt see refinements once we've acquired more field experience with the index-only scan mechanism, but it's way better than using a constant. Note: I had to adjust an underspecified query in the window.sql regression test, because it was changing answers when the plan changed to use an index-only scan. Some of the adjacent tests perhaps should be adjusted as well, but I didn't do that here.
1 parent dea95c7 commit e6858e6

File tree

22 files changed

+246
-72
lines changed

22 files changed

+246
-72
lines changed

doc/src/sgml/catalogs.sgml

+13
Original file line numberDiff line numberDiff line change
@@ -1654,6 +1654,19 @@
16541654
</entry>
16551655
</row>
16561656

1657+
<row>
1658+
<entry><structfield>relallvisible</structfield></entry>
1659+
<entry><type>int4</type></entry>
1660+
<entry></entry>
1661+
<entry>
1662+
Number of pages that are marked all-visible in the table's
1663+
visibility map. This is only an estimate used by the
1664+
planner. It is updated by <command>VACUUM</command>,
1665+
<command>ANALYZE</command>, and a few DDL commands such as
1666+
<command>CREATE INDEX</command>.
1667+
</entry>
1668+
</row>
1669+
16571670
<row>
16581671
<entry><structfield>reltoastrelid</structfield></entry>
16591672
<entry><type>oid</type></entry>

src/backend/access/hash/hash.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ hashbuild(PG_FUNCTION_ARGS)
5555
IndexBuildResult *result;
5656
BlockNumber relpages;
5757
double reltuples;
58+
double allvisfrac;
5859
uint32 num_buckets;
5960
HashBuildState buildstate;
6061

@@ -67,7 +68,7 @@ hashbuild(PG_FUNCTION_ARGS)
6768
RelationGetRelationName(index));
6869

6970
/* Estimate the number of rows currently present in the table */
70-
estimate_rel_size(heap, NULL, &relpages, &reltuples);
71+
estimate_rel_size(heap, NULL, &relpages, &reltuples, &allvisfrac);
7172

7273
/* Initialize the hash index metadata page and initial buckets */
7374
num_buckets = _hash_metapinit(index, reltuples, MAIN_FORKNUM);

src/backend/access/heap/visibilitymap.c

+68
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
* visibilitymap_pin_ok - check whether correct map page is already pinned
1717
* visibilitymap_set - set a bit in a previously pinned page
1818
* visibilitymap_test - test if a bit is set
19+
* visibilitymap_count - count number of bits set in visibility map
20+
* visibilitymap_truncate - truncate the visibility map
1921
*
2022
* NOTES
2123
*
@@ -110,6 +112,26 @@
110112
#define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
111113
#define HEAPBLK_TO_MAPBIT(x) ((x) % HEAPBLOCKS_PER_BYTE)
112114

115+
/* table for fast counting of set bits */
116+
static const uint8 number_of_ones[256] = {
117+
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
118+
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
119+
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
120+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
121+
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
122+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
123+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
124+
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
125+
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
126+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
127+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
128+
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
129+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
130+
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
131+
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
132+
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
133+
};
134+
113135
/* prototypes for internal routines */
114136
static Buffer vm_readbuf(Relation rel, BlockNumber blkno, bool extend);
115137
static void vm_extend(Relation rel, BlockNumber nvmblocks);
@@ -307,6 +329,52 @@ visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
307329
return result;
308330
}
309331

332+
/*
333+
* visibilitymap_count - count number of bits set in visibility map
334+
*
335+
* Note: we ignore the possibility of race conditions when the table is being
336+
* extended concurrently with the call. New pages added to the table aren't
337+
* going to be marked all-visible, so they won't affect the result.
338+
*/
339+
BlockNumber
340+
visibilitymap_count(Relation rel)
341+
{
342+
BlockNumber result = 0;
343+
BlockNumber mapBlock;
344+
345+
for (mapBlock = 0; ; mapBlock++)
346+
{
347+
Buffer mapBuffer;
348+
unsigned char *map;
349+
int i;
350+
351+
/*
352+
* Read till we fall off the end of the map. We assume that any
353+
* extra bytes in the last page are zeroed, so we don't bother
354+
* excluding them from the count.
355+
*/
356+
mapBuffer = vm_readbuf(rel, mapBlock, false);
357+
if (!BufferIsValid(mapBuffer))
358+
break;
359+
360+
/*
361+
* We choose not to lock the page, since the result is going to be
362+
* immediately stale anyway if anyone is concurrently setting or
363+
* clearing bits, and we only really need an approximate value.
364+
*/
365+
map = (unsigned char *) PageGetContents(BufferGetPage(mapBuffer));
366+
367+
for (i = 0; i < MAPSIZE; i++)
368+
{
369+
result += number_of_ones[map[i]];
370+
}
371+
372+
ReleaseBuffer(mapBuffer);
373+
}
374+
375+
return result;
376+
}
377+
310378
/*
311379
* visibilitymap_truncate - truncate the visibility map
312380
*

src/backend/catalog/heap.c

+4
Original file line numberDiff line numberDiff line change
@@ -772,6 +772,7 @@ InsertPgClassTuple(Relation pg_class_desc,
772772
values[Anum_pg_class_reltablespace - 1] = ObjectIdGetDatum(rd_rel->reltablespace);
773773
values[Anum_pg_class_relpages - 1] = Int32GetDatum(rd_rel->relpages);
774774
values[Anum_pg_class_reltuples - 1] = Float4GetDatum(rd_rel->reltuples);
775+
values[Anum_pg_class_relallvisible - 1] = Int32GetDatum(rd_rel->relallvisible);
775776
values[Anum_pg_class_reltoastrelid - 1] = ObjectIdGetDatum(rd_rel->reltoastrelid);
776777
values[Anum_pg_class_reltoastidxid - 1] = ObjectIdGetDatum(rd_rel->reltoastidxid);
777778
values[Anum_pg_class_relhasindex - 1] = BoolGetDatum(rd_rel->relhasindex);
@@ -845,16 +846,19 @@ AddNewRelationTuple(Relation pg_class_desc,
845846
/* The relation is real, but as yet empty */
846847
new_rel_reltup->relpages = 0;
847848
new_rel_reltup->reltuples = 0;
849+
new_rel_reltup->relallvisible = 0;
848850
break;
849851
case RELKIND_SEQUENCE:
850852
/* Sequences always have a known size */
851853
new_rel_reltup->relpages = 1;
852854
new_rel_reltup->reltuples = 1;
855+
new_rel_reltup->relallvisible = 0;
853856
break;
854857
default:
855858
/* Views, etc, have no disk storage */
856859
new_rel_reltup->relpages = 0;
857860
new_rel_reltup->reltuples = 0;
861+
new_rel_reltup->relallvisible = 0;
858862
break;
859863
}
860864

src/backend/catalog/index.c

+36-16
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "access/relscan.h"
2727
#include "access/sysattr.h"
2828
#include "access/transam.h"
29+
#include "access/visibilitymap.h"
2930
#include "access/xact.h"
3031
#include "bootstrap/bootstrap.h"
3132
#include "catalog/catalog.h"
@@ -1059,7 +1060,7 @@ index_create(Relation heapRelation,
10591060
true,
10601061
isprimary,
10611062
InvalidOid,
1062-
heapRelation->rd_rel->reltuples);
1063+
-1.0);
10631064
/* Make the above update visible */
10641065
CommandCounterIncrement();
10651066
}
@@ -1225,7 +1226,7 @@ index_constraint_create(Relation heapRelation,
12251226
true,
12261227
true,
12271228
InvalidOid,
1228-
heapRelation->rd_rel->reltuples);
1229+
-1.0);
12291230

12301231
/*
12311232
* If needed, mark the index as primary and/or deferred in pg_index.
@@ -1533,9 +1534,10 @@ FormIndexDatum(IndexInfo *indexInfo,
15331534
* isprimary: if true, set relhaspkey true; else no change
15341535
* reltoastidxid: if not InvalidOid, set reltoastidxid to this value;
15351536
* else no change
1536-
* reltuples: set reltuples to this value
1537+
* reltuples: if >= 0, set reltuples to this value; else no change
15371538
*
1538-
* relpages is also updated (using RelationGetNumberOfBlocks()).
1539+
* If reltuples >= 0, relpages and relallvisible are also updated (using
1540+
* RelationGetNumberOfBlocks() and visibilitymap_count()).
15391541
*
15401542
* NOTE: an important side-effect of this operation is that an SI invalidation
15411543
* message is sent out to all backends --- including me --- causing relcache
@@ -1550,7 +1552,6 @@ index_update_stats(Relation rel,
15501552
bool hasindex, bool isprimary,
15511553
Oid reltoastidxid, double reltuples)
15521554
{
1553-
BlockNumber relpages = RelationGetNumberOfBlocks(rel);
15541555
Oid relid = RelationGetRelid(rel);
15551556
Relation pg_class;
15561557
HeapTuple tuple;
@@ -1586,9 +1587,11 @@ index_update_stats(Relation rel,
15861587
* It is safe to use a non-transactional update even though our
15871588
* transaction could still fail before committing. Setting relhasindex
15881589
* true is safe even if there are no indexes (VACUUM will eventually fix
1589-
* it), likewise for relhaspkey. And of course the relpages and reltuples
1590-
* counts are correct (or at least more so than the old values)
1591-
* regardless.
1590+
* it), likewise for relhaspkey. And of course the new relpages and
1591+
* reltuples counts are correct regardless. However, we don't want to
1592+
* change relpages (or relallvisible) if the caller isn't providing an
1593+
* updated reltuples count, because that would bollix the
1594+
* reltuples/relpages ratio which is what's really important.
15921595
*/
15931596

15941597
pg_class = heap_open(RelationRelationId, RowExclusiveLock);
@@ -1650,15 +1653,32 @@ index_update_stats(Relation rel,
16501653
dirty = true;
16511654
}
16521655
}
1653-
if (rd_rel->reltuples != (float4) reltuples)
1654-
{
1655-
rd_rel->reltuples = (float4) reltuples;
1656-
dirty = true;
1657-
}
1658-
if (rd_rel->relpages != (int32) relpages)
1656+
1657+
if (reltuples >= 0)
16591658
{
1660-
rd_rel->relpages = (int32) relpages;
1661-
dirty = true;
1659+
BlockNumber relpages = RelationGetNumberOfBlocks(rel);
1660+
BlockNumber relallvisible;
1661+
1662+
if (rd_rel->relkind != RELKIND_INDEX)
1663+
relallvisible = visibilitymap_count(rel);
1664+
else /* don't bother for indexes */
1665+
relallvisible = 0;
1666+
1667+
if (rd_rel->relpages != (int32) relpages)
1668+
{
1669+
rd_rel->relpages = (int32) relpages;
1670+
dirty = true;
1671+
}
1672+
if (rd_rel->reltuples != (float4) reltuples)
1673+
{
1674+
rd_rel->reltuples = (float4) reltuples;
1675+
dirty = true;
1676+
}
1677+
if (rd_rel->relallvisible != (int32) relallvisible)
1678+
{
1679+
rd_rel->relallvisible = (int32) relallvisible;
1680+
dirty = true;
1681+
}
16621682
}
16631683

16641684
/*

src/backend/commands/analyze.c

+9-2
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "access/transam.h"
2020
#include "access/tupconvert.h"
2121
#include "access/tuptoaster.h"
22+
#include "access/visibilitymap.h"
2223
#include "access/xact.h"
2324
#include "catalog/index.h"
2425
#include "catalog/indexing.h"
@@ -534,7 +535,10 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh)
534535
if (!inh)
535536
vac_update_relstats(onerel,
536537
RelationGetNumberOfBlocks(onerel),
537-
totalrows, hasindex, InvalidTransactionId);
538+
totalrows,
539+
visibilitymap_count(onerel),
540+
hasindex,
541+
InvalidTransactionId);
538542

539543
/*
540544
* Same for indexes. Vacuum always scans all indexes, so if we're part of
@@ -551,7 +555,10 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh)
551555
totalindexrows = ceil(thisdata->tupleFract * totalrows);
552556
vac_update_relstats(Irel[ind],
553557
RelationGetNumberOfBlocks(Irel[ind]),
554-
totalindexrows, false, InvalidTransactionId);
558+
totalindexrows,
559+
0,
560+
false,
561+
InvalidTransactionId);
555562
}
556563
}
557564

src/backend/commands/cluster.c

+5
Original file line numberDiff line numberDiff line change
@@ -1205,6 +1205,7 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
12051205
{
12061206
int4 swap_pages;
12071207
float4 swap_tuples;
1208+
int4 swap_allvisible;
12081209

12091210
swap_pages = relform1->relpages;
12101211
relform1->relpages = relform2->relpages;
@@ -1213,6 +1214,10 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
12131214
swap_tuples = relform1->reltuples;
12141215
relform1->reltuples = relform2->reltuples;
12151216
relform2->reltuples = swap_tuples;
1217+
1218+
swap_allvisible = relform1->relallvisible;
1219+
relform1->relallvisible = relform2->relallvisible;
1220+
relform2->relallvisible = swap_allvisible;
12161221
}
12171222

12181223
/*

src/backend/commands/vacuum.c

+6
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,7 @@ vac_estimate_reltuples(Relation relation, bool is_analyze,
569569
void
570570
vac_update_relstats(Relation relation,
571571
BlockNumber num_pages, double num_tuples,
572+
BlockNumber num_all_visible_pages,
572573
bool hasindex, TransactionId frozenxid)
573574
{
574575
Oid relid = RelationGetRelid(relation);
@@ -599,6 +600,11 @@ vac_update_relstats(Relation relation,
599600
pgcform->reltuples = (float4) num_tuples;
600601
dirty = true;
601602
}
603+
if (pgcform->relallvisible != (int32) num_all_visible_pages)
604+
{
605+
pgcform->relallvisible = (int32) num_all_visible_pages;
606+
dirty = true;
607+
}
602608
if (pgcform->relhasindex != hasindex)
603609
{
604610
pgcform->relhasindex = hasindex;

src/backend/commands/vacuumlazy.c

+17-3
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
158158
TransactionId freezeTableLimit;
159159
BlockNumber new_rel_pages;
160160
double new_rel_tuples;
161+
BlockNumber new_rel_allvisible;
161162
TransactionId new_frozen_xid;
162163

163164
/* measure elapsed time iff autovacuum logging requires it */
@@ -222,6 +223,10 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
222223
* density") with nonzero relpages and reltuples=0 (which means "zero
223224
* tuple density") unless there's some actual evidence for the latter.
224225
*
226+
* We do update relallvisible even in the corner case, since if the
227+
* table is all-visible we'd definitely like to know that. But clamp
228+
* the value to be not more than what we're setting relpages to.
229+
*
225230
* Also, don't change relfrozenxid if we skipped any pages, since then
226231
* we don't know for certain that all tuples have a newer xmin.
227232
*/
@@ -233,12 +238,18 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
233238
new_rel_tuples = vacrelstats->old_rel_tuples;
234239
}
235240

241+
new_rel_allvisible = visibilitymap_count(onerel);
242+
if (new_rel_allvisible > new_rel_pages)
243+
new_rel_allvisible = new_rel_pages;
244+
236245
new_frozen_xid = FreezeLimit;
237246
if (vacrelstats->scanned_pages < vacrelstats->rel_pages)
238247
new_frozen_xid = InvalidTransactionId;
239248

240249
vac_update_relstats(onerel,
241-
new_rel_pages, new_rel_tuples,
250+
new_rel_pages,
251+
new_rel_tuples,
252+
new_rel_allvisible,
242253
vacrelstats->hasindex,
243254
new_frozen_xid);
244255

@@ -1063,8 +1074,11 @@ lazy_cleanup_index(Relation indrel,
10631074
*/
10641075
if (!stats->estimated_count)
10651076
vac_update_relstats(indrel,
1066-
stats->num_pages, stats->num_index_tuples,
1067-
false, InvalidTransactionId);
1077+
stats->num_pages,
1078+
stats->num_index_tuples,
1079+
0,
1080+
false,
1081+
InvalidTransactionId);
10681082

10691083
ereport(elevel,
10701084
(errmsg("index \"%s\" now contains %.0f row versions in %u pages",

src/backend/nodes/outfuncs.c

+1
Original file line numberDiff line numberDiff line change
@@ -1743,6 +1743,7 @@ _outRelOptInfo(StringInfo str, RelOptInfo *node)
17431743
WRITE_NODE_FIELD(indexlist);
17441744
WRITE_UINT_FIELD(pages);
17451745
WRITE_FLOAT_FIELD(tuples, "%.0f");
1746+
WRITE_FLOAT_FIELD(allvisfrac, "%.6f");
17461747
WRITE_NODE_FIELD(subplan);
17471748
WRITE_NODE_FIELD(subroot);
17481749
WRITE_NODE_FIELD(baserestrictinfo);

0 commit comments

Comments
 (0)