Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 32ea236

Browse files
committed
Improve the IndexVacuumInfo/IndexBulkDeleteResult API to allow somewhat sane
behavior in cases where we don't know the heap tuple count accurately; in particular partial vacuum, but this also makes the API a bit more useful for ANALYZE. This patch adds "estimated_count" flags to both structs so that an approximate count can be flagged as such, and adjusts the logic so that approximate counts are not used for updating pg_class.reltuples. This fixes my previous complaint that VACUUM was putting ridiculous values into pg_class.reltuples for indexes. The actual impact of that bug is limited, because the planner only pays attention to reltuples for an index if the index is partial; which probably explains why beta testers hadn't noticed a degradation in plan quality from it. But it needs to be fixed. The whole thing is a bit messy and should be redesigned in future, because reltuples now has the potential to drift quite far away from reality when a long period elapses with no non-partial vacuums. But this is as good as it's going to get for 8.4.
1 parent 4334695 commit 32ea236

File tree

10 files changed

+81
-39
lines changed

10 files changed

+81
-39
lines changed

src/backend/access/gin/ginvacuum.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.28 2009/03/24 20:17:11 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.29 2009/06/06 22:13:50 tgl Exp $
1212
*-------------------------------------------------------------------------
1313
*/
1414

@@ -741,6 +741,7 @@ ginvacuumcleanup(PG_FUNCTION_ARGS)
741741
* tell how many distinct heap entries are referenced by a GIN index.
742742
*/
743743
stats->num_index_tuples = info->num_heap_tuples;
744+
stats->estimated_count = info->estimated_count;
744745

745746
/*
746747
* If vacuum full, we already have exclusive lock on the index. Otherwise,

src/backend/access/gist/gistvacuum.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.43 2009/03/24 20:17:11 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.44 2009/06/06 22:13:50 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -524,8 +524,8 @@ gistvacuumcleanup(PG_FUNCTION_ARGS)
524524
{
525525
stats = (GistBulkDeleteResult *) palloc0(sizeof(GistBulkDeleteResult));
526526
/* use heap's tuple count */
527-
Assert(info->num_heap_tuples >= 0);
528527
stats->std.num_index_tuples = info->num_heap_tuples;
528+
stats->std.estimated_count = info->estimated_count;
529529

530530
/*
531531
* XXX the above is wrong if index is partial. Would it be OK to just
@@ -679,6 +679,7 @@ gistbulkdelete(PG_FUNCTION_ARGS)
679679
if (stats == NULL)
680680
stats = (GistBulkDeleteResult *) palloc0(sizeof(GistBulkDeleteResult));
681681
/* we'll re-count the tuples each time */
682+
stats->std.estimated_count = false;
682683
stats->std.num_index_tuples = 0;
683684

684685
stack = (GistBDItem *) palloc0(sizeof(GistBDItem));

src/backend/access/hash/hash.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.110 2009/05/05 19:36:32 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.111 2009/06/06 22:13:50 tgl Exp $
1212
*
1313
* NOTES
1414
* This file contains only the public interface routines.
@@ -610,6 +610,8 @@ hashbulkdelete(PG_FUNCTION_ARGS)
610610
/*
611611
* Otherwise, our count is untrustworthy since we may have
612612
* double-scanned tuples in split buckets. Proceed by dead-reckoning.
613+
* (Note: we still return estimated_count = false, because using this
614+
* count is better than not updating reltuples at all.)
613615
*/
614616
if (metap->hashm_ntuples > tuples_removed)
615617
metap->hashm_ntuples -= tuples_removed;
@@ -623,6 +625,7 @@ hashbulkdelete(PG_FUNCTION_ARGS)
623625
/* return statistics */
624626
if (stats == NULL)
625627
stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
628+
stats->estimated_count = false;
626629
stats->num_index_tuples = num_index_tuples;
627630
stats->tuples_removed += tuples_removed;
628631
/* hashvacuumcleanup will fill in num_pages */

src/backend/access/nbtree/nbtree.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* Portions Copyright (c) 1994, Regents of the University of California
1313
*
1414
* IDENTIFICATION
15-
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.169 2009/05/05 19:36:32 tgl Exp $
15+
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.170 2009/06/06 22:13:51 tgl Exp $
1616
*
1717
*-------------------------------------------------------------------------
1818
*/
@@ -579,10 +579,11 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
579579
/*
580580
* During a non-FULL vacuum it's quite possible for us to be fooled by
581581
* concurrent page splits into double-counting some index tuples, so
582-
* disbelieve any total that exceeds the underlying heap's count. (We
583-
* can't check this during btbulkdelete.)
582+
* disbelieve any total that exceeds the underlying heap's count ...
583+
* if we know that accurately. Otherwise this might just make matters
584+
* worse.
584585
*/
585-
if (!info->vacuum_full)
586+
if (!info->vacuum_full && !info->estimated_count)
586587
{
587588
if (stats->num_index_tuples > info->num_heap_tuples)
588589
stats->num_index_tuples = info->num_heap_tuples;
@@ -618,6 +619,7 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
618619
* Reset counts that will be incremented during the scan; needed in case
619620
* of multiple scans during a single VACUUM command
620621
*/
622+
stats->estimated_count = false;
621623
stats->num_index_tuples = 0;
622624
stats->pages_deleted = 0;
623625

src/backend/catalog/index.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.316 2009/05/31 20:55:37 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.317 2009/06/06 22:13:51 tgl Exp $
1212
*
1313
*
1414
* INTERFACE ROUTINES
@@ -1938,8 +1938,9 @@ validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
19381938
ivinfo.index = indexRelation;
19391939
ivinfo.vacuum_full = false;
19401940
ivinfo.analyze_only = false;
1941+
ivinfo.estimated_count = true;
19411942
ivinfo.message_level = DEBUG2;
1942-
ivinfo.num_heap_tuples = -1;
1943+
ivinfo.num_heap_tuples = heapRelation->rd_rel->reltuples;
19431944
ivinfo.strategy = NULL;
19441945

19451946
state.tuplesort = tuplesort_begin_datum(TIDOID,

src/backend/commands/analyze.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.137 2009/05/19 08:30:00 heikki Exp $
11+
* $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.138 2009/06/06 22:13:51 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -498,8 +498,9 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
498498
ivinfo.index = Irel[ind];
499499
ivinfo.vacuum_full = false;
500500
ivinfo.analyze_only = true;
501+
ivinfo.estimated_count = true;
501502
ivinfo.message_level = elevel;
502-
ivinfo.num_heap_tuples = -1; /* not known for sure */
503+
ivinfo.num_heap_tuples = onerel->rd_rel->reltuples;
503504
ivinfo.strategy = vac_strategy;
504505

505506
stats = index_vacuum_cleanup(&ivinfo, NULL);

src/backend/commands/vacuum.c

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
*
1414
*
1515
* IDENTIFICATION
16-
* $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.387 2009/03/31 22:12:48 tgl Exp $
16+
* $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.388 2009/06/06 22:13:51 tgl Exp $
1717
*
1818
*-------------------------------------------------------------------------
1919
*/
@@ -3389,6 +3389,7 @@ scan_index(Relation indrel, double num_tuples)
33893389
ivinfo.index = indrel;
33903390
ivinfo.vacuum_full = true;
33913391
ivinfo.analyze_only = false;
3392+
ivinfo.estimated_count = false;
33923393
ivinfo.message_level = elevel;
33933394
ivinfo.num_heap_tuples = num_tuples;
33943395
ivinfo.strategy = vac_strategy;
@@ -3398,10 +3399,14 @@ scan_index(Relation indrel, double num_tuples)
33983399
if (!stats)
33993400
return;
34003401

3401-
/* now update statistics in pg_class */
3402-
vac_update_relstats(indrel,
3403-
stats->num_pages, stats->num_index_tuples,
3404-
false, InvalidTransactionId);
3402+
/*
3403+
* Now update statistics in pg_class, but only if the index says the
3404+
* count is accurate.
3405+
*/
3406+
if (!stats->estimated_count)
3407+
vac_update_relstats(indrel,
3408+
stats->num_pages, stats->num_index_tuples,
3409+
false, InvalidTransactionId);
34053410

34063411
ereport(elevel,
34073412
(errmsg("index \"%s\" now contains %.0f row versions in %u pages",
@@ -3417,7 +3422,8 @@ scan_index(Relation indrel, double num_tuples)
34173422
* Check for tuple count mismatch. If the index is partial, then it's OK
34183423
* for it to have fewer tuples than the heap; else we got trouble.
34193424
*/
3420-
if (stats->num_index_tuples != num_tuples)
3425+
if (!stats->estimated_count &&
3426+
stats->num_index_tuples != num_tuples)
34213427
{
34223428
if (stats->num_index_tuples > num_tuples ||
34233429
!vac_is_partial_index(indrel))
@@ -3456,6 +3462,7 @@ vacuum_index(VacPageList vacpagelist, Relation indrel,
34563462
ivinfo.index = indrel;
34573463
ivinfo.vacuum_full = true;
34583464
ivinfo.analyze_only = false;
3465+
ivinfo.estimated_count = false;
34593466
ivinfo.message_level = elevel;
34603467
ivinfo.num_heap_tuples = num_tuples + keep_tuples;
34613468
ivinfo.strategy = vac_strategy;
@@ -3469,10 +3476,14 @@ vacuum_index(VacPageList vacpagelist, Relation indrel,
34693476
if (!stats)
34703477
return;
34713478

3472-
/* now update statistics in pg_class */
3473-
vac_update_relstats(indrel,
3474-
stats->num_pages, stats->num_index_tuples,
3475-
false, InvalidTransactionId);
3479+
/*
3480+
* Now update statistics in pg_class, but only if the index says the
3481+
* count is accurate.
3482+
*/
3483+
if (!stats->estimated_count)
3484+
vac_update_relstats(indrel,
3485+
stats->num_pages, stats->num_index_tuples,
3486+
false, InvalidTransactionId);
34763487

34773488
ereport(elevel,
34783489
(errmsg("index \"%s\" now contains %.0f row versions in %u pages",
@@ -3490,7 +3501,8 @@ vacuum_index(VacPageList vacpagelist, Relation indrel,
34903501
* Check for tuple count mismatch. If the index is partial, then it's OK
34913502
* for it to have fewer tuples than the heap; else we got trouble.
34923503
*/
3493-
if (stats->num_index_tuples != num_tuples + keep_tuples)
3504+
if (!stats->estimated_count &&
3505+
stats->num_index_tuples != num_tuples + keep_tuples)
34943506
{
34953507
if (stats->num_index_tuples > num_tuples + keep_tuples ||
34963508
!vac_is_partial_index(indrel))

src/backend/commands/vacuumlazy.c

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
*
3030
*
3131
* IDENTIFICATION
32-
* $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.119 2009/03/24 20:17:14 tgl Exp $
32+
* $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.120 2009/06/06 22:13:51 tgl Exp $
3333
*
3434
*-------------------------------------------------------------------------
3535
*/
@@ -84,9 +84,11 @@ typedef struct LVRelStats
8484
{
8585
/* hasindex = true means two-pass strategy; false means one-pass */
8686
bool hasindex;
87+
bool scanned_all; /* have we scanned all pages (this far)? */
8788
/* Overall statistics about rel */
8889
BlockNumber rel_pages;
89-
double rel_tuples;
90+
double old_rel_tuples; /* previous value of pg_class.reltuples */
91+
double rel_tuples; /* counts only tuples on scanned pages */
9092
BlockNumber pages_removed;
9193
double tuples_deleted;
9294
BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
@@ -96,7 +98,6 @@ typedef struct LVRelStats
9698
int max_dead_tuples; /* # slots allocated in array */
9799
ItemPointer dead_tuples; /* array of ItemPointerData */
98100
int num_index_scans;
99-
bool scanned_all; /* have we scanned all pages (this far)? */
100101
} LVRelStats;
101102

102103

@@ -174,8 +175,9 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
174175

175176
vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
176177

177-
vacrelstats->num_index_scans = 0;
178178
vacrelstats->scanned_all = true; /* will be cleared if we skip a page */
179+
vacrelstats->old_rel_tuples = onerel->rd_rel->reltuples;
180+
vacrelstats->num_index_scans = 0;
179181

180182
/* Open all indexes of the relation */
181183
vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
@@ -876,9 +878,9 @@ lazy_vacuum_index(Relation indrel,
876878
ivinfo.index = indrel;
877879
ivinfo.vacuum_full = false;
878880
ivinfo.analyze_only = false;
881+
ivinfo.estimated_count = true;
879882
ivinfo.message_level = elevel;
880-
/* We don't yet know rel_tuples, so pass -1 */
881-
ivinfo.num_heap_tuples = -1;
883+
ivinfo.num_heap_tuples = vacrelstats->old_rel_tuples;
882884
ivinfo.strategy = vac_strategy;
883885

884886
/* Do bulk deletion */
@@ -908,19 +910,25 @@ lazy_cleanup_index(Relation indrel,
908910
ivinfo.index = indrel;
909911
ivinfo.vacuum_full = false;
910912
ivinfo.analyze_only = false;
913+
ivinfo.estimated_count = !vacrelstats->scanned_all;
911914
ivinfo.message_level = elevel;
912-
ivinfo.num_heap_tuples = vacrelstats->rel_tuples;
915+
/* use rel_tuples only if we scanned all pages, else fall back */
916+
ivinfo.num_heap_tuples = vacrelstats->scanned_all ? vacrelstats->rel_tuples : vacrelstats->old_rel_tuples;
913917
ivinfo.strategy = vac_strategy;
914918

915919
stats = index_vacuum_cleanup(&ivinfo, stats);
916920

917921
if (!stats)
918922
return;
919923

920-
/* now update statistics in pg_class */
921-
vac_update_relstats(indrel,
922-
stats->num_pages, stats->num_index_tuples,
923-
false, InvalidTransactionId);
924+
/*
925+
* Now update statistics in pg_class, but only if the index says the
926+
* count is accurate.
927+
*/
928+
if (!stats->estimated_count)
929+
vac_update_relstats(indrel,
930+
stats->num_pages, stats->num_index_tuples,
931+
false, InvalidTransactionId);
924932

925933
ereport(elevel,
926934
(errmsg("index \"%s\" now contains %.0f row versions in %u pages",

src/backend/postmaster/pgstat.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
*
1414
* Copyright (c) 2001-2009, PostgreSQL Global Development Group
1515
*
16-
* $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.187 2009/01/01 17:23:46 momjian Exp $
16+
* $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.188 2009/06/06 22:13:51 tgl Exp $
1717
* ----------
1818
*/
1919
#include "postgres.h"
@@ -3774,6 +3774,13 @@ pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len)
37743774
{
37753775
if (msg->m_scanned_all)
37763776
tabentry->last_anl_tuples = msg->m_tuples;
3777+
else
3778+
{
3779+
/* last_anl_tuples must never exceed n_live_tuples+n_dead_tuples */
3780+
tabentry->last_anl_tuples = Min(tabentry->last_anl_tuples,
3781+
tabentry->n_live_tuples);
3782+
}
3783+
37773784
if (msg->m_autovacuum)
37783785
tabentry->autovac_analyze_timestamp = msg->m_vacuumtime;
37793786
else

src/include/access/genam.h

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/access/genam.h,v 1.76 2009/03/24 20:17:14 tgl Exp $
10+
* $PostgreSQL: pgsql/src/include/access/genam.h,v 1.77 2009/06/06 22:13:52 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -34,14 +34,17 @@ typedef struct IndexBuildResult
3434
/*
3535
* Struct for input arguments passed to ambulkdelete and amvacuumcleanup
3636
*
37-
* Note that num_heap_tuples will not be valid during ambulkdelete,
38-
* only amvacuumcleanup.
37+
* num_heap_tuples is accurate only when estimated_count is false;
38+
* otherwise it's just an estimate (currently, the estimate is the
39+
* prior value of the relation's pg_class.reltuples field). It will
40+
* always just be an estimate during ambulkdelete.
3941
*/
4042
typedef struct IndexVacuumInfo
4143
{
4244
Relation index; /* the index being vacuumed */
4345
bool vacuum_full; /* VACUUM FULL (we have exclusive lock) */
4446
bool analyze_only; /* ANALYZE (without any actual vacuum) */
47+
bool estimated_count; /* num_heap_tuples is an estimate */
4548
int message_level; /* ereport level for progress messages */
4649
double num_heap_tuples; /* tuples remaining in heap */
4750
BufferAccessStrategy strategy; /* access strategy for reads */
@@ -60,12 +63,15 @@ typedef struct IndexVacuumInfo
6063
*
6164
* Note: pages_removed is the amount by which the index physically shrank,
6265
* if any (ie the change in its total size on disk). pages_deleted and
63-
* pages_free refer to free space within the index file.
66+
* pages_free refer to free space within the index file. Some index AMs
67+
* may compute num_index_tuples by reference to num_heap_tuples, in which
68+
* case they should copy the estimated_count field from IndexVacuumInfo.
6469
*/
6570
typedef struct IndexBulkDeleteResult
6671
{
6772
BlockNumber num_pages; /* pages remaining in index */
6873
BlockNumber pages_removed; /* # removed during vacuum operation */
74+
bool estimated_count; /* num_index_tuples is an estimate */
6975
double num_index_tuples; /* tuples remaining */
7076
double tuples_removed; /* # removed during vacuum operation */
7177
BlockNumber pages_deleted; /* # unused pages in index */

0 commit comments

Comments
 (0)