Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 51e1445

Browse files
committed
Teach ANALYZE to distinguish dead and in-doubt tuples, which it formerly
classed all as "dead"; also get it to count DEAD item pointers as dead rows, instead of ignoring them as before. Also improve matters so that tuples previously inserted or deleted by our own transaction are handled nicely: the stats collector's live-tuple and dead-tuple counts will end up correct after our transaction ends, regardless of whether we end in commit or abort. While there's more work that could be done to improve the counting of in-doubt tuples in both VACUUM and ANALYZE, this commit is enough to alleviate some known bad behaviors in 8.3; and the other stuff that's been discussed seems like research projects anyway. Pavan Deolasee and Tom Lane
1 parent f1e0d5a commit 51e1445

File tree

3 files changed

+143
-37
lines changed

3 files changed

+143
-37
lines changed

src/backend/commands/analyze.c

+110-30
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.116 2008/03/26 21:10:37 alvherre Exp $
11+
* $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.117 2008/04/03 16:27:25 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -19,6 +19,7 @@
1919
#include "access/heapam.h"
2020
#include "access/transam.h"
2121
#include "access/tuptoaster.h"
22+
#include "access/xact.h"
2223
#include "catalog/index.h"
2324
#include "catalog/indexing.h"
2425
#include "catalog/namespace.h"
@@ -33,6 +34,7 @@
3334
#include "pgstat.h"
3435
#include "postmaster/autovacuum.h"
3536
#include "storage/proc.h"
37+
#include "storage/procarray.h"
3638
#include "utils/acl.h"
3739
#include "utils/datum.h"
3840
#include "utils/lsyscache.h"
@@ -362,10 +364,7 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
362364
* zero-column table.
363365
*/
364366
if (!vacstmt->vacuum)
365-
pgstat_report_analyze(RelationGetRelid(onerel),
366-
onerel->rd_rel->relisshared,
367-
0, 0);
368-
367+
pgstat_report_analyze(onerel, 0, 0);
369368
goto cleanup;
370369
}
371370

@@ -481,9 +480,7 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
481480
}
482481

483482
/* report results to the stats collector, too */
484-
pgstat_report_analyze(RelationGetRelid(onerel),
485-
onerel->rd_rel->relisshared,
486-
totalrows, totaldeadrows);
483+
pgstat_report_analyze(onerel, totalrows, totaldeadrows);
487484
}
488485

489486
/* We skip to here if there were no analyzable columns */
@@ -856,18 +853,23 @@ static int
856853
acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
857854
double *totalrows, double *totaldeadrows)
858855
{
859-
int numrows = 0; /* # rows collected */
860-
double liverows = 0; /* # rows seen */
856+
int numrows = 0; /* # rows now in reservoir */
857+
double samplerows = 0; /* total # rows collected */
858+
double liverows = 0; /* # live rows seen */
861859
double deadrows = 0; /* # dead rows seen */
862860
double rowstoskip = -1; /* -1 means not set yet */
863861
BlockNumber totalblocks;
862+
TransactionId OldestXmin;
864863
BlockSamplerData bs;
865864
double rstate;
866865

867866
Assert(targrows > 1);
868867

869868
totalblocks = RelationGetNumberOfBlocks(onerel);
870869

870+
/* Need a cutoff xmin for HeapTupleSatisfiesVacuum */
871+
OldestXmin = GetOldestXmin(onerel->rd_rel->relisshared, true);
872+
871873
/* Prepare for sampling block numbers */
872874
BlockSampler_Init(&bs, totalblocks, targrows);
873875
/* Prepare for sampling rows */
@@ -888,28 +890,112 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
888890
* We must maintain a pin on the target page's buffer to ensure that
889891
* the maxoffset value stays good (else concurrent VACUUM might delete
890892
* tuples out from under us). Hence, pin the page until we are done
891-
* looking at it. We don't maintain a lock on the page, so tuples
892-
* could get added to it, but we ignore such tuples.
893+
* looking at it. We also choose to hold sharelock on the buffer
894+
* throughout --- we could release and re-acquire sharelock for
895+
* each tuple, but since we aren't doing much work per tuple, the
896+
* extra lock traffic is probably better avoided.
893897
*/
894898
targbuffer = ReadBufferWithStrategy(onerel, targblock, vac_strategy);
895899
LockBuffer(targbuffer, BUFFER_LOCK_SHARE);
896900
targpage = BufferGetPage(targbuffer);
897901
maxoffset = PageGetMaxOffsetNumber(targpage);
898-
LockBuffer(targbuffer, BUFFER_LOCK_UNLOCK);
899902

900903
/* Inner loop over all tuples on the selected page */
901904
for (targoffset = FirstOffsetNumber; targoffset <= maxoffset; targoffset++)
902905
{
906+
ItemId itemid;
903907
HeapTupleData targtuple;
908+
bool sample_it = false;
909+
910+
itemid = PageGetItemId(targpage, targoffset);
911+
912+
/*
913+
* We ignore unused and redirect line pointers. DEAD line
914+
* pointers should be counted as dead, because we need vacuum
915+
* to run to get rid of them. Note that this rule agrees with
916+
* the way that heap_page_prune() counts things.
917+
*/
918+
if (!ItemIdIsNormal(itemid))
919+
{
920+
if (ItemIdIsDead(itemid))
921+
deadrows += 1;
922+
continue;
923+
}
904924

905925
ItemPointerSet(&targtuple.t_self, targblock, targoffset);
906-
/* We use heap_release_fetch to avoid useless bufmgr traffic */
907-
if (heap_release_fetch(onerel, SnapshotNow,
908-
&targtuple, &targbuffer,
909-
true, NULL))
926+
927+
targtuple.t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
928+
targtuple.t_len = ItemIdGetLength(itemid);
929+
930+
switch (HeapTupleSatisfiesVacuum(targtuple.t_data,
931+
OldestXmin,
932+
targbuffer))
933+
{
934+
case HEAPTUPLE_LIVE:
935+
sample_it = true;
936+
liverows += 1;
937+
break;
938+
939+
case HEAPTUPLE_DEAD:
940+
case HEAPTUPLE_RECENTLY_DEAD:
941+
/* Count dead and recently-dead rows */
942+
deadrows += 1;
943+
break;
944+
945+
case HEAPTUPLE_INSERT_IN_PROGRESS:
946+
/*
947+
* Insert-in-progress rows are not counted. We assume
948+
* that when the inserting transaction commits or aborts,
949+
* it will send a stats message to increment the proper
950+
* count. This works right only if that transaction ends
951+
* after we finish analyzing the table; if things happen
952+
* in the other order, its stats update will be
953+
* overwritten by ours. However, the error will be
954+
* large only if the other transaction runs long enough
955+
* to insert many tuples, so assuming it will finish
956+
* after us is the safer option.
957+
*
958+
* A special case is that the inserting transaction might
959+
* be our own. In this case we should count and sample
960+
* the row, to accommodate users who load a table and
961+
* analyze it in one transaction. (pgstat_report_analyze
962+
* has to adjust the numbers we send to the stats collector
963+
* to make this come out right.)
964+
*/
965+
if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(targtuple.t_data)))
966+
{
967+
sample_it = true;
968+
liverows += 1;
969+
}
970+
break;
971+
972+
case HEAPTUPLE_DELETE_IN_PROGRESS:
973+
/*
974+
* We count delete-in-progress rows as still live, using
975+
* the same reasoning given above; but we don't bother to
976+
* include them in the sample.
977+
*
978+
* If the delete was done by our own transaction, however,
979+
* we must count the row as dead to make
980+
* pgstat_report_analyze's stats adjustments come out
981+
* right. (Note: this works out properly when the row
982+
* was both inserted and deleted in our xact.)
983+
*/
984+
if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(targtuple.t_data)))
985+
deadrows += 1;
986+
else
987+
liverows += 1;
988+
break;
989+
990+
default:
991+
elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
992+
break;
993+
}
994+
995+
if (sample_it)
910996
{
911997
/*
912-
* The first targrows live rows are simply copied into the
998+
* The first targrows sample rows are simply copied into the
913999
* reservoir. Then we start replacing tuples in the sample
9141000
* until we reach the end of the relation. This algorithm is
9151001
* from Jeff Vitter's paper (see full citation below). It
@@ -927,11 +1013,11 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
9271013
/*
9281014
* t in Vitter's paper is the number of records already
9291015
* processed. If we need to compute a new S value, we
930-
* must use the not-yet-incremented value of liverows as
931-
* t.
1016+
* must use the not-yet-incremented value of samplerows
1017+
* as t.
9321018
*/
9331019
if (rowstoskip < 0)
934-
rowstoskip = get_next_S(liverows, targrows, &rstate);
1020+
rowstoskip = get_next_S(samplerows, targrows, &rstate);
9351021

9361022
if (rowstoskip <= 0)
9371023
{
@@ -949,18 +1035,12 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
9491035
rowstoskip -= 1;
9501036
}
9511037

952-
liverows += 1;
953-
}
954-
else
955-
{
956-
/* Count dead rows, but not empty slots */
957-
if (targtuple.t_data != NULL)
958-
deadrows += 1;
1038+
samplerows += 1;
9591039
}
9601040
}
9611041

962-
/* Now release the pin on the page */
963-
ReleaseBuffer(targbuffer);
1042+
/* Now release the lock and pin on the page */
1043+
UnlockReleaseBuffer(targbuffer);
9641044
}
9651045

9661046
/*

src/backend/postmaster/pgstat.c

+31-5
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
*
1414
* Copyright (c) 2001-2008, PostgreSQL Global Development Group
1515
*
16-
* $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.172 2008/03/26 21:10:38 alvherre Exp $
16+
* $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.173 2008/04/03 16:27:25 tgl Exp $
1717
* ----------
1818
*/
1919
#include "postgres.h"
@@ -1037,18 +1037,44 @@ pgstat_report_vacuum(Oid tableoid, bool shared,
10371037
* --------
10381038
*/
10391039
void
1040-
pgstat_report_analyze(Oid tableoid, bool shared, PgStat_Counter livetuples,
1040+
pgstat_report_analyze(Relation rel, PgStat_Counter livetuples,
10411041
PgStat_Counter deadtuples)
10421042
{
10431043
PgStat_MsgAnalyze msg;
10441044

10451045
if (pgStatSock < 0 || !pgstat_track_counts)
10461046
return;
10471047

1048+
/*
1049+
* Unlike VACUUM, ANALYZE might be running inside a transaction that
1050+
* has already inserted and/or deleted rows in the target table.
1051+
* ANALYZE will have counted such rows as live or dead respectively.
1052+
* Because we will report our counts of such rows at transaction end,
1053+
* we should subtract off these counts from what we send to the collector
1054+
* now, else they'll be double-counted after commit. (This approach also
1055+
* ensures that the collector ends up with the right numbers if we abort
1056+
* instead of committing.)
1057+
*/
1058+
if (rel->pgstat_info != NULL)
1059+
{
1060+
PgStat_TableXactStatus *trans;
1061+
1062+
for (trans = rel->pgstat_info->trans; trans; trans = trans->upper)
1063+
{
1064+
livetuples -= trans->tuples_inserted - trans->tuples_deleted;
1065+
deadtuples -= trans->tuples_deleted;
1066+
}
1067+
/* count stuff inserted by already-aborted subxacts, too */
1068+
deadtuples -= rel->pgstat_info->t_counts.t_new_dead_tuples;
1069+
/* Since ANALYZE's counts are estimates, we could have underflowed */
1070+
livetuples = Max(livetuples, 0);
1071+
deadtuples = Max(deadtuples, 0);
1072+
}
1073+
10481074
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_ANALYZE);
1049-
msg.m_databaseid = shared ? InvalidOid : MyDatabaseId;
1050-
msg.m_tableoid = tableoid;
1051-
msg.m_autovacuum = IsAutoVacuumWorkerProcess(); /* is this autovacuum? */
1075+
msg.m_databaseid = rel->rd_rel->relisshared ? InvalidOid : MyDatabaseId;
1076+
msg.m_tableoid = RelationGetRelid(rel);
1077+
msg.m_autovacuum = IsAutoVacuumWorkerProcess(); /* is this autovacuum? */
10521078
msg.m_analyzetime = GetCurrentTimestamp();
10531079
msg.m_live_tuples = livetuples;
10541080
msg.m_dead_tuples = deadtuples;

src/include/pgstat.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
*
66
* Copyright (c) 2001-2008, PostgreSQL Global Development Group
77
*
8-
* $PostgreSQL: pgsql/src/include/pgstat.h,v 1.73 2008/03/24 18:22:36 tgl Exp $
8+
* $PostgreSQL: pgsql/src/include/pgstat.h,v 1.74 2008/04/03 16:27:25 tgl Exp $
99
* ----------
1010
*/
1111
#ifndef PGSTAT_H
@@ -497,7 +497,7 @@ extern void pgstat_reset_counters(void);
497497
extern void pgstat_report_autovac(Oid dboid);
498498
extern void pgstat_report_vacuum(Oid tableoid, bool shared,
499499
bool analyze, PgStat_Counter tuples);
500-
extern void pgstat_report_analyze(Oid tableoid, bool shared,
500+
extern void pgstat_report_analyze(Relation rel,
501501
PgStat_Counter livetuples,
502502
PgStat_Counter deadtuples);
503503

0 commit comments

Comments
 (0)