Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 94bc27b

Browse files
committed
Centralize horizon determination for temp tables, fixing bug due to skew.
This fixes a bug in the edge case where, for a temp table, heap_page_prune() can end up with a different horizon than heap_vacuum_rel(). Which can trigger errors like "ERROR: cannot freeze committed xmax ...". The bug was introduced due to interaction of a7212be "Set cutoff xmin more aggressively when vacuuming a temporary table." with dc7420c "snapshot scalability: Don't compute global horizons while building snapshots.". The problem is caused by lazy_scan_heap() assuming that the only reason its HeapTupleSatisfiesVacuum() call would return HEAPTUPLE_DEAD is if the tuple is a HOT tuple, or if the tuple's inserting transaction has aborted since the heap_page_prune() call. But after a7212be that was also possible in other cases for temp tables, because heap_page_prune() uses a different visibility test after dc7420c. The fix is fairly simple: Move the special case logic for temp tables from vacuum_set_xid_limits() to the infrastructure introduced in dc7420c. That ensures that the horizon used for pruning is at least as aggressive as the one used by lazy_scan_heap(). The concrete horizon used for temp tables is slightly different than the logic in dc7420c, but should always be as aggressive as before (see comments). A significant benefit to centralizing the logic procarray.c is that now the more aggressive horizons for temp tables does not just apply to VACUUM but also to e.g. HOT pruning and the nbtree killtuples logic. Because isTopLevel is not needed by vacuum_set_xid_limits() anymore, I undid the the related changes from a7212be. This commit also adds an isolation test ensuring that the more aggressive vacuuming and pruning of temp tables keeps working. Debugged-By: Amit Kapila <amit.kapila16@gmail.com> Debugged-By: Tom Lane <tgl@sss.pgh.pa.us> Debugged-By: Ashutosh Sharma <ashu.coek88@gmail.com> Author: Andres Freund <andres@anarazel.de> Discussion: https://postgr.es/m/20201014203103.72oke6hqywcyhx7s@alap3.anarazel.de Discussion: https://postgr.es/m/20201015083735.derdzysdtqdvxshp@alap3.anarazel.de
1 parent 60a51c6 commit 94bc27b

File tree

9 files changed

+544
-73
lines changed

9 files changed

+544
-73
lines changed

src/backend/access/heap/vacuumlazy.c

-1
Original file line numberDiff line numberDiff line change
@@ -471,7 +471,6 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params,
471471
params->freeze_table_age,
472472
params->multixact_freeze_min_age,
473473
params->multixact_freeze_table_age,
474-
true, /* we must be a top-level command */
475474
&OldestXmin, &FreezeLimit, &xidFullScanLimit,
476475
&MultiXactCutoff, &mxactFullScanLimit);
477476

src/backend/commands/cluster.c

+11-17
Original file line numberDiff line numberDiff line change
@@ -67,13 +67,10 @@ typedef struct
6767
} RelToCluster;
6868

6969

70-
static void rebuild_relation(Relation OldHeap, Oid indexOid,
71-
bool isTopLevel, bool verbose);
70+
static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose);
7271
static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
73-
bool isTopLevel, bool verbose,
74-
bool *pSwapToastByContent,
75-
TransactionId *pFreezeXid,
76-
MultiXactId *pCutoffMulti);
72+
bool verbose, bool *pSwapToastByContent,
73+
TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
7774
static List *get_tables_to_cluster(MemoryContext cluster_context);
7875

7976

@@ -173,7 +170,7 @@ cluster(ClusterStmt *stmt, bool isTopLevel)
173170
table_close(rel, NoLock);
174171

175172
/* Do the job. */
176-
cluster_rel(tableOid, indexOid, stmt->options, isTopLevel);
173+
cluster_rel(tableOid, indexOid, stmt->options);
177174
}
178175
else
179176
{
@@ -222,8 +219,7 @@ cluster(ClusterStmt *stmt, bool isTopLevel)
222219
PushActiveSnapshot(GetTransactionSnapshot());
223220
/* Do the job. */
224221
cluster_rel(rvtc->tableOid, rvtc->indexOid,
225-
stmt->options | CLUOPT_RECHECK,
226-
isTopLevel);
222+
stmt->options | CLUOPT_RECHECK);
227223
PopActiveSnapshot();
228224
CommitTransactionCommand();
229225
}
@@ -254,7 +250,7 @@ cluster(ClusterStmt *stmt, bool isTopLevel)
254250
* and error messages should refer to the operation as VACUUM not CLUSTER.
255251
*/
256252
void
257-
cluster_rel(Oid tableOid, Oid indexOid, int options, bool isTopLevel)
253+
cluster_rel(Oid tableOid, Oid indexOid, int options)
258254
{
259255
Relation OldHeap;
260256
bool verbose = ((options & CLUOPT_VERBOSE) != 0);
@@ -404,7 +400,7 @@ cluster_rel(Oid tableOid, Oid indexOid, int options, bool isTopLevel)
404400
TransferPredicateLocksToHeapRelation(OldHeap);
405401

406402
/* rebuild_relation does all the dirty work */
407-
rebuild_relation(OldHeap, indexOid, isTopLevel, verbose);
403+
rebuild_relation(OldHeap, indexOid, verbose);
408404

409405
/* NB: rebuild_relation does table_close() on OldHeap */
410406

@@ -549,12 +545,11 @@ mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
549545
*
550546
* OldHeap: table to rebuild --- must be opened and exclusive-locked!
551547
* indexOid: index to cluster by, or InvalidOid to rewrite in physical order.
552-
* isTopLevel: should be passed down from ProcessUtility.
553548
*
554549
* NB: this routine closes OldHeap at the right time; caller should not.
555550
*/
556551
static void
557-
rebuild_relation(Relation OldHeap, Oid indexOid, bool isTopLevel, bool verbose)
552+
rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose)
558553
{
559554
Oid tableOid = RelationGetRelid(OldHeap);
560555
Oid tableSpace = OldHeap->rd_rel->reltablespace;
@@ -582,7 +577,7 @@ rebuild_relation(Relation OldHeap, Oid indexOid, bool isTopLevel, bool verbose)
582577
AccessExclusiveLock);
583578

584579
/* Copy the heap data into the new table in the desired order */
585-
copy_table_data(OIDNewHeap, tableOid, indexOid, isTopLevel, verbose,
580+
copy_table_data(OIDNewHeap, tableOid, indexOid, verbose,
586581
&swap_toast_by_content, &frozenXid, &cutoffMulti);
587582

588583
/*
@@ -733,8 +728,7 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, char relpersistence,
733728
* *pCutoffMulti receives the MultiXactId used as a cutoff point.
734729
*/
735730
static void
736-
copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
737-
bool isTopLevel, bool verbose,
731+
copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
738732
bool *pSwapToastByContent, TransactionId *pFreezeXid,
739733
MultiXactId *pCutoffMulti)
740734
{
@@ -832,7 +826,7 @@ copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
832826
* Since we're going to rewrite the whole table anyway, there's no reason
833827
* not to be aggressive about this.
834828
*/
835-
vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0, isTopLevel,
829+
vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0,
836830
&OldestXmin, &FreezeXid, NULL, &MultiXactCutoff,
837831
NULL);
838832

src/backend/commands/vacuum.c

+26-48
Original file line numberDiff line numberDiff line change
@@ -907,8 +907,7 @@ get_all_vacuum_rels(int options)
907907
/*
908908
* vacuum_set_xid_limits() -- compute oldestXmin and freeze cutoff points
909909
*
910-
* Input parameters are the target relation, applicable freeze age settings,
911-
* and isTopLevel which should be passed down from ProcessUtility.
910+
* Input parameters are the target relation, applicable freeze age settings.
912911
*
913912
* The output parameters are:
914913
* - oldestXmin is the cutoff value used to distinguish whether tuples are
@@ -934,7 +933,6 @@ vacuum_set_xid_limits(Relation rel,
934933
int freeze_table_age,
935934
int multixact_freeze_min_age,
936935
int multixact_freeze_table_age,
937-
bool isTopLevel,
938936
TransactionId *oldestXmin,
939937
TransactionId *freezeLimit,
940938
TransactionId *xidFullScanLimit,
@@ -950,53 +948,33 @@ vacuum_set_xid_limits(Relation rel,
950948
MultiXactId mxactLimit;
951949
MultiXactId safeMxactLimit;
952950

953-
if (RELATION_IS_LOCAL(rel) && !IsInTransactionBlock(isTopLevel))
954-
{
955-
/*
956-
* If we are processing a temp relation (which by prior checks must be
957-
* one belonging to our session), and we are not inside any
958-
* transaction block, then there can be no tuples in the rel that are
959-
* still in-doubt, nor can there be any that are dead but possibly
960-
* still interesting to some snapshot our session holds. We don't
961-
* need to care whether other sessions could see such tuples, either.
962-
* So we can aggressively set the cutoff xmin to be the nextXid.
963-
*/
964-
*oldestXmin = ReadNewTransactionId();
965-
}
966-
else
951+
/*
952+
* We can always ignore processes running lazy vacuum. This is because we
953+
* use these values only for deciding which tuples we must keep in the
954+
* tables. Since lazy vacuum doesn't write its XID anywhere (usually no
955+
* XID assigned), it's safe to ignore it. In theory it could be
956+
* problematic to ignore lazy vacuums in a full vacuum, but keep in mind
957+
* that only one vacuum process can be working on a particular table at
958+
* any time, and that each vacuum is always an independent transaction.
959+
*/
960+
*oldestXmin = GetOldestNonRemovableTransactionId(rel);
961+
962+
if (OldSnapshotThresholdActive())
967963
{
968-
/*
969-
* Otherwise, calculate the cutoff xmin normally.
970-
*
971-
* We can always ignore processes running lazy vacuum. This is
972-
* because we use these values only for deciding which tuples we must
973-
* keep in the tables. Since lazy vacuum doesn't write its XID
974-
* anywhere (usually no XID assigned), it's safe to ignore it. In
975-
* theory it could be problematic to ignore lazy vacuums in a full
976-
* vacuum, but keep in mind that only one vacuum process can be
977-
* working on a particular table at any time, and that each vacuum is
978-
* always an independent transaction.
979-
*/
980-
*oldestXmin = GetOldestNonRemovableTransactionId(rel);
964+
TransactionId limit_xmin;
965+
TimestampTz limit_ts;
981966

982-
if (OldSnapshotThresholdActive())
967+
if (TransactionIdLimitedForOldSnapshots(*oldestXmin, rel,
968+
&limit_xmin, &limit_ts))
983969
{
984-
TransactionId limit_xmin;
985-
TimestampTz limit_ts;
986-
987-
if (TransactionIdLimitedForOldSnapshots(*oldestXmin, rel,
988-
&limit_xmin, &limit_ts))
989-
{
990-
/*
991-
* TODO: We should only set the threshold if we are pruning on
992-
* the basis of the increased limits. Not as crucial here as
993-
* it is for opportunistic pruning (which often happens at a
994-
* much higher frequency), but would still be a significant
995-
* improvement.
996-
*/
997-
SetOldSnapshotThresholdTimestamp(limit_ts, limit_xmin);
998-
*oldestXmin = limit_xmin;
999-
}
970+
/*
971+
* TODO: We should only set the threshold if we are pruning on the
972+
* basis of the increased limits. Not as crucial here as it is
973+
* for opportunistic pruning (which often happens at a much higher
974+
* frequency), but would still be a significant improvement.
975+
*/
976+
SetOldSnapshotThresholdTimestamp(limit_ts, limit_xmin);
977+
*oldestXmin = limit_xmin;
1000978
}
1001979
}
1002980

@@ -1930,7 +1908,7 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params)
19301908
cluster_options |= CLUOPT_VERBOSE;
19311909

19321910
/* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
1933-
cluster_rel(relid, InvalidOid, cluster_options, true);
1911+
cluster_rel(relid, InvalidOid, cluster_options);
19341912
}
19351913
else
19361914
table_relation_vacuum(onerel, params, vac_strategy);

src/backend/storage/ipc/procarray.c

+55-4
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ typedef struct ProcArrayStruct
131131
* different types of relations. As e.g. a normal user defined table in one
132132
* database is inaccessible to backends connected to another database, a test
133133
* specific to a relation can be more aggressive than a test for a shared
134-
* relation. Currently we track three different states:
134+
* relation. Currently we track four different states:
135135
*
136136
* 1) GlobalVisSharedRels, which only considers an XID's
137137
* effects visible-to-everyone if neither snapshots in any database, nor a
@@ -153,6 +153,9 @@ typedef struct ProcArrayStruct
153153
* I.e. the difference to GlobalVisCatalogRels is that
154154
* replication slot's catalog_xmin is not taken into account.
155155
*
156+
* 4) GlobalVisTempRels, which only considers the current session, as temp
157+
* tables are not visible to other sessions.
158+
*
156159
* GlobalVisTestFor(relation) returns the appropriate state
157160
* for the relation.
158161
*
@@ -234,6 +237,13 @@ typedef struct ComputeXidHorizonsResult
234237
* defined tables.
235238
*/
236239
TransactionId data_oldest_nonremovable;
240+
241+
/*
242+
* Oldest xid for which deleted tuples need to be retained in this
243+
* session's temporary tables.
244+
*/
245+
TransactionId temp_oldest_nonremovable;
246+
237247
} ComputeXidHorizonsResult;
238248

239249

@@ -257,12 +267,13 @@ static TransactionId standbySnapshotPendingXmin;
257267

258268
/*
259269
* State for visibility checks on different types of relations. See struct
260-
* GlobalVisState for details. As shared, catalog, and user defined
270+
* GlobalVisState for details. As shared, catalog, normal and temporary
261271
* relations can have different horizons, one such state exists for each.
262272
*/
263273
static GlobalVisState GlobalVisSharedRels;
264274
static GlobalVisState GlobalVisCatalogRels;
265275
static GlobalVisState GlobalVisDataRels;
276+
static GlobalVisState GlobalVisTempRels;
266277

267278
/*
268279
* This backend's RecentXmin at the last time the accurate xmin horizon was
@@ -1668,6 +1679,23 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h)
16681679
h->oldest_considered_running = initial;
16691680
h->shared_oldest_nonremovable = initial;
16701681
h->data_oldest_nonremovable = initial;
1682+
1683+
/*
1684+
* Only modifications made by this backend affect the horizon for
1685+
* temporary relations. Instead of a check in each iteration of the
1686+
* loop over all PGPROCs it is cheaper to just initialize to the
1687+
* current top-level xid any.
1688+
*
1689+
* Without an assigned xid we could use a horizon as agressive as
1690+
* ReadNewTransactionid(), but we can get away with the much cheaper
1691+
* latestCompletedXid + 1: If this backend has no xid there, by
1692+
* definition, can't be any newer changes in the temp table than
1693+
* latestCompletedXid.
1694+
*/
1695+
if (TransactionIdIsValid(MyProc->xid))
1696+
h->temp_oldest_nonremovable = MyProc->xid;
1697+
else
1698+
h->temp_oldest_nonremovable = initial;
16711699
}
16721700

16731701
/*
@@ -1760,6 +1788,7 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h)
17601788
TransactionIdOlder(h->shared_oldest_nonremovable, kaxmin);
17611789
h->data_oldest_nonremovable =
17621790
TransactionIdOlder(h->data_oldest_nonremovable, kaxmin);
1791+
/* temp relations cannot be accessed in recovery */
17631792
}
17641793
else
17651794
{
@@ -1785,6 +1814,7 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h)
17851814
h->data_oldest_nonremovable =
17861815
TransactionIdRetreatedBy(h->data_oldest_nonremovable,
17871816
vacuum_defer_cleanup_age);
1817+
/* defer doesn't apply to temp relations */
17881818
}
17891819

17901820
/*
@@ -1844,6 +1874,8 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h)
18441874
h->catalog_oldest_nonremovable));
18451875
Assert(TransactionIdPrecedesOrEquals(h->oldest_considered_running,
18461876
h->data_oldest_nonremovable));
1877+
Assert(TransactionIdPrecedesOrEquals(h->oldest_considered_running,
1878+
h->temp_oldest_nonremovable));
18471879
Assert(!TransactionIdIsValid(h->slot_xmin) ||
18481880
TransactionIdPrecedesOrEquals(h->oldest_considered_running,
18491881
h->slot_xmin));
@@ -1878,6 +1910,8 @@ GetOldestNonRemovableTransactionId(Relation rel)
18781910
return horizons.shared_oldest_nonremovable;
18791911
else if (RelationIsAccessibleInLogicalDecoding(rel))
18801912
return horizons.catalog_oldest_nonremovable;
1913+
else if (RELATION_IS_LOCAL(rel))
1914+
return horizons.temp_oldest_nonremovable;
18811915
else
18821916
return horizons.data_oldest_nonremovable;
18831917
}
@@ -2054,8 +2088,8 @@ GetSnapshotDataReuse(Snapshot snapshot)
20542088
* RecentXmin: the xmin computed for the most recent snapshot. XIDs
20552089
* older than this are known not running any more.
20562090
*
2057-
* And try to advance the bounds of GlobalVisSharedRels, GlobalVisCatalogRels,
2058-
* GlobalVisDataRels for the benefit of theGlobalVisTest* family of functions.
2091+
* And try to advance the bounds of GlobalVis{Shared,Catalog,Data,Temp}Rels
2092+
* for the benefit of theGlobalVisTest* family of functions.
20592093
*
20602094
* Note: this function should probably not be called with an argument that's
20612095
* not statically allocated (see xip allocation below).
@@ -2357,6 +2391,15 @@ GetSnapshotData(Snapshot snapshot)
23572391
GlobalVisDataRels.definitely_needed =
23582392
FullTransactionIdNewer(def_vis_fxid_data,
23592393
GlobalVisDataRels.definitely_needed);
2394+
/* See temp_oldest_nonremovable computation in ComputeXidHorizons() */
2395+
if (TransactionIdIsNormal(myxid))
2396+
GlobalVisTempRels.definitely_needed =
2397+
FullXidRelativeTo(latest_completed, myxid);
2398+
else
2399+
{
2400+
GlobalVisTempRels.definitely_needed = latest_completed;
2401+
FullTransactionIdAdvance(&GlobalVisTempRels.definitely_needed);
2402+
}
23602403

23612404
/*
23622405
* Check if we know that we can initialize or increase the lower
@@ -2375,6 +2418,8 @@ GetSnapshotData(Snapshot snapshot)
23752418
GlobalVisDataRels.maybe_needed =
23762419
FullTransactionIdNewer(GlobalVisDataRels.maybe_needed,
23772420
oldestfxid);
2421+
/* accurate value known */
2422+
GlobalVisTempRels.maybe_needed = GlobalVisTempRels.definitely_needed;
23782423
}
23792424

23802425
RecentXmin = xmin;
@@ -3892,6 +3937,8 @@ GlobalVisTestFor(Relation rel)
38923937
state = &GlobalVisSharedRels;
38933938
else if (need_catalog)
38943939
state = &GlobalVisCatalogRels;
3940+
else if (RELATION_IS_LOCAL(rel))
3941+
state = &GlobalVisTempRels;
38953942
else
38963943
state = &GlobalVisDataRels;
38973944

@@ -3942,6 +3989,9 @@ GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons)
39423989
GlobalVisDataRels.maybe_needed =
39433990
FullXidRelativeTo(horizons->latest_completed,
39443991
horizons->data_oldest_nonremovable);
3992+
GlobalVisTempRels.maybe_needed =
3993+
FullXidRelativeTo(horizons->latest_completed,
3994+
horizons->temp_oldest_nonremovable);
39453995

39463996
/*
39473997
* In longer running transactions it's possible that transactions we
@@ -3957,6 +4007,7 @@ GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons)
39574007
GlobalVisDataRels.definitely_needed =
39584008
FullTransactionIdNewer(GlobalVisDataRels.maybe_needed,
39594009
GlobalVisDataRels.definitely_needed);
4010+
GlobalVisTempRels.definitely_needed = GlobalVisTempRels.maybe_needed;
39604011

39614012
ComputeXidHorizonsResultLastXmin = RecentXmin;
39624013
}

src/include/commands/cluster.h

+1-2
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,7 @@
1919

2020

2121
extern void cluster(ClusterStmt *stmt, bool isTopLevel);
22-
extern void cluster_rel(Oid tableOid, Oid indexOid, int options,
23-
bool isTopLevel);
22+
extern void cluster_rel(Oid tableOid, Oid indexOid, int options);
2423
extern void check_index_is_clusterable(Relation OldHeap, Oid indexOid,
2524
bool recheck, LOCKMODE lockmode);
2625
extern void mark_index_clustered(Relation rel, Oid indexOid, bool is_internal);

src/include/commands/vacuum.h

-1
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,6 @@ extern void vacuum_set_xid_limits(Relation rel,
267267
int freeze_min_age, int freeze_table_age,
268268
int multixact_freeze_min_age,
269269
int multixact_freeze_table_age,
270-
bool isTopLevel,
271270
TransactionId *oldestXmin,
272271
TransactionId *freezeLimit,
273272
TransactionId *xidFullScanLimit,

0 commit comments

Comments
 (0)