45
45
* anything we saw during replay.
46
46
*
47
47
* We are able to remove segments no longer necessary by carefully tracking
48
- * each table's used values: during vacuum, any multixact older than a
49
- * certain value is removed; the cutoff value is stored in pg_class.
50
- * The minimum value in each database is stored in pg_database, and the
51
- * global minimum is part of pg_control. Any vacuum that is able to
52
- * advance its database's minimum value also computes a new global minimum,
53
- * and uses this value to truncate older segments. When new multixactid
54
- * values are to be created, care is taken that the counter does not
55
- * fall within the wraparound horizon considering the global minimum value.
48
+ * each table's used values: during vacuum, any multixact older than a certain
49
+ * value is removed; the cutoff value is stored in pg_class. The minimum value
50
+ * across all tables in each database is stored in pg_database, and the global
51
+ * minimum across all databases is part of pg_control and is kept in shared
52
+ * memory. At checkpoint time, after the value is known flushed in WAL, any
53
+ * files that correspond to multixacts older than that value are removed.
54
+ * (These files are also removed when a restartpoint is executed.)
55
+ *
56
+ * When new multixactid values are to be created, care is taken that the
57
+ * counter does not fall within the wraparound horizon considering the global
58
+ * minimum value.
56
59
*
57
60
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
58
61
* Portions Copyright (c) 1994, Regents of the University of California
91
94
* Note: because MultiXactOffsets are 32 bits and wrap around at 0xFFFFFFFF,
92
95
* MultiXact page numbering also wraps around at
93
96
* 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE, and segment numbering at
94
- * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE/SLRU_SEGMENTS_PER_PAGE. We need
97
+ * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need
95
98
* take no explicit notice of that fact in this module, except when comparing
96
99
* segment and page numbers in TruncateMultiXact (see
97
100
* MultiXactOffsetPagePrecedes).
@@ -188,16 +191,20 @@ typedef struct MultiXactStateData
188
191
/* next-to-be-assigned offset */
189
192
MultiXactOffset nextOffset ;
190
193
191
- /* the Offset SLRU area was last truncated at this MultiXactId */
192
- MultiXactId lastTruncationPoint ;
193
-
194
194
/*
195
- * oldest multixact that is still on disk. Anything older than this
196
- * should not be consulted.
195
+ * Oldest multixact that is still on disk. Anything older than this
196
+ * should not be consulted. These values are updated by vacuum.
197
197
*/
198
198
MultiXactId oldestMultiXactId ;
199
199
Oid oldestMultiXactDB ;
200
200
201
+ /*
202
+ * This is what the previous checkpoint stored as the truncate position.
203
+ * This value is the oldestMultiXactId that was valid when a checkpoint
204
+ * was last executed.
205
+ */
206
+ MultiXactId lastCheckpointedOldest ;
207
+
201
208
/* support for anti-wraparound measures */
202
209
MultiXactId multiVacLimit ;
203
210
MultiXactId multiWarnLimit ;
@@ -234,12 +241,20 @@ typedef struct MultiXactStateData
234
241
* than its own OldestVisibleMXactId[] setting; this is necessary because
235
242
* the checkpointer could truncate away such data at any instant.
236
243
*
237
- * The checkpointer can compute the safe truncation point as the oldest
238
- * valid value among all the OldestMemberMXactId[] and
239
- * OldestVisibleMXactId[] entries, or nextMXact if none are valid.
240
- * Clearly, it is not possible for any later-computed OldestVisibleMXactId
241
- * value to be older than this, and so there is no risk of truncating data
242
- * that is still needed.
244
+ * The oldest valid value among all of the OldestMemberMXactId[] and
245
+ * OldestVisibleMXactId[] entries is considered by vacuum as the earliest
246
+ * possible value still having any live member transaction. Subtracting
247
+ * vacuum_multixact_freeze_min_age from that value we obtain the freezing
248
+ * point for multixacts for that table. Any value older than that is
249
+ * removed from tuple headers (or "frozen"; see FreezeMultiXactId. Note
250
+ * that multis that have member xids that are older than the cutoff point
251
+ * for xids must also be frozen, even if the multis themselves are newer
252
+ * than the multixid cutoff point). Whenever a full table vacuum happens,
253
+ * the freezing point so computed is used as the new pg_class.relminmxid
254
+ * value. The minimum of all those values in a database is stored as
255
+ * pg_database.datminmxid. In turn, the minimum of all of those values is
256
+ * stored in pg_control and used as truncation point for pg_multixact. At
257
+ * checkpoint or restartpoint, unneeded segments are removed.
243
258
*/
244
259
MultiXactId perBackendXactIds [1 ]; /* VARIABLE LENGTH ARRAY */
245
260
} MultiXactStateData ;
@@ -1121,8 +1136,8 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
1121
1136
* We check known limits on MultiXact before resorting to the SLRU area.
1122
1137
*
1123
1138
* An ID older than MultiXactState->oldestMultiXactId cannot possibly be
1124
- * useful; it should have already been removed by vacuum. We've truncated
1125
- * the on-disk structures anyway . Returning the wrong values could lead
1139
+ * useful; it has already been removed, or will be removed shortly, by
1140
+ * truncation . Returning the wrong values could lead
1126
1141
* to an incorrect visibility result. However, to support pg_upgrade we
1127
1142
* need to allow an empty set to be returned regardless, if the caller is
1128
1143
* willing to accept it; the caller is expected to check that it's an
@@ -1932,14 +1947,14 @@ TrimMultiXact(void)
1932
1947
LWLockAcquire (MultiXactOffsetControlLock , LW_EXCLUSIVE );
1933
1948
1934
1949
/*
1935
- * (Re-)Initialize our idea of the latest page number.
1950
+ * (Re-)Initialize our idea of the latest page number for offsets .
1936
1951
*/
1937
1952
pageno = MultiXactIdToOffsetPage (multi );
1938
1953
MultiXactOffsetCtl -> shared -> latest_page_number = pageno ;
1939
1954
1940
1955
/*
1941
1956
* Zero out the remainder of the current offsets page. See notes in
1942
- * StartupCLOG () for motivation.
1957
+ * TrimCLOG () for motivation.
1943
1958
*/
1944
1959
entryno = MultiXactIdToOffsetEntry (multi );
1945
1960
if (entryno != 0 )
@@ -1962,7 +1977,7 @@ TrimMultiXact(void)
1962
1977
LWLockAcquire (MultiXactMemberControlLock , LW_EXCLUSIVE );
1963
1978
1964
1979
/*
1965
- * (Re-)Initialize our idea of the latest page number.
1980
+ * (Re-)Initialize our idea of the latest page number for members .
1966
1981
*/
1967
1982
pageno = MXOffsetToMemberPage (offset );
1968
1983
MultiXactMemberCtl -> shared -> latest_page_number = pageno ;
@@ -2240,6 +2255,18 @@ MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
2240
2255
SetMultiXactIdLimit (oldestMulti , oldestMultiDB );
2241
2256
}
2242
2257
2258
+ /*
2259
+ * Update the "safe truncation point". This is the newest value of oldestMulti
2260
+ * that is known to be flushed as part of a checkpoint record.
2261
+ */
2262
+ void
2263
+ MultiXactSetSafeTruncate (MultiXactId safeTruncateMulti )
2264
+ {
2265
+ LWLockAcquire (MultiXactGenLock , LW_EXCLUSIVE );
2266
+ MultiXactState -> lastCheckpointedOldest = safeTruncateMulti ;
2267
+ LWLockRelease (MultiXactGenLock );
2268
+ }
2269
+
2243
2270
/*
2244
2271
* Make sure that MultiXactOffset has room for a newly-allocated MultiXactId.
2245
2272
*
@@ -2478,25 +2505,31 @@ SlruScanDirCbFindEarliest(SlruCtl ctl, char *filename, int segpage, void *data)
2478
2505
* Remove all MultiXactOffset and MultiXactMember segments before the oldest
2479
2506
* ones still of interest.
2480
2507
*
2481
- * On a primary, this is called by vacuum after it has successfully advanced a
2482
- * database's datminmxid value; the cutoff value we're passed is the minimum of
2483
- * all databases' datminmxid values.
2484
- *
2485
- * During crash recovery, it's called from CreateRestartPoint() instead. We
2486
- * rely on the fact that xlog_redo() will already have called
2487
- * MultiXactAdvanceOldest(). Our latest_page_number will already have been
2488
- * initialized by StartupMultiXact() and kept up to date as new pages are
2489
- * zeroed.
2508
+ * On a primary, this is called by the checkpointer process after a checkpoint
2509
+ * has been flushed; during crash recovery, it's called from
2510
+ * CreateRestartPoint(). In the latter case, we rely on the fact that
2511
+ * xlog_redo() will already have called MultiXactAdvanceOldest(). Our
2512
+ * latest_page_number will already have been initialized by StartupMultiXact()
2513
+ * and kept up to date as new pages are zeroed.
2490
2514
*/
2491
2515
void
2492
- TruncateMultiXact (MultiXactId oldestMXact )
2516
+ TruncateMultiXact (void )
2493
2517
{
2518
+ MultiXactId oldestMXact ;
2494
2519
MultiXactOffset oldestOffset ;
2495
2520
MultiXactOffset nextOffset ;
2496
2521
mxtruncinfo trunc ;
2497
2522
MultiXactId earliest ;
2498
2523
MembersLiveRange range ;
2499
2524
2525
+ Assert (AmCheckpointerProcess () || AmStartupProcess () ||
2526
+ !IsPostmasterEnvironment );
2527
+
2528
+ LWLockAcquire (MultiXactGenLock , LW_SHARED );
2529
+ oldestMXact = MultiXactState -> lastCheckpointedOldest ;
2530
+ LWLockRelease (MultiXactGenLock );
2531
+ Assert (MultiXactIdIsValid (oldestMXact ));
2532
+
2500
2533
/*
2501
2534
* Note we can't just plow ahead with the truncation; it's possible that
2502
2535
* there are no segments to truncate, which is a problem because we are
@@ -2507,15 +2540,16 @@ TruncateMultiXact(MultiXactId oldestMXact)
2507
2540
trunc .earliestExistingPage = -1 ;
2508
2541
SlruScanDirectory (MultiXactOffsetCtl , SlruScanDirCbFindEarliest , & trunc );
2509
2542
earliest = trunc .earliestExistingPage * MULTIXACT_OFFSETS_PER_PAGE ;
2543
+ if (earliest < FirstMultiXactId )
2544
+ earliest = FirstMultiXactId ;
2510
2545
2511
2546
/* nothing to do */
2512
2547
if (MultiXactIdPrecedes (oldestMXact , earliest ))
2513
2548
return ;
2514
2549
2515
2550
/*
2516
2551
* First, compute the safe truncation point for MultiXactMember. This is
2517
- * the starting offset of the multixact we were passed as MultiXactOffset
2518
- * cutoff.
2552
+ * the starting offset of the oldest multixact.
2519
2553
*/
2520
2554
{
2521
2555
int pageno ;
@@ -2538,10 +2572,6 @@ TruncateMultiXact(MultiXactId oldestMXact)
2538
2572
LWLockRelease (MultiXactOffsetControlLock );
2539
2573
}
2540
2574
2541
- /* truncate MultiXactOffset */
2542
- SimpleLruTruncate (MultiXactOffsetCtl ,
2543
- MultiXactIdToOffsetPage (oldestMXact ));
2544
-
2545
2575
/*
2546
2576
* To truncate MultiXactMembers, we need to figure out the active page
2547
2577
* range and delete all files outside that range. The start point is the
@@ -2559,6 +2589,11 @@ TruncateMultiXact(MultiXactId oldestMXact)
2559
2589
range .rangeEnd = MXOffsetToMemberPage (nextOffset );
2560
2590
2561
2591
SlruScanDirectory (MultiXactMemberCtl , SlruScanDirCbRemoveMembers , & range );
2592
+
2593
+ /* Now we can truncate MultiXactOffset */
2594
+ SimpleLruTruncate (MultiXactOffsetCtl ,
2595
+ MultiXactIdToOffsetPage (oldestMXact ));
2596
+
2562
2597
}
2563
2598
2564
2599
/*
0 commit comments