Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 942542c

Browse files
committed
Protect against multixact members wraparound
Multixact member files are subject to early wraparound overflow and removal: if the average multixact size is above a certain threshold (see note below) the protections against offset overflow are not enough: during multixact truncation at checkpoint time, some pg_multixact/members files would be removed because the server considers them to be old and not needed anymore. This leads to loss of files that are critical to interpret existing tuples's Xmax values. To protect against this, since we don't have enough info in pg_control and we can't modify it in old branches, we maintain shared memory state about the oldest value that we need to keep; we use this during new multixact creation to abort if an old still-needed file would get overwritten. This value is kept up to date by checkpoints, which makes it not completely accurate but should be good enough. We start emitting warnings sometime earlier, so that the eventual multixact-shutdown doesn't take DBAs completely by surprise (more precisely: once 20 members SLRU segments are remaining before shutdown.) On troublesome average multixact size: The threshold size depends on the multixact freeze parameters. The oldest age is related to the greater of multixact_freeze_table_age and multixact_freeze_min_age: anything older than that should be removed promptly by autovacuum. If autovacuum is keeping up with multixact freezing, the troublesome multixact average size is (2^32-1) / Max(freeze table age, freeze min age) or around 28 members per multixact. Having an average multixact size larger than that will eventually cause new multixact data to overwrite the data area for older multixacts. (If autovacuum is not able to keep up, or there are errors in vacuuming, the actual maximum is multixact_freeeze_max_age instead, at which point multixact generation is stopped completely. The default value for this limit is 400 million, which means that the multixact size that would cause trouble is about 10 members). Initial bug report by Timothy Garnett, bug #12990 Backpatch to 9.3, where the problem was introduced. Authors: Álvaro Herrera, Thomas Munro Reviews: Thomas Munro, Amit Kapila, Robert Haas, Kevin Grittner
1 parent fd3dfc2 commit 942542c

File tree

2 files changed

+187
-25
lines changed

2 files changed

+187
-25
lines changed

src/backend/access/transam/multixact.c

+180-23
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,9 @@ typedef struct MultiXactStateData
211211
MultiXactId multiStopLimit;
212212
MultiXactId multiWrapLimit;
213213

214+
/* support for members anti-wraparound measures */
215+
MultiXactOffset offsetStopLimit;
216+
214217
/*
215218
* Per-backend data starts here. We have two arrays stored in the area
216219
* immediately following the MultiXactStateData struct. Each is indexed by
@@ -339,6 +342,10 @@ static bool MultiXactOffsetPrecedes(MultiXactOffset offset1,
339342
MultiXactOffset offset2);
340343
static void ExtendMultiXactOffset(MultiXactId multi);
341344
static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers);
345+
static void DetermineSafeOldestOffset(MultiXactId oldestMXact);
346+
static bool MultiXactOffsetWouldWrap(MultiXactOffset boundary,
347+
MultiXactOffset start, uint32 distance);
348+
static MultiXactOffset read_offset_for_multi(MultiXactId multi);
342349
static void WriteMZeroPageXlogRec(int pageno, uint8 info);
343350

344351

@@ -972,7 +979,7 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
972979

973980
/*
974981
* To avoid swamping the postmaster with signals, we issue the autovac
975-
* request only once per 64K transaction starts. This still gives
982+
* request only once per 64K multis generated. This still gives
976983
* plenty of chances before we get into real trouble.
977984
*/
978985
if (IsUnderPostmaster && (result % 65536) == 0)
@@ -1048,6 +1055,47 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
10481055
else
10491056
*offset = nextOffset;
10501057

1058+
/*----------
1059+
* Protect against overrun of the members space as well, with the
1060+
* following rules:
1061+
*
1062+
* If we're past offsetStopLimit, refuse to generate more multis.
1063+
* If we're close to offsetStopLimit, emit a warning.
1064+
*
1065+
* Arbitrarily, we start emitting warnings when we're 20 segments or less
1066+
* from offsetStopLimit.
1067+
*
1068+
* Note we haven't updated the shared state yet, so if we fail at this
1069+
* point, the multixact ID we grabbed can still be used by the next guy.
1070+
*
1071+
* Note that there is no point in forcing autovacuum runs here: the
1072+
* multixact freeze settings would have to be reduced for that to have any
1073+
* effect.
1074+
*----------
1075+
*/
1076+
#define OFFSET_WARN_SEGMENTS 20
1077+
if (MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, nextOffset,
1078+
nmembers))
1079+
ereport(ERROR,
1080+
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1081+
errmsg("multixact \"members\" limit exceeded"),
1082+
errdetail_plural("This command would create a multixact with %u members, which exceeds remaining space (%u member.)",
1083+
"This command would create a multixact with %u members, which exceeds remaining space (%u members.)",
1084+
MultiXactState->offsetStopLimit - nextOffset - 1,
1085+
nmembers,
1086+
MultiXactState->offsetStopLimit - nextOffset - 1),
1087+
errhint("Execute a database-wide VACUUM in database with OID %u, with reduced vacuum_multixact_freeze_min_age and vacuum_multixact_freeze_table_age settings.",
1088+
MultiXactState->oldestMultiXactDB)));
1089+
else if (MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit,
1090+
nextOffset,
1091+
nmembers + MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT * OFFSET_WARN_SEGMENTS))
1092+
ereport(WARNING,
1093+
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1094+
errmsg("database with OID %u must be vacuumed before %d more multixact members are used",
1095+
MultiXactState->oldestMultiXactDB,
1096+
MultiXactState->offsetStopLimit - nextOffset + nmembers),
1097+
errhint("Execute a database-wide VACUUM in that database, with reduced vacuum_multixact_freeze_min_age and vacuum_multixact_freeze_table_age settings.")));
1098+
10511099
ExtendMultiXactMember(nextOffset, nmembers);
10521100

10531101
/*
@@ -1918,6 +1966,12 @@ StartupMultiXact(void)
19181966
*/
19191967
pageno = MXOffsetToMemberPage(offset);
19201968
MultiXactMemberCtl->shared->latest_page_number = pageno;
1969+
1970+
/*
1971+
* compute the oldest member we need to keep around to avoid old member
1972+
* data overrun.
1973+
*/
1974+
DetermineSafeOldestOffset(MultiXactState->oldestMultiXactId);
19211975
}
19221976

19231977
/*
@@ -2011,6 +2065,8 @@ TrimMultiXact(void)
20112065
}
20122066

20132067
LWLockRelease(MultiXactMemberControlLock);
2068+
2069+
DetermineSafeOldestOffset(MultiXactState->oldestMultiXactId);
20142070
}
20152071

20162072
/*
@@ -2118,7 +2174,7 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
21182174
*
21192175
* Note: This differs from the magic number used in
21202176
* SetTransactionIdLimit() since vacuum itself will never generate new
2121-
* multis.
2177+
* multis. XXX actually it does, if it needs to freeze old multis.
21222178
*/
21232179
multiStopLimit = multiWrapLimit - 100;
21242180
if (multiStopLimit < FirstMultiXactId)
@@ -2161,6 +2217,8 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
21612217
curMulti = MultiXactState->nextMXact;
21622218
LWLockRelease(MultiXactGenLock);
21632219

2220+
DetermineSafeOldestOffset(oldest_datminmxid);
2221+
21642222
/* Log the info */
21652223
ereport(DEBUG1,
21662224
(errmsg("MultiXactId wrap limit is %u, limited by database with OID %u",
@@ -2247,13 +2305,16 @@ MultiXactAdvanceNextMXact(MultiXactId minMulti,
22472305

22482306
/*
22492307
* Update our oldestMultiXactId value, but only if it's more recent than
2250-
* what we had.
2308+
* what we had. However, even if not, always update the oldest multixact
2309+
* offset limit.
22512310
*/
22522311
void
22532312
MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
22542313
{
22552314
if (MultiXactIdPrecedes(MultiXactState->oldestMultiXactId, oldestMulti))
22562315
SetMultiXactIdLimit(oldestMulti, oldestMultiDB);
2316+
else
2317+
DetermineSafeOldestOffset(oldestMulti);
22572318
}
22582319

22592320
/*
@@ -2420,6 +2481,121 @@ GetOldestMultiXactId(void)
24202481
return oldestMXact;
24212482
}
24222483

2484+
/*
2485+
* Based on the given oldest MultiXactId, determine what's the oldest member
2486+
* offset and install the limit info in MultiXactState, where it can be used to
2487+
* prevent overrun of old data in the members SLRU area.
2488+
*/
2489+
static void
2490+
DetermineSafeOldestOffset(MultiXactId oldestMXact)
2491+
{
2492+
MultiXactOffset oldestOffset;
2493+
2494+
/*
2495+
* Can't do this while initdb'ing or in the startup process while
2496+
* replaying WAL: the segment file to read might have not yet been
2497+
* created, or already been removed.
2498+
*/
2499+
if (IsBootstrapProcessingMode() || InRecovery)
2500+
return;
2501+
2502+
/*
2503+
* We determine the safe upper bound for offsets of new xacts by reading
2504+
* the offset of the oldest multixact, and going back one segment. This
2505+
* way, the sequence of multixact member segments will always have a
2506+
* one-segment hole at a minimum. We start spewing warnings a few
2507+
* complete segments before that.
2508+
*/
2509+
oldestOffset = read_offset_for_multi(oldestMXact);
2510+
/* move back to start of the corresponding segment */
2511+
oldestOffset -= oldestOffset / MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT;
2512+
2513+
LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
2514+
/* always leave one segment before the wraparound point */
2515+
MultiXactState->offsetStopLimit = oldestOffset -
2516+
(MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT);
2517+
LWLockRelease(MultiXactGenLock);
2518+
}
2519+
2520+
/*
2521+
* Return whether adding "distance" to "start" would move past "boundary".
2522+
*
2523+
* We use this to determine whether the addition is "wrapping around" the
2524+
* boundary point, hence the name. The reason we don't want to use the regular
2525+
* 2^31-modulo arithmetic here is that we want to be able to use the whole of
2526+
* the 2^32-1 space here, allowing for more multixacts that would fit
2527+
* otherwise. See also SlruScanDirCbRemoveMembers.
2528+
*/
2529+
static bool
2530+
MultiXactOffsetWouldWrap(MultiXactOffset boundary, MultiXactOffset start,
2531+
uint32 distance)
2532+
{
2533+
MultiXactOffset finish;
2534+
2535+
Assert(distance >= 0);
2536+
2537+
/*
2538+
* Note that offset number 0 is not used (see GetMultiXactIdMembers), so
2539+
* if the addition wraps around the UINT_MAX boundary, skip that value.
2540+
*/
2541+
finish = start + distance;
2542+
if (finish < start)
2543+
finish++;
2544+
2545+
/*-----------------------------------------------------------------------
2546+
* When the boundary is numerically greater than the starting point, any
2547+
* value numerically between the two is not wrapped:
2548+
*
2549+
* <----S----B---->
2550+
* [---) = F wrapped past B (and UINT_MAX)
2551+
* [---) = F not wrapped
2552+
* [----] = F wrapped past B
2553+
*
2554+
* When the boundary is numerically less than the starting point (i.e. the
2555+
* UINT_MAX wraparound occurs somewhere in between) then all values in
2556+
* between are wrapped:
2557+
*
2558+
* <----B----S---->
2559+
* [---) = F not wrapped past B (but wrapped past UINT_MAX)
2560+
* [---) = F wrapped past B (and UINT_MAX)
2561+
* [----] = F not wrapped
2562+
*-----------------------------------------------------------------------
2563+
*/
2564+
if (start < boundary)
2565+
{
2566+
return finish >= boundary || finish < start;
2567+
}
2568+
else
2569+
{
2570+
return finish >= boundary && finish < start;
2571+
}
2572+
}
2573+
2574+
/*
2575+
* Read the offset of the first member of the given multixact.
2576+
*/
2577+
static MultiXactOffset
2578+
read_offset_for_multi(MultiXactId multi)
2579+
{
2580+
MultiXactOffset offset;
2581+
int pageno;
2582+
int entryno;
2583+
int slotno;
2584+
MultiXactOffset *offptr;
2585+
2586+
pageno = MultiXactIdToOffsetPage(multi);
2587+
entryno = MultiXactIdToOffsetEntry(multi);
2588+
2589+
/* lock is acquired by SimpleLruReadPage_ReadOnly */
2590+
slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno, multi);
2591+
offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
2592+
offptr += entryno;
2593+
offset = *offptr;
2594+
LWLockRelease(MultiXactOffsetControlLock);
2595+
2596+
return offset;
2597+
}
2598+
24232599
/*
24242600
* SlruScanDirectory callback.
24252601
* This callback deletes segments that are outside the range determined by
@@ -2552,26 +2728,7 @@ TruncateMultiXact(void)
25522728
* First, compute the safe truncation point for MultiXactMember. This is
25532729
* the starting offset of the oldest multixact.
25542730
*/
2555-
{
2556-
int pageno;
2557-
int slotno;
2558-
int entryno;
2559-
MultiXactOffset *offptr;
2560-
2561-
/* lock is acquired by SimpleLruReadPage_ReadOnly */
2562-
2563-
pageno = MultiXactIdToOffsetPage(oldestMXact);
2564-
entryno = MultiXactIdToOffsetEntry(oldestMXact);
2565-
2566-
slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno,
2567-
oldestMXact);
2568-
offptr = (MultiXactOffset *)
2569-
MultiXactOffsetCtl->shared->page_buffer[slotno];
2570-
offptr += entryno;
2571-
oldestOffset = *offptr;
2572-
2573-
LWLockRelease(MultiXactOffsetControlLock);
2574-
}
2731+
oldestOffset = read_offset_for_multi(oldestMXact);
25752732

25762733
/*
25772734
* To truncate MultiXactMembers, we need to figure out the active page

src/backend/bootstrap/bootstrap.c

+7-2
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,12 @@ AuxiliaryProcessMain(int argc, char *argv[])
406406
proc_exit(1); /* should never return */
407407

408408
case BootstrapProcess:
409+
/*
410+
* There was a brief instant during which mode was Normal; this is
411+
* okay. We need to be in bootstrap mode during BootStrapXLOG for
412+
* the sake of multixact initialization.
413+
*/
414+
SetProcessingMode(BootstrapProcessing);
409415
bootstrap_signals();
410416
BootStrapXLOG();
411417
BootstrapModeMain();
@@ -468,8 +474,7 @@ BootstrapModeMain(void)
468474
int i;
469475

470476
Assert(!IsUnderPostmaster);
471-
472-
SetProcessingMode(BootstrapProcessing);
477+
Assert(IsBootstrapProcessingMode());
473478

474479
/*
475480
* Do backend-like initialization for bootstrap mode

0 commit comments

Comments
 (0)