Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 24c19e6

Browse files
committed
Avoid bogus "out-of-sequence timeline ID" errors in standby-mode.
When startup process opens a WAL segment after replaying part of it, it validates the first page on the WAL segment, even though the page it's really interested in later in the file. As part of the validation, it checks that the TLI on the page header is >= the TLI it saw on the last page it read. If the segment contains a timeline switch, and we have already replayed it, and then re-open the WAL segment (because of streaming replication got disconnected and reconnected, for example), the TLI check will fail when the first page is validated. Fix that by relaxing the TLI check when re-opening a WAL segment. Backpatch to 9.0. Earlier versions had the same code, but before standby mode was introduced in 9.0, recovery never tried to re-read a segment after partially replaying it. Reported by Amit Kapila, while testing a new feature.
1 parent 27b2c6a commit 24c19e6

File tree

1 file changed

+24
-9
lines changed
  • src/backend/access/transam

1 file changed

+24
-9
lines changed

src/backend/access/transam/xlog.c

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,7 @@ static uint32 readRecordBufSize = 0;
570570
static XLogRecPtr ReadRecPtr; /* start of last record read */
571571
static XLogRecPtr EndRecPtr; /* end+1 of last record read */
572572
static TimeLineID lastPageTLI = 0;
573+
static TimeLineID lastSegmentTLI = 0;
573574

574575
static XLogRecPtr minRecoveryPoint; /* local copy of
575576
* ControlFile->minRecoveryPoint */
@@ -644,7 +645,7 @@ static void CleanupBackupHistory(void);
644645
static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force);
645646
static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt);
646647
static void CheckRecoveryConsistency(void);
647-
static bool ValidXLogPageHeader(XLogPageHeader hdr, int emode);
648+
static bool ValidXLogPageHeader(XLogPageHeader hdr, int emode, bool segmentonly);
648649
static bool ValidXLogRecordHeader(XLogRecPtr *RecPtr, XLogRecord *record,
649650
int emode, bool randAccess);
650651
static XLogRecord *ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt);
@@ -3339,7 +3340,8 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt)
33393340
* to go backwards (but we can't reset that variable right here, since
33403341
* we might not change files at all).
33413342
*/
3342-
lastPageTLI = 0; /* see comment in ValidXLogPageHeader */
3343+
/* see comment in ValidXLogPageHeader */
3344+
lastPageTLI = lastSegmentTLI = 0;
33433345
randAccess = true; /* allow curFileTLI to go backwards too */
33443346
}
33453347

@@ -3579,7 +3581,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt)
35793581
* ReadRecord. It's not intended for use from anywhere else.
35803582
*/
35813583
static bool
3582-
ValidXLogPageHeader(XLogPageHeader hdr, int emode)
3584+
ValidXLogPageHeader(XLogPageHeader hdr, int emode, bool segmentonly)
35833585
{
35843586
XLogRecPtr recaddr;
35853587

@@ -3681,19 +3683,32 @@ ValidXLogPageHeader(XLogPageHeader hdr, int emode)
36813683
* successive pages of a consistent WAL sequence.
36823684
*
36833685
* Of course this check should only be applied when advancing sequentially
3684-
* across pages; therefore ReadRecord resets lastPageTLI to zero when
3685-
* going to a random page.
3686+
* across pages; therefore ReadRecord resets lastPageTLI and
3687+
* lastSegmentTLI to zero when going to a random page.
3688+
*
3689+
* Sometimes we re-open a segment that's already been partially replayed.
3690+
* In that case we cannot perform the normal TLI check: if there is a
3691+
* timeline switch within the segment, the first page has a smaller TLI
3692+
* than later pages following the timeline switch, and we might've read
3693+
* them already. As a weaker test, we still check that it's not smaller
3694+
* than the TLI we last saw at the beginning of a segment. Pass
3695+
* segmentonly = true when re-validating the first page like that, and the
3696+
* page you're actually interested in comes later.
36863697
*/
3687-
if (hdr->xlp_tli < lastPageTLI)
3698+
if (hdr->xlp_tli < (segmentonly ? lastSegmentTLI : lastPageTLI))
36883699
{
36893700
ereport(emode_for_corrupt_record(emode, recaddr),
36903701
(errmsg("out-of-sequence timeline ID %u (after %u) in log segment %s, offset %u",
3691-
hdr->xlp_tli, lastPageTLI,
3702+
hdr->xlp_tli,
3703+
segmentonly ? lastSegmentTLI : lastPageTLI,
36923704
XLogFileNameP(curFileTLI, readSegNo),
36933705
readOff)));
36943706
return false;
36953707
}
36963708
lastPageTLI = hdr->xlp_tli;
3709+
if (readOff == 0)
3710+
lastSegmentTLI = hdr->xlp_tli;
3711+
36973712
return true;
36983713
}
36993714

@@ -9366,7 +9381,7 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
93669381
fname, readOff)));
93679382
goto next_record_is_invalid;
93689383
}
9369-
if (!ValidXLogPageHeader((XLogPageHeader) readBuf, emode))
9384+
if (!ValidXLogPageHeader((XLogPageHeader) readBuf, emode, true))
93709385
goto next_record_is_invalid;
93719386
}
93729387

@@ -9392,7 +9407,7 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
93929407
fname, readOff)));
93939408
goto next_record_is_invalid;
93949409
}
9395-
if (!ValidXLogPageHeader((XLogPageHeader) readBuf, emode))
9410+
if (!ValidXLogPageHeader((XLogPageHeader) readBuf, emode, false))
93969411
goto next_record_is_invalid;
93979412

93989413
readFileHeaderValidated = true;

0 commit comments

Comments
 (0)