Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 7b7521d

Browse files
committed
Fix scenario where streaming standby gets stuck at a continuation record.
If a continuation record is split so that its first half has already been removed from the master, and is only present in pg_wal, and there is a recycled WAL segment in the standby server that looks like it would contain the second half, recovery would get stuck. The code in XLogPageRead() incorrectly started streaming at the beginning of the WAL record, even if we had already read the first page. Backpatch to 9.4. In principle, older versions have the same problem, but without replication slots, there was no straightforward mechanism to prevent the master from recycling old WAL that was still needed by standby. Without such a mechanism, I think it's reasonable to assume that there's enough slack in how many old segments are kept around to not run into this, or you have a WAL archive. Reported by Jonathon Nelson. Analysis and patch by Kyotaro HORIGUCHI, with some extra comments by me. Discussion: https://www.postgresql.org/message-id/CACJqAM3xVz0JY1XFDKPP%2BJoJAjoGx%3DGNuOAshEDWCext7BFvCQ%40mail.gmail.com
1 parent 3a11485 commit 7b7521d

File tree

3 files changed

+62
-13
lines changed

3 files changed

+62
-13
lines changed

src/backend/access/transam/xlog.c

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11273,6 +11273,40 @@ XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
1127311273
Assert(reqLen <= readLen);
1127411274

1127511275
*readTLI = curFileTLI;
11276+
11277+
/*
11278+
* Check the page header immediately, so that we can retry immediately if
11279+
* it's not valid. This may seem unnecessary, because XLogReadRecord()
11280+
* validates the page header anyway, and would propagate the failure up to
11281+
* ReadRecord(), which would retry. However, there's a corner case with
11282+
* continuation records, if a record is split across two pages such that
11283+
* we would need to read the two pages from different sources. For
11284+
* example, imagine a scenario where a streaming replica is started up,
11285+
* and replay reaches a record that's split across two WAL segments. The
11286+
* first page is only available locally, in pg_wal, because it's already
11287+
* been recycled in the master. The second page, however, is not present
11288+
* in pg_wal, and we should stream it from the master. There is a recycled
11289+
* WAL segment present in pg_wal, with garbage contents, however. We would
11290+
* read the first page from the local WAL segment, but when reading the
11291+
* second page, we would read the bogus, recycled, WAL segment. If we
11292+
* didn't catch that case here, we would never recover, because
11293+
* ReadRecord() would retry reading the whole record from the beginning.
11294+
*
11295+
* Of course, this only catches errors in the page header, which is what
11296+
* happens in the case of a recycled WAL segment. Other kinds of errors or
11297+
* corruption still has the same problem. But this at least fixes the
11298+
* common case, which can happen as part of normal operation.
11299+
*
11300+
* Validating the page header is cheap enough that doing it twice
11301+
* shouldn't be a big deal from a performance point of view.
11302+
*/
11303+
if (!XLogReaderValidatePageHeader(xlogreader, targetPagePtr, readBuf))
11304+
{
11305+
/* reset any error XLogReaderValidatePageHeader() might have set */
11306+
xlogreader->errormsg_buf[0] = '\0';
11307+
goto next_record_is_invalid;
11308+
}
11309+
1127611310
return readLen;
1127711311

1127811312
next_record_is_invalid:
@@ -11406,12 +11440,18 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
1140611440
}
1140711441
else
1140811442
{
11409-
ptr = tliRecPtr;
11443+
ptr = RecPtr;
11444+
11445+
/*
11446+
* Use the record begin position to determine the
11447+
* TLI, rather than the position we're reading.
11448+
*/
1141011449
tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
1141111450

1141211451
if (curFileTLI > 0 && tli < curFileTLI)
1141311452
elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
11414-
(uint32) (ptr >> 32), (uint32) ptr,
11453+
(uint32) (tliRecPtr >> 32),
11454+
(uint32) tliRecPtr,
1141511455
tli, curFileTLI);
1141611456
}
1141711457
curFileTLI = tli;

src/backend/access/transam/xlogreader.c

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,6 @@
2727

2828
static bool allocate_recordbuf(XLogReaderState *state, uint32 reclength);
2929

30-
static bool ValidXLogPageHeader(XLogReaderState *state, XLogRecPtr recptr,
31-
XLogPageHeader hdr);
3230
static bool ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr,
3331
XLogRecPtr PrevRecPtr, XLogRecord *record, bool randAccess);
3432
static bool ValidXLogRecord(XLogReaderState *state, XLogRecord *record,
@@ -530,7 +528,6 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
530528
*/
531529
if (targetSegNo != state->readSegNo && targetPageOff != 0)
532530
{
533-
XLogPageHeader hdr;
534531
XLogRecPtr targetSegmentPtr = pageptr - targetPageOff;
535532

536533
readLen = state->read_page(state, targetSegmentPtr, XLOG_BLCKSZ,
@@ -542,9 +539,8 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
542539
/* we can be sure to have enough WAL available, we scrolled back */
543540
Assert(readLen == XLOG_BLCKSZ);
544541

545-
hdr = (XLogPageHeader) state->readBuf;
546-
547-
if (!ValidXLogPageHeader(state, targetSegmentPtr, hdr))
542+
if (!XLogReaderValidatePageHeader(state, targetSegmentPtr,
543+
state->readBuf))
548544
goto err;
549545
}
550546

@@ -581,7 +577,7 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
581577
/*
582578
* Now that we know we have the full header, validate it.
583579
*/
584-
if (!ValidXLogPageHeader(state, pageptr, hdr))
580+
if (!XLogReaderValidatePageHeader(state, pageptr, (char *) hdr))
585581
goto err;
586582

587583
/* update read state information */
@@ -706,15 +702,19 @@ ValidXLogRecord(XLogReaderState *state, XLogRecord *record, XLogRecPtr recptr)
706702
}
707703

708704
/*
709-
* Validate a page header
705+
* Validate a page header.
706+
*
707+
* Check if 'phdr' is valid as the header of the XLog page at position
708+
* 'recptr'.
710709
*/
711-
static bool
712-
ValidXLogPageHeader(XLogReaderState *state, XLogRecPtr recptr,
713-
XLogPageHeader hdr)
710+
bool
711+
XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr,
712+
char *phdr)
714713
{
715714
XLogRecPtr recaddr;
716715
XLogSegNo segno;
717716
int32 offset;
717+
XLogPageHeader hdr = (XLogPageHeader) phdr;
718718

719719
Assert((recptr % XLOG_BLCKSZ) == 0);
720720

@@ -802,6 +802,11 @@ ValidXLogPageHeader(XLogReaderState *state, XLogRecPtr recptr,
802802
return false;
803803
}
804804

805+
/*
806+
* Check that the address on the page agrees with what we expected.
807+
* This check typically fails when an old WAL segment is recycled,
808+
* and hasn't yet been overwritten with new data yet.
809+
*/
805810
if (hdr->xlp_pageaddr != recaddr)
806811
{
807812
char fname[MAXFNAMELEN];

src/include/access/xlogreader.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,10 @@ extern void XLogReaderFree(XLogReaderState *state);
180180
extern struct XLogRecord *XLogReadRecord(XLogReaderState *state,
181181
XLogRecPtr recptr, char **errormsg);
182182

183+
/* Validate a page */
184+
extern bool XLogReaderValidatePageHeader(XLogReaderState *state,
185+
XLogRecPtr recptr, char *phdr);
186+
183187
/* Invalidate read state */
184188
extern void XLogReaderInvalReadState(XLogReaderState *state);
185189

0 commit comments

Comments
 (0)