Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit c4c2274

Browse files
committed
Fix bugs in cascading replication with recovery_target_timeline='latest'
The cascading replication code assumed that the current RecoveryTargetTLI never changes, but that's not true with recovery_target_timeline='latest'. The obvious upshot of that is that RecoveryTargetTLI in shared memory needs to be protected by a lock. A less obvious consequence is that when a cascading standby is connected, and the standby switches to a new target timeline after scanning the archive, it will continue to stream WAL to the cascading standby, but from a wrong file, ie. the file of the previous timeline. For example, if the standby is currently streaming from the middle of file 000000010000000000000005, and the timeline changes, the standby will continue to stream from that file. However, the WAL on the new timeline is in file 000000020000000000000005, so the standby sends garbage from 000000010000000000000005 to the cascading standby, instead of the correct WAL from file 000000020000000000000005. This also fixes a related bug where a partial WAL segment is restored from the archive and streamed to a cascading standby. The code assumed that when a WAL segment is copied from the archive, it can immediately be fully streamed to a cascading standby. However, if the segment is only partially filled, ie. has the right size, but only N first bytes contain valid WAL, that's not safe. That can happen if a partial WAL segment is manually copied to the archive, or if a partial WAL segment is archived because a server is started up on a new timeline within that segment. The cascading standby will get confused if the WAL it received is not valid, and will get stuck until it's restarted. This patch fixes that problem by not allowing WAL restored from the archive to be streamed to a cascading standby until it's been replayed, and thus validated.
1 parent cdf91ed commit c4c2274

File tree

3 files changed

+59
-35
lines changed

3 files changed

+59
-35
lines changed

src/backend/access/transam/xlog.c

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,6 @@ typedef struct XLogCtlData
407407
XLogRecPtr *xlblocks; /* 1st byte ptr-s + XLOG_BLCKSZ */
408408
int XLogCacheBlck; /* highest allocated xlog buffer index */
409409
TimeLineID ThisTimeLineID;
410-
TimeLineID RecoveryTargetTLI;
411410

412411
/*
413412
* archiveCleanupCommand is read from recovery.conf but needs to be in
@@ -456,14 +455,14 @@ typedef struct XLogCtlData
456455
XLogRecPtr recoveryLastRecPtr;
457456
/* timestamp of last COMMIT/ABORT record replayed (or being replayed) */
458457
TimestampTz recoveryLastXTime;
458+
/* current effective recovery target timeline */
459+
TimeLineID RecoveryTargetTLI;
459460

460461
/*
461462
* timestamp of when we started replaying the current chunk of WAL data,
462463
* only relevant for replication or archive recovery
463464
*/
464465
TimestampTz currentChunkStartTime;
465-
/* end of the last record restored from the archive */
466-
XLogRecPtr restoreLastRecPtr;
467466
/* Are we requested to pause recovery? */
468467
bool recoveryPause;
469468

@@ -2817,18 +2816,6 @@ XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli,
28172816
if (reload)
28182817
WalSndRqstFileReload();
28192818

2820-
/*
2821-
* Calculate the end location of the restored WAL file and save it in
2822-
* shmem. It's used as current standby flush position, and cascading
2823-
* walsenders try to send WAL records up to this location.
2824-
*/
2825-
XLogSegNoOffsetToRecPtr(segno, 0, endptr);
2826-
XLByteAdvance(endptr, XLogSegSize);
2827-
2828-
SpinLockAcquire(&xlogctl->info_lck);
2829-
xlogctl->restoreLastRecPtr = endptr;
2830-
SpinLockRelease(&xlogctl->info_lck);
2831-
28322819
/* Signal walsender that new WAL has arrived */
28332820
if (AllowCascadeReplication())
28342821
WalSndWakeup();
@@ -4470,12 +4457,17 @@ rescanLatestTimeLine(void)
44704457
ThisTimeLineID)));
44714458
else
44724459
{
4460+
/* use volatile pointer to prevent code rearrangement */
4461+
volatile XLogCtlData *xlogctl = XLogCtl;
4462+
44734463
/* Switch target */
44744464
recoveryTargetTLI = newtarget;
44754465
list_free(expectedTLIs);
44764466
expectedTLIs = newExpectedTLIs;
44774467

4478-
XLogCtl->RecoveryTargetTLI = recoveryTargetTLI;
4468+
SpinLockAcquire(&xlogctl->info_lck);
4469+
xlogctl->RecoveryTargetTLI = recoveryTargetTLI;
4470+
SpinLockRelease(&xlogctl->info_lck);
44794471

44804472
ereport(LOG,
44814473
(errmsg("new target timeline is %u",
@@ -7513,13 +7505,20 @@ GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch)
75137505
}
75147506

75157507
/*
7516-
* GetRecoveryTargetTLI - get the recovery target timeline ID
7508+
* GetRecoveryTargetTLI - get the current recovery target timeline ID
75177509
*/
75187510
TimeLineID
75197511
GetRecoveryTargetTLI(void)
75207512
{
7521-
/* RecoveryTargetTLI doesn't change so we need no lock to copy it */
7522-
return XLogCtl->RecoveryTargetTLI;
7513+
/* use volatile pointer to prevent code rearrangement */
7514+
volatile XLogCtlData *xlogctl = XLogCtl;
7515+
TimeLineID result;
7516+
7517+
SpinLockAcquire(&xlogctl->info_lck);
7518+
result = xlogctl->RecoveryTargetTLI;
7519+
SpinLockRelease(&xlogctl->info_lck);
7520+
7521+
return result;
75237522
}
75247523

75257524
/*
@@ -8309,7 +8308,7 @@ CreateRestartPoint(int flags)
83098308
XLogRecPtr endptr;
83108309

83118310
/* Get the current (or recent) end of xlog */
8312-
endptr = GetStandbyFlushRecPtr();
8311+
endptr = GetStandbyFlushRecPtr(NULL);
83138312

83148313
KeepLogSeg(endptr, &_logSegNo);
83158314
_logSegNo--;
@@ -9818,23 +9817,22 @@ do_pg_abort_backup(void)
98189817
/*
98199818
* Get latest redo apply position.
98209819
*
9821-
* Optionally, returns the end byte position of the last restored
9822-
* WAL segment. Callers not interested in that value may pass
9823-
* NULL for restoreLastRecPtr.
9820+
* Optionally, returns the current recovery target timeline. Callers not
9821+
* interested in that may pass NULL for targetTLI.
98249822
*
98259823
* Exported to allow WALReceiver to read the pointer directly.
98269824
*/
98279825
XLogRecPtr
9828-
GetXLogReplayRecPtr(XLogRecPtr *restoreLastRecPtr)
9826+
GetXLogReplayRecPtr(TimeLineID *targetTLI)
98299827
{
98309828
/* use volatile pointer to prevent code rearrangement */
98319829
volatile XLogCtlData *xlogctl = XLogCtl;
98329830
XLogRecPtr recptr;
98339831

98349832
SpinLockAcquire(&xlogctl->info_lck);
98359833
recptr = xlogctl->recoveryLastRecPtr;
9836-
if (restoreLastRecPtr)
9837-
*restoreLastRecPtr = xlogctl->restoreLastRecPtr;
9834+
if (targetTLI)
9835+
*targetTLI = xlogctl->RecoveryTargetTLI;
98389836
SpinLockRelease(&xlogctl->info_lck);
98399837

98409838
return recptr;
@@ -9843,21 +9841,23 @@ GetXLogReplayRecPtr(XLogRecPtr *restoreLastRecPtr)
98439841
/*
98449842
* Get current standby flush position, ie, the last WAL position
98459843
* known to be fsync'd to disk in standby.
9844+
*
9845+
* If 'targetTLI' is not NULL, it's set to the current recovery target
9846+
* timeline.
98469847
*/
98479848
XLogRecPtr
9848-
GetStandbyFlushRecPtr(void)
9849+
GetStandbyFlushRecPtr(TimeLineID *targetTLI)
98499850
{
98509851
XLogRecPtr receivePtr;
98519852
XLogRecPtr replayPtr;
9852-
XLogRecPtr restorePtr;
98539853

98549854
receivePtr = GetWalRcvWriteRecPtr(NULL);
9855-
replayPtr = GetXLogReplayRecPtr(&restorePtr);
9855+
replayPtr = GetXLogReplayRecPtr(targetTLI);
98569856

98579857
if (XLByteLT(receivePtr, replayPtr))
9858-
return XLByteLT(replayPtr, restorePtr) ? restorePtr : replayPtr;
9858+
return replayPtr;
98599859
else
9860-
return XLByteLT(receivePtr, restorePtr) ? restorePtr : receivePtr;
9860+
return receivePtr;
98619861
}
98629862

98639863
/*

src/backend/replication/walsender.c

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ IdentifySystem(void)
303303
GetSystemIdentifier());
304304
snprintf(tli, sizeof(tli), "%u", ThisTimeLineID);
305305

306-
logptr = am_cascading_walsender ? GetStandbyFlushRecPtr() : GetInsertRecPtr();
306+
logptr = am_cascading_walsender ? GetStandbyFlushRecPtr(NULL) : GetInsertRecPtr();
307307

308308
snprintf(xpos, sizeof(xpos), "%X/%X", (uint32) (logptr >> 32), (uint32) logptr);
309309

@@ -1137,7 +1137,31 @@ XLogSend(char *msgbuf, bool *caughtup)
11371137
* subsequently crashes and restarts, slaves must not have applied any WAL
11381138
* that gets lost on the master.
11391139
*/
1140-
SendRqstPtr = am_cascading_walsender ? GetStandbyFlushRecPtr() : GetFlushRecPtr();
1140+
if (am_cascading_walsender)
1141+
{
1142+
TimeLineID currentTargetTLI;
1143+
SendRqstPtr = GetStandbyFlushRecPtr(&currentTargetTLI);
1144+
1145+
/*
1146+
* If the recovery target timeline changed, bail out. It's a bit
1147+
* unfortunate that we have to just disconnect, but there is no way
1148+
* to tell the client that the timeline changed. We also don't know
1149+
* exactly where the switch happened, so we cannot safely try to send
1150+
* up to the switchover point before disconnecting.
1151+
*/
1152+
if (currentTargetTLI != ThisTimeLineID)
1153+
{
1154+
if (!walsender_ready_to_stop)
1155+
ereport(LOG,
1156+
(errmsg("terminating walsender process to force cascaded standby "
1157+
"to update timeline and reconnect")));
1158+
walsender_ready_to_stop = true;
1159+
*caughtup = true;
1160+
return;
1161+
}
1162+
}
1163+
else
1164+
SendRqstPtr = GetFlushRecPtr();
11411165

11421166
/* Quick exit if nothing to do */
11431167
if (XLByteLE(SendRqstPtr, sentPtr))

src/include/access/xlog.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -285,8 +285,8 @@ extern bool RecoveryInProgress(void);
285285
extern bool HotStandbyActive(void);
286286
extern bool XLogInsertAllowed(void);
287287
extern void GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream);
288-
extern XLogRecPtr GetXLogReplayRecPtr(XLogRecPtr *restoreLastRecPtr);
289-
extern XLogRecPtr GetStandbyFlushRecPtr(void);
288+
extern XLogRecPtr GetXLogReplayRecPtr(TimeLineID *targetTLI);
289+
extern XLogRecPtr GetStandbyFlushRecPtr(TimeLineID *targetTLI);
290290
extern XLogRecPtr GetXLogInsertRecPtr(void);
291291
extern XLogRecPtr GetXLogWriteRecPtr(void);
292292
extern bool RecoveryIsPaused(void);

0 commit comments

Comments
 (0)