Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 990fe3c

Browse files
committed
Fix more issues with cascading replication and timeline switches.
When a standby server follows the master using WAL archive, and it chooses a new timeline (recovery_target_timeline='latest'), it only fetches the timeline history file for the chosen target timeline, not any other history files that might be missing from pg_xlog. For example, if the current timeline is 2, and we choose 4 as the new recovery target timeline, the history file for timeline 3 is not fetched, even if it's part of this server's history. That's enough for the standby itself - the history file for timeline 4 includes timeline 3 as well - but if a cascading standby server wants to recover to timeline 3, it needs the history file. To fix, when a new recovery target timeline is chosen, try to copy any missing history files from the archive to pg_xlog between the old and new target timeline. A second similar issue was with the WAL files. When a standby recovers from archive, and it reaches a segment that contains a switch to a new timeline, recovery fetches only the WAL file labelled with the new timeline's ID. The file from the new timeline contains a copy of the WAL from the old timeline up to the point where the switch happened, and recovery recovers it from the new file. But in streaming replication, walsender only tries to read it from the old timeline's file. To fix, change walsender to read it from the new file, so that it behaves the same as recovery in that sense, and doesn't try to open the possibly nonexistent file with the old timeline's ID.
1 parent 861ad67 commit 990fe3c

File tree

5 files changed

+92
-11
lines changed

5 files changed

+92
-11
lines changed

src/backend/access/transam/timeline.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,28 @@
4040
#include "access/xlogdefs.h"
4141
#include "storage/fd.h"
4242

43+
/*
44+
* Copies all timeline history files with id's between 'begin' and 'end'
45+
* from archive to pg_xlog.
46+
*/
47+
void
48+
restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
49+
{
50+
char path[MAXPGPATH];
51+
char histfname[MAXFNAMELEN];
52+
TimeLineID tli;
53+
54+
for (tli = begin; tli < end; tli++)
55+
{
56+
if (tli == 1)
57+
continue;
58+
59+
TLHistoryFileName(histfname, tli);
60+
if (RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false))
61+
KeepFileRestoredFromArchive(path, histfname);
62+
}
63+
}
64+
4365
/*
4466
* Try to read a timeline's history file.
4567
*

src/backend/access/transam/xlog.c

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3276,8 +3276,8 @@ rescanLatestTimeLine(void)
32763276
bool found;
32773277
ListCell *cell;
32783278
TimeLineID newtarget;
3279+
TimeLineID oldtarget = recoveryTargetTLI;
32793280
TimeLineHistoryEntry *currentTle = NULL;
3280-
/* use volatile pointer to prevent code rearrangement */
32813281

32823282
newtarget = findNewestTimeLine(recoveryTargetTLI);
32833283
if (newtarget == recoveryTargetTLI)
@@ -3336,6 +3336,12 @@ rescanLatestTimeLine(void)
33363336
list_free_deep(expectedTLEs);
33373337
expectedTLEs = newExpectedTLEs;
33383338

3339+
/*
3340+
* As in StartupXLOG(), try to ensure we have all the history files
3341+
* between the old target and new target in pg_xlog.
3342+
*/
3343+
restoreTimeLineHistoryFiles(oldtarget + 1, newtarget);
3344+
33393345
ereport(LOG,
33403346
(errmsg("new target timeline is %u",
33413347
recoveryTargetTLI)));
@@ -4993,6 +4999,20 @@ StartupXLOG(void)
49934999
*/
49945000
ThisTimeLineID = checkPoint.ThisTimeLineID;
49955001

5002+
/*
5003+
* Copy any missing timeline history files between 'now' and the
5004+
* recovery target timeline from archive to pg_xlog. While we don't need
5005+
* those files ourselves - the history file of the recovery target
5006+
* timeline covers all the previous timelines in the history too - a
5007+
* cascading standby server might be interested in them. Or, if you
5008+
* archive the WAL from this server to a different archive than the
5009+
* master, it'd be good for all the history files to get archived there
5010+
* after failover, so that you can use one of the old timelines as a
5011+
* PITR target. Timeline history files are small, so it's better to copy
5012+
* them unnecessarily than not copy them and regret later.
5013+
*/
5014+
restoreTimeLineHistoryFiles(ThisTimeLineID, recoveryTargetTLI);
5015+
49965016
lastFullPageWrites = checkPoint.fullPageWrites;
49975017

49985018
RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo;

src/backend/replication/walsender.c

Lines changed: 48 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,9 @@ static int sendFile = -1;
110110
static XLogSegNo sendSegNo = 0;
111111
static uint32 sendOff = 0;
112112

113+
/* Timeline ID of the currently open file */
114+
static TimeLineID curFileTimeLine = 0;
115+
113116
/*
114117
* These variables keep track of the state of the timeline we're currently
115118
* sending. sendTimeLine identifies the timeline. If sendTimeLineIsHistoric,
@@ -1201,8 +1204,8 @@ WalSndKill(int code, Datum arg)
12011204
* always be one descriptor left open until the process ends, but never
12021205
* more than one.
12031206
*/
1204-
void
1205-
XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count)
1207+
static void
1208+
XLogRead(char *buf, XLogRecPtr startptr, Size count)
12061209
{
12071210
char *p;
12081211
XLogRecPtr recptr;
@@ -1222,17 +1225,53 @@ XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count)
12221225

12231226
startoff = recptr % XLogSegSize;
12241227

1225-
if (sendFile < 0 || !XLByteInSeg(recptr, sendSegNo) || sendTimeLine != tli)
1228+
if (sendFile < 0 || !XLByteInSeg(recptr, sendSegNo))
12261229
{
12271230
char path[MAXPGPATH];
12281231

12291232
/* Switch to another logfile segment */
12301233
if (sendFile >= 0)
12311234
close(sendFile);
12321235

1233-
sendTimeLine = tli;
12341236
XLByteToSeg(recptr, sendSegNo);
1235-
XLogFilePath(path, sendTimeLine, sendSegNo);
1237+
1238+
/*-------
1239+
* When reading from a historic timeline, and there is a timeline
1240+
* switch within this segment, read from the WAL segment belonging
1241+
* to the new timeline.
1242+
*
1243+
* For example, imagine that this server is currently on timeline
1244+
* 5, and we're streaming timeline 4. The switch from timeline 4
1245+
* to 5 happened at 0/13002088. In pg_xlog, we have these files:
1246+
*
1247+
* ...
1248+
* 000000040000000000000012
1249+
* 000000040000000000000013
1250+
* 000000050000000000000013
1251+
* 000000050000000000000014
1252+
* ...
1253+
*
1254+
* In this situation, when requested to send the WAL from
1255+
* segment 0x13, on timeline 4, we read the WAL from file
1256+
* 000000050000000000000013. Archive recovery prefers files from
1257+
* newer timelines, so if the segment was restored from the
1258+
* archive on this server, the file belonging to the old timeline,
1259+
* 000000040000000000000013, might not exist. Their contents are
1260+
* equal up to the switchpoint, because at a timeline switch, the
1261+
* used portion of the old segment is copied to the new file.
1262+
*-------
1263+
*/
1264+
curFileTimeLine = sendTimeLine;
1265+
if (sendTimeLineIsHistoric)
1266+
{
1267+
XLogSegNo endSegNo;
1268+
1269+
XLByteToSeg(sendTimeLineValidUpto, endSegNo);
1270+
if (sendSegNo == endSegNo)
1271+
curFileTimeLine = sendTimeLineNextTLI;
1272+
}
1273+
1274+
XLogFilePath(path, curFileTimeLine, sendSegNo);
12361275

12371276
sendFile = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0);
12381277
if (sendFile < 0)
@@ -1246,7 +1285,7 @@ XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count)
12461285
ereport(ERROR,
12471286
(errcode_for_file_access(),
12481287
errmsg("requested WAL segment %s has already been removed",
1249-
XLogFileNameP(sendTimeLine, sendSegNo))));
1288+
XLogFileNameP(curFileTimeLine, sendSegNo))));
12501289
else
12511290
ereport(ERROR,
12521291
(errcode_for_file_access(),
@@ -1263,7 +1302,7 @@ XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count)
12631302
ereport(ERROR,
12641303
(errcode_for_file_access(),
12651304
errmsg("could not seek in log segment %s to offset %u: %m",
1266-
XLogFileNameP(sendTimeLine, sendSegNo),
1305+
XLogFileNameP(curFileTimeLine, sendSegNo),
12671306
startoff)));
12681307
sendOff = startoff;
12691308
}
@@ -1280,7 +1319,7 @@ XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count)
12801319
ereport(ERROR,
12811320
(errcode_for_file_access(),
12821321
errmsg("could not read from log segment %s, offset %u, length %lu: %m",
1283-
XLogFileNameP(sendTimeLine, sendSegNo),
1322+
XLogFileNameP(curFileTimeLine, sendSegNo),
12841323
sendOff, (unsigned long) segbytes)));
12851324
}
12861325

@@ -1524,7 +1563,7 @@ XLogSend(bool *caughtup)
15241563
* calls.
15251564
*/
15261565
enlargeStringInfo(&output_message, nbytes);
1527-
XLogRead(&output_message.data[output_message.len], sendTimeLine, startptr, nbytes);
1566+
XLogRead(&output_message.data[output_message.len], startptr, nbytes);
15281567
output_message.len += nbytes;
15291568
output_message.data[output_message.len] = '\0';
15301569

src/include/access/timeline.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ extern TimeLineID findNewestTimeLine(TimeLineID startTLI);
3535
extern void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
3636
XLogRecPtr switchpoint, char *reason);
3737
extern void writeTimeLineHistoryFile(TimeLineID tli, char *content, int size);
38+
extern void restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end);
3839
extern bool tliInHistory(TimeLineID tli, List *expectedTLIs);
3940
extern TimeLineID tliOfPointInHistory(XLogRecPtr ptr, List *history);
4041
extern XLogRecPtr tliSwitchPoint(TimeLineID tli, List *history,

src/include/replication/walsender_private.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,6 @@ extern WalSndCtlData *WalSndCtl;
9595

9696

9797
extern void WalSndSetState(WalSndState state);
98-
extern void XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count);
9998

10099
/*
101100
* Internal functions for parsing the replication grammar, in repl_gram.y and

0 commit comments

Comments
 (0)