Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 41f9ffd

Browse files
committed
If backup-end record is not seen, and we reach end of recovery from a
streamed backup, throw an error and refuse to start up. The restore has not finished correctly in that case and the data directory is possibly corrupt. We already errored out in case of archive recovery, but could not during crash recovery because we couldn't distinguish between the case that pg_start_backup() was called and the database then crashed (must not error, data is OK), and the case that we're restoring from a backup and not all the needed WAL was replayed (data can be corrupt). To distinguish those cases, add a line to backup_label to indicate whether the backup was taken with pg_start/stop_backup(), or by streaming (ie. pg_basebackup). This requires re-initdb, because of a new field added to the control file.
1 parent 9f17ffd commit 41f9ffd

File tree

2 files changed

+41
-7
lines changed

2 files changed

+41
-7
lines changed

src/backend/access/transam/xlog.c

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -662,7 +662,8 @@ static bool CheckForStandbyTrigger(void);
662662
static void xlog_outrec(StringInfo buf, XLogRecord *record);
663663
#endif
664664
static void pg_start_backup_callback(int code, Datum arg);
665-
static bool read_backup_label(XLogRecPtr *checkPointLoc);
665+
static bool read_backup_label(XLogRecPtr *checkPointLoc,
666+
bool *backupEndRequired);
666667
static void rm_redo_error_callback(void *arg);
667668
static int get_sync_bit(int method);
668669

@@ -6016,6 +6017,7 @@ StartupXLOG(void)
60166017
XLogRecord *record;
60176018
uint32 freespace;
60186019
TransactionId oldestActiveXID;
6020+
bool backupEndRequired = false;
60196021

60206022
/*
60216023
* Read control file and check XLOG status looks valid.
@@ -6149,7 +6151,7 @@ StartupXLOG(void)
61496151
if (StandbyMode)
61506152
OwnLatch(&XLogCtl->recoveryWakeupLatch);
61516153

6152-
if (read_backup_label(&checkPointLoc))
6154+
if (read_backup_label(&checkPointLoc, &backupEndRequired))
61536155
{
61546156
/*
61556157
* When a backup_label file is present, we want to roll forward from
@@ -6328,7 +6330,10 @@ StartupXLOG(void)
63286330
* set backupStartPoint if we're starting recovery from a base backup
63296331
*/
63306332
if (haveBackupLabel)
6333+
{
63316334
ControlFile->backupStartPoint = checkPoint.redo;
6335+
ControlFile->backupEndRequired = backupEndRequired;
6336+
}
63326337
ControlFile->time = (pg_time_t) time(NULL);
63336338
/* No need to hold ControlFileLock yet, we aren't up far enough */
63346339
UpdateControlFile();
@@ -6698,9 +6703,13 @@ StartupXLOG(void)
66986703
* crashes while an online backup is in progress. We must not treat
66996704
* that as an error, or the database will refuse to start up.
67006705
*/
6701-
if (InArchiveRecovery)
6706+
if (InArchiveRecovery || ControlFile->backupEndRequired)
67026707
{
6703-
if (!XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
6708+
if (ControlFile->backupEndRequired)
6709+
ereport(FATAL,
6710+
(errmsg("WAL ends before end of online backup"),
6711+
errhint("All WAL generated while online backup was taken must be available at recovery.")));
6712+
else if (!XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
67046713
ereport(FATAL,
67056714
(errmsg("WAL ends before end of online backup"),
67066715
errhint("Online backup started with pg_start_backup() must be ended with pg_stop_backup(), and all WAL up to that point must be available at recovery.")));
@@ -8531,6 +8540,7 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
85318540
if (XLByteLT(ControlFile->minRecoveryPoint, lsn))
85328541
ControlFile->minRecoveryPoint = lsn;
85338542
MemSet(&ControlFile->backupStartPoint, 0, sizeof(XLogRecPtr));
8543+
ControlFile->backupEndRequired = false;
85348544
UpdateControlFile();
85358545

85368546
LWLockRelease(ControlFileLock);
@@ -9013,6 +9023,8 @@ do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile)
90139023
startpoint.xlogid, startpoint.xrecoff, xlogfilename);
90149024
appendStringInfo(&labelfbuf, "CHECKPOINT LOCATION: %X/%X\n",
90159025
checkpointloc.xlogid, checkpointloc.xrecoff);
9026+
appendStringInfo(&labelfbuf, "BACKUP METHOD: %s\n",
9027+
exclusive ? "pg_start_backup" : "streamed");
90169028
appendStringInfo(&labelfbuf, "START TIME: %s\n", strfbuf);
90179029
appendStringInfo(&labelfbuf, "LABEL: %s\n", backupidstr);
90189030

@@ -9768,15 +9780,19 @@ pg_xlogfile_name(PG_FUNCTION_ARGS)
97689780
*
97699781
* Returns TRUE if a backup_label was found (and fills the checkpoint
97709782
* location and its REDO location into *checkPointLoc and RedoStartLSN,
9771-
* respectively); returns FALSE if not.
9783+
* respectively); returns FALSE if not. If this backup_label came from a
9784+
* streamed backup, *backupEndRequired is set to TRUE.
97729785
*/
97739786
static bool
9774-
read_backup_label(XLogRecPtr *checkPointLoc)
9787+
read_backup_label(XLogRecPtr *checkPointLoc, bool *backupEndRequired)
97759788
{
97769789
char startxlogfilename[MAXFNAMELEN];
97779790
TimeLineID tli;
97789791
FILE *lfp;
97799792
char ch;
9793+
char backuptype[20];
9794+
9795+
*backupEndRequired = false;
97809796

97819797
/*
97829798
* See if label file is present
@@ -9809,6 +9825,17 @@ read_backup_label(XLogRecPtr *checkPointLoc)
98099825
ereport(FATAL,
98109826
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
98119827
errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
9828+
/*
9829+
* BACKUP METHOD line is new in 9.0. Don't complain if it doesn't exist,
9830+
* in case you're restoring from a backup taken with an 9.0 beta version
9831+
* that didn't emit it.
9832+
*/
9833+
if (fscanf(lfp, "BACKUP METHOD: %19s", backuptype) == 1)
9834+
{
9835+
if (strcmp(backuptype, "streamed") == 0)
9836+
*backupEndRequired = true;
9837+
}
9838+
98129839
if (ferror(lfp) || FreeFile(lfp))
98139840
ereport(FATAL,
98149841
(errcode_for_file_access(),

src/include/catalog/pg_control.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222

2323
/* Version identifier for this pg_control format */
24-
#define PG_CONTROL_VERSION 903
24+
#define PG_CONTROL_VERSION 911
2525

2626
/*
2727
* Body of CheckPoint XLOG records. This is declared here because we keep
@@ -137,9 +137,16 @@ typedef struct ControlFileData
137137
* we use the redo pointer as a cross-check when we see an end-of-backup
138138
* record, to make sure the end-of-backup record corresponds the base
139139
* backup we're recovering from.
140+
*
141+
* If backupEndRequired is true, we know for sure that we're restoring
142+
* from a backup, and must see a backup-end record before we can safely
143+
* start up. If it's false, but backupStartPoint is set, a backup_label
144+
* file was found at startup but it may have been a leftover from a stray
145+
* pg_start_backup() call, not accompanied by pg_stop_backup().
140146
*/
141147
XLogRecPtr minRecoveryPoint;
142148
XLogRecPtr backupStartPoint;
149+
bool backupEndRequired;
143150

144151
/*
145152
* Parameter settings that determine if the WAL can be used for archival

0 commit comments

Comments
 (0)