@@ -355,10 +355,13 @@ typedef struct XLogCtlInsert
355
355
* exclusiveBackup is true if a backup started with pg_start_backup() is
356
356
* in progress, and nonExclusiveBackups is a counter indicating the number
357
357
* of streaming base backups currently in progress. forcePageWrites is
358
- * set to true when either of these is non-zero.
358
+ * set to true when either of these is non-zero. lastBackupStart is the
359
+ * latest checkpoint redo location used as a starting point for an online
360
+ * backup.
359
361
*/
360
362
bool exclusiveBackup ;
361
363
int nonExclusiveBackups ;
364
+ XLogRecPtr lastBackupStart ;
362
365
} XLogCtlInsert ;
363
366
364
367
/*
@@ -8808,6 +8811,19 @@ do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile)
8808
8811
errmsg ("backup label too long (max %d bytes)" ,
8809
8812
MAXPGPATH )));
8810
8813
8814
+ /*
8815
+ * Force an XLOG file switch before the checkpoint, to ensure that the WAL
8816
+ * segment the checkpoint is written to doesn't contain pages with old
8817
+ * timeline IDs. That would otherwise happen if you called
8818
+ * pg_start_backup() right after restoring from a PITR archive: the first
8819
+ * WAL segment containing the startup checkpoint has pages in the
8820
+ * beginning with the old timeline ID. That can cause trouble at recovery:
8821
+ * we won't have a history file covering the old timeline if pg_xlog
8822
+ * directory was not included in the base backup and the WAL archive was
8823
+ * cleared too before starting the backup.
8824
+ */
8825
+ RequestXLogSwitch ();
8826
+
8811
8827
/*
8812
8828
* Mark backup active in shared memory. We must do full-page WAL writes
8813
8829
* during an on-line backup even if not doing so at other times, because
@@ -8843,43 +8859,54 @@ do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile)
8843
8859
XLogCtl -> Insert .forcePageWrites = true;
8844
8860
LWLockRelease (WALInsertLock );
8845
8861
8846
- /*
8847
- * Force an XLOG file switch before the checkpoint, to ensure that the WAL
8848
- * segment the checkpoint is written to doesn't contain pages with old
8849
- * timeline IDs. That would otherwise happen if you called
8850
- * pg_start_backup() right after restoring from a PITR archive: the first
8851
- * WAL segment containing the startup checkpoint has pages in the
8852
- * beginning with the old timeline ID. That can cause trouble at recovery:
8853
- * we won't have a history file covering the old timeline if pg_xlog
8854
- * directory was not included in the base backup and the WAL archive was
8855
- * cleared too before starting the backup.
8856
- */
8857
- RequestXLogSwitch ();
8858
-
8859
8862
/* Ensure we release forcePageWrites if fail below */
8860
8863
PG_ENSURE_ERROR_CLEANUP (pg_start_backup_callback , (Datum ) BoolGetDatum (exclusive ));
8861
8864
{
8862
- /*
8863
- * Force a CHECKPOINT. Aside from being necessary to prevent torn
8864
- * page problems, this guarantees that two successive backup runs will
8865
- * have different checkpoint positions and hence different history
8866
- * file names, even if nothing happened in between.
8867
- *
8868
- * We use CHECKPOINT_IMMEDIATE only if requested by user (via passing
8869
- * fast = true). Otherwise this can take awhile.
8870
- */
8871
- RequestCheckpoint (CHECKPOINT_FORCE | CHECKPOINT_WAIT |
8872
- (fast ? CHECKPOINT_IMMEDIATE : 0 ));
8865
+ bool gotUniqueStartpoint = false;
8866
+ do
8867
+ {
8868
+ /*
8869
+ * Force a CHECKPOINT. Aside from being necessary to prevent torn
8870
+ * page problems, this guarantees that two successive backup runs will
8871
+ * have different checkpoint positions and hence different history
8872
+ * file names, even if nothing happened in between.
8873
+ *
8874
+ * We use CHECKPOINT_IMMEDIATE only if requested by user (via passing
8875
+ * fast = true). Otherwise this can take awhile.
8876
+ */
8877
+ RequestCheckpoint (CHECKPOINT_FORCE | CHECKPOINT_WAIT |
8878
+ (fast ? CHECKPOINT_IMMEDIATE : 0 ));
8873
8879
8874
- /*
8875
- * Now we need to fetch the checkpoint record location, and also its
8876
- * REDO pointer. The oldest point in WAL that would be needed to
8877
- * restore starting from the checkpoint is precisely the REDO pointer.
8878
- */
8879
- LWLockAcquire (ControlFileLock , LW_SHARED );
8880
- checkpointloc = ControlFile -> checkPoint ;
8881
- startpoint = ControlFile -> checkPointCopy .redo ;
8882
- LWLockRelease (ControlFileLock );
8880
+ /*
8881
+ * Now we need to fetch the checkpoint record location, and also its
8882
+ * REDO pointer. The oldest point in WAL that would be needed to
8883
+ * restore starting from the checkpoint is precisely the REDO pointer.
8884
+ */
8885
+ LWLockAcquire (ControlFileLock , LW_SHARED );
8886
+ checkpointloc = ControlFile -> checkPoint ;
8887
+ startpoint = ControlFile -> checkPointCopy .redo ;
8888
+ LWLockRelease (ControlFileLock );
8889
+
8890
+ /*
8891
+ * If two base backups are started at the same time (in WAL
8892
+ * sender processes), we need to make sure that they use
8893
+ * different checkpoints as starting locations, because we use
8894
+ * the starting WAL location as a unique identifier for the base
8895
+ * backup in the end-of-backup WAL record and when we write the
8896
+ * backup history file. Perhaps it would be better generate a
8897
+ * separate unique ID for each backup instead of forcing another
8898
+ * checkpoint, but taking a checkpoint right after another is
8899
+ * not that expensive either because only few buffers have been
8900
+ * dirtied yet.
8901
+ */
8902
+ LWLockAcquire (WALInsertLock , LW_SHARED );
8903
+ if (XLByteLT (XLogCtl -> Insert .lastBackupStart , startpoint ))
8904
+ {
8905
+ XLogCtl -> Insert .lastBackupStart = startpoint ;
8906
+ gotUniqueStartpoint = true;
8907
+ }
8908
+ LWLockRelease (WALInsertLock );
8909
+ } while (!gotUniqueStartpoint );
8883
8910
8884
8911
XLByteToSeg (startpoint , _logId , _logSeg );
8885
8912
XLogFileName (xlogfilename , ThisTimeLineID , _logId , _logSeg );
0 commit comments