Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip WAL recycling and preallocation during archive recovery.
authorNoah Misch <noah@leadboat.com>
Tue, 29 Jun 2021 01:34:56 +0000 (18:34 -0700)
committerNoah Misch <noah@leadboat.com>
Tue, 29 Jun 2021 01:34:56 +0000 (18:34 -0700)
The previous commit addressed the chief consequences of a race condition
between InstallXLogFileSegment() and KeepFileRestoredFromArchive().  Fix
three lesser consequences.  A spurious durable_rename_excl() LOG message
remained possible.  KeepFileRestoredFromArchive() wasted the proceeds of
WAL recycling and preallocation.  Finally, XLogFileInitInternal() could
return a descriptor for a file that KeepFileRestoredFromArchive() had
already unlinked.  That felt like a recipe for future bugs.

Discussion: https://postgr.es/m/20210202151416.GB3304930@rfd.leadboat.com

src/backend/access/transam/xlog.c

index 8cda20d803d5694feda64e69eb56e9e9eeca99c3..2c6e21bea5a811de134b1e48f9db1784ae411704 100644 (file)
@@ -662,6 +662,16 @@ typedef struct XLogCtlData
     */
    bool        SharedHotStandbyActive;
 
+   /*
+    * InstallXLogFileSegmentActive indicates whether the checkpointer should
+    * arrange for future segments by recycling and/or PreallocXlogFiles().
+    * Protected by ControlFileLock.  Only the startup process changes it.  If
+    * true, anyone can use InstallXLogFileSegment().  If false, the startup
+    * process owns the exclusive right to install segments, by reading from
+    * the archive and possibly replacing existing files.
+    */
+   bool        InstallXLogFileSegmentActive;
+
    /*
     * SharedPromoteIsTriggered indicates if a standby promotion has been
     * triggered.  Protected by info_lck.
@@ -921,6 +931,7 @@ static int  XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
                         int reqLen, XLogRecPtr targetRecPtr, char *readBuf);
 static bool WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
                                        bool fetching_ckpt, XLogRecPtr tliRecPtr);
+static void XLogShutdownWalRcv(void);
 static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr);
 static void XLogFileClose(void);
 static void PreallocXlogFiles(XLogRecPtr endptr);
@@ -3625,8 +3636,8 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
  * is false.)
  *
  * Returns true if the file was installed successfully.  false indicates that
- * max_segno limit was exceeded, or an error occurred while renaming the
- * file into place.
+ * max_segno limit was exceeded, the startup process has disabled this
+ * function for now, or an error occurred while renaming the file into place.
  */
 static bool
 InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
@@ -3638,6 +3649,11 @@ InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
    XLogFilePath(path, ThisTimeLineID, *segno, wal_segment_size);
 
    LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
+   if (!XLogCtl->InstallXLogFileSegmentActive)
+   {
+       LWLockRelease(ControlFileLock);
+       return false;
+   }
 
    if (!find_free)
    {
@@ -3745,6 +3761,7 @@ XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli,
     */
    if (source == XLOG_FROM_ARCHIVE)
    {
+       Assert(!XLogCtl->InstallXLogFileSegmentActive);
        KeepFileRestoredFromArchive(path, xlogfname);
 
        /*
@@ -3946,6 +3963,9 @@ PreallocXlogFiles(XLogRecPtr endptr)
    char        path[MAXPGPATH];
    uint64      offset;
 
+   if (!XLogCtl->InstallXLogFileSegmentActive)
+       return;                 /* unlocked check says no */
+
    XLByteToPrevSeg(endptr, _logSegNo, wal_segment_size);
    offset = XLogSegmentOffset(endptr - 1, wal_segment_size);
    if (offset >= (uint32) (0.75 * wal_segment_size))
@@ -4227,6 +4247,7 @@ RemoveXlogFile(const char *segname, XLogSegNo recycleSegNo,
     */
    if (wal_recycle &&
        *endlogSegNo <= recycleSegNo &&
+       XLogCtl->InstallXLogFileSegmentActive &&    /* callee rechecks this */
        lstat(path, &statbuf) == 0 && S_ISREG(statbuf.st_mode) &&
        InstallXLogFileSegment(endlogSegNo, path,
                               true, recycleSegNo))
@@ -4240,7 +4261,7 @@ RemoveXlogFile(const char *segname, XLogSegNo recycleSegNo,
    }
    else
    {
-       /* No need for any more future segments... */
+       /* No need for any more future segments, or recycling failed ... */
        int         rc;
 
        ereport(DEBUG2,
@@ -5226,6 +5247,7 @@ XLOGShmemInit(void)
    XLogCtl->XLogCacheBlck = XLOGbuffers - 1;
    XLogCtl->SharedRecoveryState = RECOVERY_STATE_CRASH;
    XLogCtl->SharedHotStandbyActive = false;
+   XLogCtl->InstallXLogFileSegmentActive = false;
    XLogCtl->SharedPromoteIsTriggered = false;
    XLogCtl->WalWriterSleeping = false;
 
@@ -5253,6 +5275,11 @@ BootStrapXLOG(void)
    struct timeval tv;
    pg_crc32c   crc;
 
+   /* allow ordinary WAL segment creation, like StartupXLOG() would */
+   LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
+   XLogCtl->InstallXLogFileSegmentActive = true;
+   LWLockRelease(ControlFileLock);
+
    /*
     * Select a hopefully-unique system identifier code for this installation.
     * We use the result of gettimeofday(), including the fractional seconds
@@ -7619,7 +7646,7 @@ StartupXLOG(void)
     * the startup checkpoint record. It will trump over the checkpoint and
     * subsequent records if it's still alive when we start writing WAL.
     */
-   ShutdownWalRcv();
+   XLogShutdownWalRcv();
 
    /*
     * Reset unlogged relations to the contents of their INIT fork. This is
@@ -7644,7 +7671,7 @@ StartupXLOG(void)
     * recovery, e.g., timeline history file) from archive or pg_wal.
     *
     * Note that standby mode must be turned off after killing WAL receiver,
-    * i.e., calling ShutdownWalRcv().
+    * i.e., calling XLogShutdownWalRcv().
     */
    Assert(!WalRcvStreaming());
    StandbyMode = false;
@@ -7709,6 +7736,14 @@ StartupXLOG(void)
     */
    oldestActiveXID = PrescanPreparedTransactions(NULL, NULL);
 
+   /*
+    * Allow ordinary WAL segment creation before any exitArchiveRecovery(),
+    * which sometimes creates a segment, and after the last ReadRecord().
+    */
+   LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
+   XLogCtl->InstallXLogFileSegmentActive = true;
+   LWLockRelease(ControlFileLock);
+
    /*
     * Consider whether we need to assign a new timeline ID.
     *
@@ -12378,7 +12413,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
                     */
                    if (StandbyMode && CheckForStandbyTrigger())
                    {
-                       ShutdownWalRcv();
+                       XLogShutdownWalRcv();
                        return false;
                    }
 
@@ -12426,7 +12461,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
                     * WAL that we restore from archive.
                     */
                    if (WalRcvStreaming())
-                       ShutdownWalRcv();
+                       XLogShutdownWalRcv();
 
                    /*
                     * Before we sleep, re-scan for possible new timelines if
@@ -12553,7 +12588,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
                     */
                    if (pendingWalRcvRestart && !startWalReceiver)
                    {
-                       ShutdownWalRcv();
+                       XLogShutdownWalRcv();
 
                        /*
                         * Re-scan for possible new timelines if we were
@@ -12603,6 +12638,9 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
                                     tli, curFileTLI);
                        }
                        curFileTLI = tli;
+                       LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
+                       XLogCtl->InstallXLogFileSegmentActive = true;
+                       LWLockRelease(ControlFileLock);
                        RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
                                             PrimarySlotName,
                                             wal_receiver_create_temp_slot);
@@ -12770,6 +12808,17 @@ StartupRequestWalReceiverRestart(void)
    }
 }
 
+/* Thin wrapper around ShutdownWalRcv(). */
+static void
+XLogShutdownWalRcv(void)
+{
+   ShutdownWalRcv();
+
+   LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
+   XLogCtl->InstallXLogFileSegmentActive = false;
+   LWLockRelease(ControlFileLock);
+}
+
 /*
  * Determine what log level should be used to report a corrupt WAL record
  * in the current WAL page, previously read by XLogPageRead().