Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit fe0b29b

Browse files
author
Commitfest Bot
committed
[CF 5276] v3 - Fix race between WAL flush and InstallXLogFileSegment()
This branch was automatically generated by a robot using patches from an email thread registered at: https://commitfest.postgresql.org/patch/5276 The branch will be overwritten each time a new patch version is posted to the thread, and also periodically to check for bitrot caused by changes on the master branch. Patch(es): https://www.postgresql.org/message-id/CAE-ML+-vkbp_oU8qUdJux01EjQK1sDic48yT_aEPmoA0yC8mxw@mail.gmail.com Author(s): Thomas Munro
2 parents 2fd3e2f + 5f18348 commit fe0b29b

File tree

2 files changed

+45
-0
lines changed

2 files changed

+45
-0
lines changed

src/backend/access/transam/xlog.c

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,11 @@ typedef struct XLogCtlData
462462

463463
/* Fake LSN counter, for unlogged relations. */
464464
pg_atomic_uint64 unloggedLSN;
465+
/*
466+
* Approximation of the last WAL segment number that is known to have been
467+
* installed by InstallXLogFileSegment().
468+
*/
469+
pg_atomic_uint64 last_known_installed_segno;
465470

466471
/* Time and LSN of last xlog segment switch. Protected by WALWriteLock. */
467472
pg_time_t lastSegSwitchTime;
@@ -3224,7 +3229,28 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
32243229
errmsg("could not open file \"%s\": %m", path)));
32253230
}
32263231
else
3232+
{
3233+
/*
3234+
* The file is there, but it is possible that InstallXLogFileSegment()
3235+
* has recently renamed it and not yet made the new name durable. We
3236+
* don't want to be able to flush data into a file whose name might
3237+
* not survive power loss, since it would become unreachable in
3238+
* recovery. Since InstallXlogFileSegment() holds ControlFileLock,
3239+
* acquiring it here is enough to wait for any durable_rename() call
3240+
* that might have started before we opened the file.
3241+
*
3242+
* We can skip that if we can already see that the WAL space we need
3243+
* is fully synchronized. We may see a slightly out of date value
3244+
* since we haven't acquired the lock yet, but that's OK, it just
3245+
* means we might take the lock when we don't need to.
3246+
*/
3247+
if (pg_atomic_read_u64(&XLogCtl->last_known_installed_segno) < logsegno)
3248+
{
3249+
LWLockAcquire(ControlFileLock, LW_SHARED);
3250+
LWLockRelease(ControlFileLock);
3251+
}
32273252
return fd;
3253+
}
32283254

32293255
/*
32303256
* Initialize an empty (all zeroes) segment. NOTE: it is possible that
@@ -3576,6 +3602,11 @@ InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
35763602

35773603
XLogFilePath(path, tli, *segno, wal_segment_size);
35783604

3605+
/*
3606+
* Acquire and keep the ControlFileLock held *until* we have renamed the
3607+
* target segment durably. See XLogFileInitInternal() for details as to why
3608+
* it is dangerous otherwise.
3609+
*/
35793610
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
35803611
if (!XLogCtl->InstallXLogFileSegmentActive)
35813612
{
@@ -3612,6 +3643,8 @@ InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
36123643
return false;
36133644
}
36143645

3646+
pg_atomic_write_u64(&XLogCtl->last_known_installed_segno, *segno);
3647+
36153648
LWLockRelease(ControlFileLock);
36163649

36173650
return true;
@@ -4970,6 +5003,7 @@ XLOGShmemInit(void)
49705003
char *allocptr;
49715004
int i;
49725005
ControlFileData *localControlFile;
5006+
XLogSegNo lastKnownInstalledSegno = 0;
49735007

49745008
#ifdef WAL_DEBUG
49755009

@@ -5017,6 +5051,12 @@ XLOGShmemInit(void)
50175051
{
50185052
memcpy(ControlFile, localControlFile, sizeof(ControlFileData));
50195053
pfree(localControlFile);
5054+
/*
5055+
* A decent approximation for the last known installed WAL segment
5056+
* number can be the segment in which the checkpoint record resides,
5057+
* specially in cases where we have had a clean shutdown.
5058+
*/
5059+
XLByteToSeg(ControlFile->checkPoint, lastKnownInstalledSegno, wal_segment_size);
50205060
}
50215061

50225062
/*
@@ -5071,6 +5111,7 @@ XLOGShmemInit(void)
50715111
pg_atomic_init_u64(&XLogCtl->logWriteResult, InvalidXLogRecPtr);
50725112
pg_atomic_init_u64(&XLogCtl->logFlushResult, InvalidXLogRecPtr);
50735113
pg_atomic_init_u64(&XLogCtl->unloggedLSN, InvalidXLogRecPtr);
5114+
pg_atomic_init_u64(&XLogCtl->last_known_installed_segno, lastKnownInstalledSegno);
50745115
}
50755116

50765117
/*

src/backend/storage/file/fd.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -773,6 +773,10 @@ fsync_fname(const char *fname, bool isdir)
773773
* might not be on the same filesystem. Therefore this routine does not
774774
* support renaming across directories.
775775
*
776+
* Note that there is a window between the rename and the fsync(s). If "newfile"
777+
* is opened, written to and then fdatasynced, and if there is a crash before
778+
* the fsync(s) hits disk, the written data could be .
779+
*
776780
* Log errors with the caller specified severity.
777781
*
778782
* Returns 0 if the operation succeeded, -1 otherwise. Note that errno is not

0 commit comments

Comments
 (0)