Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit cbc7a7a

Browse files
committed
Fix a recently-introduced race condition in LISTEN/NOTIFY handling.
Commit 566372b fixed some race conditions involving concurrent SimpleLruTruncate calls, but it introduced new ones in async.c. A newly-listening backend could attempt to read Notify SLRU pages that were in process of being truncated, possibly causing an error. Also, the QUEUE_TAIL pointer could become set to a value that's not equal to the queue position of any backend. While that's fairly harmless in v13 and up (thanks to commit 51004c7), in older branches it resulted in near-permanent disabling of the queue truncation logic, so that continued use of NOTIFY led to queue-fill warnings and eventual inability to send any more notifies. (A server restart is enough to make that go away, but it's still pretty unpleasant.) The core of the problem is confusion about whether QUEUE_TAIL represents the "logical" tail of the queue (i.e., the oldest still-interesting data) or the "physical" tail (the oldest data we've not yet truncated away). To fix, split that into two variables. QUEUE_TAIL regains its definition as the logical tail, and we introduce a new variable to track the oldest un-truncated page. Per report from Mikael Gustavsson. Like the previous patch, back-patch to all supported branches. Discussion: https://postgr.es/m/1b8561412e8a4f038d7a491c8b922788@smhi.se
1 parent fce17e4 commit cbc7a7a

File tree

1 file changed

+39
-13
lines changed

1 file changed

+39
-13
lines changed

src/backend/commands/async.c

Lines changed: 39 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ typedef struct QueueBackendStatus
234234
* When holding AsyncQueueLock in EXCLUSIVE mode, backends can inspect the
235235
* entries of other backends and also change the head pointer. When holding
236236
* both AsyncQueueLock and NotifyQueueTailLock in EXCLUSIVE mode, backends can
237-
* change the tail pointer.
237+
* change the tail pointers.
238238
*
239239
* AsyncCtlLock is used as the control lock for the pg_notify SLRU buffers.
240240
* In order to avoid deadlocks, whenever we need multiple locks, we first get
@@ -249,6 +249,8 @@ typedef struct AsyncQueueControl
249249
QueuePosition head; /* head points to the next free location */
250250
QueuePosition tail; /* the global tail is equivalent to the pos of
251251
* the "slowest" backend */
252+
int stopPage; /* oldest unrecycled page; must be <=
253+
* tail.page */
252254
TimestampTz lastQueueFillWarn; /* time of last queue-full msg */
253255
QueueBackendStatus backend[FLEXIBLE_ARRAY_MEMBER];
254256
/* backend[0] is not used; used entries are from [1] to [MaxBackends] */
@@ -258,6 +260,7 @@ static AsyncQueueControl *asyncQueueControl;
258260

259261
#define QUEUE_HEAD (asyncQueueControl->head)
260262
#define QUEUE_TAIL (asyncQueueControl->tail)
263+
#define QUEUE_STOP_PAGE (asyncQueueControl->stopPage)
261264
#define QUEUE_BACKEND_PID(i) (asyncQueueControl->backend[i].pid)
262265
#define QUEUE_BACKEND_DBOID(i) (asyncQueueControl->backend[i].dboid)
263266
#define QUEUE_BACKEND_POS(i) (asyncQueueControl->backend[i].pos)
@@ -467,6 +470,7 @@ AsyncShmemInit(void)
467470

468471
SET_QUEUE_POS(QUEUE_HEAD, 0, 0);
469472
SET_QUEUE_POS(QUEUE_TAIL, 0, 0);
473+
QUEUE_STOP_PAGE = 0;
470474
asyncQueueControl->lastQueueFillWarn = 0;
471475
/* zero'th entry won't be used, but let's initialize it anyway */
472476
for (i = 0; i <= MaxBackends; i++)
@@ -1239,7 +1243,7 @@ asyncQueueIsFull(void)
12391243
nexthead = QUEUE_POS_PAGE(QUEUE_HEAD) + 1;
12401244
if (nexthead > QUEUE_MAX_PAGE)
12411245
nexthead = 0; /* wrap around */
1242-
boundary = QUEUE_POS_PAGE(QUEUE_TAIL);
1246+
boundary = QUEUE_STOP_PAGE;
12431247
boundary -= boundary % SLRU_PAGES_PER_SEGMENT;
12441248
return asyncQueuePagePrecedes(nexthead, boundary);
12451249
}
@@ -1430,6 +1434,11 @@ pg_notification_queue_usage(PG_FUNCTION_ARGS)
14301434
* Return the fraction of the queue that is currently occupied.
14311435
*
14321436
* The caller must hold AsyncQueueLock in (at least) shared mode.
1437+
*
1438+
* Note: we measure the distance to the logical tail page, not the physical
1439+
* tail page. In some sense that's wrong, but the relative position of the
1440+
* physical tail is affected by details such as SLRU segment boundaries,
1441+
* so that a result based on that is unpleasantly unstable.
14331442
*/
14341443
static double
14351444
asyncQueueUsage(void)
@@ -2018,15 +2027,32 @@ asyncQueueAdvanceTail(void)
20182027
/* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
20192028
LWLockAcquire(NotifyQueueTailLock, LW_EXCLUSIVE);
20202029

2021-
/* Compute the new tail. */
2030+
/*
2031+
* Compute the new tail. Pre-v13, it's essential that QUEUE_TAIL be exact
2032+
* (ie, exactly match at least one backend's queue position), so it must
2033+
* be updated atomically with the actual computation. Since v13, we could
2034+
* get away with not doing it like that, but it seems prudent to keep it
2035+
* so.
2036+
*
2037+
* Also, because incoming backends will scan forward from QUEUE_TAIL, that
2038+
* must be advanced before we can truncate any data. Thus, QUEUE_TAIL is
2039+
* the logical tail, while QUEUE_STOP_PAGE is the physical tail, or oldest
2040+
* un-truncated page. When QUEUE_STOP_PAGE != QUEUE_POS_PAGE(QUEUE_TAIL),
2041+
* there are pages we can truncate but haven't yet finished doing so.
2042+
*
2043+
* For concurrency's sake, we don't want to hold AsyncQueueLock while
2044+
* performing SimpleLruTruncate. This is OK because no backend will try
2045+
* to access the pages we are in the midst of truncating.
2046+
*/
20222047
LWLockAcquire(AsyncQueueLock, LW_EXCLUSIVE);
20232048
min = QUEUE_HEAD;
20242049
for (i = 1; i <= MaxBackends; i++)
20252050
{
20262051
if (QUEUE_BACKEND_PID(i) != InvalidPid)
20272052
min = QUEUE_POS_MIN(min, QUEUE_BACKEND_POS(i));
20282053
}
2029-
oldtailpage = QUEUE_POS_PAGE(QUEUE_TAIL);
2054+
QUEUE_TAIL = min;
2055+
oldtailpage = QUEUE_STOP_PAGE;
20302056
LWLockRelease(AsyncQueueLock);
20312057

20322058
/*
@@ -2045,16 +2071,16 @@ asyncQueueAdvanceTail(void)
20452071
* the lock again.
20462072
*/
20472073
SimpleLruTruncate(AsyncCtl, newtailpage);
2048-
}
20492074

2050-
/*
2051-
* Advertise the new tail. This changes asyncQueueIsFull()'s verdict for
2052-
* the segment immediately prior to the new tail, allowing fresh data into
2053-
* that segment.
2054-
*/
2055-
LWLockAcquire(AsyncQueueLock, LW_EXCLUSIVE);
2056-
QUEUE_TAIL = min;
2057-
LWLockRelease(AsyncQueueLock);
2075+
/*
2076+
* Update QUEUE_STOP_PAGE. This changes asyncQueueIsFull()'s verdict
2077+
* for the segment immediately prior to the old tail, allowing fresh
2078+
* data into that segment.
2079+
*/
2080+
LWLockAcquire(AsyncQueueLock, LW_EXCLUSIVE);
2081+
QUEUE_STOP_PAGE = newtailpage;
2082+
LWLockRelease(AsyncQueueLock);
2083+
}
20582084

20592085
LWLockRelease(NotifyQueueTailLock);
20602086
}

0 commit comments

Comments
 (0)