Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit f40022f

Browse files
committed
Make WaitLatch's WL_POSTMASTER_DEATH result trustworthy; simplify callers.
Per a suggestion from Peter Geoghegan, make WaitLatch responsible for verifying that the WL_POSTMASTER_DEATH bit it returns is truthful (by testing PostmasterIsAlive). Then simplify its callers, who no longer need to do that for themselves. Remove weasel wording about falsely-set result bits from WaitLatch's API contract.
1 parent 586d356 commit f40022f

File tree

7 files changed

+47
-37
lines changed

7 files changed

+47
-37
lines changed

src/backend/port/unix_latch.c

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
#include "miscadmin.h"
5151
#include "postmaster/postmaster.h"
5252
#include "storage/latch.h"
53+
#include "storage/pmsignal.h"
5354
#include "storage/shmem.h"
5455

5556
/* Are we currently in WaitLatch? The signal handler would like to know. */
@@ -160,15 +161,7 @@ DisownLatch(volatile Latch *latch)
160161
*
161162
* Returns bit mask indicating which condition(s) caused the wake-up. Note
162163
* that if multiple wake-up conditions are true, there is no guarantee that
163-
* we return all of them in one call, but we will return at least one. Also,
164-
* according to the select(2) man page on Linux, select(2) may spuriously
165-
* return and report a file descriptor as readable, when it's not. We use
166-
* select(2), so WaitLatch can also spuriously claim that a socket is
167-
* readable, or postmaster has died, even when none of the wake conditions
168-
* have been satisfied. That should be rare in practice, but the caller
169-
* should not use the return value for anything critical, re-checking the
170-
* situation with PostmasterIsAlive() or read() on a socket as necessary.
171-
* The latch and timeout flag bits can be trusted, however.
164+
* we return all of them in one call, but we will return at least one.
172165
*/
173166
int
174167
WaitLatch(volatile Latch *latch, int wakeEvents, long timeout)
@@ -318,7 +311,17 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
318311
if ((wakeEvents & WL_POSTMASTER_DEATH) &&
319312
(pfds[nfds - 1].revents & (POLLHUP | POLLIN | POLLERR | POLLNVAL)))
320313
{
321-
result |= WL_POSTMASTER_DEATH;
314+
/*
315+
* According to the select(2) man page on Linux, select(2) may
316+
* spuriously return and report a file descriptor as readable,
317+
* when it's not; and presumably so can poll(2). It's not clear
318+
* that the relevant cases would ever apply to the postmaster
319+
* pipe, but since the consequences of falsely returning
320+
* WL_POSTMASTER_DEATH could be pretty unpleasant, we take the
321+
* trouble to positively verify EOF with PostmasterIsAlive().
322+
*/
323+
if (!PostmasterIsAlive())
324+
result |= WL_POSTMASTER_DEATH;
322325
}
323326

324327
#else /* !HAVE_POLL */
@@ -380,7 +383,17 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
380383
if ((wakeEvents & WL_POSTMASTER_DEATH) &&
381384
FD_ISSET(postmaster_alive_fds[POSTMASTER_FD_WATCH], &input_mask))
382385
{
383-
result |= WL_POSTMASTER_DEATH;
386+
/*
387+
* According to the select(2) man page on Linux, select(2) may
388+
* spuriously return and report a file descriptor as readable,
389+
* when it's not; and presumably so can poll(2). It's not clear
390+
* that the relevant cases would ever apply to the postmaster
391+
* pipe, but since the consequences of falsely returning
392+
* WL_POSTMASTER_DEATH could be pretty unpleasant, we take the
393+
* trouble to positively verify EOF with PostmasterIsAlive().
394+
*/
395+
if (!PostmasterIsAlive())
396+
result |= WL_POSTMASTER_DEATH;
384397
}
385398
#endif /* HAVE_POLL */
386399
} while (result == 0);

src/backend/port/win32_latch.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "miscadmin.h"
2727
#include "postmaster/postmaster.h"
2828
#include "storage/latch.h"
29+
#include "storage/pmsignal.h"
2930
#include "storage/shmem.h"
3031

3132

@@ -217,8 +218,15 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
217218
else if ((wakeEvents & WL_POSTMASTER_DEATH) &&
218219
rc == WAIT_OBJECT_0 + pmdeath_eventno)
219220
{
220-
/* Postmaster died */
221-
result |= WL_POSTMASTER_DEATH;
221+
/*
222+
* Postmaster apparently died. Since the consequences of falsely
223+
* returning WL_POSTMASTER_DEATH could be pretty unpleasant, we
224+
* take the trouble to positively verify this with
225+
* PostmasterIsAlive(), even though there is no known reason to
226+
* think that the event could be falsely set on Windows.
227+
*/
228+
if (!PostmasterIsAlive())
229+
result |= WL_POSTMASTER_DEATH;
222230
}
223231
else
224232
elog(ERROR, "unexpected return code from WaitForMultipleObjects(): %lu", rc);

src/backend/postmaster/autovacuum.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,7 @@ AutoVacLauncherMain(int argc, char *argv[])
574574
TimestampTz current_time = 0;
575575
bool can_launch;
576576
Dlelem *elem;
577+
int rc;
577578

578579
/*
579580
* This loop is a bit different from the normal use of WaitLatch,
@@ -592,9 +593,9 @@ AutoVacLauncherMain(int argc, char *argv[])
592593
* Wait until naptime expires or we get some type of signal (all the
593594
* signal handlers will wake us by calling SetLatch).
594595
*/
595-
WaitLatch(&MyProc->procLatch,
596-
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
597-
(nap.tv_sec * 1000L) + (nap.tv_usec / 1000L));
596+
rc = WaitLatch(&MyProc->procLatch,
597+
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
598+
(nap.tv_sec * 1000L) + (nap.tv_usec / 1000L));
598599

599600
ResetLatch(&MyProc->procLatch);
600601

@@ -604,7 +605,7 @@ AutoVacLauncherMain(int argc, char *argv[])
604605
* Emergency bailout if postmaster has died. This is to avoid the
605606
* necessity for manual cleanup of all postmaster children.
606607
*/
607-
if (!PostmasterIsAlive())
608+
if (rc & WL_POSTMASTER_DEATH)
608609
proc_exit(1);
609610

610611
/* the normal shutdown case */

src/backend/postmaster/bgwriter.c

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@
4848
#include "storage/buf_internals.h"
4949
#include "storage/ipc.h"
5050
#include "storage/lwlock.h"
51-
#include "storage/pmsignal.h"
5251
#include "storage/proc.h"
5352
#include "storage/shmem.h"
5453
#include "storage/smgr.h"
@@ -323,11 +322,9 @@ BackgroundWriterMain(void)
323322

324323
/*
325324
* Emergency bailout if postmaster has died. This is to avoid the
326-
* necessity for manual cleanup of all postmaster children. Note
327-
* that we mustn't trust the WL_POSTMASTER_DEATH result flag entirely;
328-
* if it is set, recheck with PostmasterIsAlive before believing it.
325+
* necessity for manual cleanup of all postmaster children.
329326
*/
330-
if ((rc & WL_POSTMASTER_DEATH) && !PostmasterIsAlive())
327+
if (rc & WL_POSTMASTER_DEATH)
331328
exit(1);
332329

333330
prev_hibernate = can_hibernate;

src/backend/postmaster/checkpointer.c

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@
5050
#include "storage/bufmgr.h"
5151
#include "storage/ipc.h"
5252
#include "storage/lwlock.h"
53-
#include "storage/pmsignal.h"
5453
#include "storage/proc.h"
5554
#include "storage/shmem.h"
5655
#include "storage/smgr.h"
@@ -581,11 +580,9 @@ CheckpointerMain(void)
581580

582581
/*
583582
* Emergency bailout if postmaster has died. This is to avoid the
584-
* necessity for manual cleanup of all postmaster children. Note
585-
* that we mustn't trust the WL_POSTMASTER_DEATH result flag entirely;
586-
* if it is set, recheck with PostmasterIsAlive before believing it.
583+
* necessity for manual cleanup of all postmaster children.
587584
*/
588-
if ((rc & WL_POSTMASTER_DEATH) && !PostmasterIsAlive())
585+
if (rc & WL_POSTMASTER_DEATH)
589586
exit(1);
590587
}
591588
}

src/backend/postmaster/pgstat.c

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@
5151
#include "storage/ipc.h"
5252
#include "storage/latch.h"
5353
#include "storage/pg_shmem.h"
54-
#include "storage/pmsignal.h"
5554
#include "storage/procsignal.h"
5655
#include "utils/ascii.h"
5756
#include "utils/guc.h"
@@ -3227,11 +3226,9 @@ PgstatCollectorMain(int argc, char *argv[])
32273226

32283227
/*
32293228
* Emergency bailout if postmaster has died. This is to avoid the
3230-
* necessity for manual cleanup of all postmaster children. Note
3231-
* that we mustn't trust the WL_POSTMASTER_DEATH result flag entirely;
3232-
* if it is set, recheck with PostmasterIsAlive before believing it.
3229+
* necessity for manual cleanup of all postmaster children.
32333230
*/
3234-
if ((wr & WL_POSTMASTER_DEATH) && !PostmasterIsAlive())
3231+
if (wr & WL_POSTMASTER_DEATH)
32353232
break;
32363233
} /* end of outer loop */
32373234

src/backend/postmaster/walwriter.c

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@
5353
#include "storage/bufmgr.h"
5454
#include "storage/ipc.h"
5555
#include "storage/lwlock.h"
56-
#include "storage/pmsignal.h"
5756
#include "storage/proc.h"
5857
#include "storage/smgr.h"
5958
#include "utils/guc.h"
@@ -305,11 +304,9 @@ WalWriterMain(void)
305304

306305
/*
307306
* Emergency bailout if postmaster has died. This is to avoid the
308-
* necessity for manual cleanup of all postmaster children. Note
309-
* that we mustn't trust the WL_POSTMASTER_DEATH result flag entirely;
310-
* if it is set, recheck with PostmasterIsAlive before believing it.
307+
* necessity for manual cleanup of all postmaster children.
311308
*/
312-
if ((rc & WL_POSTMASTER_DEATH) && !PostmasterIsAlive())
309+
if (rc & WL_POSTMASTER_DEATH)
313310
exit(1);
314311
}
315312
}

0 commit comments

Comments
 (0)