Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 3ab4fc5

Browse files
committed
Don't trust signalfd() on illumos.
Since commit 6a2a70a, we've used signalfd() to receive latch wakeups when building with WAIT_USE_EPOLL (default for Linux and illumos), and our traditional self-pipe when falling back to WAIT_USE_POLL (default for other Unixes with neither epoll() nor kqueue()). Unexplained hangs and kernel panics have been reported on illumos systems, apparently linked to this use of signalfd(), leading illumos users and build farm members to have to define WAIT_USE_POLL explicitly as a work-around. A bug report exists at https://www.illumos.org/issues/13700 but no fix is available yet. Let's provide a way for illumos users to go back to self-pipes with epoll(), like releases before 14, and choose that by default. No change for Linux users. To help with development/debugging, macros WAIT_USE_{EPOLL,POLL} and WAIT_USE_{SIGNALFD,SELF_PIPE} can be defined explicitly to override the defaults. Back-patch to 14, where we started using signalfd(). Reported-by: Japin Li <japinli@hotmail.com> Reported-by: Olaf Bohlen <olbohlen@eenfach.de> (off-list) Reviewed-by: Japin Li <japinli@hotmail.com> Discussion: https://postgr.es/m/MEYP282MB1669C8D88F0997354C2313C1B6CA9%40MEYP282MB1669.AUSP282.PROD.OUTLOOK.COM
1 parent 00377b9 commit 3ab4fc5

File tree

1 file changed

+40
-18
lines changed

1 file changed

+40
-18
lines changed

src/backend/storage/ipc/latch.c

+40-18
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@
7272
#if defined(WAIT_USE_EPOLL) || defined(WAIT_USE_POLL) || \
7373
defined(WAIT_USE_KQUEUE) || defined(WAIT_USE_WIN32)
7474
/* don't overwrite manual choice */
75-
#elif defined(HAVE_SYS_EPOLL_H) && defined(HAVE_SYS_SIGNALFD_H)
75+
#elif defined(HAVE_SYS_EPOLL_H)
7676
#define WAIT_USE_EPOLL
7777
#elif defined(HAVE_KQUEUE)
7878
#define WAIT_USE_KQUEUE
@@ -84,6 +84,22 @@
8484
#error "no wait set implementation available"
8585
#endif
8686

87+
/*
88+
* By default, we use a self-pipe with poll() and a signalfd with epoll(), if
89+
* available. We avoid signalfd on illumos for now based on problem reports.
90+
* For testing the choice can also be manually specified.
91+
*/
92+
#if defined(WAIT_USE_POLL) || defined(WAIT_USE_EPOLL)
93+
#if defined(WAIT_USE_SELF_PIPE) || defined(WAIT_USE_SIGNALFD)
94+
/* don't overwrite manual choice */
95+
#elif defined(WAIT_USE_EPOLL) && defined(HAVE_SYS_SIGNALFD_H) && \
96+
!defined(__illumos__)
97+
#define WAIT_USE_SIGNALFD
98+
#else
99+
#define WAIT_USE_SELF_PIPE
100+
#endif
101+
#endif
102+
87103
/* typedef in latch.h */
88104
struct WaitEventSet
89105
{
@@ -146,12 +162,12 @@ static WaitEventSet *LatchWaitSet;
146162
static volatile sig_atomic_t waiting = false;
147163
#endif
148164

149-
#ifdef WAIT_USE_EPOLL
165+
#ifdef WAIT_USE_SIGNALFD
150166
/* On Linux, we'll receive SIGURG via a signalfd file descriptor. */
151167
static int signal_fd = -1;
152168
#endif
153169

154-
#if defined(WAIT_USE_POLL)
170+
#ifdef WAIT_USE_SELF_PIPE
155171
/* Read and write ends of the self-pipe */
156172
static int selfpipe_readfd = -1;
157173
static int selfpipe_writefd = -1;
@@ -164,7 +180,7 @@ static void latch_sigurg_handler(SIGNAL_ARGS);
164180
static void sendSelfPipeByte(void);
165181
#endif
166182

167-
#if defined(WAIT_USE_POLL) || defined(WAIT_USE_EPOLL)
183+
#if defined(WAIT_USE_SELF_PIPE) || defined(WAIT_USE_SIGNALFD)
168184
static void drain(void);
169185
#endif
170186

@@ -190,7 +206,7 @@ static inline int WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
190206
void
191207
InitializeLatchSupport(void)
192208
{
193-
#if defined(WAIT_USE_POLL)
209+
#if defined(WAIT_USE_SELF_PIPE)
194210
int pipefd[2];
195211

196212
if (IsUnderPostmaster)
@@ -264,7 +280,7 @@ InitializeLatchSupport(void)
264280
pqsignal(SIGURG, latch_sigurg_handler);
265281
#endif
266282

267-
#ifdef WAIT_USE_EPOLL
283+
#ifdef WAIT_USE_SIGNALFD
268284
sigset_t signalfd_mask;
269285

270286
/* Block SIGURG, because we'll receive it through a signalfd. */
@@ -316,15 +332,15 @@ ShutdownLatchSupport(void)
316332
LatchWaitSet = NULL;
317333
}
318334

319-
#if defined(WAIT_USE_POLL)
335+
#if defined(WAIT_USE_SELF_PIPE)
320336
close(selfpipe_readfd);
321337
close(selfpipe_writefd);
322338
selfpipe_readfd = -1;
323339
selfpipe_writefd = -1;
324340
selfpipe_owner_pid = InvalidPid;
325341
#endif
326342

327-
#if defined(WAIT_USE_EPOLL)
343+
#if defined(WAIT_USE_SIGNALFD)
328344
close(signal_fd);
329345
signal_fd = -1;
330346
#endif
@@ -341,9 +357,12 @@ InitLatch(Latch *latch)
341357
latch->owner_pid = MyProcPid;
342358
latch->is_shared = false;
343359

344-
#if defined(WAIT_USE_POLL)
360+
#if defined(WAIT_USE_SELF_PIPE)
345361
/* Assert InitializeLatchSupport has been called in this process */
346362
Assert(selfpipe_readfd >= 0 && selfpipe_owner_pid == MyProcPid);
363+
#elif defined(WAIT_USE_SIGNALFD)
364+
/* Assert InitializeLatchSupport has been called in this process */
365+
Assert(signal_fd >= 0);
347366
#elif defined(WAIT_USE_WIN32)
348367
latch->event = CreateEvent(NULL, TRUE, FALSE, NULL);
349368
if (latch->event == NULL)
@@ -407,9 +426,12 @@ OwnLatch(Latch *latch)
407426
/* Sanity checks */
408427
Assert(latch->is_shared);
409428

410-
#if defined(WAIT_USE_POLL)
429+
#if defined(WAIT_USE_SELF_PIPE)
411430
/* Assert InitializeLatchSupport has been called in this process */
412431
Assert(selfpipe_readfd >= 0 && selfpipe_owner_pid == MyProcPid);
432+
#elif defined(WAIT_USE_SIGNALFD)
433+
/* Assert InitializeLatchSupport has been called in this process */
434+
Assert(signal_fd >= 0);
413435
#endif
414436

415437
owner_pid = latch->owner_pid;
@@ -620,7 +642,7 @@ SetLatch(Latch *latch)
620642
return;
621643
else if (owner_pid == MyProcPid)
622644
{
623-
#if defined(WAIT_USE_POLL)
645+
#if defined(WAIT_USE_SELF_PIPE)
624646
if (waiting)
625647
sendSelfPipeByte();
626648
#else
@@ -907,9 +929,9 @@ AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch,
907929
{
908930
set->latch = latch;
909931
set->latch_pos = event->pos;
910-
#if defined(WAIT_USE_POLL)
932+
#if defined(WAIT_USE_SELF_PIPE)
911933
event->fd = selfpipe_readfd;
912-
#elif defined(WAIT_USE_EPOLL)
934+
#elif defined(WAIT_USE_SIGNALFD)
913935
event->fd = signal_fd;
914936
#else
915937
event->fd = PGINVALID_SOCKET;
@@ -2086,7 +2108,7 @@ GetNumRegisteredWaitEvents(WaitEventSet *set)
20862108
return set->nevents;
20872109
}
20882110

2089-
#if defined(WAIT_USE_POLL)
2111+
#if defined(WAIT_USE_SELF_PIPE)
20902112

20912113
/*
20922114
* SetLatch uses SIGURG to wake up the process waiting on the latch.
@@ -2137,7 +2159,7 @@ sendSelfPipeByte(void)
21372159

21382160
#endif
21392161

2140-
#if defined(WAIT_USE_POLL) || defined(WAIT_USE_EPOLL)
2162+
#if defined(WAIT_USE_SELF_PIPE) || defined(WAIT_USE_SIGNALFD)
21412163

21422164
/*
21432165
* Read all available data from self-pipe or signalfd.
@@ -2153,7 +2175,7 @@ drain(void)
21532175
int rc;
21542176
int fd;
21552177

2156-
#ifdef WAIT_USE_POLL
2178+
#ifdef WAIT_USE_SELF_PIPE
21572179
fd = selfpipe_readfd;
21582180
#else
21592181
fd = signal_fd;
@@ -2171,7 +2193,7 @@ drain(void)
21712193
else
21722194
{
21732195
waiting = false;
2174-
#ifdef WAIT_USE_POLL
2196+
#ifdef WAIT_USE_SELF_PIPE
21752197
elog(ERROR, "read() on self-pipe failed: %m");
21762198
#else
21772199
elog(ERROR, "read() on signalfd failed: %m");
@@ -2181,7 +2203,7 @@ drain(void)
21812203
else if (rc == 0)
21822204
{
21832205
waiting = false;
2184-
#ifdef WAIT_USE_POLL
2206+
#ifdef WAIT_USE_SELF_PIPE
21852207
elog(ERROR, "unexpected EOF on self-pipe");
21862208
#else
21872209
elog(ERROR, "unexpected EOF on signalfd");

0 commit comments

Comments
 (0)