|
3 | 3 | * latch.c
|
4 | 4 | * Routines for inter-process latches
|
5 | 5 | *
|
6 |
| - * The Unix implementation uses the so-called self-pipe trick to overcome |
7 |
| - * the race condition involved with select() and setting a global flag |
8 |
| - * in the signal handler. When a latch is set and the current process |
9 |
| - * is waiting for it, the signal handler wakes up the select() in |
10 |
| - * WaitLatch by writing a byte to a pipe. A signal by itself doesn't |
11 |
| - * interrupt select() on all platforms, and even on platforms where it |
12 |
| - * does, a signal that arrives just before the select() call does not |
13 |
| - * prevent the select() from entering sleep. An incoming byte on a pipe |
14 |
| - * however reliably interrupts the sleep, and causes select() to return |
15 |
| - * immediately even if the signal arrives before select() begins. |
16 |
| - * |
17 |
| - * (Actually, we prefer epoll_wait() over poll() over select() where |
18 |
| - * available, but the same comments apply.) |
| 6 | + * The Unix implementation uses the so-called self-pipe trick to overcome the |
| 7 | + * race condition involved with poll() (or epoll_wait() on linux) and setting |
| 8 | + * a global flag in the signal handler. When a latch is set and the current |
| 9 | + * process is waiting for it, the signal handler wakes up the poll() in |
| 10 | + * WaitLatch by writing a byte to a pipe. A signal by itself doesn't interrupt |
| 11 | + * poll() on all platforms, and even on platforms where it does, a signal that |
| 12 | + * arrives just before the poll() call does not prevent poll() from entering |
| 13 | + * sleep. An incoming byte on a pipe however reliably interrupts the sleep, |
| 14 | + * and causes poll() to return immediately even if the signal arrives before |
| 15 | + * poll() begins. |
19 | 16 | *
|
20 | 17 | * When SetLatch is called from the same process that owns the latch,
|
21 | 18 | * SetLatch writes the byte directly to the pipe. If it's owned by another
|
22 | 19 | * process, SIGUSR1 is sent and the signal handler in the waiting process
|
23 | 20 | * writes the byte to the pipe on behalf of the signaling process.
|
24 | 21 | *
|
25 |
| - * The Windows implementation uses Windows events that are inherited by |
26 |
| - * all postmaster child processes. |
| 22 | + * The Windows implementation uses Windows events that are inherited by all |
| 23 | + * postmaster child processes. There's no need for the self-pipe trick there. |
27 | 24 | *
|
28 | 25 | * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
|
29 | 26 | * Portions Copyright (c) 1994, Regents of the University of California
|
|
39 | 36 | #include <limits.h>
|
40 | 37 | #include <signal.h>
|
41 | 38 | #include <unistd.h>
|
42 |
| -#include <sys/time.h> |
43 | 39 | #ifdef HAVE_SYS_EPOLL_H
|
44 | 40 | #include <sys/epoll.h>
|
45 | 41 | #endif
|
|
49 | 45 | #ifdef HAVE_SYS_POLL_H
|
50 | 46 | #include <sys/poll.h>
|
51 | 47 | #endif
|
52 |
| -#ifdef HAVE_SYS_SELECT_H |
53 |
| -#include <sys/select.h> |
54 |
| -#endif |
55 | 48 |
|
56 | 49 | #include "miscadmin.h"
|
57 | 50 | #include "pgstat.h"
|
|
69 | 62 | * define somewhere before this block.
|
70 | 63 | */
|
71 | 64 | #if defined(WAIT_USE_EPOLL) || defined(WAIT_USE_POLL) || \
|
72 |
| - defined(WAIT_USE_SELECT) || defined(WAIT_USE_WIN32) |
| 65 | + defined(WAIT_USE_WIN32) |
73 | 66 | /* don't overwrite manual choice */
|
74 | 67 | #elif defined(HAVE_SYS_EPOLL_H)
|
75 | 68 | #define WAIT_USE_EPOLL
|
76 | 69 | #elif defined(HAVE_POLL)
|
77 | 70 | #define WAIT_USE_POLL
|
78 |
| -#elif HAVE_SYS_SELECT_H |
79 |
| -#define WAIT_USE_SELECT |
80 | 71 | #elif WIN32
|
81 | 72 | #define WAIT_USE_WIN32
|
82 | 73 | #else
|
@@ -162,8 +153,8 @@ InitializeLatchSupport(void)
|
162 | 153 |
|
163 | 154 | /*
|
164 | 155 | * Set up the self-pipe that allows a signal handler to wake up the
|
165 |
| - * select() in WaitLatch. Make the write-end non-blocking, so that |
166 |
| - * SetLatch won't block if the event has already been set many times |
| 156 | + * poll()/epoll_wait() in WaitLatch. Make the write-end non-blocking, so |
| 157 | + * that SetLatch won't block if the event has already been set many times |
167 | 158 | * filling the kernel buffer. Make the read-end non-blocking too, so that
|
168 | 159 | * we can easily clear the pipe by reading until EAGAIN or EWOULDBLOCK.
|
169 | 160 | */
|
@@ -401,8 +392,9 @@ SetLatch(volatile Latch *latch)
|
401 | 392 |
|
402 | 393 | /*
|
403 | 394 | * See if anyone's waiting for the latch. It can be the current process if
|
404 |
| - * we're in a signal handler. We use the self-pipe to wake up the select() |
405 |
| - * in that case. If it's another process, send a signal. |
| 395 | + * we're in a signal handler. We use the self-pipe to wake up the |
| 396 | + * poll()/epoll_wait() in that case. If it's another process, send a |
| 397 | + * signal. |
406 | 398 | *
|
407 | 399 | * Fetch owner_pid only once, in case the latch is concurrently getting
|
408 | 400 | * owned or disowned. XXX: This assumes that pid_t is atomic, which isn't
|
@@ -666,8 +658,6 @@ AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch,
|
666 | 658 | WaitEventAdjustEpoll(set, event, EPOLL_CTL_ADD);
|
667 | 659 | #elif defined(WAIT_USE_POLL)
|
668 | 660 | WaitEventAdjustPoll(set, event);
|
669 |
| -#elif defined(WAIT_USE_SELECT) |
670 |
| - /* nothing to do */ |
671 | 661 | #elif defined(WAIT_USE_WIN32)
|
672 | 662 | WaitEventAdjustWin32(set, event);
|
673 | 663 | #endif
|
@@ -724,8 +714,6 @@ ModifyWaitEvent(WaitEventSet *set, int pos, uint32 events, Latch *latch)
|
724 | 714 | WaitEventAdjustEpoll(set, event, EPOLL_CTL_MOD);
|
725 | 715 | #elif defined(WAIT_USE_POLL)
|
726 | 716 | WaitEventAdjustPoll(set, event);
|
727 |
| -#elif defined(WAIT_USE_SELECT) |
728 |
| - /* nothing to do */ |
729 | 717 | #elif defined(WAIT_USE_WIN32)
|
730 | 718 | WaitEventAdjustWin32(set, event);
|
731 | 719 | #endif
|
@@ -1055,9 +1043,11 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
|
1055 | 1043 | * because we don't expect the pipe to become readable or to have
|
1056 | 1044 | * any errors either, treat those cases as postmaster death, too.
|
1057 | 1045 | *
|
1058 |
| - * As explained in the WAIT_USE_SELECT implementation, select(2) |
1059 |
| - * may spuriously return. Be paranoid about that here too, a |
1060 |
| - * spurious WL_POSTMASTER_DEATH would be painful. |
| 1046 | + * Be paranoid about a spurious event signalling the postmaster as |
| 1047 | + * being dead. There have been reports about that happening with |
| 1048 | + * older primitives (select(2) to be specific), and a spurious |
| 1049 | + * WL_POSTMASTER_DEATH event would be painful. Re-checking doesn't |
| 1050 | + * cost much. |
1061 | 1051 | */
|
1062 | 1052 | if (!PostmasterIsAlive())
|
1063 | 1053 | {
|
@@ -1171,9 +1161,11 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
|
1171 | 1161 | * we don't expect the pipe to become readable or to have any
|
1172 | 1162 | * errors either, treat those cases as postmaster death, too.
|
1173 | 1163 | *
|
1174 |
| - * As explained in the WAIT_USE_SELECT implementation, select(2) |
1175 |
| - * may spuriously return. Be paranoid about that here too, a |
1176 |
| - * spurious WL_POSTMASTER_DEATH would be painful. |
| 1164 | + * Be paranoid about a spurious event signalling the postmaster as |
| 1165 | + * being dead. There have been reports about that happening with |
| 1166 | + * older primitives (select(2) to be specific), and a spurious |
| 1167 | + * WL_POSTMASTER_DEATH event would be painful. Re-checking doesn't |
| 1168 | + * cost much. |
1177 | 1169 | */
|
1178 | 1170 | if (!PostmasterIsAlive())
|
1179 | 1171 | {
|
@@ -1214,163 +1206,6 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
|
1214 | 1206 | return returned_events;
|
1215 | 1207 | }
|
1216 | 1208 |
|
1217 |
| -#elif defined(WAIT_USE_SELECT) |
1218 |
| - |
1219 |
| -/* |
1220 |
| - * Wait using select(2). |
1221 |
| - * |
1222 |
| - * XXX: On at least older linux kernels select(), in violation of POSIX, |
1223 |
| - * doesn't reliably return a socket as writable if closed - but we rely on |
1224 |
| - * that. So far all the known cases of this problem are on platforms that also |
1225 |
| - * provide a poll() implementation without that bug. If we find one where |
1226 |
| - * that's not the case, we'll need to add a workaround. |
1227 |
| - */ |
1228 |
| -static inline int |
1229 |
| -WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout, |
1230 |
| - WaitEvent *occurred_events, int nevents) |
1231 |
| -{ |
1232 |
| - int returned_events = 0; |
1233 |
| - int rc; |
1234 |
| - WaitEvent *cur_event; |
1235 |
| - fd_set input_mask; |
1236 |
| - fd_set output_mask; |
1237 |
| - int hifd; |
1238 |
| - struct timeval tv; |
1239 |
| - struct timeval *tvp = NULL; |
1240 |
| - |
1241 |
| - FD_ZERO(&input_mask); |
1242 |
| - FD_ZERO(&output_mask); |
1243 |
| - |
1244 |
| - /* |
1245 |
| - * Prepare input/output masks. We do so every loop iteration as there's no |
1246 |
| - * entirely portable way to copy fd_sets. |
1247 |
| - */ |
1248 |
| - for (cur_event = set->events; |
1249 |
| - cur_event < (set->events + set->nevents); |
1250 |
| - cur_event++) |
1251 |
| - { |
1252 |
| - if (cur_event->events == WL_LATCH_SET) |
1253 |
| - FD_SET(cur_event->fd, &input_mask); |
1254 |
| - else if (cur_event->events == WL_POSTMASTER_DEATH) |
1255 |
| - FD_SET(cur_event->fd, &input_mask); |
1256 |
| - else |
1257 |
| - { |
1258 |
| - Assert(cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)); |
1259 |
| - if (cur_event->events == WL_SOCKET_READABLE) |
1260 |
| - FD_SET(cur_event->fd, &input_mask); |
1261 |
| - else if (cur_event->events == WL_SOCKET_WRITEABLE) |
1262 |
| - FD_SET(cur_event->fd, &output_mask); |
1263 |
| - } |
1264 |
| - |
1265 |
| - if (cur_event->fd > hifd) |
1266 |
| - hifd = cur_event->fd; |
1267 |
| - } |
1268 |
| - |
1269 |
| - /* Sleep */ |
1270 |
| - if (cur_timeout >= 0) |
1271 |
| - { |
1272 |
| - tv.tv_sec = cur_timeout / 1000L; |
1273 |
| - tv.tv_usec = (cur_timeout % 1000L) * 1000L; |
1274 |
| - tvp = &tv; |
1275 |
| - } |
1276 |
| - rc = select(hifd + 1, &input_mask, &output_mask, NULL, tvp); |
1277 |
| - |
1278 |
| - /* Check return code */ |
1279 |
| - if (rc < 0) |
1280 |
| - { |
1281 |
| - /* EINTR is okay, otherwise complain */ |
1282 |
| - if (errno != EINTR) |
1283 |
| - { |
1284 |
| - waiting = false; |
1285 |
| - ereport(ERROR, |
1286 |
| - (errcode_for_socket_access(), |
1287 |
| - errmsg("select() failed: %m"))); |
1288 |
| - } |
1289 |
| - return 0; /* retry */ |
1290 |
| - } |
1291 |
| - else if (rc == 0) |
1292 |
| - { |
1293 |
| - /* timeout exceeded */ |
1294 |
| - return -1; |
1295 |
| - } |
1296 |
| - |
1297 |
| - /* |
1298 |
| - * To associate events with select's masks, we have to check the status of |
1299 |
| - * the file descriptors associated with an event; by looping through all |
1300 |
| - * events. |
1301 |
| - */ |
1302 |
| - for (cur_event = set->events; |
1303 |
| - cur_event < (set->events + set->nevents) |
1304 |
| - && returned_events < nevents; |
1305 |
| - cur_event++) |
1306 |
| - { |
1307 |
| - occurred_events->pos = cur_event->pos; |
1308 |
| - occurred_events->user_data = cur_event->user_data; |
1309 |
| - occurred_events->events = 0; |
1310 |
| - |
1311 |
| - if (cur_event->events == WL_LATCH_SET && |
1312 |
| - FD_ISSET(cur_event->fd, &input_mask)) |
1313 |
| - { |
1314 |
| - /* There's data in the self-pipe, clear it. */ |
1315 |
| - drainSelfPipe(); |
1316 |
| - |
1317 |
| - if (set->latch->is_set) |
1318 |
| - { |
1319 |
| - occurred_events->fd = PGINVALID_SOCKET; |
1320 |
| - occurred_events->events = WL_LATCH_SET; |
1321 |
| - occurred_events++; |
1322 |
| - returned_events++; |
1323 |
| - } |
1324 |
| - } |
1325 |
| - else if (cur_event->events == WL_POSTMASTER_DEATH && |
1326 |
| - FD_ISSET(cur_event->fd, &input_mask)) |
1327 |
| - { |
1328 |
| - /* |
1329 |
| - * According to the select(2) man page on Linux, select(2) may |
1330 |
| - * spuriously return and report a file descriptor as readable, |
1331 |
| - * when it's not; and presumably so can poll(2). It's not clear |
1332 |
| - * that the relevant cases would ever apply to the postmaster |
1333 |
| - * pipe, but since the consequences of falsely returning |
1334 |
| - * WL_POSTMASTER_DEATH could be pretty unpleasant, we take the |
1335 |
| - * trouble to positively verify EOF with PostmasterIsAlive(). |
1336 |
| - */ |
1337 |
| - if (!PostmasterIsAlive()) |
1338 |
| - { |
1339 |
| - occurred_events->fd = PGINVALID_SOCKET; |
1340 |
| - occurred_events->events = WL_POSTMASTER_DEATH; |
1341 |
| - occurred_events++; |
1342 |
| - returned_events++; |
1343 |
| - } |
1344 |
| - } |
1345 |
| - else if (cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) |
1346 |
| - { |
1347 |
| - Assert(cur_event->fd != PGINVALID_SOCKET); |
1348 |
| - |
1349 |
| - if ((cur_event->events & WL_SOCKET_READABLE) && |
1350 |
| - FD_ISSET(cur_event->fd, &input_mask)) |
1351 |
| - { |
1352 |
| - /* data available in socket, or EOF */ |
1353 |
| - occurred_events->events |= WL_SOCKET_READABLE; |
1354 |
| - } |
1355 |
| - |
1356 |
| - if ((cur_event->events & WL_SOCKET_WRITEABLE) && |
1357 |
| - FD_ISSET(cur_event->fd, &output_mask)) |
1358 |
| - { |
1359 |
| - /* socket is writeable, or EOF */ |
1360 |
| - occurred_events->events |= WL_SOCKET_WRITEABLE; |
1361 |
| - } |
1362 |
| - |
1363 |
| - if (occurred_events->events != 0) |
1364 |
| - { |
1365 |
| - occurred_events->fd = cur_event->fd; |
1366 |
| - occurred_events++; |
1367 |
| - returned_events++; |
1368 |
| - } |
1369 |
| - } |
1370 |
| - } |
1371 |
| - return returned_events; |
1372 |
| -} |
1373 |
| - |
1374 | 1209 | #elif defined(WAIT_USE_WIN32)
|
1375 | 1210 |
|
1376 | 1211 | /*
|
|
0 commit comments