Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 966970e

Browse files
committed
Re-revert stats collector latch changes.
This reverts commit cb2f287, restoring the latch-ified stats collector logic. We'll soon see if this works any better on the Windows buildfarm machines.
1 parent b85427f commit 966970e

File tree

1 file changed

+68
-117
lines changed

1 file changed

+68
-117
lines changed

src/backend/postmaster/pgstat.c

Lines changed: 68 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,6 @@
2828
#include <arpa/inet.h>
2929
#include <signal.h>
3030
#include <time.h>
31-
#ifdef HAVE_POLL_H
32-
#include <poll.h>
33-
#endif
34-
#ifdef HAVE_SYS_POLL_H
35-
#include <sys/poll.h>
36-
#endif
3731

3832
#include "pgstat.h"
3933

@@ -55,8 +49,8 @@
5549
#include "storage/backendid.h"
5650
#include "storage/fd.h"
5751
#include "storage/ipc.h"
52+
#include "storage/latch.h"
5853
#include "storage/pg_shmem.h"
59-
#include "storage/pmsignal.h"
6054
#include "storage/procsignal.h"
6155
#include "utils/ascii.h"
6256
#include "utils/guc.h"
@@ -94,9 +88,6 @@
9488
* failed statistics collector; in
9589
* seconds. */
9690

97-
#define PGSTAT_SELECT_TIMEOUT 2 /* How often to check for postmaster
98-
* death; in seconds. */
99-
10091
#define PGSTAT_POLL_LOOP_COUNT (PGSTAT_MAX_WAIT_TIME / PGSTAT_RETRY_DELAY)
10192
#define PGSTAT_INQ_LOOP_COUNT (PGSTAT_INQ_INTERVAL / PGSTAT_RETRY_DELAY)
10293

@@ -139,6 +130,8 @@ PgStat_MsgBgWriter BgWriterStats;
139130
*/
140131
NON_EXEC_STATIC pgsocket pgStatSock = PGINVALID_SOCKET;
141132

133+
static Latch pgStatLatch;
134+
142135
static struct sockaddr_storage pgStatAddr;
143136

144137
static time_t last_pgstat_start_time;
@@ -3009,15 +3002,7 @@ PgstatCollectorMain(int argc, char *argv[])
30093002
{
30103003
int len;
30113004
PgStat_Msg msg;
3012-
3013-
#ifndef WIN32
3014-
#ifdef HAVE_POLL
3015-
struct pollfd input_fd;
3016-
#else
3017-
struct timeval sel_timeout;
3018-
fd_set rfds;
3019-
#endif
3020-
#endif
3005+
int wr;
30213006

30223007
IsUnderPostmaster = true; /* we are a postmaster subprocess now */
30233008

@@ -3036,9 +3021,13 @@ PgstatCollectorMain(int argc, char *argv[])
30363021
elog(FATAL, "setsid() failed: %m");
30373022
#endif
30383023

3024+
/* Initialize private latch for use by signal handlers */
3025+
InitLatch(&pgStatLatch);
3026+
30393027
/*
30403028
* Ignore all signals usually bound to some action in the postmaster,
3041-
* except SIGQUIT.
3029+
* except SIGHUP and SIGQUIT. Note we don't need a SIGUSR1 handler to
3030+
* support latch operations, because pgStatLatch is local not shared.
30423031
*/
30433032
pqsignal(SIGHUP, pgstat_sighup_handler);
30443033
pqsignal(SIGINT, SIG_IGN);
@@ -3073,26 +3062,24 @@ PgstatCollectorMain(int argc, char *argv[])
30733062
pgStatRunningInCollector = true;
30743063
pgStatDBHash = pgstat_read_statsfile(InvalidOid, true);
30753064

3076-
/*
3077-
* Setup the descriptor set for select(2). Since only one bit in the set
3078-
* ever changes, we need not repeat FD_ZERO each time.
3079-
*/
3080-
#if !defined(HAVE_POLL) && !defined(WIN32)
3081-
FD_ZERO(&rfds);
3082-
#endif
3083-
30843065
/*
30853066
* Loop to process messages until we get SIGQUIT or detect ungraceful
30863067
* death of our parent postmaster.
30873068
*
3088-
* For performance reasons, we don't want to do a PostmasterIsAlive() test
3089-
* after every message; instead, do it only when select()/poll() is
3090-
* interrupted by timeout. In essence, we'll stay alive as long as
3091-
* backends keep sending us stuff often, even if the postmaster is gone.
3069+
* For performance reasons, we don't want to do ResetLatch/WaitLatch after
3070+
* every message; instead, do that only after a recv() fails to obtain a
3071+
* message. (This effectively means that if backends are sending us stuff
3072+
* like mad, we won't notice postmaster death until things slack off a
3073+
* bit; which seems fine.) To do that, we have an inner loop that
3074+
* iterates as long as recv() succeeds. We do recognize got_SIGHUP inside
3075+
* the inner loop, which means that such interrupts will get serviced but
3076+
* the latch won't get cleared until next time there is a break in the
3077+
* action.
30923078
*/
30933079
for (;;)
30943080
{
3095-
int got_data;
3081+
/* Clear any already-pending wakeups */
3082+
ResetLatch(&pgStatLatch);
30963083

30973084
/*
30983085
* Quit if we get SIGQUIT from the postmaster.
@@ -3101,87 +3088,37 @@ PgstatCollectorMain(int argc, char *argv[])
31013088
break;
31023089

31033090
/*
3104-
* Reload configuration if we got SIGHUP from the postmaster.
3105-
*/
3106-
if (got_SIGHUP)
3107-
{
3108-
ProcessConfigFile(PGC_SIGHUP);
3109-
got_SIGHUP = false;
3110-
}
3111-
3112-
/*
3113-
* Write the stats file if a new request has arrived that is not
3114-
* satisfied by existing file.
3091+
* Inner loop iterates as long as we keep getting messages, or until
3092+
* need_exit becomes set.
31153093
*/
3116-
if (last_statwrite < last_statrequest)
3117-
pgstat_write_statsfile(false);
3118-
3119-
/*
3120-
* Wait for a message to arrive; but not for more than
3121-
* PGSTAT_SELECT_TIMEOUT seconds. (This determines how quickly we will
3122-
* shut down after an ungraceful postmaster termination; so it needn't
3123-
* be very fast. However, on some systems SIGQUIT won't interrupt the
3124-
* poll/select call, so this also limits speed of response to SIGQUIT,
3125-
* which is more important.)
3126-
*
3127-
* We use poll(2) if available, otherwise select(2). Win32 has its own
3128-
* implementation.
3129-
*/
3130-
#ifndef WIN32
3131-
#ifdef HAVE_POLL
3132-
input_fd.fd = pgStatSock;
3133-
input_fd.events = POLLIN | POLLERR;
3134-
input_fd.revents = 0;
3135-
3136-
if (poll(&input_fd, 1, PGSTAT_SELECT_TIMEOUT * 1000) < 0)
3137-
{
3138-
if (errno == EINTR)
3139-
continue;
3140-
ereport(ERROR,
3141-
(errcode_for_socket_access(),
3142-
errmsg("poll() failed in statistics collector: %m")));
3143-
}
3144-
3145-
got_data = (input_fd.revents != 0);
3146-
#else /* !HAVE_POLL */
3147-
3148-
FD_SET(pgStatSock, &rfds);
3149-
3150-
/*
3151-
* timeout struct is modified by select() on some operating systems,
3152-
* so re-fill it each time.
3153-
*/
3154-
sel_timeout.tv_sec = PGSTAT_SELECT_TIMEOUT;
3155-
sel_timeout.tv_usec = 0;
3156-
3157-
if (select(pgStatSock + 1, &rfds, NULL, NULL, &sel_timeout) < 0)
3094+
while (!need_exit)
31583095
{
3159-
if (errno == EINTR)
3160-
continue;
3161-
ereport(ERROR,
3162-
(errcode_for_socket_access(),
3163-
errmsg("select() failed in statistics collector: %m")));
3164-
}
3096+
/*
3097+
* Reload configuration if we got SIGHUP from the postmaster.
3098+
*/
3099+
if (got_SIGHUP)
3100+
{
3101+
got_SIGHUP = false;
3102+
ProcessConfigFile(PGC_SIGHUP);
3103+
}
31653104

3166-
got_data = FD_ISSET(pgStatSock, &rfds);
3167-
#endif /* HAVE_POLL */
3168-
#else /* WIN32 */
3169-
got_data = pgwin32_waitforsinglesocket(pgStatSock, FD_READ,
3170-
PGSTAT_SELECT_TIMEOUT * 1000);
3171-
#endif
3105+
/*
3106+
* Write the stats file if a new request has arrived that is not
3107+
* satisfied by existing file.
3108+
*/
3109+
if (last_statwrite < last_statrequest)
3110+
pgstat_write_statsfile(false);
31723111

3173-
/*
3174-
* If there is a message on the socket, read it and check for
3175-
* validity.
3176-
*/
3177-
if (got_data)
3178-
{
3112+
/*
3113+
* Try to receive and process a message. This will not block,
3114+
* since the socket is set to non-blocking mode.
3115+
*/
31793116
len = recv(pgStatSock, (char *) &msg,
31803117
sizeof(PgStat_Msg), 0);
31813118
if (len < 0)
31823119
{
3183-
if (errno == EINTR)
3184-
continue;
3120+
if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)
3121+
break; /* out of inner loop */
31853122
ereport(ERROR,
31863123
(errcode_for_socket_access(),
31873124
errmsg("could not read statistics message: %m")));
@@ -3279,17 +3216,21 @@ PgstatCollectorMain(int argc, char *argv[])
32793216
default:
32803217
break;
32813218
}
3282-
}
3283-
else
3284-
{
3285-
/*
3286-
* We can only get here if the select/poll timeout elapsed. Check
3287-
* for postmaster death.
3288-
*/
3289-
if (!PostmasterIsAlive())
3290-
break;
3291-
}
3292-
} /* end of message-processing loop */
3219+
} /* end of inner message-processing loop */
3220+
3221+
/* Sleep until there's something to do */
3222+
wr = WaitLatchOrSocket(&pgStatLatch,
3223+
WL_LATCH_SET | WL_POSTMASTER_DEATH | WL_SOCKET_READABLE,
3224+
pgStatSock,
3225+
-1L);
3226+
3227+
/*
3228+
* Emergency bailout if postmaster has died. This is to avoid the
3229+
* necessity for manual cleanup of all postmaster children.
3230+
*/
3231+
if (wr & WL_POSTMASTER_DEATH)
3232+
break;
3233+
} /* end of outer loop */
32933234

32943235
/*
32953236
* Save the final stats to reuse at next startup.
@@ -3304,14 +3245,24 @@ PgstatCollectorMain(int argc, char *argv[])
33043245
static void
33053246
pgstat_exit(SIGNAL_ARGS)
33063247
{
3248+
int save_errno = errno;
3249+
33073250
need_exit = true;
3251+
SetLatch(&pgStatLatch);
3252+
3253+
errno = save_errno;
33083254
}
33093255

33103256
/* SIGHUP handler for collector process */
33113257
static void
33123258
pgstat_sighup_handler(SIGNAL_ARGS)
33133259
{
3260+
int save_errno = errno;
3261+
33143262
got_SIGHUP = true;
3263+
SetLatch(&pgStatLatch);
3264+
3265+
errno = save_errno;
33153266
}
33163267

33173268

0 commit comments

Comments
 (0)