Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit cb2f287

Browse files
committed
Temporarily revert stats collector latch changes so we can ship beta1.
This patch reverts commit 4934003 and some follow-on tweaking in pgstat.c. While the basic scheme of latch-ifying the stats collector seems sound enough, it's failing on most Windows buildfarm members for unknown reasons, and there's no time left to debug that before 9.2beta1. Better to ship a beta version without this improvement. I hope to re-revert this once beta1 is out, though.
1 parent 5428ff4 commit cb2f287

File tree

1 file changed

+117
-68
lines changed

1 file changed

+117
-68
lines changed

src/backend/postmaster/pgstat.c

Lines changed: 117 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,12 @@
2828
#include <arpa/inet.h>
2929
#include <signal.h>
3030
#include <time.h>
31+
#ifdef HAVE_POLL_H
32+
#include <poll.h>
33+
#endif
34+
#ifdef HAVE_SYS_POLL_H
35+
#include <sys/poll.h>
36+
#endif
3137

3238
#include "pgstat.h"
3339

@@ -49,8 +55,8 @@
4955
#include "storage/backendid.h"
5056
#include "storage/fd.h"
5157
#include "storage/ipc.h"
52-
#include "storage/latch.h"
5358
#include "storage/pg_shmem.h"
59+
#include "storage/pmsignal.h"
5460
#include "storage/procsignal.h"
5561
#include "utils/ascii.h"
5662
#include "utils/guc.h"
@@ -88,6 +94,9 @@
8894
* failed statistics collector; in
8995
* seconds. */
9096

97+
#define PGSTAT_SELECT_TIMEOUT 2 /* How often to check for postmaster
98+
* death; in seconds. */
99+
91100
#define PGSTAT_POLL_LOOP_COUNT (PGSTAT_MAX_WAIT_TIME / PGSTAT_RETRY_DELAY)
92101
#define PGSTAT_INQ_LOOP_COUNT (PGSTAT_INQ_INTERVAL / PGSTAT_RETRY_DELAY)
93102

@@ -130,8 +139,6 @@ PgStat_MsgBgWriter BgWriterStats;
130139
*/
131140
NON_EXEC_STATIC pgsocket pgStatSock = PGINVALID_SOCKET;
132141

133-
static Latch pgStatLatch;
134-
135142
static struct sockaddr_storage pgStatAddr;
136143

137144
static time_t last_pgstat_start_time;
@@ -3002,7 +3009,15 @@ PgstatCollectorMain(int argc, char *argv[])
30023009
{
30033010
int len;
30043011
PgStat_Msg msg;
3005-
int wr;
3012+
3013+
#ifndef WIN32
3014+
#ifdef HAVE_POLL
3015+
struct pollfd input_fd;
3016+
#else
3017+
struct timeval sel_timeout;
3018+
fd_set rfds;
3019+
#endif
3020+
#endif
30063021

30073022
IsUnderPostmaster = true; /* we are a postmaster subprocess now */
30083023

@@ -3021,13 +3036,9 @@ PgstatCollectorMain(int argc, char *argv[])
30213036
elog(FATAL, "setsid() failed: %m");
30223037
#endif
30233038

3024-
/* Initialize private latch for use by signal handlers */
3025-
InitLatch(&pgStatLatch);
3026-
30273039
/*
30283040
* Ignore all signals usually bound to some action in the postmaster,
3029-
* except SIGHUP and SIGQUIT. Note we don't need a SIGUSR1 handler to
3030-
* support latch operations, because pgStatLatch is local not shared.
3041+
* except SIGQUIT.
30313042
*/
30323043
pqsignal(SIGHUP, pgstat_sighup_handler);
30333044
pqsignal(SIGINT, SIG_IGN);
@@ -3062,24 +3073,26 @@ PgstatCollectorMain(int argc, char *argv[])
30623073
pgStatRunningInCollector = true;
30633074
pgStatDBHash = pgstat_read_statsfile(InvalidOid, true);
30643075

3076+
/*
3077+
* Setup the descriptor set for select(2). Since only one bit in the set
3078+
* ever changes, we need not repeat FD_ZERO each time.
3079+
*/
3080+
#if !defined(HAVE_POLL) && !defined(WIN32)
3081+
FD_ZERO(&rfds);
3082+
#endif
3083+
30653084
/*
30663085
* Loop to process messages until we get SIGQUIT or detect ungraceful
30673086
* death of our parent postmaster.
30683087
*
3069-
* For performance reasons, we don't want to do ResetLatch/WaitLatch after
3070-
* every message; instead, do that only after a recv() fails to obtain a
3071-
* message. (This effectively means that if backends are sending us stuff
3072-
* like mad, we won't notice postmaster death until things slack off a
3073-
* bit; which seems fine.) To do that, we have an inner loop that
3074-
* iterates as long as recv() succeeds. We do recognize got_SIGHUP inside
3075-
* the inner loop, which means that such interrupts will get serviced but
3076-
* the latch won't get cleared until next time there is a break in the
3077-
* action.
3088+
* For performance reasons, we don't want to do a PostmasterIsAlive() test
3089+
* after every message; instead, do it only when select()/poll() is
3090+
* interrupted by timeout. In essence, we'll stay alive as long as
3091+
* backends keep sending us stuff often, even if the postmaster is gone.
30783092
*/
30793093
for (;;)
30803094
{
3081-
/* Clear any already-pending wakeups */
3082-
ResetLatch(&pgStatLatch);
3095+
int got_data;
30833096

30843097
/*
30853098
* Quit if we get SIGQUIT from the postmaster.
@@ -3088,37 +3101,87 @@ PgstatCollectorMain(int argc, char *argv[])
30883101
break;
30893102

30903103
/*
3091-
* Inner loop iterates as long as we keep getting messages, or until
3092-
* need_exit becomes set.
3104+
* Reload configuration if we got SIGHUP from the postmaster.
30933105
*/
3094-
while (!need_exit)
3106+
if (got_SIGHUP)
30953107
{
3096-
/*
3097-
* Reload configuration if we got SIGHUP from the postmaster.
3098-
*/
3099-
if (got_SIGHUP)
3100-
{
3101-
got_SIGHUP = false;
3102-
ProcessConfigFile(PGC_SIGHUP);
3103-
}
3108+
ProcessConfigFile(PGC_SIGHUP);
3109+
got_SIGHUP = false;
3110+
}
31043111

3105-
/*
3106-
* Write the stats file if a new request has arrived that is not
3107-
* satisfied by existing file.
3108-
*/
3109-
if (last_statwrite < last_statrequest)
3110-
pgstat_write_statsfile(false);
3112+
/*
3113+
* Write the stats file if a new request has arrived that is not
3114+
* satisfied by existing file.
3115+
*/
3116+
if (last_statwrite < last_statrequest)
3117+
pgstat_write_statsfile(false);
31113118

3112-
/*
3113-
* Try to receive and process a message. This will not block,
3114-
* since the socket is set to non-blocking mode.
3115-
*/
3119+
/*
3120+
* Wait for a message to arrive; but not for more than
3121+
* PGSTAT_SELECT_TIMEOUT seconds. (This determines how quickly we will
3122+
* shut down after an ungraceful postmaster termination; so it needn't
3123+
* be very fast. However, on some systems SIGQUIT won't interrupt the
3124+
* poll/select call, so this also limits speed of response to SIGQUIT,
3125+
* which is more important.)
3126+
*
3127+
* We use poll(2) if available, otherwise select(2). Win32 has its own
3128+
* implementation.
3129+
*/
3130+
#ifndef WIN32
3131+
#ifdef HAVE_POLL
3132+
input_fd.fd = pgStatSock;
3133+
input_fd.events = POLLIN | POLLERR;
3134+
input_fd.revents = 0;
3135+
3136+
if (poll(&input_fd, 1, PGSTAT_SELECT_TIMEOUT * 1000) < 0)
3137+
{
3138+
if (errno == EINTR)
3139+
continue;
3140+
ereport(ERROR,
3141+
(errcode_for_socket_access(),
3142+
errmsg("poll() failed in statistics collector: %m")));
3143+
}
3144+
3145+
got_data = (input_fd.revents != 0);
3146+
#else /* !HAVE_POLL */
3147+
3148+
FD_SET(pgStatSock, &rfds);
3149+
3150+
/*
3151+
* timeout struct is modified by select() on some operating systems,
3152+
* so re-fill it each time.
3153+
*/
3154+
sel_timeout.tv_sec = PGSTAT_SELECT_TIMEOUT;
3155+
sel_timeout.tv_usec = 0;
3156+
3157+
if (select(pgStatSock + 1, &rfds, NULL, NULL, &sel_timeout) < 0)
3158+
{
3159+
if (errno == EINTR)
3160+
continue;
3161+
ereport(ERROR,
3162+
(errcode_for_socket_access(),
3163+
errmsg("select() failed in statistics collector: %m")));
3164+
}
3165+
3166+
got_data = FD_ISSET(pgStatSock, &rfds);
3167+
#endif /* HAVE_POLL */
3168+
#else /* WIN32 */
3169+
got_data = pgwin32_waitforsinglesocket(pgStatSock, FD_READ,
3170+
PGSTAT_SELECT_TIMEOUT * 1000);
3171+
#endif
3172+
3173+
/*
3174+
* If there is a message on the socket, read it and check for
3175+
* validity.
3176+
*/
3177+
if (got_data)
3178+
{
31163179
len = recv(pgStatSock, (char *) &msg,
31173180
sizeof(PgStat_Msg), 0);
31183181
if (len < 0)
31193182
{
3120-
if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)
3121-
break; /* out of inner loop */
3183+
if (errno == EINTR)
3184+
continue;
31223185
ereport(ERROR,
31233186
(errcode_for_socket_access(),
31243187
errmsg("could not read statistics message: %m")));
@@ -3216,21 +3279,17 @@ PgstatCollectorMain(int argc, char *argv[])
32163279
default:
32173280
break;
32183281
}
3219-
} /* end of inner message-processing loop */
3220-
3221-
/* Sleep until there's something to do */
3222-
wr = WaitLatchOrSocket(&pgStatLatch,
3223-
WL_LATCH_SET | WL_POSTMASTER_DEATH | WL_SOCKET_READABLE,
3224-
pgStatSock,
3225-
-1L);
3226-
3227-
/*
3228-
* Emergency bailout if postmaster has died. This is to avoid the
3229-
* necessity for manual cleanup of all postmaster children.
3230-
*/
3231-
if (wr & WL_POSTMASTER_DEATH)
3232-
break;
3233-
} /* end of outer loop */
3282+
}
3283+
else
3284+
{
3285+
/*
3286+
* We can only get here if the select/poll timeout elapsed. Check
3287+
* for postmaster death.
3288+
*/
3289+
if (!PostmasterIsAlive())
3290+
break;
3291+
}
3292+
} /* end of message-processing loop */
32343293

32353294
/*
32363295
* Save the final stats to reuse at next startup.
@@ -3245,24 +3304,14 @@ PgstatCollectorMain(int argc, char *argv[])
32453304
static void
32463305
pgstat_exit(SIGNAL_ARGS)
32473306
{
3248-
int save_errno = errno;
3249-
32503307
need_exit = true;
3251-
SetLatch(&pgStatLatch);
3252-
3253-
errno = save_errno;
32543308
}
32553309

32563310
/* SIGHUP handler for collector process */
32573311
static void
32583312
pgstat_sighup_handler(SIGNAL_ARGS)
32593313
{
3260-
int save_errno = errno;
3261-
32623314
got_SIGHUP = true;
3263-
SetLatch(&pgStatLatch);
3264-
3265-
errno = save_errno;
32663315
}
32673316

32683317

0 commit comments

Comments
 (0)