Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 4934003

Browse files
committed
Reduce idle power consumption of stats collector process.
Latch-ify the stats collector, so that it does not need an arbitrary wakeup cycle to check for postmaster death. The incremental savings in idle power is pretty marginal, since we only had it waking every two seconds; but I believe that this patch may also improve the collector's performance under load, by reducing the number of kernel calls made per message when messages are arriving constantly (we now avoid a select/poll call except when we need to sleep). The change also reduces the time needed for a normal database shutdown on platforms where signals don't interrupt select().
1 parent 5461564 commit 4934003

File tree

1 file changed

+65
-116
lines changed

1 file changed

+65
-116
lines changed

src/backend/postmaster/pgstat.c

Lines changed: 65 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,6 @@
2828
#include <arpa/inet.h>
2929
#include <signal.h>
3030
#include <time.h>
31-
#ifdef HAVE_POLL_H
32-
#include <poll.h>
33-
#endif
34-
#ifdef HAVE_SYS_POLL_H
35-
#include <sys/poll.h>
36-
#endif
3731

3832
#include "pgstat.h"
3933

@@ -55,6 +49,7 @@
5549
#include "storage/backendid.h"
5650
#include "storage/fd.h"
5751
#include "storage/ipc.h"
52+
#include "storage/latch.h"
5853
#include "storage/pg_shmem.h"
5954
#include "storage/pmsignal.h"
6055
#include "storage/procsignal.h"
@@ -94,9 +89,6 @@
9489
* failed statistics collector; in
9590
* seconds. */
9691

97-
#define PGSTAT_SELECT_TIMEOUT 2 /* How often to check for postmaster
98-
* death; in seconds. */
99-
10092
#define PGSTAT_POLL_LOOP_COUNT (PGSTAT_MAX_WAIT_TIME / PGSTAT_RETRY_DELAY)
10193
#define PGSTAT_INQ_LOOP_COUNT (PGSTAT_INQ_INTERVAL / PGSTAT_RETRY_DELAY)
10294

@@ -139,6 +131,8 @@ PgStat_MsgBgWriter BgWriterStats;
139131
*/
140132
NON_EXEC_STATIC pgsocket pgStatSock = PGINVALID_SOCKET;
141133

134+
static Latch pgStatLatch;
135+
142136
static struct sockaddr_storage pgStatAddr;
143137

144138
static time_t last_pgstat_start_time;
@@ -3009,15 +3003,7 @@ PgstatCollectorMain(int argc, char *argv[])
30093003
{
30103004
int len;
30113005
PgStat_Msg msg;
3012-
3013-
#ifndef WIN32
3014-
#ifdef HAVE_POLL
3015-
struct pollfd input_fd;
3016-
#else
3017-
struct timeval sel_timeout;
3018-
fd_set rfds;
3019-
#endif
3020-
#endif
3006+
int wr;
30213007

30223008
IsUnderPostmaster = true; /* we are a postmaster subprocess now */
30233009

@@ -3036,9 +3022,13 @@ PgstatCollectorMain(int argc, char *argv[])
30363022
elog(FATAL, "setsid() failed: %m");
30373023
#endif
30383024

3025+
/* Initialize private latch for use by signal handlers */
3026+
InitLatch(&pgStatLatch);
3027+
30393028
/*
30403029
* Ignore all signals usually bound to some action in the postmaster,
3041-
* except SIGQUIT.
3030+
* except SIGHUP and SIGQUIT. Note we don't need a SIGUSR1 handler to
3031+
* support latch operations, because pgStatLatch is local not shared.
30423032
*/
30433033
pqsignal(SIGHUP, pgstat_sighup_handler);
30443034
pqsignal(SIGINT, SIG_IGN);
@@ -3073,26 +3063,24 @@ PgstatCollectorMain(int argc, char *argv[])
30733063
pgStatRunningInCollector = true;
30743064
pgStatDBHash = pgstat_read_statsfile(InvalidOid, true);
30753065

3076-
/*
3077-
* Setup the descriptor set for select(2). Since only one bit in the set
3078-
* ever changes, we need not repeat FD_ZERO each time.
3079-
*/
3080-
#if !defined(HAVE_POLL) && !defined(WIN32)
3081-
FD_ZERO(&rfds);
3082-
#endif
3083-
30843066
/*
30853067
* Loop to process messages until we get SIGQUIT or detect ungraceful
30863068
* death of our parent postmaster.
30873069
*
3088-
* For performance reasons, we don't want to do a PostmasterIsAlive() test
3089-
* after every message; instead, do it only when select()/poll() is
3090-
* interrupted by timeout. In essence, we'll stay alive as long as
3091-
* backends keep sending us stuff often, even if the postmaster is gone.
3070+
* For performance reasons, we don't want to do ResetLatch/WaitLatch after
3071+
* every message; instead, do that only after a recv() fails to obtain a
3072+
* message. (This effectively means that if backends are sending us stuff
3073+
* like mad, we won't notice postmaster death until things slack off a
3074+
* bit; which seems fine.) To do that, we have an inner loop that
3075+
* iterates as long as recv() succeeds. We do recognize got_SIGHUP inside
3076+
* the inner loop, which means that such interrupts will get serviced but
3077+
* the latch won't get cleared until next time there is a break in the
3078+
* action.
30923079
*/
30933080
for (;;)
30943081
{
3095-
int got_data;
3082+
/* Clear any already-pending wakeups */
3083+
ResetLatch(&pgStatLatch);
30963084

30973085
/*
30983086
* Quit if we get SIGQUIT from the postmaster.
@@ -3101,87 +3089,37 @@ PgstatCollectorMain(int argc, char *argv[])
31013089
break;
31023090

31033091
/*
3104-
* Reload configuration if we got SIGHUP from the postmaster.
3105-
*/
3106-
if (got_SIGHUP)
3107-
{
3108-
ProcessConfigFile(PGC_SIGHUP);
3109-
got_SIGHUP = false;
3110-
}
3111-
3112-
/*
3113-
* Write the stats file if a new request has arrived that is not
3114-
* satisfied by existing file.
3115-
*/
3116-
if (last_statwrite < last_statrequest)
3117-
pgstat_write_statsfile(false);
3118-
3119-
/*
3120-
* Wait for a message to arrive; but not for more than
3121-
* PGSTAT_SELECT_TIMEOUT seconds. (This determines how quickly we will
3122-
* shut down after an ungraceful postmaster termination; so it needn't
3123-
* be very fast. However, on some systems SIGQUIT won't interrupt the
3124-
* poll/select call, so this also limits speed of response to SIGQUIT,
3125-
* which is more important.)
3126-
*
3127-
* We use poll(2) if available, otherwise select(2). Win32 has its own
3128-
* implementation.
3129-
*/
3130-
#ifndef WIN32
3131-
#ifdef HAVE_POLL
3132-
input_fd.fd = pgStatSock;
3133-
input_fd.events = POLLIN | POLLERR;
3134-
input_fd.revents = 0;
3135-
3136-
if (poll(&input_fd, 1, PGSTAT_SELECT_TIMEOUT * 1000) < 0)
3137-
{
3138-
if (errno == EINTR)
3139-
continue;
3140-
ereport(ERROR,
3141-
(errcode_for_socket_access(),
3142-
errmsg("poll() failed in statistics collector: %m")));
3143-
}
3144-
3145-
got_data = (input_fd.revents != 0);
3146-
#else /* !HAVE_POLL */
3147-
3148-
FD_SET(pgStatSock, &rfds);
3149-
3150-
/*
3151-
* timeout struct is modified by select() on some operating systems,
3152-
* so re-fill it each time.
3092+
* Inner loop iterates as long as we keep getting messages, or until
3093+
* need_exit becomes set.
31533094
*/
3154-
sel_timeout.tv_sec = PGSTAT_SELECT_TIMEOUT;
3155-
sel_timeout.tv_usec = 0;
3156-
3157-
if (select(pgStatSock + 1, &rfds, NULL, NULL, &sel_timeout) < 0)
3095+
while (!need_exit)
31583096
{
3159-
if (errno == EINTR)
3160-
continue;
3161-
ereport(ERROR,
3162-
(errcode_for_socket_access(),
3163-
errmsg("select() failed in statistics collector: %m")));
3164-
}
3097+
/*
3098+
* Reload configuration if we got SIGHUP from the postmaster.
3099+
*/
3100+
if (got_SIGHUP)
3101+
{
3102+
got_SIGHUP = false;
3103+
ProcessConfigFile(PGC_SIGHUP);
3104+
}
31653105

3166-
got_data = FD_ISSET(pgStatSock, &rfds);
3167-
#endif /* HAVE_POLL */
3168-
#else /* WIN32 */
3169-
got_data = pgwin32_waitforsinglesocket(pgStatSock, FD_READ,
3170-
PGSTAT_SELECT_TIMEOUT * 1000);
3171-
#endif
3106+
/*
3107+
* Write the stats file if a new request has arrived that is not
3108+
* satisfied by existing file.
3109+
*/
3110+
if (last_statwrite < last_statrequest)
3111+
pgstat_write_statsfile(false);
31723112

3173-
/*
3174-
* If there is a message on the socket, read it and check for
3175-
* validity.
3176-
*/
3177-
if (got_data)
3178-
{
3113+
/*
3114+
* Try to receive and process a message. This will not block,
3115+
* since the socket is set to non-blocking mode.
3116+
*/
31793117
len = recv(pgStatSock, (char *) &msg,
31803118
sizeof(PgStat_Msg), 0);
31813119
if (len < 0)
31823120
{
3183-
if (errno == EINTR)
3184-
continue;
3121+
if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)
3122+
break; /* out of inner loop */
31853123
ereport(ERROR,
31863124
(errcode_for_socket_access(),
31873125
errmsg("could not read statistics message: %m")));
@@ -3279,17 +3217,18 @@ PgstatCollectorMain(int argc, char *argv[])
32793217
default:
32803218
break;
32813219
}
3282-
}
3283-
else
3284-
{
3285-
/*
3286-
* We can only get here if the select/poll timeout elapsed. Check
3287-
* for postmaster death.
3288-
*/
3289-
if (!PostmasterIsAlive())
3290-
break;
3291-
}
3292-
} /* end of message-processing loop */
3220+
} /* end of inner message-processing loop */
3221+
3222+
/* Sleep until there's something to do */
3223+
wr = WaitLatchOrSocket(&pgStatLatch,
3224+
WL_LATCH_SET | WL_POSTMASTER_DEATH | WL_SOCKET_READABLE,
3225+
pgStatSock,
3226+
-1L);
3227+
3228+
/* Check for postmaster death */
3229+
if (wr & WL_POSTMASTER_DEATH)
3230+
break;
3231+
} /* end of outer loop */
32933232

32943233
/*
32953234
* Save the final stats to reuse at next startup.
@@ -3304,14 +3243,24 @@ PgstatCollectorMain(int argc, char *argv[])
33043243
static void
33053244
pgstat_exit(SIGNAL_ARGS)
33063245
{
3246+
int save_errno = errno;
3247+
33073248
need_exit = true;
3249+
SetLatch(&pgStatLatch);
3250+
3251+
errno = save_errno;
33083252
}
33093253

33103254
/* SIGHUP handler for collector process */
33113255
static void
33123256
pgstat_sighup_handler(SIGNAL_ARGS)
33133257
{
3258+
int save_errno = errno;
3259+
33143260
got_SIGHUP = true;
3261+
SetLatch(&pgStatLatch);
3262+
3263+
errno = save_errno;
33153264
}
33163265

33173266

0 commit comments

Comments
 (0)