28
28
#include <arpa/inet.h>
29
29
#include <signal.h>
30
30
#include <time.h>
31
+ #ifdef HAVE_POLL_H
32
+ #include <poll.h>
33
+ #endif
34
+ #ifdef HAVE_SYS_POLL_H
35
+ #include <sys/poll.h>
36
+ #endif
31
37
32
38
#include "pgstat.h"
33
39
49
55
#include "storage/backendid.h"
50
56
#include "storage/fd.h"
51
57
#include "storage/ipc.h"
52
- #include "storage/latch.h"
53
58
#include "storage/pg_shmem.h"
59
+ #include "storage/pmsignal.h"
54
60
#include "storage/procsignal.h"
55
61
#include "utils/ascii.h"
56
62
#include "utils/guc.h"
88
94
* failed statistics collector; in
89
95
* seconds. */
90
96
97
+ #define PGSTAT_SELECT_TIMEOUT 2 /* How often to check for postmaster
98
+ * death; in seconds. */
99
+
91
100
#define PGSTAT_POLL_LOOP_COUNT (PGSTAT_MAX_WAIT_TIME / PGSTAT_RETRY_DELAY)
92
101
#define PGSTAT_INQ_LOOP_COUNT (PGSTAT_INQ_INTERVAL / PGSTAT_RETRY_DELAY)
93
102
@@ -130,8 +139,6 @@ PgStat_MsgBgWriter BgWriterStats;
130
139
*/
131
140
NON_EXEC_STATIC pgsocket pgStatSock = PGINVALID_SOCKET ;
132
141
133
- static Latch pgStatLatch ;
134
-
135
142
static struct sockaddr_storage pgStatAddr ;
136
143
137
144
static time_t last_pgstat_start_time ;
@@ -3002,7 +3009,15 @@ PgstatCollectorMain(int argc, char *argv[])
3002
3009
{
3003
3010
int len ;
3004
3011
PgStat_Msg msg ;
3005
- int wr ;
3012
+
3013
+ #ifndef WIN32
3014
+ #ifdef HAVE_POLL
3015
+ struct pollfd input_fd ;
3016
+ #else
3017
+ struct timeval sel_timeout ;
3018
+ fd_set rfds ;
3019
+ #endif
3020
+ #endif
3006
3021
3007
3022
IsUnderPostmaster = true; /* we are a postmaster subprocess now */
3008
3023
@@ -3021,13 +3036,9 @@ PgstatCollectorMain(int argc, char *argv[])
3021
3036
elog (FATAL , "setsid() failed: %m" );
3022
3037
#endif
3023
3038
3024
- /* Initialize private latch for use by signal handlers */
3025
- InitLatch (& pgStatLatch );
3026
-
3027
3039
/*
3028
3040
* Ignore all signals usually bound to some action in the postmaster,
3029
- * except SIGHUP and SIGQUIT. Note we don't need a SIGUSR1 handler to
3030
- * support latch operations, because pgStatLatch is local not shared.
3041
+ * except SIGQUIT.
3031
3042
*/
3032
3043
pqsignal (SIGHUP , pgstat_sighup_handler );
3033
3044
pqsignal (SIGINT , SIG_IGN );
@@ -3062,24 +3073,26 @@ PgstatCollectorMain(int argc, char *argv[])
3062
3073
pgStatRunningInCollector = true;
3063
3074
pgStatDBHash = pgstat_read_statsfile (InvalidOid , true);
3064
3075
3076
+ /*
3077
+ * Setup the descriptor set for select(2). Since only one bit in the set
3078
+ * ever changes, we need not repeat FD_ZERO each time.
3079
+ */
3080
+ #if !defined(HAVE_POLL ) && !defined(WIN32 )
3081
+ FD_ZERO (& rfds );
3082
+ #endif
3083
+
3065
3084
/*
3066
3085
* Loop to process messages until we get SIGQUIT or detect ungraceful
3067
3086
* death of our parent postmaster.
3068
3087
*
3069
- * For performance reasons, we don't want to do ResetLatch/WaitLatch after
3070
- * every message; instead, do that only after a recv() fails to obtain a
3071
- * message. (This effectively means that if backends are sending us stuff
3072
- * like mad, we won't notice postmaster death until things slack off a
3073
- * bit; which seems fine.) To do that, we have an inner loop that
3074
- * iterates as long as recv() succeeds. We do recognize got_SIGHUP inside
3075
- * the inner loop, which means that such interrupts will get serviced but
3076
- * the latch won't get cleared until next time there is a break in the
3077
- * action.
3088
+ * For performance reasons, we don't want to do a PostmasterIsAlive() test
3089
+ * after every message; instead, do it only when select()/poll() is
3090
+ * interrupted by timeout. In essence, we'll stay alive as long as
3091
+ * backends keep sending us stuff often, even if the postmaster is gone.
3078
3092
*/
3079
3093
for (;;)
3080
3094
{
3081
- /* Clear any already-pending wakeups */
3082
- ResetLatch (& pgStatLatch );
3095
+ int got_data ;
3083
3096
3084
3097
/*
3085
3098
* Quit if we get SIGQUIT from the postmaster.
@@ -3088,37 +3101,87 @@ PgstatCollectorMain(int argc, char *argv[])
3088
3101
break ;
3089
3102
3090
3103
/*
3091
- * Inner loop iterates as long as we keep getting messages, or until
3092
- * need_exit becomes set.
3104
+ * Reload configuration if we got SIGHUP from the postmaster.
3093
3105
*/
3094
- while (! need_exit )
3106
+ if ( got_SIGHUP )
3095
3107
{
3096
- /*
3097
- * Reload configuration if we got SIGHUP from the postmaster.
3098
- */
3099
- if (got_SIGHUP )
3100
- {
3101
- got_SIGHUP = false;
3102
- ProcessConfigFile (PGC_SIGHUP );
3103
- }
3108
+ ProcessConfigFile (PGC_SIGHUP );
3109
+ got_SIGHUP = false;
3110
+ }
3104
3111
3105
- /*
3106
- * Write the stats file if a new request has arrived that is not
3107
- * satisfied by existing file.
3108
- */
3109
- if (last_statwrite < last_statrequest )
3110
- pgstat_write_statsfile (false);
3112
+ /*
3113
+ * Write the stats file if a new request has arrived that is not
3114
+ * satisfied by existing file.
3115
+ */
3116
+ if (last_statwrite < last_statrequest )
3117
+ pgstat_write_statsfile (false);
3111
3118
3112
- /*
3113
- * Try to receive and process a message. This will not block,
3114
- * since the socket is set to non-blocking mode.
3115
- */
3119
+ /*
3120
+ * Wait for a message to arrive; but not for more than
3121
+ * PGSTAT_SELECT_TIMEOUT seconds. (This determines how quickly we will
3122
+ * shut down after an ungraceful postmaster termination; so it needn't
3123
+ * be very fast. However, on some systems SIGQUIT won't interrupt the
3124
+ * poll/select call, so this also limits speed of response to SIGQUIT,
3125
+ * which is more important.)
3126
+ *
3127
+ * We use poll(2) if available, otherwise select(2). Win32 has its own
3128
+ * implementation.
3129
+ */
3130
+ #ifndef WIN32
3131
+ #ifdef HAVE_POLL
3132
+ input_fd .fd = pgStatSock ;
3133
+ input_fd .events = POLLIN | POLLERR ;
3134
+ input_fd .revents = 0 ;
3135
+
3136
+ if (poll (& input_fd , 1 , PGSTAT_SELECT_TIMEOUT * 1000 ) < 0 )
3137
+ {
3138
+ if (errno == EINTR )
3139
+ continue ;
3140
+ ereport (ERROR ,
3141
+ (errcode_for_socket_access (),
3142
+ errmsg ("poll() failed in statistics collector: %m" )));
3143
+ }
3144
+
3145
+ got_data = (input_fd .revents != 0 );
3146
+ #else /* !HAVE_POLL */
3147
+
3148
+ FD_SET (pgStatSock , & rfds );
3149
+
3150
+ /*
3151
+ * timeout struct is modified by select() on some operating systems,
3152
+ * so re-fill it each time.
3153
+ */
3154
+ sel_timeout .tv_sec = PGSTAT_SELECT_TIMEOUT ;
3155
+ sel_timeout .tv_usec = 0 ;
3156
+
3157
+ if (select (pgStatSock + 1 , & rfds , NULL , NULL , & sel_timeout ) < 0 )
3158
+ {
3159
+ if (errno == EINTR )
3160
+ continue ;
3161
+ ereport (ERROR ,
3162
+ (errcode_for_socket_access (),
3163
+ errmsg ("select() failed in statistics collector: %m" )));
3164
+ }
3165
+
3166
+ got_data = FD_ISSET (pgStatSock , & rfds );
3167
+ #endif /* HAVE_POLL */
3168
+ #else /* WIN32 */
3169
+ got_data = pgwin32_waitforsinglesocket (pgStatSock , FD_READ ,
3170
+ PGSTAT_SELECT_TIMEOUT * 1000 );
3171
+ #endif
3172
+
3173
+ /*
3174
+ * If there is a message on the socket, read it and check for
3175
+ * validity.
3176
+ */
3177
+ if (got_data )
3178
+ {
3116
3179
len = recv (pgStatSock , (char * ) & msg ,
3117
3180
sizeof (PgStat_Msg ), 0 );
3118
3181
if (len < 0 )
3119
3182
{
3120
- if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR )
3121
- break ; /* out of inner loop */
3183
+ if (errno == EINTR )
3184
+ continue ;
3122
3185
ereport (ERROR ,
3123
3186
(errcode_for_socket_access (),
3124
3187
errmsg ("could not read statistics message: %m" )));
@@ -3216,21 +3279,17 @@ PgstatCollectorMain(int argc, char *argv[])
3216
3279
default :
3217
3280
break ;
3218
3281
}
3219
- } /* end of inner message-processing loop */
3220
-
3221
- /* Sleep until there's something to do */
3222
- wr = WaitLatchOrSocket (& pgStatLatch ,
3223
- WL_LATCH_SET | WL_POSTMASTER_DEATH | WL_SOCKET_READABLE ,
3224
- pgStatSock ,
3225
- -1L );
3226
-
3227
- /*
3228
- * Emergency bailout if postmaster has died. This is to avoid the
3229
- * necessity for manual cleanup of all postmaster children.
3230
- */
3231
- if (wr & WL_POSTMASTER_DEATH )
3232
- break ;
3233
- } /* end of outer loop */
3282
+ }
3283
+ else
3284
+ {
3285
+ /*
3286
+ * We can only get here if the select/poll timeout elapsed. Check
3287
+ * for postmaster death.
3288
+ */
3289
+ if (!PostmasterIsAlive ())
3290
+ break ;
3291
+ }
3292
+ } /* end of message-processing loop */
3234
3293
3235
3294
/*
3236
3295
* Save the final stats to reuse at next startup.
@@ -3245,24 +3304,14 @@ PgstatCollectorMain(int argc, char *argv[])
3245
3304
static void
3246
3305
pgstat_exit (SIGNAL_ARGS )
3247
3306
{
3248
- int save_errno = errno ;
3249
-
3250
3307
need_exit = true;
3251
- SetLatch (& pgStatLatch );
3252
-
3253
- errno = save_errno ;
3254
3308
}
3255
3309
3256
3310
/* SIGHUP handler for collector process */
3257
3311
static void
3258
3312
pgstat_sighup_handler (SIGNAL_ARGS )
3259
3313
{
3260
- int save_errno = errno ;
3261
-
3262
3314
got_SIGHUP = true;
3263
- SetLatch (& pgStatLatch );
3264
-
3265
- errno = save_errno ;
3266
3315
}
3267
3316
3268
3317
0 commit comments