Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit e96373a

Browse files
committed
Ensure that we retry rather than erroring out when send() or recv() return
EINTR; the stats code was failing to do this and so were a couple of places in the postmaster. The stats code assumed that recv() could not return EINTR if a preceding select() showed the socket to be read-ready, but this is demonstrably false with our Windows implementation of recv(), and it may not be the case on all Unix variants either. I think this explains the intermittent stats regression test failures we've been seeing, as well as reports of stats collector instability under high load on Windows. Backpatch as far as 8.0.
1 parent a420818 commit e96373a

File tree

2 files changed

+33
-7
lines changed

2 files changed

+33
-7
lines changed

src/backend/postmaster/pgstat.c

+22-5
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
*
1414
* Copyright (c) 2001-2006, PostgreSQL Global Development Group
1515
*
16-
* $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.135 2006/07/14 14:52:22 momjian Exp $
16+
* $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.136 2006/07/16 18:17:14 tgl Exp $
1717
* ----------
1818
*/
1919
#include "postgres.h"
@@ -323,8 +323,12 @@ pgstat_init(void)
323323
* rules prevent it).
324324
*/
325325
test_byte = TESTBYTEVAL;
326+
327+
retry1:
326328
if (send(pgStatSock, &test_byte, 1, 0) != 1)
327329
{
330+
if (errno == EINTR)
331+
goto retry1; /* if interrupted, just retry */
328332
ereport(LOG,
329333
(errcode_for_socket_access(),
330334
errmsg("could not send test message on socket for statistics collector: %m")));
@@ -375,8 +379,11 @@ pgstat_init(void)
375379

376380
test_byte++; /* just make sure variable is changed */
377381

382+
retry2:
378383
if (recv(pgStatSock, &test_byte, 1, 0) != 1)
379384
{
385+
if (errno == EINTR)
386+
goto retry2; /* if interrupted, just retry */
380387
ereport(LOG,
381388
(errcode_for_socket_access(),
382389
errmsg("could not receive test message on socket for statistics collector: %m")));
@@ -1533,17 +1540,23 @@ pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype)
15331540
static void
15341541
pgstat_send(void *msg, int len)
15351542
{
1543+
int rc;
1544+
15361545
if (pgStatSock < 0)
15371546
return;
15381547

15391548
((PgStat_MsgHdr *) msg)->m_size = len;
15401549

1550+
/* We'll retry after EINTR, but ignore all other failures */
1551+
do
1552+
{
1553+
rc = send(pgStatSock, msg, len, 0);
1554+
} while (rc < 0 && errno == EINTR);
1555+
15411556
#ifdef USE_ASSERT_CHECKING
1542-
if (send(pgStatSock, msg, len, 0) < 0)
1557+
/* In debug builds, log send failures ... */
1558+
if (rc < 0)
15431559
elog(LOG, "could not send to statistics collector: %m");
1544-
#else
1545-
send(pgStatSock, msg, len, 0);
1546-
/* We deliberately ignore any error from send() */
15471560
#endif
15481561
}
15491562

@@ -1718,9 +1731,13 @@ PgstatCollectorMain(int argc, char *argv[])
17181731
len = recv(pgStatSock, (char *) &msg,
17191732
sizeof(PgStat_Msg), 0);
17201733
if (len < 0)
1734+
{
1735+
if (errno == EINTR)
1736+
continue;
17211737
ereport(ERROR,
17221738
(errcode_for_socket_access(),
17231739
errmsg("could not read statistics message: %m")));
1740+
}
17241741

17251742
/*
17261743
* We ignore messages that are smaller than our common header

src/backend/postmaster/postmaster.c

+11-2
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
*
3838
*
3939
* IDENTIFICATION
40-
* $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.494 2006/07/15 15:47:17 tgl Exp $
40+
* $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.495 2006/07/16 18:17:14 tgl Exp $
4141
*
4242
* NOTES
4343
*
@@ -1374,8 +1374,12 @@ ProcessStartupPacket(Port *port, bool SSLdone)
13741374
#else
13751375
SSLok = 'N'; /* No support for SSL */
13761376
#endif
1377+
1378+
retry1:
13771379
if (send(port->sock, &SSLok, 1, 0) != 1)
13781380
{
1381+
if (errno == EINTR)
1382+
goto retry1; /* if interrupted, just retry */
13791383
ereport(COMMERROR,
13801384
(errcode_for_socket_access(),
13811385
errmsg("failed to send SSL negotiation response: %m")));
@@ -2524,6 +2528,7 @@ static void
25242528
report_fork_failure_to_client(Port *port, int errnum)
25252529
{
25262530
char buffer[1000];
2531+
int rc;
25272532

25282533
/* Format the error message packet (always V2 protocol) */
25292534
snprintf(buffer, sizeof(buffer), "E%s%s\n",
@@ -2534,7 +2539,11 @@ report_fork_failure_to_client(Port *port, int errnum)
25342539
if (!pg_set_noblock(port->sock))
25352540
return;
25362541

2537-
send(port->sock, buffer, strlen(buffer) + 1, 0);
2542+
/* We'll retry after EINTR, but ignore all other failures */
2543+
do
2544+
{
2545+
rc = send(port->sock, buffer, strlen(buffer) + 1, 0);
2546+
} while (rc < 0 && errno == EINTR);
25382547
}
25392548

25402549

0 commit comments

Comments
 (0)