Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit c30f54a

Browse files
committed
Detect POLLHUP/POLLRDHUP while running queries.
Provide a new GUC check_client_connection_interval that can be used to check whether the client connection has gone away, while running very long queries. It is disabled by default. For now this uses a non-standard Linux extension (also adopted by at least one other OS). POLLRDHUP is not defined by POSIX, and other OSes don't have a reliable way to know if a connection was closed without actually trying to read or write. In future we might consider trying to send a no-op/heartbeat message instead, but that could require protocol changes. Author: Sergey Cherkashin <s.cherkashin@postgrespro.ru> Author: Thomas Munro <thomas.munro@gmail.com> Reviewed-by: Thomas Munro <thomas.munro@gmail.com> Reviewed-by: Tatsuo Ishii <ishii@sraoss.co.jp> Reviewed-by: Konstantin Knizhnik <k.knizhnik@postgrespro.ru> Reviewed-by: Zhihong Yu <zyu@yugabyte.com> Reviewed-by: Andres Freund <andres@anarazel.de> Reviewed-by: Maksim Milyutin <milyutinma@gmail.com> Reviewed-by: Tsunakawa, Takayuki/綱川 貴之 <tsunakawa.takay@fujitsu.com> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> (much earlier version) Discussion: https://postgr.es/m/77def86b27e41f0efcba411460e929ae%40postgrespro.ru
1 parent 174edbe commit c30f54a

File tree

11 files changed

+156
-0
lines changed

11 files changed

+156
-0
lines changed

doc/src/sgml/config.sgml

+37
Original file line numberDiff line numberDiff line change
@@ -998,6 +998,43 @@ include_dir 'conf.d'
998998
</listitem>
999999
</varlistentry>
10001000

1001+
<varlistentry id="guc-client-connection-check-interval" xreflabel="client_connection_check_interval">
1002+
<term><varname>client_connection_check_interval</varname> (<type>integer</type>)
1003+
<indexterm>
1004+
<primary><varname>client_connection_check_interval</varname> configuration parameter</primary>
1005+
</indexterm>
1006+
</term>
1007+
<listitem>
1008+
<para>
1009+
Sets the time interval between optional checks that the client is still
1010+
connected, while running queries. The check is performed by polling
1011+
the socket, and allows long running queries to be aborted sooner if
1012+
the kernel reports that the connection is closed.
1013+
</para>
1014+
<para>
1015+
This option is currently available only on systems that support the
1016+
non-standard <symbol>POLLRDHUP</symbol> extension to the
1017+
<symbol>poll</symbol> system call, including Linux.
1018+
</para>
1019+
<para>
1020+
If the value is specified without units, it is taken as milliseconds.
1021+
The default value is <literal>0</literal>, which disables connection
1022+
checks. Without connection checks, the server will detect the loss of
1023+
the connection only at the next interaction with the socket, when it
1024+
waits for, receives or sends data.
1025+
</para>
1026+
<para>
1027+
For the kernel itself to detect lost TCP connections reliably and within
1028+
a known timeframe in all scenarios including network failure, it may
1029+
also be necessary to adjust the TCP keepalive settings of the operating
1030+
system, or the <xref linkend="guc-tcp-keepalives-idle"/>,
1031+
<xref linkend="guc-tcp-keepalives-interval"/> and
1032+
<xref linkend="guc-tcp-keepalives-count"/> settings of
1033+
<productname>PostgreSQL</productname>.
1034+
</para>
1035+
</listitem>
1036+
</varlistentry>
1037+
10011038
</variablelist>
10021039
</sect2>
10031040

src/backend/libpq/pqcomm.c

+40
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@
5454
*/
5555
#include "postgres.h"
5656

57+
#ifdef HAVE_POLL_H
58+
#include <poll.h>
59+
#endif
5760
#include <signal.h>
5861
#include <fcntl.h>
5962
#include <grp.h>
@@ -1921,3 +1924,40 @@ pq_settcpusertimeout(int timeout, Port *port)
19211924

19221925
return STATUS_OK;
19231926
}
1927+
1928+
/*
1929+
* Check if the client is still connected.
1930+
*/
1931+
bool
1932+
pq_check_connection(void)
1933+
{
1934+
#if defined(POLLRDHUP)
1935+
/*
1936+
* POLLRDHUP is a Linux extension to poll(2) to detect sockets closed by
1937+
* the other end. We don't have a portable way to do that without
1938+
* actually trying to read or write data on other systems. We don't want
1939+
* to read because that would be confused by pipelined queries and COPY
1940+
* data. Perhaps in future we'll try to write a heartbeat message instead.
1941+
*/
1942+
struct pollfd pollfd;
1943+
int rc;
1944+
1945+
pollfd.fd = MyProcPort->sock;
1946+
pollfd.events = POLLOUT | POLLIN | POLLRDHUP;
1947+
pollfd.revents = 0;
1948+
1949+
rc = poll(&pollfd, 1, 0);
1950+
1951+
if (rc < 0)
1952+
{
1953+
ereport(COMMERROR,
1954+
(errcode_for_socket_access(),
1955+
errmsg("could not poll socket: %m")));
1956+
return false;
1957+
}
1958+
else if (rc == 1 && (pollfd.revents & (POLLHUP | POLLRDHUP)))
1959+
return false;
1960+
#endif
1961+
1962+
return true;
1963+
}

src/backend/tcop/postgres.c

+32
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,9 @@ int max_stack_depth = 100;
102102
/* wait N seconds to allow attach from a debugger */
103103
int PostAuthDelay = 0;
104104

105+
/* Time between checks that the client is still connected. */
106+
int client_connection_check_interval = 0;
107+
105108
/* ----------------
106109
* private typedefs etc
107110
* ----------------
@@ -2671,6 +2674,14 @@ start_xact_command(void)
26712674
* not desired, the timeout has to be disabled explicitly.
26722675
*/
26732676
enable_statement_timeout();
2677+
2678+
/* Start timeout for checking if the client has gone away if necessary. */
2679+
if (client_connection_check_interval > 0 &&
2680+
IsUnderPostmaster &&
2681+
MyProcPort &&
2682+
!get_timeout_active(CLIENT_CONNECTION_CHECK_TIMEOUT))
2683+
enable_timeout_after(CLIENT_CONNECTION_CHECK_TIMEOUT,
2684+
client_connection_check_interval);
26742685
}
26752686

26762687
static void
@@ -3149,6 +3160,27 @@ ProcessInterrupts(void)
31493160
(errcode(ERRCODE_ADMIN_SHUTDOWN),
31503161
errmsg("terminating connection due to administrator command")));
31513162
}
3163+
3164+
if (CheckClientConnectionPending)
3165+
{
3166+
CheckClientConnectionPending = false;
3167+
3168+
/*
3169+
* Check for lost connection and re-arm, if still configured, but not
3170+
* if we've arrived back at DoingCommandRead state. We don't want to
3171+
* wake up idle sessions, and they already know how to detect lost
3172+
* connections.
3173+
*/
3174+
if (!DoingCommandRead && client_connection_check_interval > 0)
3175+
{
3176+
if (!pq_check_connection())
3177+
ClientConnectionLost = true;
3178+
else
3179+
enable_timeout_after(CLIENT_CONNECTION_CHECK_TIMEOUT,
3180+
client_connection_check_interval);
3181+
}
3182+
}
3183+
31523184
if (ClientConnectionLost)
31533185
{
31543186
QueryCancelPending = false; /* lost connection trumps QueryCancel */

src/backend/utils/init/globals.c

+1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ ProtocolVersion FrontendProtocol;
3030
volatile sig_atomic_t InterruptPending = false;
3131
volatile sig_atomic_t QueryCancelPending = false;
3232
volatile sig_atomic_t ProcDiePending = false;
33+
volatile sig_atomic_t CheckClientConnectionPending = false;
3334
volatile sig_atomic_t ClientConnectionLost = false;
3435
volatile sig_atomic_t IdleInTransactionSessionTimeoutPending = false;
3536
volatile sig_atomic_t IdleSessionTimeoutPending = false;

src/backend/utils/init/postinit.c

+10
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ static void StatementTimeoutHandler(void);
7373
static void LockTimeoutHandler(void);
7474
static void IdleInTransactionSessionTimeoutHandler(void);
7575
static void IdleSessionTimeoutHandler(void);
76+
static void ClientCheckTimeoutHandler(void);
7677
static bool ThereIsAtLeastOneRole(void);
7778
static void process_startup_options(Port *port, bool am_superuser);
7879
static void process_settings(Oid databaseid, Oid roleid);
@@ -620,6 +621,7 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username,
620621
RegisterTimeout(IDLE_IN_TRANSACTION_SESSION_TIMEOUT,
621622
IdleInTransactionSessionTimeoutHandler);
622623
RegisterTimeout(IDLE_SESSION_TIMEOUT, IdleSessionTimeoutHandler);
624+
RegisterTimeout(CLIENT_CONNECTION_CHECK_TIMEOUT, ClientCheckTimeoutHandler);
623625
}
624626

625627
/*
@@ -1242,6 +1244,14 @@ IdleSessionTimeoutHandler(void)
12421244
SetLatch(MyLatch);
12431245
}
12441246

1247+
static void
1248+
ClientCheckTimeoutHandler(void)
1249+
{
1250+
CheckClientConnectionPending = true;
1251+
InterruptPending = true;
1252+
SetLatch(MyLatch);
1253+
}
1254+
12451255
/*
12461256
* Returns true if at least one role is defined in this database cluster.
12471257
*/

src/backend/utils/misc/guc.c

+29
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@
2020
#include <float.h>
2121
#include <math.h>
2222
#include <limits.h>
23+
#ifdef HAVE_POLL_H
24+
#include <poll.h>
25+
#endif
2326
#ifndef WIN32
2427
#include <sys/mman.h>
2528
#endif
@@ -204,6 +207,7 @@ static bool check_autovacuum_work_mem(int *newval, void **extra, GucSource sourc
204207
static bool check_effective_io_concurrency(int *newval, void **extra, GucSource source);
205208
static bool check_maintenance_io_concurrency(int *newval, void **extra, GucSource source);
206209
static bool check_huge_page_size(int *newval, void **extra, GucSource source);
210+
static bool check_client_connection_check_interval(int *newval, void **extra, GucSource source);
207211
static void assign_pgstat_temp_directory(const char *newval, void *extra);
208212
static bool check_application_name(char **newval, void **extra, GucSource source);
209213
static void assign_application_name(const char *newval, void *extra);
@@ -3501,6 +3505,17 @@ static struct config_int ConfigureNamesInt[] =
35013505
NULL, NULL, NULL
35023506
},
35033507

3508+
{
3509+
{"client_connection_check_interval", PGC_USERSET, CLIENT_CONN_OTHER,
3510+
gettext_noop("Sets the time interval between checks for disconnection while running queries."),
3511+
NULL,
3512+
GUC_UNIT_MS
3513+
},
3514+
&client_connection_check_interval,
3515+
0, 0, INT_MAX,
3516+
check_client_connection_check_interval, NULL, NULL
3517+
},
3518+
35043519
/* End-of-list marker */
35053520
{
35063521
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
@@ -11980,6 +11995,20 @@ check_huge_page_size(int *newval, void **extra, GucSource source)
1198011995
return true;
1198111996
}
1198211997

11998+
static bool
11999+
check_client_connection_check_interval(int *newval, void **extra, GucSource source)
12000+
{
12001+
#ifndef POLLRDHUP
12002+
/* Linux only, for now. See pq_check_connection(). */
12003+
if (*newval != 0)
12004+
{
12005+
GUC_check_errdetail("client_connection_check_interval must be set to 0 on platforms that lack POLLRDHUP.");
12006+
return false;
12007+
}
12008+
#endif
12009+
return true;
12010+
}
12011+
1198312012
static void
1198412013
assign_pgstat_temp_directory(const char *newval, void *extra)
1198512014
{

src/backend/utils/misc/postgresql.conf.sample

+3
Original file line numberDiff line numberDiff line change
@@ -719,6 +719,9 @@
719719

720720
#dynamic_library_path = '$libdir'
721721

722+
#client_connection_check_interval = 0 # time between checks for client
723+
# disconnection while running queries;
724+
# 0 for never
722725

723726
#------------------------------------------------------------------------------
724727
# LOCK MANAGEMENT

src/include/libpq/libpq.h

+1
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ extern int pq_getbyte(void);
7171
extern int pq_peekbyte(void);
7272
extern int pq_getbyte_if_available(unsigned char *c);
7373
extern int pq_putmessage_v2(char msgtype, const char *s, size_t len);
74+
extern bool pq_check_connection(void);
7475

7576
/*
7677
* prototypes for functions in be-secure.c

src/include/miscadmin.h

+1
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ extern PGDLLIMPORT volatile sig_atomic_t IdleInTransactionSessionTimeoutPending;
8585
extern PGDLLIMPORT volatile sig_atomic_t IdleSessionTimeoutPending;
8686
extern PGDLLIMPORT volatile sig_atomic_t ProcSignalBarrierPending;
8787

88+
extern PGDLLIMPORT volatile sig_atomic_t CheckClientConnectionPending;
8889
extern PGDLLIMPORT volatile sig_atomic_t ClientConnectionLost;
8990

9091
/* these are marked volatile because they are examined by signal handlers: */

src/include/tcop/tcopprot.h

+1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ extern CommandDest whereToSendOutput;
2929
extern PGDLLIMPORT const char *debug_query_string;
3030
extern int max_stack_depth;
3131
extern int PostAuthDelay;
32+
extern int client_connection_check_interval;
3233

3334
/* GUC-configurable parameters */
3435

src/include/utils/timeout.h

+1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ typedef enum TimeoutId
3232
STANDBY_LOCK_TIMEOUT,
3333
IDLE_IN_TRANSACTION_SESSION_TIMEOUT,
3434
IDLE_SESSION_TIMEOUT,
35+
CLIENT_CONNECTION_CHECK_TIMEOUT,
3536
/* First user-definable timeout reason */
3637
USER_TIMEOUT,
3738
/* Maximum number of timeout reasons */

0 commit comments

Comments
 (0)