Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 47556a0

Browse files
committed
Fix pg_recvlogical upon signal termination
When pg_recvlogical needs to abort on a signal like SIGINT/SIGTERM, it is expected to exit cleanly as the code documents. However, the code forgot to clean up the state of the connection before leaving. This would cause the tool to emit messages like "unexpected termination of replication stream" error, which is meant for really unexpected termination or a crash. The code is refactored to apply the same termination abort operations for signals, end LSN and keepalive cases, registering a "reason" for the termination with a message printed under --verbose adapted to the reason used. This is arguably a bug, but this has been this way since the tool exists and the signal termination can now become slower depending on the change being decoded when the signal is received. Reported-by: Andres Freund Author: Bharath Rupireddy Reviewed-by: Andres Freund, Kyotaro Horiguchi, Cary Huang, Michael Paquier Discussion: https://postgr.es/m/20221019213953.htdtzikf4f45ywil@awork3.anarazel.de
1 parent ab29a7a commit 47556a0

File tree

2 files changed

+44
-12
lines changed

2 files changed

+44
-12
lines changed

src/bin/pg_basebackup/pg_recvlogical.c

+43-12
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,14 @@
3232
/* Time to sleep between reconnection attempts */
3333
#define RECONNECT_SLEEP_TIME 5
3434

35+
typedef enum
36+
{
37+
STREAM_STOP_NONE,
38+
STREAM_STOP_END_OF_WAL,
39+
STREAM_STOP_KEEPALIVE,
40+
STREAM_STOP_SIGNAL
41+
} StreamStopReason;
42+
3543
/* Global Options */
3644
static char *outfile = NULL;
3745
static int verbose = 0;
@@ -55,6 +63,7 @@ static const char *plugin = "test_decoding";
5563
/* Global State */
5664
static int outfd = -1;
5765
static volatile sig_atomic_t time_to_abort = false;
66+
static volatile sig_atomic_t stop_reason = STREAM_STOP_NONE;
5867
static volatile sig_atomic_t output_reopen = false;
5968
static bool output_isfile;
6069
static TimestampTz output_last_fsync = -1;
@@ -66,7 +75,8 @@ static void usage(void);
6675
static void StreamLogicalLog(void);
6776
static bool flushAndSendFeedback(PGconn *conn, TimestampTz *now);
6877
static void prepareToTerminate(PGconn *conn, XLogRecPtr endpos,
69-
bool keepalive, XLogRecPtr lsn);
78+
StreamStopReason reason,
79+
XLogRecPtr lsn);
7080

7181
static void
7282
usage(void)
@@ -207,9 +217,11 @@ StreamLogicalLog(void)
207217
TimestampTz last_status = -1;
208218
int i;
209219
PQExpBuffer query;
220+
XLogRecPtr cur_record_lsn;
210221

211222
output_written_lsn = InvalidXLogRecPtr;
212223
output_fsync_lsn = InvalidXLogRecPtr;
224+
cur_record_lsn = InvalidXLogRecPtr;
213225

214226
/*
215227
* Connect in replication mode to the server
@@ -275,7 +287,8 @@ StreamLogicalLog(void)
275287
int bytes_written;
276288
TimestampTz now;
277289
int hdr_len;
278-
XLogRecPtr cur_record_lsn = InvalidXLogRecPtr;
290+
291+
cur_record_lsn = InvalidXLogRecPtr;
279292

280293
if (copybuf != NULL)
281294
{
@@ -487,7 +500,7 @@ StreamLogicalLog(void)
487500

488501
if (endposReached)
489502
{
490-
prepareToTerminate(conn, endpos, true, InvalidXLogRecPtr);
503+
stop_reason = STREAM_STOP_KEEPALIVE;
491504
time_to_abort = true;
492505
break;
493506
}
@@ -527,7 +540,7 @@ StreamLogicalLog(void)
527540
*/
528541
if (!flushAndSendFeedback(conn, &now))
529542
goto error;
530-
prepareToTerminate(conn, endpos, false, cur_record_lsn);
543+
stop_reason = STREAM_STOP_END_OF_WAL;
531544
time_to_abort = true;
532545
break;
533546
}
@@ -572,12 +585,16 @@ StreamLogicalLog(void)
572585
/* endpos was exactly the record we just processed, we're done */
573586
if (!flushAndSendFeedback(conn, &now))
574587
goto error;
575-
prepareToTerminate(conn, endpos, false, cur_record_lsn);
588+
stop_reason = STREAM_STOP_END_OF_WAL;
576589
time_to_abort = true;
577590
break;
578591
}
579592
}
580593

594+
/* Clean up connection state if stream has been aborted */
595+
if (time_to_abort)
596+
prepareToTerminate(conn, endpos, stop_reason, cur_record_lsn);
597+
581598
res = PQgetResult(conn);
582599
if (PQresultStatus(res) == PGRES_COPY_OUT)
583600
{
@@ -656,6 +673,7 @@ StreamLogicalLog(void)
656673
static void
657674
sigexit_handler(SIGNAL_ARGS)
658675
{
676+
stop_reason = STREAM_STOP_SIGNAL;
659677
time_to_abort = true;
660678
}
661679

@@ -1021,18 +1039,31 @@ flushAndSendFeedback(PGconn *conn, TimestampTz *now)
10211039
* retry on failure.
10221040
*/
10231041
static void
1024-
prepareToTerminate(PGconn *conn, XLogRecPtr endpos, bool keepalive, XLogRecPtr lsn)
1042+
prepareToTerminate(PGconn *conn, XLogRecPtr endpos, StreamStopReason reason,
1043+
XLogRecPtr lsn)
10251044
{
10261045
(void) PQputCopyEnd(conn, NULL);
10271046
(void) PQflush(conn);
10281047

10291048
if (verbose)
10301049
{
1031-
if (keepalive)
1032-
pg_log_info("end position %X/%X reached by keepalive",
1033-
LSN_FORMAT_ARGS(endpos));
1034-
else
1035-
pg_log_info("end position %X/%X reached by WAL record at %X/%X",
1036-
LSN_FORMAT_ARGS(endpos), LSN_FORMAT_ARGS(lsn));
1050+
switch (reason)
1051+
{
1052+
case STREAM_STOP_SIGNAL:
1053+
pg_log_info("received interrupt signal, exiting");
1054+
break;
1055+
case STREAM_STOP_KEEPALIVE:
1056+
pg_log_info("end position %X/%X reached by keepalive",
1057+
LSN_FORMAT_ARGS(endpos));
1058+
break;
1059+
case STREAM_STOP_END_OF_WAL:
1060+
Assert(!XLogRecPtrIsInvalid(lsn));
1061+
pg_log_info("end position %X/%X reached by WAL record at %X/%X",
1062+
LSN_FORMAT_ARGS(endpos), LSN_FORMAT_ARGS(lsn));
1063+
break;
1064+
case STREAM_STOP_NONE:
1065+
Assert(false);
1066+
break;
1067+
}
10371068
}
10381069
}

src/tools/pgindent/typedefs.list

+1
Original file line numberDiff line numberDiff line change
@@ -2639,6 +2639,7 @@ Step
26392639
StopList
26402640
StrategyNumber
26412641
StreamCtl
2642+
StreamStopReason
26422643
String
26432644
StringInfo
26442645
StringInfoData

0 commit comments

Comments
 (0)