Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit c59a973

Browse files
committed
Fix test race between primary XLOG_RUNNING_XACTS and standby logical slot.
Before the previous commit, the test could hang until LOG_SNAPSHOT_INTERVAL_MS (15s), until checkpoint_timeout (300s), or indefinitely. An indefinite hang was awfully improbable. It entailed the test reaching checkpoint_timeout before the DecodingContextFindStartpoint() of a CREATE SUBSCRIPTION, yet after the preceding WAL record. Back-patch to v16, which introduced the test. Bertrand Drouvot, reported by Noah Misch. Discussion: https://postgr.es/m/20240211010227.a2.nmisch@google.com
1 parent f024746 commit c59a973

File tree

2 files changed

+34
-16
lines changed

2 files changed

+34
-16
lines changed

src/test/perl/PostgreSQL/Test/Cluster.pm

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3092,6 +3092,36 @@ $SIG{TERM} = $SIG{INT} = sub {
30923092

30933093
=pod
30943094
3095+
=item $node->log_standby_snapshot(self, standby, slot_name)
3096+
3097+
Log a standby snapshot on primary once the slot restart_lsn is determined on
3098+
the standby.
3099+
3100+
=cut
3101+
3102+
sub log_standby_snapshot
3103+
{
3104+
my ($self, $standby, $slot_name) = @_;
3105+
3106+
# Once the slot's restart_lsn is determined, the standby looks for
3107+
# xl_running_xacts WAL record from the restart_lsn onwards. First wait
3108+
# until the slot restart_lsn is determined.
3109+
3110+
$standby->poll_query_until(
3111+
'postgres', qq[
3112+
SELECT restart_lsn IS NOT NULL
3113+
FROM pg_catalog.pg_replication_slots WHERE slot_name = '$slot_name'
3114+
])
3115+
or die
3116+
"timed out waiting for logical slot to calculate its restart_lsn";
3117+
3118+
# Then arrange for the xl_running_xacts record for which the standby is
3119+
# waiting.
3120+
$self->safe_psql('postgres', 'SELECT pg_log_standby_snapshot()');
3121+
}
3122+
3123+
=pod
3124+
30953125
=item $node->create_logical_slot_on_standby(self, primary, slot_name, dbname)
30963126
30973127
Create logical replication slot on given standby
@@ -3117,21 +3147,9 @@ sub create_logical_slot_on_standby
31173147
'2>',
31183148
\$stderr);
31193149

3120-
# Once the slot's restart_lsn is determined, the standby looks for
3121-
# xl_running_xacts WAL record from the restart_lsn onwards. First wait
3122-
# until the slot restart_lsn is determined.
3123-
3124-
$self->poll_query_until(
3125-
'postgres', qq[
3126-
SELECT restart_lsn IS NOT NULL
3127-
FROM pg_catalog.pg_replication_slots WHERE slot_name = '$slot_name'
3128-
])
3129-
or die
3130-
"timed out waiting for logical slot to calculate its restart_lsn";
3131-
3132-
# Then arrange for the xl_running_xacts record for which pg_recvlogical is
3150+
# Arrange for the xl_running_xacts record for which pg_recvlogical is
31333151
# waiting.
3134-
$primary->safe_psql('postgres', 'SELECT pg_log_standby_snapshot()');
3152+
$primary->log_standby_snapshot($self, $slot_name);
31353153

31363154
$handle->finish();
31373155

src/test/recovery/t/035_standby_logical_decoding.pl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -467,8 +467,8 @@ sub wait_until_vacuum_can_remove
467467

468468
$psql_subscriber{run}->pump_nb();
469469

470-
# Speed up the subscription creation
471-
$node_primary->safe_psql('postgres', "SELECT pg_log_standby_snapshot()");
470+
# Log the standby snapshot to speed up the subscription creation
471+
$node_primary->log_standby_snapshot($node_standby, 'tap_sub');
472472

473473
# Explicitly shut down psql instance gracefully - to avoid hangs
474474
# or worse on windows

0 commit comments

Comments
 (0)