Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit afdeff1

Browse files
committed
Add temporary debug info to help debug 019_replslot_limit.pl failures.
I have not been able to reproduce the occasional failures of 019_replslot_limit.pl we are seeing in the buildfarm and not for lack of trying. The additional logging and increased log level will hopefully help. Will be reverted once the cause is identified. Discussion: https://postgr.es/m/20220218231415.c4plkp4i3reqcwip@alap3.anarazel.de
1 parent 9467321 commit afdeff1

File tree

3 files changed

+34
-2
lines changed

3 files changed

+34
-2
lines changed

src/backend/replication/slot.c

+21
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,10 @@ ReplicationSlotInitialize(void)
177177
static void
178178
ReplicationSlotShmemExit(int code, Datum arg)
179179
{
180+
/* temp debugging aid to analyze 019_replslot_limit failures */
181+
elog(DEBUG3, "replication slot exit hook, %s active slot",
182+
MyReplicationSlot != NULL ? "with" : "without");
183+
180184
/* Make sure active replication slots are released */
181185
if (MyReplicationSlot != NULL)
182186
ReplicationSlotRelease();
@@ -554,6 +558,9 @@ ReplicationSlotCleanup(void)
554558
Assert(MyReplicationSlot == NULL);
555559

556560
restart:
561+
/* temp debugging aid to analyze 019_replslot_limit failures */
562+
elog(DEBUG3, "temporary replication slot cleanup: begin");
563+
557564
LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
558565
for (i = 0; i < max_replication_slots; i++)
559566
{
@@ -579,6 +586,8 @@ ReplicationSlotCleanup(void)
579586
}
580587

581588
LWLockRelease(ReplicationSlotControlLock);
589+
590+
elog(DEBUG3, "temporary replication slot cleanup: done");
582591
}
583592

584593
/*
@@ -1284,6 +1293,12 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlot *s, XLogRecPtr oldestLSN,
12841293
(void) kill(active_pid, SIGTERM);
12851294
last_signaled_pid = active_pid;
12861295
}
1296+
else
1297+
{
1298+
/* temp debugging aid to analyze 019_replslot_limit failures */
1299+
elog(DEBUG3, "not signalling process %d during invalidation of slot \"%s\"",
1300+
active_pid, NameStr(slotname));
1301+
}
12871302

12881303
/* Wait until the slot is released. */
12891304
ConditionVariableSleep(&s->active_cv,
@@ -1347,6 +1362,10 @@ InvalidateObsoleteReplicationSlots(XLogSegNo oldestSegno)
13471362
XLogSegNoOffsetToRecPtr(oldestSegno, 0, wal_segment_size, oldestLSN);
13481363

13491364
restart:
1365+
/* temp debugging aid to analyze 019_replslot_limit failures */
1366+
elog(DEBUG3, "begin invalidating obsolete replication slots older than %X/%X",
1367+
LSN_FORMAT_ARGS(oldestLSN));
1368+
13501369
LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
13511370
for (int i = 0; i < max_replication_slots; i++)
13521371
{
@@ -1372,6 +1391,8 @@ InvalidateObsoleteReplicationSlots(XLogSegNo oldestSegno)
13721391
ReplicationSlotsComputeRequiredLSN();
13731392
}
13741393

1394+
elog(DEBUG3, "done invalidating obsolete replication slots");
1395+
13751396
return invalidated;
13761397
}
13771398

src/bin/pg_basebackup/pg_basebackup.c

+9-1
Original file line numberDiff line numberDiff line change
@@ -700,8 +700,16 @@ StartLogStreamer(char *startpos, uint32 timeline, char *sysidentifier)
700700
bgchild = fork();
701701
if (bgchild == 0)
702702
{
703+
int ret;
704+
703705
/* in child process */
704-
exit(LogStreamerMain(param));
706+
ret = LogStreamerMain(param);
707+
708+
/* temp debugging aid to analyze 019_replslot_limit failures */
709+
if (verbose)
710+
pg_log_info("log streamer with pid %d exiting", getpid());
711+
712+
exit(ret);
705713
}
706714
else if (bgchild < 0)
707715
{

src/test/recovery/t/019_replslot_limit.pl

+4-1
Original file line numberDiff line numberDiff line change
@@ -316,13 +316,16 @@
316316
max_wal_size = 2MB
317317
log_checkpoints = yes
318318
max_slot_wal_keep_size = 1MB
319+
320+
# temp debugging aid to analyze 019_replslot_limit failures
321+
log_min_messages=debug3
319322
));
320323
$node_primary3->start;
321324
$node_primary3->safe_psql('postgres',
322325
"SELECT pg_create_physical_replication_slot('rep3')");
323326
# Take backup
324327
$backup_name = 'my_backup';
325-
$node_primary3->backup($backup_name);
328+
$node_primary3->backup($backup_name, backup_options => ['--verbose']);
326329
# Create standby
327330
my $node_standby3 = PostgreSQL::Test::Cluster->new('standby_3');
328331
$node_standby3->init_from_backup($node_primary3, $backup_name,

0 commit comments

Comments
 (0)