Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 551aa6b

Browse files
committed
Improve wording of log messages triggered by max_slot_wal_keep_size.
The one about "terminating process to release replication slot" told you nothing about why that was happening. The one about "invalidating slot because its restart_lsn exceeds max_slot_wal_keep_size" told you what was happening, but violated our message style guideline about keeping the primary message short. Add DETAIL/HINT lines to carry the appropriate detail and make the two cases more uniform. While here, fix bogus test logic in 019_replslot_limit.pl: if it timed out without seeing the expected log message, no test failure would be reported. This is flat broken since commit 549ec20 removed the test counts; even before that it was horribly bad style, since you'd only get told that not all tests had been run. Kyotaro Horiguchi, reviewed by Bertrand Drouvot; test fixes by me Discussion: https://postgr.es/m/20211214.130456.2233153190058148084.horikyota.ntt@gmail.com
1 parent d7e39d7 commit 551aa6b

File tree

2 files changed

+21
-11
lines changed

2 files changed

+21
-11
lines changed

src/backend/replication/slot.c

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1293,8 +1293,12 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlot *s, XLogRecPtr oldestLSN,
12931293
if (last_signaled_pid != active_pid)
12941294
{
12951295
ereport(LOG,
1296-
(errmsg("terminating process %d to release replication slot \"%s\"",
1297-
active_pid, NameStr(slotname))));
1296+
errmsg("terminating process %d to release replication slot \"%s\"",
1297+
active_pid, NameStr(slotname)),
1298+
errdetail("The slot's restart_lsn %X/%X exceeds the limit by %llu bytes.",
1299+
LSN_FORMAT_ARGS(restart_lsn),
1300+
(unsigned long long) (oldestLSN - restart_lsn)),
1301+
errhint("You might need to increase max_slot_wal_keep_size."));
12981302

12991303
(void) kill(active_pid, SIGTERM);
13001304
last_signaled_pid = active_pid;
@@ -1331,9 +1335,12 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlot *s, XLogRecPtr oldestLSN,
13311335
ReplicationSlotRelease();
13321336

13331337
ereport(LOG,
1334-
(errmsg("invalidating slot \"%s\" because its restart_lsn %X/%X exceeds max_slot_wal_keep_size",
1335-
NameStr(slotname),
1336-
LSN_FORMAT_ARGS(restart_lsn))));
1338+
errmsg("invalidating obsolete replication slot \"%s\"",
1339+
NameStr(slotname)),
1340+
errdetail("The slot's restart_lsn %X/%X exceeds the limit by %llu bytes.",
1341+
LSN_FORMAT_ARGS(restart_lsn),
1342+
(unsigned long long) (oldestLSN - restart_lsn)),
1343+
errhint("You might need to increase max_slot_wal_keep_size."));
13371344

13381345
/* done with this slot for now */
13391346
break;

src/test/recovery/t/019_replslot_limit.pl

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -185,8 +185,7 @@
185185
for (my $i = 0; $i < 10000; $i++)
186186
{
187187
if (find_in_log(
188-
$node_primary,
189-
"invalidating slot \"rep1\" because its restart_lsn [0-9A-F/]+ exceeds max_slot_wal_keep_size",
188+
$node_primary, 'invalidating obsolete replication slot "rep1"',
190189
$logstart))
191190
{
192191
$invalidated = 1;
@@ -379,6 +378,7 @@
379378
kill 'STOP', $senderpid, $receiverpid;
380379
advance_wal($node_primary3, 2);
381380

381+
my $msg_logged = 0;
382382
my $max_attempts = $PostgreSQL::Test::Utils::timeout_default;
383383
while ($max_attempts-- >= 0)
384384
{
@@ -387,11 +387,12 @@
387387
"terminating process $senderpid to release replication slot \"rep3\"",
388388
$logstart))
389389
{
390-
ok(1, "walsender termination logged");
390+
$msg_logged = 1;
391391
last;
392392
}
393393
sleep 1;
394394
}
395+
ok($msg_logged, "walsender termination logged");
395396

396397
# Now let the walsender continue; slot should be killed now.
397398
# (Must not let walreceiver run yet; otherwise the standby could start another
@@ -402,18 +403,20 @@
402403
"lost")
403404
or die "timed out waiting for slot to be lost";
404405

406+
$msg_logged = 0;
405407
$max_attempts = $PostgreSQL::Test::Utils::timeout_default;
406408
while ($max_attempts-- >= 0)
407409
{
408410
if (find_in_log(
409-
$node_primary3,
410-
'invalidating slot "rep3" because its restart_lsn', $logstart))
411+
$node_primary3, 'invalidating obsolete replication slot "rep3"',
412+
$logstart))
411413
{
412-
ok(1, "slot invalidation logged");
414+
$msg_logged = 1;
413415
last;
414416
}
415417
sleep 1;
416418
}
419+
ok($msg_logged, "slot invalidation logged");
417420

418421
# Now let the walreceiver continue, so that the node can be stopped cleanly
419422
kill 'CONT', $receiverpid;

0 commit comments

Comments
 (0)