Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 0650ff2

Browse files
committed
Add GUC to log long wait times on recovery conflicts.
This commit adds GUC log_recovery_conflict_waits that controls whether a log message is produced when the startup process is waiting longer than deadlock_timeout for recovery conflicts. This is useful in determining if recovery conflicts prevent the recovery from applying WAL. Note that currently a log message is produced only when recovery conflict has not been resolved yet even after deadlock_timeout passes, i.e., only when the startup process is still waiting for recovery conflict even after deadlock_timeout. Author: Bertrand Drouvot, Masahiko Sawada Reviewed-by: Alvaro Herrera, Kyotaro Horiguchi, Fujii Masao Discussion: https://postgr.es/m/9a60178c-a853-1440-2cdc-c3af916cff59@amazon.com
1 parent f7a1a80 commit 0650ff2

File tree

8 files changed

+292
-30
lines changed

8 files changed

+292
-30
lines changed

doc/src/sgml/config.sgml

+22
Original file line numberDiff line numberDiff line change
@@ -6952,6 +6952,28 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
69526952
</listitem>
69536953
</varlistentry>
69546954

6955+
<varlistentry id="guc-log-recovery-conflict-waits" xreflabel="log_recovery_conflict_waits">
6956+
<term><varname>log_recovery_conflict_waits</varname> (<type>boolean</type>)
6957+
<indexterm>
6958+
<primary><varname>log_recovery_conflict_waits</varname> configuration parameter</primary>
6959+
</indexterm>
6960+
</term>
6961+
<listitem>
6962+
<para>
6963+
Controls whether a log message is produced when the startup process
6964+
is waiting longer than <varname>deadlock_timeout</varname>
6965+
for recovery conflicts. This is useful in determining if recovery
6966+
conflicts prevent the recovery from applying WAL.
6967+
</para>
6968+
6969+
<para>
6970+
The default is <literal>off</literal>. This parameter can only be set
6971+
in the <filename>postgresql.conf</filename> file or on the server
6972+
command line.
6973+
</para>
6974+
</listitem>
6975+
</varlistentry>
6976+
69556977
<varlistentry id="guc-log-parameter-max-length" xreflabel="log_parameter_max_length">
69566978
<term><varname>log_parameter_max_length</varname> (<type>integer</type>)
69576979
<indexterm>

doc/src/sgml/high-availability.sgml

+6
Original file line numberDiff line numberDiff line change
@@ -2071,6 +2071,12 @@ if (!triggered)
20712071
server. The <structname>pg_stat_database</structname> system view also contains
20722072
summary information.
20732073
</para>
2074+
2075+
<para>
2076+
Users can control whether a log message is produced when WAL replay is waiting
2077+
longer than <varname>deadlock_timeout</varname> for conflicts. This
2078+
is controlled by the <xref linkend="guc-log-recovery-conflict-waits"/> parameter.
2079+
</para>
20742080
</sect2>
20752081

20762082
<sect2 id="hot-standby-admin">

src/backend/storage/buffer/bufmgr.c

+30
Original file line numberDiff line numberDiff line change
@@ -3809,6 +3809,8 @@ LockBufferForCleanup(Buffer buffer)
38093809
{
38103810
BufferDesc *bufHdr;
38113811
char *new_status = NULL;
3812+
TimestampTz waitStart = 0;
3813+
bool logged_recovery_conflict = false;
38123814

38133815
Assert(BufferIsPinned(buffer));
38143816
Assert(PinCountWaitBuf == NULL);
@@ -3882,6 +3884,34 @@ LockBufferForCleanup(Buffer buffer)
38823884
new_status[len] = '\0'; /* truncate off " waiting" */
38833885
}
38843886

3887+
/*
3888+
* Emit the log message if the startup process is waiting longer
3889+
* than deadlock_timeout for recovery conflict on buffer pin.
3890+
*
3891+
* Skip this if first time through because the startup process has
3892+
* not started waiting yet in this case. So, the wait start
3893+
* timestamp is set after this logic.
3894+
*/
3895+
if (waitStart != 0 && !logged_recovery_conflict)
3896+
{
3897+
TimestampTz now = GetCurrentTimestamp();
3898+
3899+
if (TimestampDifferenceExceeds(waitStart, now,
3900+
DeadlockTimeout))
3901+
{
3902+
LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
3903+
waitStart, now, NULL);
3904+
logged_recovery_conflict = true;
3905+
}
3906+
}
3907+
3908+
/*
3909+
* Set the wait start timestamp if logging is enabled and first
3910+
* time through.
3911+
*/
3912+
if (log_recovery_conflict_waits && waitStart == 0)
3913+
waitStart = GetCurrentTimestamp();
3914+
38853915
/* Publish the bufid that Startup process waits on */
38863916
SetStartupBufferPinWaitBufId(buffer - 1);
38873917
/* Set alarm and then wait to be signaled by UnpinBuffer() */

src/backend/storage/ipc/standby.c

+173-26
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
int vacuum_defer_cleanup_age;
4040
int max_standby_archive_delay = 30 * 1000;
4141
int max_standby_streaming_delay = 30 * 1000;
42+
bool log_recovery_conflict_waits = false;
4243

4344
static HTAB *RecoveryLockLists;
4445

@@ -53,6 +54,7 @@ static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlis
5354
static void SendRecoveryConflictWithBufferPin(ProcSignalReason reason);
5455
static XLogRecPtr LogCurrentRunningXacts(RunningTransactions CurrRunningXacts);
5556
static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks);
57+
static const char *get_recovery_conflict_desc(ProcSignalReason reason);
5658

5759
/*
5860
* Keep track of all the locks owned by a given transaction.
@@ -218,31 +220,100 @@ WaitExceedsMaxStandbyDelay(uint32 wait_event_info)
218220
return false;
219221
}
220222

223+
/*
224+
* Log the recovery conflict.
225+
*
226+
* wait_start is the timestamp when the caller started to wait.
227+
* now is the timestamp when this function has been called.
228+
* wait_list is the list of virtual transaction ids assigned to
229+
* conflicting processes.
230+
*/
231+
void
232+
LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start,
233+
TimestampTz now, VirtualTransactionId *wait_list)
234+
{
235+
long secs;
236+
int usecs;
237+
long msecs;
238+
StringInfoData buf;
239+
int nprocs = 0;
240+
241+
TimestampDifference(wait_start, now, &secs, &usecs);
242+
msecs = secs * 1000 + usecs / 1000;
243+
usecs = usecs % 1000;
244+
245+
if (wait_list)
246+
{
247+
VirtualTransactionId *vxids;
248+
249+
/* Construct a string of list of the conflicting processes */
250+
vxids = wait_list;
251+
while (VirtualTransactionIdIsValid(*vxids))
252+
{
253+
PGPROC *proc = BackendIdGetProc(vxids->backendId);
254+
255+
/* proc can be NULL if the target backend is not active */
256+
if (proc)
257+
{
258+
if (nprocs == 0)
259+
{
260+
initStringInfo(&buf);
261+
appendStringInfo(&buf, "%d", proc->pid);
262+
}
263+
else
264+
appendStringInfo(&buf, ", %d", proc->pid);
265+
266+
nprocs++;
267+
}
268+
269+
vxids++;
270+
}
271+
}
272+
273+
/*
274+
* If wait_list is specified, report the list of PIDs of active
275+
* conflicting backends in a detail message. Note that if all the backends
276+
* in the list are not active, no detail message is logged.
277+
*/
278+
ereport(LOG,
279+
errmsg("recovery still waiting after %ld.%03d ms: %s",
280+
msecs, usecs, _(get_recovery_conflict_desc(reason))),
281+
nprocs > 0 ? errdetail_log_plural("Conflicting process: %s.",
282+
"Conflicting processes: %s.",
283+
nprocs, buf.data) : 0);
284+
285+
if (nprocs > 0)
286+
pfree(buf.data);
287+
}
288+
221289
/*
222290
* This is the main executioner for any query backend that conflicts with
223291
* recovery processing. Judgement has already been passed on it within
224292
* a specific rmgr. Here we just issue the orders to the procs. The procs
225293
* then throw the required error as instructed.
226294
*
227-
* If report_waiting is true, "waiting" is reported in PS display if necessary.
228-
* If the caller has already reported that, report_waiting should be false.
229-
* Otherwise, "waiting" is reported twice unexpectedly.
295+
* If report_waiting is true, "waiting" is reported in PS display and the
296+
* wait for recovery conflict is reported in the log, if necessary. If
297+
* the caller is responsible for reporting them, report_waiting should be
298+
* false. Otherwise, both the caller and this function report the same
299+
* thing unexpectedly.
230300
*/
231301
static void
232302
ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
233303
ProcSignalReason reason, uint32 wait_event_info,
234304
bool report_waiting)
235305
{
236306
TimestampTz waitStart = 0;
237-
char *new_status;
307+
char *new_status = NULL;
308+
bool logged_recovery_conflict = false;
238309

239310
/* Fast exit, to avoid a kernel call if there's no work to be done. */
240311
if (!VirtualTransactionIdIsValid(*waitlist))
241312
return;
242313

243-
if (report_waiting)
314+
/* Set the wait start timestamp for reporting */
315+
if (report_waiting && (log_recovery_conflict_waits || update_process_title))
244316
waitStart = GetCurrentTimestamp();
245-
new_status = NULL; /* we haven't changed the ps display */
246317

247318
while (VirtualTransactionIdIsValid(*waitlist))
248319
{
@@ -252,25 +323,6 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
252323
/* wait until the virtual xid is gone */
253324
while (!VirtualXactLock(*waitlist, false))
254325
{
255-
/*
256-
* Report via ps if we have been waiting for more than 500 msec
257-
* (should that be configurable?)
258-
*/
259-
if (update_process_title && new_status == NULL && report_waiting &&
260-
TimestampDifferenceExceeds(waitStart, GetCurrentTimestamp(),
261-
500))
262-
{
263-
const char *old_status;
264-
int len;
265-
266-
old_status = get_ps_display(&len);
267-
new_status = (char *) palloc(len + 8 + 1);
268-
memcpy(new_status, old_status, len);
269-
strcpy(new_status + len, " waiting");
270-
set_ps_display(new_status);
271-
new_status[len] = '\0'; /* truncate off " waiting" */
272-
}
273-
274326
/* Is it time to kill it? */
275327
if (WaitExceedsMaxStandbyDelay(wait_event_info))
276328
{
@@ -289,6 +341,50 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
289341
if (pid != 0)
290342
pg_usleep(5000L);
291343
}
344+
345+
if (waitStart != 0 && (!logged_recovery_conflict || new_status == NULL))
346+
{
347+
TimestampTz now = 0;
348+
bool maybe_log_conflict;
349+
bool maybe_update_title;
350+
351+
maybe_log_conflict = (log_recovery_conflict_waits && !logged_recovery_conflict);
352+
maybe_update_title = (update_process_title && new_status == NULL);
353+
354+
/* Get the current timestamp if not report yet */
355+
if (maybe_log_conflict || maybe_update_title)
356+
now = GetCurrentTimestamp();
357+
358+
/*
359+
* Report via ps if we have been waiting for more than 500
360+
* msec (should that be configurable?)
361+
*/
362+
if (maybe_update_title &&
363+
TimestampDifferenceExceeds(waitStart, now, 500))
364+
{
365+
const char *old_status;
366+
int len;
367+
368+
old_status = get_ps_display(&len);
369+
new_status = (char *) palloc(len + 8 + 1);
370+
memcpy(new_status, old_status, len);
371+
strcpy(new_status + len, " waiting");
372+
set_ps_display(new_status);
373+
new_status[len] = '\0'; /* truncate off " waiting" */
374+
}
375+
376+
/*
377+
* Emit the log message if the startup process is waiting
378+
* longer than deadlock_timeout for recovery conflict on
379+
* buffer pin.
380+
*/
381+
if (maybe_log_conflict &&
382+
TimestampDifferenceExceeds(waitStart, now, DeadlockTimeout))
383+
{
384+
LogRecoveryConflict(reason, waitStart, now, waitlist);
385+
logged_recovery_conflict = true;
386+
}
387+
}
292388
}
293389

294390
/* The virtual transaction is gone now, wait for the next one */
@@ -405,9 +501,18 @@ ResolveRecoveryConflictWithDatabase(Oid dbid)
405501
* hot-standby backend processes. If deadlock_timeout is reached in
406502
* this function, all the backends holding the conflicting locks are
407503
* requested to check themselves for deadlocks.
504+
*
505+
* logging_conflict should be true if the recovery conflict has not been
506+
* logged yet even though logging is enabled. After deadlock_timeout is
507+
* reached and the request for deadlock check is sent, we wait again to
508+
* be signaled by the release of the lock if logging_conflict is false.
509+
* Otherwise we return without waiting again so that the caller can report
510+
* the recovery conflict. In this case, then, this function is called again
511+
* with logging_conflict=false (because the recovery conflict has already
512+
* been logged) and we will wait again for the lock to be released.
408513
*/
409514
void
410-
ResolveRecoveryConflictWithLock(LOCKTAG locktag)
515+
ResolveRecoveryConflictWithLock(LOCKTAG locktag, bool logging_conflict)
411516
{
412517
TimestampTz ltime;
413518

@@ -494,6 +599,15 @@ ResolveRecoveryConflictWithLock(LOCKTAG locktag)
494599
backends++;
495600
}
496601

602+
/*
603+
* Exit if the recovery conflict has not been logged yet even though
604+
* logging is enabled, so that the caller can log that. Then
605+
* RecoveryConflictWithLock() is called again and we will wait again
606+
* for the lock to be released.
607+
*/
608+
if (logging_conflict)
609+
goto cleanup;
610+
497611
/*
498612
* Wait again here to be signaled by the release of the Relation Lock,
499613
* to prevent the subsequent RecoveryConflictWithLock() from causing
@@ -1209,3 +1323,36 @@ LogStandbyInvalidations(int nmsgs, SharedInvalidationMessage *msgs,
12091323
nmsgs * sizeof(SharedInvalidationMessage));
12101324
XLogInsert(RM_STANDBY_ID, XLOG_INVALIDATIONS);
12111325
}
1326+
1327+
/* Return the description of recovery conflict */
1328+
static const char *
1329+
get_recovery_conflict_desc(ProcSignalReason reason)
1330+
{
1331+
const char *reasonDesc = gettext_noop("unknown reason");
1332+
1333+
switch (reason)
1334+
{
1335+
case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
1336+
reasonDesc = gettext_noop("recovery conflict on buffer pin");
1337+
break;
1338+
case PROCSIG_RECOVERY_CONFLICT_LOCK:
1339+
reasonDesc = gettext_noop("recovery conflict on lock");
1340+
break;
1341+
case PROCSIG_RECOVERY_CONFLICT_TABLESPACE:
1342+
reasonDesc = gettext_noop("recovery conflict on tablespace");
1343+
break;
1344+
case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT:
1345+
reasonDesc = gettext_noop("recovery conflict on snapshot");
1346+
break;
1347+
case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
1348+
reasonDesc = gettext_noop("recovery conflict on buffer deadlock");
1349+
break;
1350+
case PROCSIG_RECOVERY_CONFLICT_DATABASE:
1351+
reasonDesc = gettext_noop("recovery conflict on database");
1352+
break;
1353+
default:
1354+
break;
1355+
}
1356+
1357+
return reasonDesc;
1358+
}

0 commit comments

Comments
 (0)