39
39
int vacuum_defer_cleanup_age ;
40
40
int max_standby_archive_delay = 30 * 1000 ;
41
41
int max_standby_streaming_delay = 30 * 1000 ;
42
+ bool log_recovery_conflict_waits = false;
42
43
43
44
static HTAB * RecoveryLockLists ;
44
45
@@ -53,6 +54,7 @@ static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlis
53
54
static void SendRecoveryConflictWithBufferPin (ProcSignalReason reason );
54
55
static XLogRecPtr LogCurrentRunningXacts (RunningTransactions CurrRunningXacts );
55
56
static void LogAccessExclusiveLocks (int nlocks , xl_standby_lock * locks );
57
+ static const char * get_recovery_conflict_desc (ProcSignalReason reason );
56
58
57
59
/*
58
60
* Keep track of all the locks owned by a given transaction.
@@ -218,31 +220,100 @@ WaitExceedsMaxStandbyDelay(uint32 wait_event_info)
218
220
return false;
219
221
}
220
222
223
+ /*
224
+ * Log the recovery conflict.
225
+ *
226
+ * wait_start is the timestamp when the caller started to wait.
227
+ * now is the timestamp when this function has been called.
228
+ * wait_list is the list of virtual transaction ids assigned to
229
+ * conflicting processes.
230
+ */
231
+ void
232
+ LogRecoveryConflict (ProcSignalReason reason , TimestampTz wait_start ,
233
+ TimestampTz now , VirtualTransactionId * wait_list )
234
+ {
235
+ long secs ;
236
+ int usecs ;
237
+ long msecs ;
238
+ StringInfoData buf ;
239
+ int nprocs = 0 ;
240
+
241
+ TimestampDifference (wait_start , now , & secs , & usecs );
242
+ msecs = secs * 1000 + usecs / 1000 ;
243
+ usecs = usecs % 1000 ;
244
+
245
+ if (wait_list )
246
+ {
247
+ VirtualTransactionId * vxids ;
248
+
249
+ /* Construct a string of list of the conflicting processes */
250
+ vxids = wait_list ;
251
+ while (VirtualTransactionIdIsValid (* vxids ))
252
+ {
253
+ PGPROC * proc = BackendIdGetProc (vxids -> backendId );
254
+
255
+ /* proc can be NULL if the target backend is not active */
256
+ if (proc )
257
+ {
258
+ if (nprocs == 0 )
259
+ {
260
+ initStringInfo (& buf );
261
+ appendStringInfo (& buf , "%d" , proc -> pid );
262
+ }
263
+ else
264
+ appendStringInfo (& buf , ", %d" , proc -> pid );
265
+
266
+ nprocs ++ ;
267
+ }
268
+
269
+ vxids ++ ;
270
+ }
271
+ }
272
+
273
+ /*
274
+ * If wait_list is specified, report the list of PIDs of active
275
+ * conflicting backends in a detail message. Note that if all the backends
276
+ * in the list are not active, no detail message is logged.
277
+ */
278
+ ereport (LOG ,
279
+ errmsg ("recovery still waiting after %ld.%03d ms: %s" ,
280
+ msecs , usecs , _ (get_recovery_conflict_desc (reason ))),
281
+ nprocs > 0 ? errdetail_log_plural ("Conflicting process: %s." ,
282
+ "Conflicting processes: %s." ,
283
+ nprocs , buf .data ) : 0 );
284
+
285
+ if (nprocs > 0 )
286
+ pfree (buf .data );
287
+ }
288
+
221
289
/*
222
290
* This is the main executioner for any query backend that conflicts with
223
291
* recovery processing. Judgement has already been passed on it within
224
292
* a specific rmgr. Here we just issue the orders to the procs. The procs
225
293
* then throw the required error as instructed.
226
294
*
227
- * If report_waiting is true, "waiting" is reported in PS display if necessary.
228
- * If the caller has already reported that, report_waiting should be false.
229
- * Otherwise, "waiting" is reported twice unexpectedly.
295
+ * If report_waiting is true, "waiting" is reported in PS display and the
296
+ * wait for recovery conflict is reported in the log, if necessary. If
297
+ * the caller is responsible for reporting them, report_waiting should be
298
+ * false. Otherwise, both the caller and this function report the same
299
+ * thing unexpectedly.
230
300
*/
231
301
static void
232
302
ResolveRecoveryConflictWithVirtualXIDs (VirtualTransactionId * waitlist ,
233
303
ProcSignalReason reason , uint32 wait_event_info ,
234
304
bool report_waiting )
235
305
{
236
306
TimestampTz waitStart = 0 ;
237
- char * new_status ;
307
+ char * new_status = NULL ;
308
+ bool logged_recovery_conflict = false;
238
309
239
310
/* Fast exit, to avoid a kernel call if there's no work to be done. */
240
311
if (!VirtualTransactionIdIsValid (* waitlist ))
241
312
return ;
242
313
243
- if (report_waiting )
314
+ /* Set the wait start timestamp for reporting */
315
+ if (report_waiting && (log_recovery_conflict_waits || update_process_title ))
244
316
waitStart = GetCurrentTimestamp ();
245
- new_status = NULL ; /* we haven't changed the ps display */
246
317
247
318
while (VirtualTransactionIdIsValid (* waitlist ))
248
319
{
@@ -252,25 +323,6 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
252
323
/* wait until the virtual xid is gone */
253
324
while (!VirtualXactLock (* waitlist , false))
254
325
{
255
- /*
256
- * Report via ps if we have been waiting for more than 500 msec
257
- * (should that be configurable?)
258
- */
259
- if (update_process_title && new_status == NULL && report_waiting &&
260
- TimestampDifferenceExceeds (waitStart , GetCurrentTimestamp (),
261
- 500 ))
262
- {
263
- const char * old_status ;
264
- int len ;
265
-
266
- old_status = get_ps_display (& len );
267
- new_status = (char * ) palloc (len + 8 + 1 );
268
- memcpy (new_status , old_status , len );
269
- strcpy (new_status + len , " waiting" );
270
- set_ps_display (new_status );
271
- new_status [len ] = '\0' ; /* truncate off " waiting" */
272
- }
273
-
274
326
/* Is it time to kill it? */
275
327
if (WaitExceedsMaxStandbyDelay (wait_event_info ))
276
328
{
@@ -289,6 +341,50 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
289
341
if (pid != 0 )
290
342
pg_usleep (5000L );
291
343
}
344
+
345
+ if (waitStart != 0 && (!logged_recovery_conflict || new_status == NULL ))
346
+ {
347
+ TimestampTz now = 0 ;
348
+ bool maybe_log_conflict ;
349
+ bool maybe_update_title ;
350
+
351
+ maybe_log_conflict = (log_recovery_conflict_waits && !logged_recovery_conflict );
352
+ maybe_update_title = (update_process_title && new_status == NULL );
353
+
354
+ /* Get the current timestamp if not report yet */
355
+ if (maybe_log_conflict || maybe_update_title )
356
+ now = GetCurrentTimestamp ();
357
+
358
+ /*
359
+ * Report via ps if we have been waiting for more than 500
360
+ * msec (should that be configurable?)
361
+ */
362
+ if (maybe_update_title &&
363
+ TimestampDifferenceExceeds (waitStart , now , 500 ))
364
+ {
365
+ const char * old_status ;
366
+ int len ;
367
+
368
+ old_status = get_ps_display (& len );
369
+ new_status = (char * ) palloc (len + 8 + 1 );
370
+ memcpy (new_status , old_status , len );
371
+ strcpy (new_status + len , " waiting" );
372
+ set_ps_display (new_status );
373
+ new_status [len ] = '\0' ; /* truncate off " waiting" */
374
+ }
375
+
376
+ /*
377
+ * Emit the log message if the startup process is waiting
378
+ * longer than deadlock_timeout for recovery conflict on
379
+ * buffer pin.
380
+ */
381
+ if (maybe_log_conflict &&
382
+ TimestampDifferenceExceeds (waitStart , now , DeadlockTimeout ))
383
+ {
384
+ LogRecoveryConflict (reason , waitStart , now , waitlist );
385
+ logged_recovery_conflict = true;
386
+ }
387
+ }
292
388
}
293
389
294
390
/* The virtual transaction is gone now, wait for the next one */
@@ -405,9 +501,18 @@ ResolveRecoveryConflictWithDatabase(Oid dbid)
405
501
* hot-standby backend processes. If deadlock_timeout is reached in
406
502
* this function, all the backends holding the conflicting locks are
407
503
* requested to check themselves for deadlocks.
504
+ *
505
+ * logging_conflict should be true if the recovery conflict has not been
506
+ * logged yet even though logging is enabled. After deadlock_timeout is
507
+ * reached and the request for deadlock check is sent, we wait again to
508
+ * be signaled by the release of the lock if logging_conflict is false.
509
+ * Otherwise we return without waiting again so that the caller can report
510
+ * the recovery conflict. In this case, then, this function is called again
511
+ * with logging_conflict=false (because the recovery conflict has already
512
+ * been logged) and we will wait again for the lock to be released.
408
513
*/
409
514
void
410
- ResolveRecoveryConflictWithLock (LOCKTAG locktag )
515
+ ResolveRecoveryConflictWithLock (LOCKTAG locktag , bool logging_conflict )
411
516
{
412
517
TimestampTz ltime ;
413
518
@@ -494,6 +599,15 @@ ResolveRecoveryConflictWithLock(LOCKTAG locktag)
494
599
backends ++ ;
495
600
}
496
601
602
+ /*
603
+ * Exit if the recovery conflict has not been logged yet even though
604
+ * logging is enabled, so that the caller can log that. Then
605
+ * RecoveryConflictWithLock() is called again and we will wait again
606
+ * for the lock to be released.
607
+ */
608
+ if (logging_conflict )
609
+ goto cleanup ;
610
+
497
611
/*
498
612
* Wait again here to be signaled by the release of the Relation Lock,
499
613
* to prevent the subsequent RecoveryConflictWithLock() from causing
@@ -1209,3 +1323,36 @@ LogStandbyInvalidations(int nmsgs, SharedInvalidationMessage *msgs,
1209
1323
nmsgs * sizeof (SharedInvalidationMessage ));
1210
1324
XLogInsert (RM_STANDBY_ID , XLOG_INVALIDATIONS );
1211
1325
}
1326
+
1327
+ /* Return the description of recovery conflict */
1328
+ static const char *
1329
+ get_recovery_conflict_desc (ProcSignalReason reason )
1330
+ {
1331
+ const char * reasonDesc = gettext_noop ("unknown reason" );
1332
+
1333
+ switch (reason )
1334
+ {
1335
+ case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN :
1336
+ reasonDesc = gettext_noop ("recovery conflict on buffer pin" );
1337
+ break ;
1338
+ case PROCSIG_RECOVERY_CONFLICT_LOCK :
1339
+ reasonDesc = gettext_noop ("recovery conflict on lock" );
1340
+ break ;
1341
+ case PROCSIG_RECOVERY_CONFLICT_TABLESPACE :
1342
+ reasonDesc = gettext_noop ("recovery conflict on tablespace" );
1343
+ break ;
1344
+ case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT :
1345
+ reasonDesc = gettext_noop ("recovery conflict on snapshot" );
1346
+ break ;
1347
+ case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK :
1348
+ reasonDesc = gettext_noop ("recovery conflict on buffer deadlock" );
1349
+ break ;
1350
+ case PROCSIG_RECOVERY_CONFLICT_DATABASE :
1351
+ reasonDesc = gettext_noop ("recovery conflict on database" );
1352
+ break ;
1353
+ default :
1354
+ break ;
1355
+ }
1356
+
1357
+ return reasonDesc ;
1358
+ }
0 commit comments