Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit b95a720

Browse files
Re-enable max_standby_delay = -1 using deadlock detection on startup
process. If startup waits on a buffer pin we send a request to all backends to cancel themselves if they are holding the buffer pin required and they are also waiting on a lock. If not, startup waits until max_standby_delay before cancelling any backend waiting for the requested buffer pin.
1 parent fafa374 commit b95a720

File tree

8 files changed

+86
-26
lines changed

8 files changed

+86
-26
lines changed

src/backend/storage/ipc/procsignal.c

+4-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/storage/ipc/procsignal.c,v 1.4 2010/01/23 16:37:12 sriggs Exp $
11+
* $PostgreSQL: pgsql/src/backend/storage/ipc/procsignal.c,v 1.5 2010/02/13 01:32:19 sriggs Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -272,6 +272,9 @@ procsignal_sigusr1_handler(SIGNAL_ARGS)
272272
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT))
273273
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT);
274274

275+
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK))
276+
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
277+
275278
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN))
276279
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
277280

src/backend/storage/ipc/standby.c

+46-16
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* Portions Copyright (c) 1994, Regents of the University of California
1212
*
1313
* IDENTIFICATION
14-
* $PostgreSQL: pgsql/src/backend/storage/ipc/standby.c,v 1.11 2010/02/11 19:35:22 sriggs Exp $
14+
* $PostgreSQL: pgsql/src/backend/storage/ipc/standby.c,v 1.12 2010/02/13 01:32:19 sriggs Exp $
1515
*
1616
*-------------------------------------------------------------------------
1717
*/
@@ -127,6 +127,9 @@ WaitExceedsMaxStandbyDelay(void)
127127
long delay_secs;
128128
int delay_usecs;
129129

130+
if (MaxStandbyDelay == -1)
131+
return false;
132+
130133
/* Are we past max_standby_delay? */
131134
TimestampDifference(GetLatestXLogTime(), GetCurrentTimestamp(),
132135
&delay_secs, &delay_usecs);
@@ -351,15 +354,15 @@ ResolveRecoveryConflictWithLock(Oid dbOid, Oid relOid)
351354
* they hold one of the buffer pins that is blocking Startup process. If so,
352355
* backends will take an appropriate error action, ERROR or FATAL.
353356
*
354-
* A secondary purpose of this is to avoid deadlocks that might occur between
355-
* the Startup process and lock waiters. Deadlocks occur because if queries
357+
* We also check for deadlocks before we wait, though applications that cause
358+
* these will be extremely rare. Deadlocks occur because if queries
356359
* wait on a lock, that must be behind an AccessExclusiveLock, which can only
357-
* be clared if the Startup process replays a transaction completion record.
358-
* If Startup process is waiting then that is a deadlock. If we allowed a
359-
* setting of max_standby_delay that meant "wait forever" we would then need
360-
* special code to protect against deadlock. Such deadlocks are rare, so the
361-
* code would be almost certainly buggy, so we avoid both long waits and
362-
* deadlocks using the same mechanism.
360+
* be cleared if the Startup process replays a transaction completion record.
361+
* If Startup process is also waiting then that is a deadlock. The deadlock
362+
* can occur if the query is waiting and then the Startup sleeps, or if
363+
* Startup is sleeping and the the query waits on a lock. We protect against
364+
* only the former sequence here, the latter sequence is checked prior to
365+
* the query sleeping, in CheckRecoveryConflictDeadlock().
363366
*/
364367
void
365368
ResolveRecoveryConflictWithBufferPin(void)
@@ -368,11 +371,23 @@ ResolveRecoveryConflictWithBufferPin(void)
368371

369372
Assert(InHotStandby);
370373

371-
/*
372-
* Signal immediately or set alarm for later.
373-
*/
374374
if (MaxStandbyDelay == 0)
375-
SendRecoveryConflictWithBufferPin();
375+
{
376+
/*
377+
* We don't want to wait, so just tell everybody holding the pin to
378+
* get out of town.
379+
*/
380+
SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
381+
}
382+
else if (MaxStandbyDelay == -1)
383+
{
384+
/*
385+
* Send out a request to check for buffer pin deadlocks before we wait.
386+
* This is fairly cheap, so no need to wait for deadlock timeout before
387+
* trying to send it out.
388+
*/
389+
SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
390+
}
376391
else
377392
{
378393
TimestampTz now;
@@ -386,13 +401,25 @@ ResolveRecoveryConflictWithBufferPin(void)
386401
&standby_delay_secs, &standby_delay_usecs);
387402

388403
if (standby_delay_secs >= MaxStandbyDelay)
389-
SendRecoveryConflictWithBufferPin();
404+
{
405+
/*
406+
* We're already behind, so clear a path as quickly as possible.
407+
*/
408+
SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
409+
}
390410
else
391411
{
392412
TimestampTz fin_time; /* Expected wake-up time by timer */
393413
long timer_delay_secs; /* Amount of time we set timer for */
394414
int timer_delay_usecs = 0;
395415

416+
/*
417+
* Send out a request to check for buffer pin deadlocks before we wait.
418+
* This is fairly cheap, so no need to wait for deadlock timeout before
419+
* trying to send it out.
420+
*/
421+
SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
422+
396423
/*
397424
* How much longer we should wait?
398425
*/
@@ -435,15 +462,18 @@ ResolveRecoveryConflictWithBufferPin(void)
435462
}
436463

437464
void
438-
SendRecoveryConflictWithBufferPin(void)
465+
SendRecoveryConflictWithBufferPin(ProcSignalReason reason)
439466
{
467+
Assert(reason == PROCSIG_RECOVERY_CONFLICT_BUFFERPIN ||
468+
reason == PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
469+
440470
/*
441471
* We send signal to all backends to ask them if they are holding
442472
* the buffer pin which is delaying the Startup process. We must
443473
* not set the conflict flag yet, since most backends will be innocent.
444474
* Let the SIGUSR1 handling in each backend decide their own fate.
445475
*/
446-
CancelDBBackends(InvalidOid, PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, false);
476+
CancelDBBackends(InvalidOid, reason, false);
447477
}
448478

449479
/*

src/backend/storage/lmgr/proc.c

+12-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.215 2010/02/08 04:33:54 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.216 2010/02/13 01:32:19 sriggs Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -45,6 +45,7 @@
4545
#include "storage/pmsignal.h"
4646
#include "storage/proc.h"
4747
#include "storage/procarray.h"
48+
#include "storage/procsignal.h"
4849
#include "storage/spin.h"
4950

5051

@@ -556,6 +557,15 @@ HaveNFreeProcs(int n)
556557
return (n <= 0);
557558
}
558559

560+
bool
561+
IsWaitingForLock(void)
562+
{
563+
if (lockAwaited == NULL)
564+
return false;
565+
566+
return true;
567+
}
568+
559569
/*
560570
* Cancel any pending wait for lock, when aborting a transaction.
561571
*
@@ -1670,7 +1680,7 @@ CheckStandbyTimeout(void)
16701680
now = GetCurrentTimestamp();
16711681

16721682
if (now >= statement_fin_time)
1673-
SendRecoveryConflictWithBufferPin();
1683+
SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
16741684
else
16751685
{
16761686
/* Not time yet, so (re)schedule the interrupt */

src/backend/tcop/postgres.c

+15-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.587 2010/01/23 17:04:05 sriggs Exp $
11+
* $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.588 2010/02/13 01:32:19 sriggs Exp $
1212
*
1313
* NOTES
1414
* this is the "main" module of the postgres backend and
@@ -2278,6 +2278,9 @@ errdetail_recovery_conflict(void)
22782278
case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT:
22792279
errdetail("User query might have needed to see row versions that must be removed.");
22802280
break;
2281+
case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
2282+
errdetail("User transaction caused buffer deadlock with recovery.");
2283+
break;
22812284
case PROCSIG_RECOVERY_CONFLICT_DATABASE:
22822285
errdetail("User was connected to a database that must be dropped.");
22832286
break;
@@ -2754,6 +2757,15 @@ RecoveryConflictInterrupt(ProcSignalReason reason)
27542757
RecoveryConflictReason = reason;
27552758
switch (reason)
27562759
{
2760+
case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
2761+
/*
2762+
* If we aren't waiting for a lock we can never deadlock.
2763+
*/
2764+
if (!IsWaitingForLock())
2765+
return;
2766+
2767+
/* Intentional drop through to check wait for pin */
2768+
27572769
case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
27582770
/*
27592771
* If we aren't blocking the Startup process there is
@@ -2819,6 +2831,8 @@ RecoveryConflictInterrupt(ProcSignalReason reason)
28192831
elog(FATAL, "Unknown conflict mode");
28202832
}
28212833

2834+
Assert(RecoveryConflictPending && (QueryCancelPending || ProcDiePending));
2835+
28222836
/*
28232837
* If it's safe to interrupt, and we're waiting for input or a lock,
28242838
* service the interrupt immediately

src/backend/utils/misc/guc.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* Written by Peter Eisentraut <peter_e@gmx.net>.
1111
*
1212
* IDENTIFICATION
13-
* $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.538 2010/02/01 13:40:28 sriggs Exp $
13+
* $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.539 2010/02/13 01:32:20 sriggs Exp $
1414
*
1515
*--------------------------------------------------------------------
1616
*/
@@ -1381,7 +1381,7 @@ static struct config_int ConfigureNamesInt[] =
13811381
NULL
13821382
},
13831383
&MaxStandbyDelay,
1384-
30, 0, INT_MAX, NULL, NULL
1384+
30, -1, INT_MAX, NULL, NULL
13851385
},
13861386

13871387
{

src/include/storage/proc.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.119 2010/01/23 16:37:12 sriggs Exp $
10+
* $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.120 2010/02/13 01:32:20 sriggs Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -189,6 +189,7 @@ extern void ProcQueueInit(PROC_QUEUE *queue);
189189
extern int ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable);
190190
extern PGPROC *ProcWakeup(PGPROC *proc, int waitStatus);
191191
extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock);
192+
extern bool IsWaitingForLock(void);
192193
extern void LockWaitCancel(void);
193194

194195
extern void ProcWaitForSignal(void);

src/include/storage/procsignal.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/storage/procsignal.h,v 1.4 2010/01/23 16:37:12 sriggs Exp $
10+
* $PostgreSQL: pgsql/src/include/storage/procsignal.h,v 1.5 2010/02/13 01:32:20 sriggs Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -38,6 +38,7 @@ typedef enum
3838
PROCSIG_RECOVERY_CONFLICT_LOCK,
3939
PROCSIG_RECOVERY_CONFLICT_SNAPSHOT,
4040
PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
41+
PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK,
4142

4243
NUM_PROCSIGNALS /* Must be last! */
4344
} ProcSignalReason;

src/include/storage/standby.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/storage/standby.h,v 1.7 2010/01/31 19:01:11 sriggs Exp $
10+
* $PostgreSQL: pgsql/src/include/storage/standby.h,v 1.8 2010/02/13 01:32:20 sriggs Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -16,6 +16,7 @@
1616

1717
#include "access/xlog.h"
1818
#include "storage/lock.h"
19+
#include "storage/procsignal.h"
1920
#include "storage/relfilenode.h"
2021

2122
extern int vacuum_defer_cleanup_age;
@@ -30,7 +31,7 @@ extern void ResolveRecoveryConflictWithTablespace(Oid tsid);
3031
extern void ResolveRecoveryConflictWithDatabase(Oid dbid);
3132

3233
extern void ResolveRecoveryConflictWithBufferPin(void);
33-
extern void SendRecoveryConflictWithBufferPin(void);
34+
extern void SendRecoveryConflictWithBufferPin(ProcSignalReason reason);
3435
extern void CheckRecoveryConflictDeadlock(LWLockId partitionLock);
3536

3637
/*

0 commit comments

Comments
 (0)