Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit c9d8edc

Browse files
committed
Repair bufmgr deadlock problem reported by Michael Wildpaner. Must take
share lock on a buffer being written out before releasing BufMgrLock in the BufferAlloc code path; if we do it later we might block on someone who's re-pinned the buffer. I believe this is only an issue for BufferAlloc and not the other places that call FlushBuffer. BufferSync must continue to do it the old way since it may well be trying to write buffers that other backends have pinned; but it should not be holding any conflicting locks. FlushRelationBuffers is okay since it's got exclusive lock at the relation level.
1 parent 4ea43bd commit c9d8edc

File tree

1 file changed

+37
-11
lines changed

1 file changed

+37
-11
lines changed

src/backend/storage/buffer/bufmgr.c

Lines changed: 37 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.183 2004/12/31 22:00:49 pgsql Exp $
11+
* $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.184 2005/01/03 18:49:41 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -84,7 +84,7 @@ static Buffer ReadBufferInternal(Relation reln, BlockNumber blockNum,
8484
bool bufferLockHeld);
8585
static BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum,
8686
bool *foundPtr);
87-
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln);
87+
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln, bool earlylock);
8888
static void write_buffer(Buffer buffer, bool unpin);
8989

9090

@@ -340,6 +340,10 @@ BufferAlloc(Relation reln,
340340
* allocated -- ours. If it had a pin it wouldn't have been on
341341
* the free list. No one else could have pinned it between
342342
* StrategyGetBuffer and here because we have the BufMgrLock.
343+
*
344+
* (We must pin the buffer before releasing BufMgrLock ourselves,
345+
* to ensure StrategyGetBuffer won't give the same buffer to someone
346+
* else.)
343347
*/
344348
Assert(buf->refcount == 0);
345349
buf->refcount = 1;
@@ -367,9 +371,20 @@ BufferAlloc(Relation reln,
367371

368372
/*
369373
* Write the buffer out, being careful to release BufMgrLock
370-
* while doing the I/O.
374+
* while doing the I/O. We also tell FlushBuffer to share-lock
375+
* the buffer before releasing BufMgrLock. This is safe because
376+
* we know no other backend currently has the buffer pinned,
377+
* therefore no one can have it locked either, so we can always
378+
* get the lock without blocking. It is necessary because if
379+
* we release BufMgrLock first, it's possible for someone else
380+
* to pin and exclusive-lock the buffer before we get to the
381+
* share-lock, causing us to block. If the someone else then
382+
* blocks on a lock we hold, deadlock ensues. This has been
383+
* observed to happen when two backends are both trying to split
384+
* btree index pages, and the second one just happens to be
385+
* trying to split the page the first one got from the freelist.
371386
*/
372-
FlushBuffer(buf, NULL);
387+
FlushBuffer(buf, NULL, true);
373388

374389
/*
375390
* Somebody could have allocated another buffer for the same
@@ -766,7 +781,7 @@ BufferSync(int percent, int maxpages)
766781
PinBuffer(bufHdr, true);
767782
StartBufferIO(bufHdr, false);
768783

769-
FlushBuffer(bufHdr, NULL);
784+
FlushBuffer(bufHdr, NULL, false);
770785

771786
TerminateBufferIO(bufHdr, 0);
772787
UnpinBuffer(bufHdr, true);
@@ -1018,11 +1033,16 @@ BufferGetFileNode(Buffer buffer)
10181033
* If the caller has an smgr reference for the buffer's relation, pass it
10191034
* as the second parameter. If not, pass NULL. (Do not open relation
10201035
* while holding BufMgrLock!)
1036+
*
1037+
* When earlylock is TRUE, we grab the per-buffer sharelock before releasing
1038+
* BufMgrLock, rather than after. Normally this would be a bad idea since
1039+
* we might deadlock, but it is safe and necessary when called from
1040+
* BufferAlloc() --- see comments therein.
10211041
*/
10221042
static void
1023-
FlushBuffer(BufferDesc *buf, SMgrRelation reln)
1043+
FlushBuffer(BufferDesc *buf, SMgrRelation reln, bool earlylock)
10241044
{
1025-
Buffer buffer;
1045+
Buffer buffer = BufferDescriptorGetBuffer(buf);
10261046
XLogRecPtr recptr;
10271047
ErrorContextCallback errcontext;
10281048

@@ -1033,6 +1053,13 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln)
10331053
/* To check if block content changed while flushing. - vadim 01/17/97 */
10341054
buf->flags &= ~BM_JUST_DIRTIED;
10351055

1056+
/*
1057+
* If earlylock, grab buffer sharelock before anyone else could re-lock
1058+
* the buffer.
1059+
*/
1060+
if (earlylock)
1061+
LockBuffer(buffer, BUFFER_LOCK_SHARE);
1062+
10361063
/* Release BufMgrLock while doing xlog work */
10371064
LWLockRelease(BufMgrLock);
10381065

@@ -1046,14 +1073,13 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln)
10461073
if (reln == NULL)
10471074
reln = smgropen(buf->tag.rnode);
10481075

1049-
buffer = BufferDescriptorGetBuffer(buf);
1050-
10511076
/*
10521077
* Protect buffer content against concurrent update. (Note that
10531078
* hint-bit updates can still occur while the write is in progress,
10541079
* but we assume that that will not invalidate the data written.)
10551080
*/
1056-
LockBuffer(buffer, BUFFER_LOCK_SHARE);
1081+
if (!earlylock)
1082+
LockBuffer(buffer, BUFFER_LOCK_SHARE);
10571083

10581084
/*
10591085
* Force XLOG flush for buffer' LSN. This implements the basic WAL
@@ -1485,7 +1511,7 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
14851511
{
14861512
StartBufferIO(bufHdr, false);
14871513

1488-
FlushBuffer(bufHdr, rel->rd_smgr);
1514+
FlushBuffer(bufHdr, rel->rd_smgr, false);
14891515

14901516
TerminateBufferIO(bufHdr, 0);
14911517
}

0 commit comments

Comments
 (0)