8
8
*
9
9
*
10
10
* IDENTIFICATION
11
- * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.183 2004/12/31 22:00:49 pgsql Exp $
11
+ * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.184 2005/01/03 18:49:41 tgl Exp $
12
12
*
13
13
*-------------------------------------------------------------------------
14
14
*/
@@ -84,7 +84,7 @@ static Buffer ReadBufferInternal(Relation reln, BlockNumber blockNum,
84
84
bool bufferLockHeld );
85
85
static BufferDesc * BufferAlloc (Relation reln , BlockNumber blockNum ,
86
86
bool * foundPtr );
87
- static void FlushBuffer (BufferDesc * buf , SMgrRelation reln );
87
+ static void FlushBuffer (BufferDesc * buf , SMgrRelation reln , bool earlylock );
88
88
static void write_buffer (Buffer buffer , bool unpin );
89
89
90
90
@@ -340,6 +340,10 @@ BufferAlloc(Relation reln,
340
340
* allocated -- ours. If it had a pin it wouldn't have been on
341
341
* the free list. No one else could have pinned it between
342
342
* StrategyGetBuffer and here because we have the BufMgrLock.
343
+ *
344
+ * (We must pin the buffer before releasing BufMgrLock ourselves,
345
+ * to ensure StrategyGetBuffer won't give the same buffer to someone
346
+ * else.)
343
347
*/
344
348
Assert (buf -> refcount == 0 );
345
349
buf -> refcount = 1 ;
@@ -367,9 +371,20 @@ BufferAlloc(Relation reln,
367
371
368
372
/*
369
373
* Write the buffer out, being careful to release BufMgrLock
370
- * while doing the I/O.
374
+ * while doing the I/O. We also tell FlushBuffer to share-lock
375
+ * the buffer before releasing BufMgrLock. This is safe because
376
+ * we know no other backend currently has the buffer pinned,
377
+ * therefore no one can have it locked either, so we can always
378
+ * get the lock without blocking. It is necessary because if
379
+ * we release BufMgrLock first, it's possible for someone else
380
+ * to pin and exclusive-lock the buffer before we get to the
381
+ * share-lock, causing us to block. If the someone else then
382
+ * blocks on a lock we hold, deadlock ensues. This has been
383
+ * observed to happen when two backends are both trying to split
384
+ * btree index pages, and the second one just happens to be
385
+ * trying to split the page the first one got from the freelist.
371
386
*/
372
- FlushBuffer (buf , NULL );
387
+ FlushBuffer (buf , NULL , true );
373
388
374
389
/*
375
390
* Somebody could have allocated another buffer for the same
@@ -766,7 +781,7 @@ BufferSync(int percent, int maxpages)
766
781
PinBuffer (bufHdr , true);
767
782
StartBufferIO (bufHdr , false);
768
783
769
- FlushBuffer (bufHdr , NULL );
784
+ FlushBuffer (bufHdr , NULL , false );
770
785
771
786
TerminateBufferIO (bufHdr , 0 );
772
787
UnpinBuffer (bufHdr , true);
@@ -1018,11 +1033,16 @@ BufferGetFileNode(Buffer buffer)
1018
1033
* If the caller has an smgr reference for the buffer's relation, pass it
1019
1034
* as the second parameter. If not, pass NULL. (Do not open relation
1020
1035
* while holding BufMgrLock!)
1036
+ *
1037
+ * When earlylock is TRUE, we grab the per-buffer sharelock before releasing
1038
+ * BufMgrLock, rather than after. Normally this would be a bad idea since
1039
+ * we might deadlock, but it is safe and necessary when called from
1040
+ * BufferAlloc() --- see comments therein.
1021
1041
*/
1022
1042
static void
1023
- FlushBuffer (BufferDesc * buf , SMgrRelation reln )
1043
+ FlushBuffer (BufferDesc * buf , SMgrRelation reln , bool earlylock )
1024
1044
{
1025
- Buffer buffer ;
1045
+ Buffer buffer = BufferDescriptorGetBuffer ( buf ) ;
1026
1046
XLogRecPtr recptr ;
1027
1047
ErrorContextCallback errcontext ;
1028
1048
@@ -1033,6 +1053,13 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln)
1033
1053
/* To check if block content changed while flushing. - vadim 01/17/97 */
1034
1054
buf -> flags &= ~BM_JUST_DIRTIED ;
1035
1055
1056
+ /*
1057
+ * If earlylock, grab buffer sharelock before anyone else could re-lock
1058
+ * the buffer.
1059
+ */
1060
+ if (earlylock )
1061
+ LockBuffer (buffer , BUFFER_LOCK_SHARE );
1062
+
1036
1063
/* Release BufMgrLock while doing xlog work */
1037
1064
LWLockRelease (BufMgrLock );
1038
1065
@@ -1046,14 +1073,13 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln)
1046
1073
if (reln == NULL )
1047
1074
reln = smgropen (buf -> tag .rnode );
1048
1075
1049
- buffer = BufferDescriptorGetBuffer (buf );
1050
-
1051
1076
/*
1052
1077
* Protect buffer content against concurrent update. (Note that
1053
1078
* hint-bit updates can still occur while the write is in progress,
1054
1079
* but we assume that that will not invalidate the data written.)
1055
1080
*/
1056
- LockBuffer (buffer , BUFFER_LOCK_SHARE );
1081
+ if (!earlylock )
1082
+ LockBuffer (buffer , BUFFER_LOCK_SHARE );
1057
1083
1058
1084
/*
1059
1085
* Force XLOG flush for buffer' LSN. This implements the basic WAL
@@ -1485,7 +1511,7 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
1485
1511
{
1486
1512
StartBufferIO (bufHdr , false);
1487
1513
1488
- FlushBuffer (bufHdr , rel -> rd_smgr );
1514
+ FlushBuffer (bufHdr , rel -> rd_smgr , false );
1489
1515
1490
1516
TerminateBufferIO (bufHdr , 0 );
1491
1517
}
0 commit comments