Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit e656657

Browse files
committed
Fix RBM_ZERO_AND_LOCK.
Commit 210622c accidentally zeroed out pages even if they were found in the buffer pool. It should always lock the page, but it should only zero pages that were not already valid. Otherwise, concurrent readers that hold only a pin could see corrupted page contents changing under their feet. While here, rename ZeroAndLockBuffer() to match the RBM_ flag name. Also restore a some useful comments lost by 210622c's refactoring, and add some new ones to clarify why we need to use the BM_IO_IN_PROGRESS infrastructure despite not doing I/O. Reported-by: Noah Misch <noah@leadboat.com> Reported-by: Alexander Lakhin <exclusion@gmail.com> Reviewed-by: Alvaro Herrera <alvherre@alvh.no-ip.org> (earlier version) Reviewed-by: Robert Haas <robertmhaas@gmail.com> (earlier version) Discussion: https://postgr.es/m/20240512171658.7e.nmisch@google.com Discussion: https://postgr.es/m/7ed10231-ce47-03d5-d3f9-4aea0dc7d5a4%40gmail.com
1 parent 00ac25a commit e656657

File tree

1 file changed

+67
-21
lines changed

1 file changed

+67
-21
lines changed

src/backend/storage/buffer/bufmgr.c

+67-21
Original file line numberDiff line numberDiff line change
@@ -1010,43 +1010,89 @@ ExtendBufferedRelTo(BufferManagerRelation bmr,
10101010
}
10111011

10121012
/*
1013-
* Zero a buffer and lock it, as part of the implementation of
1013+
* Lock and optionally zero a buffer, as part of the implementation of
10141014
* RBM_ZERO_AND_LOCK or RBM_ZERO_AND_CLEANUP_LOCK. The buffer must be already
1015-
* pinned. It does not have to be valid, but it is valid and locked on
1016-
* return.
1015+
* pinned. If the buffer is not already valid, it is zeroed and made valid.
10171016
*/
10181017
static void
1019-
ZeroBuffer(Buffer buffer, ReadBufferMode mode)
1018+
ZeroAndLockBuffer(Buffer buffer, ReadBufferMode mode, bool already_valid)
10201019
{
10211020
BufferDesc *bufHdr;
1022-
uint32 buf_state;
1021+
bool need_to_zero;
1022+
bool isLocalBuf = BufferIsLocal(buffer);
10231023

10241024
Assert(mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK);
10251025

1026-
if (BufferIsLocal(buffer))
1026+
if (already_valid)
1027+
{
1028+
/*
1029+
* If the caller already knew the buffer was valid, we can skip some
1030+
* header interaction. The caller just wants to lock the buffer.
1031+
*/
1032+
need_to_zero = false;
1033+
}
1034+
else if (isLocalBuf)
1035+
{
1036+
/* Simple case for non-shared buffers. */
10271037
bufHdr = GetLocalBufferDescriptor(-buffer - 1);
1038+
need_to_zero = (pg_atomic_read_u32(&bufHdr->state) & BM_VALID) == 0;
1039+
}
10281040
else
10291041
{
1042+
/*
1043+
* Take BM_IO_IN_PROGRESS, or discover that BM_VALID has been set
1044+
* concurrently. Even though we aren't doing I/O, that ensures that
1045+
* we don't zero a page that someone else has pinned. An exclusive
1046+
* content lock wouldn't be enough, because readers are allowed to
1047+
* drop the content lock after determining that a tuple is visible
1048+
* (see buffer access rules in README).
1049+
*/
10301050
bufHdr = GetBufferDescriptor(buffer - 1);
1031-
if (mode == RBM_ZERO_AND_LOCK)
1032-
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
1033-
else
1034-
LockBufferForCleanup(buffer);
1051+
need_to_zero = StartBufferIO(bufHdr, true, false);
10351052
}
10361053

1037-
memset(BufferGetPage(buffer), 0, BLCKSZ);
1038-
1039-
if (BufferIsLocal(buffer))
1054+
if (need_to_zero)
10401055
{
1041-
buf_state = pg_atomic_read_u32(&bufHdr->state);
1042-
buf_state |= BM_VALID;
1043-
pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
1056+
memset(BufferGetPage(buffer), 0, BLCKSZ);
1057+
1058+
/*
1059+
* Grab the buffer content lock before marking the page as valid, to
1060+
* make sure that no other backend sees the zeroed page before the
1061+
* caller has had a chance to initialize it.
1062+
*
1063+
* Since no-one else can be looking at the page contents yet, there is
1064+
* no difference between an exclusive lock and a cleanup-strength
1065+
* lock. (Note that we cannot use LockBuffer() or
1066+
* LockBufferForCleanup() here, because they assert that the buffer is
1067+
* already valid.)
1068+
*/
1069+
if (!isLocalBuf)
1070+
LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_EXCLUSIVE);
1071+
1072+
if (isLocalBuf)
1073+
{
1074+
/* Only need to adjust flags */
1075+
uint32 buf_state = pg_atomic_read_u32(&bufHdr->state);
1076+
1077+
buf_state |= BM_VALID;
1078+
pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
1079+
}
1080+
else
1081+
{
1082+
/* Set BM_VALID, terminate IO, and wake up any waiters */
1083+
TerminateBufferIO(bufHdr, false, BM_VALID, true);
1084+
}
10441085
}
1045-
else
1086+
else if (!isLocalBuf)
10461087
{
1047-
buf_state = LockBufHdr(bufHdr);
1048-
buf_state |= BM_VALID;
1049-
UnlockBufHdr(bufHdr, buf_state);
1088+
/*
1089+
* The buffer is valid, so we can't zero it. The caller still expects
1090+
* the page to be locked on return.
1091+
*/
1092+
if (mode == RBM_ZERO_AND_LOCK)
1093+
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
1094+
else
1095+
LockBufferForCleanup(buffer);
10501096
}
10511097
}
10521098

@@ -1185,7 +1231,7 @@ ReadBuffer_common(Relation rel, SMgrRelation smgr, char smgr_persistence,
11851231

11861232
buffer = PinBufferForBlock(rel, smgr, smgr_persistence,
11871233
forkNum, blockNum, strategy, &found);
1188-
ZeroBuffer(buffer, mode);
1234+
ZeroAndLockBuffer(buffer, mode, found);
11891235
return buffer;
11901236
}
11911237

0 commit comments

Comments
 (0)