Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit f923260

Browse files
committed
Revise FlushRelationBuffers/ReleaseRelationBuffers per discussion with
Hiroshi. ReleaseRelationBuffers now removes rel's buffers from pool, instead of merely marking them nondirty. The old code would leave valid buffers for a deleted relation, which didn't cause any known problems but can't possibly be a good idea. There were several places which called ReleaseRelationBuffers *and* FlushRelationBuffers, which is now unnecessary; but there were others that did not. FlushRelationBuffers no longer emits a warning notice if it finds dirty buffers to flush, because with the current bufmgr behavior that's not an unexpected condition. Also, FlushRelationBuffers will flush out all dirty buffers for the relation regardless of block number. This ensures that pg_upgrade's expectations are met about tuple on-row status bits being up-to-date on disk. Lastly, tweak BufTableDelete() to clear the buffer's tag so that no one can mistake it for being a still-valid buffer for the page it once held. Formerly, the buffer would not be found by buffer hashtable searches after BufTableDelete(), but it would still be thought to belong to its old relation by the routines that sequentially scan the shared-buffer array. Again I know of no bugs caused by that, but it still can't be a good idea.
1 parent db90fdf commit f923260

File tree

7 files changed

+87
-76
lines changed

7 files changed

+87
-76
lines changed

src/backend/catalog/heap.c

+2-6
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.125 2000/04/12 17:14:55 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.126 2000/05/19 03:22:31 tgl Exp $
1212
*
1313
*
1414
* INTERFACE ROUTINES
@@ -1197,8 +1197,6 @@ RelationTruncateIndexes(Relation heapRelation)
11971197
* dirty, they're just dropped without bothering to flush to disk.
11981198
*/
11991199
ReleaseRelationBuffers(currentIndex);
1200-
if (FlushRelationBuffers(currentIndex, (BlockNumber) 0, false) < 0)
1201-
elog(ERROR, "RelationTruncateIndexes: unable to flush index from buffer pool");
12021200

12031201
/* Now truncate the actual data and set blocks to zero */
12041202
smgrtruncate(DEFAULT_SMGR, currentIndex, 0);
@@ -1266,8 +1264,6 @@ heap_truncate(char *relname)
12661264
*/
12671265

12681266
ReleaseRelationBuffers(rel);
1269-
if (FlushRelationBuffers(rel, (BlockNumber) 0, false) < 0)
1270-
elog(ERROR, "heap_truncate: unable to flush relation from buffer pool");
12711267

12721268
/* Now truncate the actual data and set blocks to zero */
12731269

@@ -1544,7 +1540,7 @@ heap_drop_with_catalog(const char *relname)
15441540
DeleteRelationTuple(rel);
15451541

15461542
/*
1547-
* release dirty buffers of this relation
1543+
* release dirty buffers of this relation; don't bother to write them
15481544
*/
15491545
ReleaseRelationBuffers(rel);
15501546

src/backend/catalog/index.c

+1-3
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.108 2000/04/12 17:14:55 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.109 2000/05/19 03:22:31 tgl Exp $
1212
*
1313
*
1414
* INTERFACE ROUTINES
@@ -2159,8 +2159,6 @@ reindex_index(Oid indexId, bool force)
21592159
* they're just dropped without bothering to flush to disk.
21602160
*/
21612161
ReleaseRelationBuffers(iRel);
2162-
if (FlushRelationBuffers(iRel, (BlockNumber) 0, false) < 0)
2163-
elog(ERROR, "reindex_index: unable to flush index from buffer pool");
21642162

21652163
/* Now truncate the actual data and set blocks to zero */
21662164
smgrtruncate(DEFAULT_SMGR, iRel, 0);

src/backend/commands/rename.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/commands/Attic/rename.c,v 1.43 2000/05/11 03:54:18 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/commands/Attic/rename.c,v 1.44 2000/05/19 03:22:29 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -240,7 +240,7 @@ renamerel(const char *oldrelname, const char *newrelname)
240240
* Since we hold the exclusive lock on the relation, we don't have to
241241
* worry about more blocks being read in while we finish the rename.
242242
*/
243-
if (FlushRelationBuffers(targetrelation, (BlockNumber) 0, true) < 0)
243+
if (FlushRelationBuffers(targetrelation, (BlockNumber) 0) < 0)
244244
elog(ERROR, "renamerel: unable to flush relation from buffer pool");
245245

246246
/*

src/backend/commands/vacuum.c

+6-4
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.147 2000/04/12 17:14:59 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.148 2000/05/19 03:22:29 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -1968,10 +1968,10 @@ failed to add item with len = %u to page %u (free space %u, nusd %u, noff %u)",
19681968
pfree(Nvpl.vpl_pagedesc);
19691969
}
19701970

1971-
/* truncate relation */
1971+
/* truncate relation, after flushing any dirty pages out to disk */
19721972
if (blkno < nblocks)
19731973
{
1974-
i = FlushRelationBuffers(onerel, blkno, false);
1974+
i = FlushRelationBuffers(onerel, blkno);
19751975
if (i < 0)
19761976
elog(FATAL, "VACUUM (vc_repair_frag): FlushRelationBuffers returned %d", i);
19771977
blkno = smgrtruncate(DEFAULT_SMGR, onerel, blkno);
@@ -2035,10 +2035,12 @@ vc_vacheap(VRelStats *vacrelstats, Relation onerel, VPageList vacuum_pages)
20352035
/*
20362036
* We have to flush "empty" end-pages (if changed, but who knows
20372037
* it) before truncation
2038+
*
2039+
* XXX is FlushBufferPool() still needed here?
20382040
*/
20392041
FlushBufferPool();
20402042

2041-
i = FlushRelationBuffers(onerel, nblocks, false);
2043+
i = FlushRelationBuffers(onerel, nblocks);
20422044
if (i < 0)
20432045
elog(FATAL, "VACUUM (vc_vacheap): FlushRelationBuffers returned %d", i);
20442046

src/backend/storage/buffer/buf_table.c

+10-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_table.c,v 1.16 2000/01/26 05:56:50 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_table.c,v 1.17 2000/05/19 03:22:28 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -119,6 +119,15 @@ BufTableDelete(BufferDesc *buf)
119119
return FALSE;
120120
}
121121

122+
/*
123+
* Clear the buffer's tag. This doesn't matter for the hash table,
124+
* since the buffer is already removed from it, but it ensures that
125+
* sequential searches through the buffer table won't think the
126+
* buffer is still valid for its old page.
127+
*/
128+
buf->tag.relId.relId = InvalidOid;
129+
buf->tag.relId.dbId = InvalidOid;
130+
122131
return TRUE;
123132
}
124133

src/backend/storage/buffer/bufmgr.c

+64-57
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.80 2000/04/12 17:15:34 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.81 2000/05/19 03:22:28 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -607,7 +607,6 @@ BufferAlloc(Relation reln,
607607
{
608608
SpinRelease(BufMgrLock);
609609
elog(FATAL, "buffer wasn't in the buffer table\n");
610-
611610
}
612611

613612
/* record the database name and relation name for this buffer */
@@ -1585,9 +1584,9 @@ RelationGetNumberOfBlocks(Relation relation)
15851584
/* ---------------------------------------------------------------------
15861585
* ReleaseRelationBuffers
15871586
*
1588-
* this function unmarks all the dirty pages of a relation
1589-
* in the buffer pool so that at the end of transaction
1590-
* these pages will not be flushed. This is used when the
1587+
* This function removes all the buffered pages for a relation
1588+
* from the buffer pool. Dirty pages are simply dropped, without
1589+
* bothering to write them out first. This is used when the
15911590
* relation is about to be deleted. We assume that the caller
15921591
* holds an exclusive lock on the relation, which should assure
15931592
* that no new buffers will be acquired for the rel meanwhile.
@@ -1600,7 +1599,6 @@ void
16001599
ReleaseRelationBuffers(Relation rel)
16011600
{
16021601
Oid relid = RelationGetRelid(rel);
1603-
bool holding = false;
16041602
int i;
16051603
BufferDesc *buf;
16061604

@@ -1610,22 +1608,23 @@ ReleaseRelationBuffers(Relation rel)
16101608
{
16111609
buf = &LocalBufferDescriptors[i];
16121610
if (buf->tag.relId.relId == relid)
1611+
{
16131612
buf->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
1613+
LocalRefCount[i] = 0;
1614+
buf->tag.relId.relId = InvalidOid;
1615+
}
16141616
}
16151617
return;
16161618
}
16171619

1620+
SpinAcquire(BufMgrLock);
16181621
for (i = 1; i <= NBuffers; i++)
16191622
{
16201623
buf = &BufferDescriptors[i - 1];
1621-
if (!holding)
1622-
{
1623-
SpinAcquire(BufMgrLock);
1624-
holding = true;
1625-
}
16261624
recheck:
1627-
if (buf->tag.relId.dbId == MyDatabaseId &&
1628-
buf->tag.relId.relId == relid)
1625+
if (buf->tag.relId.relId == relid &&
1626+
(buf->tag.relId.dbId == MyDatabaseId ||
1627+
buf->tag.relId.dbId == (Oid) NULL))
16291628
{
16301629

16311630
/*
@@ -1661,21 +1660,24 @@ ReleaseRelationBuffers(Relation rel)
16611660
buf->refcount == 1);
16621661
/* ReleaseBuffer expects we do not hold the lock at entry */
16631662
SpinRelease(BufMgrLock);
1664-
holding = false;
16651663
ReleaseBuffer(i);
1664+
SpinAcquire(BufMgrLock);
16661665
}
1666+
/*
1667+
* And mark the buffer as no longer occupied by this rel.
1668+
*/
1669+
BufTableDelete(buf);
16671670
}
16681671
}
1669-
1670-
if (holding)
1671-
SpinRelease(BufMgrLock);
1672+
SpinRelease(BufMgrLock);
16721673
}
16731674

16741675
/* ---------------------------------------------------------------------
16751676
* DropBuffers
16761677
*
1677-
* This function marks all the buffers in the buffer cache for a
1678-
* particular database as clean. This is used when we destroy a
1678+
* This function removes all the buffers in the buffer cache for a
1679+
* particular database. Dirty pages are simply dropped, without
1680+
* bothering to write them out first. This is used when we destroy a
16791681
* database, to avoid trying to flush data to disk when the directory
16801682
* tree no longer exists. Implementation is pretty similar to
16811683
* ReleaseRelationBuffers() which is for destroying just one relation.
@@ -1719,6 +1721,10 @@ DropBuffers(Oid dbid)
17191721
* backends are running in that database.
17201722
*/
17211723
Assert(buf->flags & BM_FREE);
1724+
/*
1725+
* And mark the buffer as no longer occupied by this page.
1726+
*/
1727+
BufTableDelete(buf);
17221728
}
17231729
}
17241730
SpinRelease(BufMgrLock);
@@ -1812,34 +1818,39 @@ BufferPoolBlowaway()
18121818
/* ---------------------------------------------------------------------
18131819
* FlushRelationBuffers
18141820
*
1815-
* This function removes from the buffer pool all pages of a relation
1816-
* that have blocknumber >= specified block. Pages that are dirty are
1817-
* written out first. If expectDirty is false, a notice is emitted
1818-
* warning of dirty buffers, but we proceed anyway. An error code is
1819-
* returned if we fail to dump a dirty buffer or if we find one of
1821+
* This function flushes all dirty pages of a relation out to disk.
1822+
* Furthermore, pages that have blocknumber >= firstDelBlock are
1823+
* actually removed from the buffer pool. An error code is returned
1824+
* if we fail to dump a dirty buffer or if we find one of
18201825
* the target pages is pinned into the cache.
18211826
*
18221827
* This is used by VACUUM before truncating the relation to the given
1823-
* number of blocks. For VACUUM, we pass expectDirty = false since it
1824-
* could mean a bug in VACUUM if any of the unwanted pages were still
1825-
* dirty. (TRUNCATE TABLE also uses it in the same way.)
1826-
*
1827-
* This is also used by RENAME TABLE (with block=0 and expectDirty=true)
1828+
* number of blocks. (TRUNCATE TABLE also uses it in the same way.)
1829+
* It might seem unnecessary to flush dirty pages before firstDelBlock,
1830+
* since VACUUM should already have committed its changes. However,
1831+
* it is possible for there still to be dirty pages: if some page
1832+
* had unwritten on-row tuple status updates from a prior transaction,
1833+
* and VACUUM had no additional changes to make to that page, then
1834+
* VACUUM won't have written it. This is harmless in most cases but
1835+
* will break pg_upgrade, which relies on VACUUM to ensure that *all*
1836+
* tuples have correct on-row status. So, we check and flush all
1837+
* dirty pages of the rel regardless of block number.
1838+
*
1839+
* This is also used by RENAME TABLE (with firstDelBlock = 0)
18281840
* to clear out the buffer cache before renaming the physical files of
18291841
* a relation. Without that, some other backend might try to do a
18301842
* blind write of a buffer page (relying on the BlindId of the buffer)
18311843
* and fail because it's not got the right filename anymore.
18321844
*
1833-
* In both cases, the caller should be holding AccessExclusiveLock on
1845+
* In all cases, the caller should be holding AccessExclusiveLock on
18341846
* the target relation to ensure that no other backend is busy reading
18351847
* more blocks of the relation.
18361848
*
1837-
* Formerly, we considered it an error condition if we found unexpectedly
1838-
* dirty buffers. However, since BufferSync no longer forces out all
1849+
* Formerly, we considered it an error condition if we found dirty
1850+
* buffers here. However, since BufferSync no longer forces out all
18391851
* dirty buffers at every xact commit, it's possible for dirty buffers
18401852
* to still be present in the cache due to failure of an earlier
1841-
* transaction. So, downgrade the error to a mere notice. Maybe we
1842-
* shouldn't even emit a notice...
1853+
* transaction. So, must flush dirty buffers without complaint.
18431854
*
18441855
* Returns: 0 - Ok, -1 - FAILED TO WRITE DIRTY BUFFER, -2 - PINNED
18451856
*
@@ -1848,8 +1859,9 @@ BufferPoolBlowaway()
18481859
* --------------------------------------------------------------------
18491860
*/
18501861
int
1851-
FlushRelationBuffers(Relation rel, BlockNumber block, bool expectDirty)
1862+
FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
18521863
{
1864+
Oid relid = RelationGetRelid(rel);
18531865
int i;
18541866
BufferDesc *buf;
18551867

@@ -1858,31 +1870,29 @@ FlushRelationBuffers(Relation rel, BlockNumber block, bool expectDirty)
18581870
for (i = 0; i < NLocBuffer; i++)
18591871
{
18601872
buf = &LocalBufferDescriptors[i];
1861-
if (buf->tag.relId.relId == RelationGetRelid(rel) &&
1862-
buf->tag.blockNum >= block)
1873+
if (buf->tag.relId.relId == relid)
18631874
{
18641875
if (buf->flags & BM_DIRTY)
18651876
{
1866-
if (!expectDirty)
1867-
elog(NOTICE, "FlushRelationBuffers(%s (local), %u): block %u is dirty",
1868-
RelationGetRelationName(rel),
1869-
block, buf->tag.blockNum);
18701877
if (FlushBuffer(-i - 1, false) != STATUS_OK)
18711878
{
18721879
elog(NOTICE, "FlushRelationBuffers(%s (local), %u): block %u is dirty, could not flush it",
1873-
RelationGetRelationName(rel),
1874-
block, buf->tag.blockNum);
1880+
RelationGetRelationName(rel), firstDelBlock,
1881+
buf->tag.blockNum);
18751882
return -1;
18761883
}
18771884
}
18781885
if (LocalRefCount[i] > 0)
18791886
{
18801887
elog(NOTICE, "FlushRelationBuffers(%s (local), %u): block %u is referenced (%ld)",
1881-
RelationGetRelationName(rel), block,
1888+
RelationGetRelationName(rel), firstDelBlock,
18821889
buf->tag.blockNum, LocalRefCount[i]);
18831890
return -2;
18841891
}
1885-
buf->tag.relId.relId = InvalidOid;
1892+
if (buf->tag.blockNum >= firstDelBlock)
1893+
{
1894+
buf->tag.relId.relId = InvalidOid;
1895+
}
18861896
}
18871897
}
18881898
return 0;
@@ -1891,26 +1901,20 @@ FlushRelationBuffers(Relation rel, BlockNumber block, bool expectDirty)
18911901
SpinAcquire(BufMgrLock);
18921902
for (i = 0; i < NBuffers; i++)
18931903
{
1894-
recheck:
18951904
buf = &BufferDescriptors[i];
1896-
if (buf->tag.relId.relId == RelationGetRelid(rel) &&
1905+
recheck:
1906+
if (buf->tag.relId.relId == relid &&
18971907
(buf->tag.relId.dbId == MyDatabaseId ||
1898-
buf->tag.relId.dbId == (Oid) NULL) &&
1899-
buf->tag.blockNum >= block)
1908+
buf->tag.relId.dbId == (Oid) NULL))
19001909
{
19011910
if (buf->flags & BM_DIRTY)
19021911
{
19031912
PinBuffer(buf);
19041913
SpinRelease(BufMgrLock);
1905-
if (!expectDirty)
1906-
elog(NOTICE, "FlushRelationBuffers(%s, %u): block %u is dirty (private %ld, global %d)",
1907-
RelationGetRelationName(rel), block,
1908-
buf->tag.blockNum,
1909-
PrivateRefCount[i], buf->refcount);
19101914
if (FlushBuffer(i + 1, true) != STATUS_OK)
19111915
{
19121916
elog(NOTICE, "FlushRelationBuffers(%s, %u): block %u is dirty (private %ld, global %d), could not flush it",
1913-
RelationGetRelationName(rel), block,
1917+
RelationGetRelationName(rel), firstDelBlock,
19141918
buf->tag.blockNum,
19151919
PrivateRefCount[i], buf->refcount);
19161920
return -1;
@@ -1927,12 +1931,15 @@ FlushRelationBuffers(Relation rel, BlockNumber block, bool expectDirty)
19271931
{
19281932
SpinRelease(BufMgrLock);
19291933
elog(NOTICE, "FlushRelationBuffers(%s, %u): block %u is referenced (private %ld, global %d)",
1930-
RelationGetRelationName(rel), block,
1934+
RelationGetRelationName(rel), firstDelBlock,
19311935
buf->tag.blockNum,
19321936
PrivateRefCount[i], buf->refcount);
19331937
return -2;
19341938
}
1935-
BufTableDelete(buf);
1939+
if (buf->tag.blockNum >= firstDelBlock)
1940+
{
1941+
BufTableDelete(buf);
1942+
}
19361943
}
19371944
}
19381945
SpinRelease(BufMgrLock);

0 commit comments

Comments
 (0)