Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit d004147

Browse files
Fix nbtree metapage cache upgrade bug.
Commit 857f9c3, which taught nbtree VACUUM to avoid unnecessary index scans, bumped the nbtree version number from 2 to 3, while adding the ability for nbtree indexes to be upgraded on-the-fly. Various assertions that assumed that an nbtree index was always on version 2 had to be changed to accept any supported version (version 2 or 3 on Postgres 11). However, a few assertions were missed in the initial commit, all of which were in code paths that cache a local copy of the metapage metadata, where the index had been expected to be on the current version (no longer version 2) as a generic sanity check. Rather than simply update the assertions, follow-up commit 0a64b45 intentionally made the metapage caching code update the per-backend cached metadata version without changing the on-disk version at the same time. This could even happen when the planner needed to determine the height of a B-Tree for costing purposes. The assertions only fail on Postgres v12 when upgrading from v10, because they were adjusted to use the authoritative shared memory metapage by v12's commit dd299df. To fix, remove the cache-only upgrade mechanism entirely, and update the assertions themselves to accept any supported version (go back to using the cached version in v12). The fix is almost a full revert of commit 0a64b45 on the v11 branch. VACUUM only considers the authoritative metapage, and never bothers with a locally cached version, whereas everywhere else isn't interested in the metapage fields that were added by commit 857f9c3. It seems unlikely that this bug has affected any user on v11. Reported-By: Christoph Berg Bug: #15896 Discussion: https://postgr.es/m/15896-5b25e260fdb0b081%40postgresql.org Backpatch: 11-, where VACUUM was taught to avoid unnecessary index scans.
1 parent bc8393c commit d004147

File tree

3 files changed

+40
-77
lines changed

3 files changed

+40
-77
lines changed

src/backend/access/nbtree/nbtpage.c

+36-75
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
#include "storage/predicate.h"
3434
#include "utils/snapmgr.h"
3535

36-
static void _bt_cachemetadata(Relation rel, BTMetaPageData *input);
3736
static BTMetaPageData *_bt_getmeta(Relation rel, Buffer metabuf);
3837
static bool _bt_mark_page_halfdead(Relation rel, Buffer buf, BTStack stack);
3938
static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf,
@@ -109,48 +108,13 @@ _bt_upgrademetapage(Page page)
109108
((char *) metad + sizeof(BTMetaPageData)) - (char *) page;
110109
}
111110

112-
/*
113-
* Cache metadata from input meta page to rel->rd_amcache.
114-
*/
115-
static void
116-
_bt_cachemetadata(Relation rel, BTMetaPageData *input)
117-
{
118-
BTMetaPageData *cached_metad;
119-
120-
/* We assume rel->rd_amcache was already freed by caller */
121-
Assert(rel->rd_amcache == NULL);
122-
rel->rd_amcache = MemoryContextAlloc(rel->rd_indexcxt,
123-
sizeof(BTMetaPageData));
124-
125-
/* Meta page should be of supported version */
126-
Assert(input->btm_version >= BTREE_MIN_VERSION &&
127-
input->btm_version <= BTREE_VERSION);
128-
129-
cached_metad = (BTMetaPageData *) rel->rd_amcache;
130-
if (input->btm_version >= BTREE_NOVAC_VERSION)
131-
{
132-
/* Version with compatible meta-data, no need to upgrade */
133-
memcpy(cached_metad, input, sizeof(BTMetaPageData));
134-
}
135-
else
136-
{
137-
/*
138-
* Upgrade meta-data: copy available information from meta-page and
139-
* fill new fields with default values.
140-
*
141-
* Note that we cannot upgrade to version 4+ without a REINDEX, since
142-
* extensive on-disk changes are required.
143-
*/
144-
memcpy(cached_metad, input, offsetof(BTMetaPageData, btm_oldest_btpo_xact));
145-
cached_metad->btm_version = BTREE_NOVAC_VERSION;
146-
cached_metad->btm_oldest_btpo_xact = InvalidTransactionId;
147-
cached_metad->btm_last_cleanup_num_heap_tuples = -1.0;
148-
}
149-
}
150-
151111
/*
152112
* Get metadata from share-locked buffer containing metapage, while performing
153-
* standard sanity checks. Sanity checks here must match _bt_getroot().
113+
* standard sanity checks.
114+
*
115+
* Callers that cache data returned here in local cache should note that an
116+
* on-the-fly upgrade using _bt_upgrademetapage() can change the version field
117+
* and BTREE_NOVAC_VERSION specific fields without invalidating local cache.
154118
*/
155119
static BTMetaPageData *
156120
_bt_getmeta(Relation rel, Buffer metabuf)
@@ -291,8 +255,6 @@ Buffer
291255
_bt_getroot(Relation rel, int access)
292256
{
293257
Buffer metabuf;
294-
Page metapg;
295-
BTPageOpaque metaopaque;
296258
Buffer rootbuf;
297259
Page rootpage;
298260
BTPageOpaque rootopaque;
@@ -345,30 +307,13 @@ _bt_getroot(Relation rel, int access)
345307
}
346308

347309
metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
348-
metapg = BufferGetPage(metabuf);
349-
metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg);
350-
metad = BTPageGetMeta(metapg);
351-
352-
/* sanity-check the metapage */
353-
if (!P_ISMETA(metaopaque) ||
354-
metad->btm_magic != BTREE_MAGIC)
355-
ereport(ERROR,
356-
(errcode(ERRCODE_INDEX_CORRUPTED),
357-
errmsg("index \"%s\" is not a btree",
358-
RelationGetRelationName(rel))));
359-
360-
if (metad->btm_version < BTREE_MIN_VERSION ||
361-
metad->btm_version > BTREE_VERSION)
362-
ereport(ERROR,
363-
(errcode(ERRCODE_INDEX_CORRUPTED),
364-
errmsg("version mismatch in index \"%s\": file version %d, "
365-
"current version %d, minimal supported version %d",
366-
RelationGetRelationName(rel),
367-
metad->btm_version, BTREE_VERSION, BTREE_MIN_VERSION)));
310+
metad = _bt_getmeta(rel, metabuf);
368311

369312
/* if no root page initialized yet, do it */
370313
if (metad->btm_root == P_NONE)
371314
{
315+
Page metapg;
316+
372317
/* If access = BT_READ, caller doesn't want us to create root yet */
373318
if (access == BT_READ)
374319
{
@@ -410,6 +355,8 @@ _bt_getroot(Relation rel, int access)
410355
rootopaque->btpo_flags = (BTP_LEAF | BTP_ROOT);
411356
rootopaque->btpo.level = 0;
412357
rootopaque->btpo_cycleid = 0;
358+
/* Get raw page pointer for metapage */
359+
metapg = BufferGetPage(metabuf);
413360

414361
/* NO ELOG(ERROR) till meta is updated */
415362
START_CRIT_SECTION();
@@ -471,7 +418,7 @@ _bt_getroot(Relation rel, int access)
471418
LockBuffer(rootbuf, BUFFER_LOCK_UNLOCK);
472419
LockBuffer(rootbuf, BT_READ);
473420

474-
/* okay, metadata is correct, release lock on it */
421+
/* okay, metadata is correct, release lock on it without caching */
475422
_bt_relbuf(rel, metabuf);
476423
}
477424
else
@@ -483,7 +430,9 @@ _bt_getroot(Relation rel, int access)
483430
/*
484431
* Cache the metapage data for next time
485432
*/
486-
_bt_cachemetadata(rel, metad);
433+
rel->rd_amcache = MemoryContextAlloc(rel->rd_indexcxt,
434+
sizeof(BTMetaPageData));
435+
memcpy(rel->rd_amcache, metad, sizeof(BTMetaPageData));
487436

488437
/*
489438
* We are done with the metapage; arrange to release it via first
@@ -657,16 +606,19 @@ _bt_getrootheight(Relation rel)
657606
/*
658607
* Cache the metapage data for next time
659608
*/
660-
_bt_cachemetadata(rel, metad);
661-
/* We shouldn't have cached it if any of these fail */
662-
Assert(metad->btm_magic == BTREE_MAGIC);
663-
Assert(metad->btm_version >= BTREE_NOVAC_VERSION);
664-
Assert(metad->btm_fastroot != P_NONE);
609+
rel->rd_amcache = MemoryContextAlloc(rel->rd_indexcxt,
610+
sizeof(BTMetaPageData));
611+
memcpy(rel->rd_amcache, metad, sizeof(BTMetaPageData));
665612
_bt_relbuf(rel, metabuf);
666613
}
667614

668615
/* Get cached page */
669616
metad = (BTMetaPageData *) rel->rd_amcache;
617+
/* We shouldn't have cached it if any of these fail */
618+
Assert(metad->btm_magic == BTREE_MAGIC);
619+
Assert(metad->btm_version >= BTREE_MIN_VERSION);
620+
Assert(metad->btm_version <= BTREE_VERSION);
621+
Assert(metad->btm_fastroot != P_NONE);
670622

671623
return metad->btm_fastlevel;
672624
}
@@ -707,17 +659,26 @@ _bt_heapkeyspace(Relation rel)
707659

708660
/*
709661
* Cache the metapage data for next time
662+
*
663+
* An on-the-fly version upgrade performed by _bt_upgrademetapage()
664+
* can change the nbtree version for an index without invalidating any
665+
* local cache. This is okay because it can only happen when moving
666+
* from version 2 to version 3, both of which are !heapkeyspace
667+
* versions.
710668
*/
711-
_bt_cachemetadata(rel, metad);
712-
/* We shouldn't have cached it if any of these fail */
713-
Assert(metad->btm_magic == BTREE_MAGIC);
714-
Assert(metad->btm_version >= BTREE_NOVAC_VERSION);
715-
Assert(metad->btm_fastroot != P_NONE);
669+
rel->rd_amcache = MemoryContextAlloc(rel->rd_indexcxt,
670+
sizeof(BTMetaPageData));
671+
memcpy(rel->rd_amcache, metad, sizeof(BTMetaPageData));
716672
_bt_relbuf(rel, metabuf);
717673
}
718674

719675
/* Get cached page */
720676
metad = (BTMetaPageData *) rel->rd_amcache;
677+
/* We shouldn't have cached it if any of these fail */
678+
Assert(metad->btm_magic == BTREE_MAGIC);
679+
Assert(metad->btm_version >= BTREE_MIN_VERSION);
680+
Assert(metad->btm_version <= BTREE_VERSION);
681+
Assert(metad->btm_fastroot != P_NONE);
721682

722683
return metad->btm_version > BTREE_NOVAC_VERSION;
723684
}

src/backend/access/nbtree/nbtxlog.c

+2
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ _bt_restore_meta(XLogReaderState *record, uint8 block_id)
107107
md->btm_level = xlrec->level;
108108
md->btm_fastroot = xlrec->fastroot;
109109
md->btm_fastlevel = xlrec->fastlevel;
110+
/* Cannot log BTREE_MIN_VERSION index metapage without upgrade */
111+
Assert(md->btm_version >= BTREE_NOVAC_VERSION);
110112
md->btm_oldest_btpo_xact = xlrec->oldest_btpo_xact;
111113
md->btm_last_cleanup_num_heap_tuples = xlrec->last_cleanup_num_heap_tuples;
112114

src/include/access/nbtree.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -97,12 +97,12 @@ typedef BTPageOpaqueData *BTPageOpaque;
9797
typedef struct BTMetaPageData
9898
{
9999
uint32 btm_magic; /* should contain BTREE_MAGIC */
100-
uint32 btm_version; /* should contain BTREE_VERSION */
100+
uint32 btm_version; /* nbtree version (always <= BTREE_VERSION) */
101101
BlockNumber btm_root; /* current root location */
102102
uint32 btm_level; /* tree level of the root page */
103103
BlockNumber btm_fastroot; /* current "fast" root location */
104104
uint32 btm_fastlevel; /* tree level of the "fast" root page */
105-
/* following fields are available since page version 3 */
105+
/* remaining fields only valid when btm_version >= BTREE_NOVAC_VERSION */
106106
TransactionId btm_oldest_btpo_xact; /* oldest btpo_xact among all deleted
107107
* pages */
108108
float8 btm_last_cleanup_num_heap_tuples; /* number of heap tuples

0 commit comments

Comments
 (0)