Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 6a46aba

Browse files
committed
Fix bugs in vacuum of shared rels, by keeping their relcache entries current.
When vacuum processes a relation it uses the corresponding relcache entry's relfrozenxid / relminmxid as a cutoff for when to remove tuples etc. Unfortunately for nailed relations (i.e. critical system catalogs) bugs could frequently lead to the corresponding relcache entry being stale. This set of bugs could cause actual data corruption as vacuum would potentially not remove the correct row versions, potentially reviving them at a later point. After 699bf7d some corruptions in this vein were prevented, but the additional error checks could also trigger spuriously. Examples of such errors are: ERROR: found xmin ... from before relfrozenxid ... and ERROR: found multixact ... from before relminmxid ... To be caused by this bug the errors have to occur on system catalog tables. The two bugs are: 1) Invalidations for nailed relations were ignored, based on the theory that the relcache entry for such tables doesn't change. Which is largely true, except for fields like relfrozenxid etc. This means that changes to relations vacuumed in other sessions weren't picked up by already existing sessions. Luckily autovacuum doesn't have particularly longrunning sessions. 2) For shared *and* nailed relations, the shared relcache init file was never invalidated while running. That means that for such tables (e.g. pg_authid, pg_database) it's not just already existing sessions that are affected, but even new connections are as well. That explains why the reports usually were about pg_authid et. al. To fix 1), revalidate the rd_rel portion of a relcache entry when invalid. This implies a bit of extra complexity to deal with bootstrapping, but it's not too bad. The fix for 2) is simpler, simply always remove both the shared and local init files. Author: Andres Freund Reviewed-By: Alvaro Herrera Discussion: https://postgr.es/m/20180525203736.crkbg36muzxrjj5e@alap3.anarazel.de https://postgr.es/m/CAMa1XUhKSJd98JW4o9StWPrfS=11bPgG+_GDMxe25TvUY4Sugg@mail.gmail.com https://postgr.es/m/CAKMFJucqbuoDRfxPDX39WhA3vJyxweRg_zDVXzncr6+5wOguWA@mail.gmail.com https://postgr.es/m/CAGewt-ujGpMLQ09gXcUFMZaZsGJC98VXHEFbF-tpPB0fB13K+A@mail.gmail.com Backpatch: 9.3-
1 parent 256d431 commit 6a46aba

File tree

3 files changed

+146
-72
lines changed

3 files changed

+146
-72
lines changed

src/backend/utils/cache/inval.c

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -515,10 +515,12 @@ RegisterRelcacheInvalidation(Oid dbId, Oid relId)
515515
(void) GetCurrentCommandId(true);
516516

517517
/*
518-
* If the relation being invalidated is one of those cached in the local
519-
* relcache init file, mark that we need to zap that file at commit.
518+
* If the relation being invalidated is one of those cached in a relcache
519+
* init file, mark that we need to zap that file at commit. For simplicity
520+
* invalidations for a specific database always invalidate the shared file
521+
* as well.
520522
*/
521-
if (OidIsValid(dbId) && RelationIdIsInInitFile(relId))
523+
if (RelationIdIsInInitFile(relId))
522524
transInvalInfo->RelcacheInitFileInval = true;
523525
}
524526

@@ -870,18 +872,26 @@ ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs,
870872

871873
if (RelcacheInitFileInval)
872874
{
875+
elog(trace_recovery(DEBUG4), "removing relcache init files for database %u",
876+
dbid);
877+
873878
/*
874-
* RelationCacheInitFilePreInvalidate requires DatabasePath to be set,
875-
* but we should not use SetDatabasePath during recovery, since it is
879+
* RelationCacheInitFilePreInvalidate, when the invalidation message
880+
* is for a specific database, requires DatabasePath to be set, but we
881+
* should not use SetDatabasePath during recovery, since it is
876882
* intended to be used only once by normal backends. Hence, a quick
877883
* hack: set DatabasePath directly then unset after use.
878884
*/
879-
DatabasePath = GetDatabasePath(dbid, tsid);
880-
elog(trace_recovery(DEBUG4), "removing relcache init file in \"%s\"",
881-
DatabasePath);
885+
if (OidIsValid(dbid))
886+
DatabasePath = GetDatabasePath(dbid, tsid);
887+
882888
RelationCacheInitFilePreInvalidate();
883-
pfree(DatabasePath);
884-
DatabasePath = NULL;
889+
890+
if (OidIsValid(dbid))
891+
{
892+
pfree(DatabasePath);
893+
DatabasePath = NULL;
894+
}
885895
}
886896

887897
SendSharedInvalidMessages(msgs, nmsgs);

src/backend/utils/cache/relcache.c

Lines changed: 125 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,7 @@ static void RelationDestroyRelation(Relation relation, bool remember_tupdesc);
241241
static void RelationClearRelation(Relation relation, bool rebuild);
242242

243243
static void RelationReloadIndexInfo(Relation relation);
244+
static void RelationReloadNailed(Relation relation);
244245
static void RelationFlushRelation(Relation relation);
245246
static void RememberToFreeTupleDescAtEOX(TupleDesc td);
246247
static void AtEOXact_cleanup(Relation relation, bool isCommit);
@@ -277,7 +278,7 @@ static void IndexSupportInitialize(oidvector *indclass,
277278
static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
278279
StrategyNumber numSupport);
279280
static void RelationCacheInitFileRemoveInDir(const char *tblspcpath);
280-
static void unlink_initfile(const char *initfilename);
281+
static void unlink_initfile(const char *initfilename, int elevel);
281282

282283

283284
/*
@@ -1782,7 +1783,16 @@ RelationIdGetRelation(Oid relationId)
17821783
RelationReloadIndexInfo(rd);
17831784
else
17841785
RelationClearRelation(rd, true);
1785-
Assert(rd->rd_isvalid);
1786+
1787+
/*
1788+
* Normally entries need to be valid here, but before the relcache
1789+
* has been initialized, not enough infrastructure exists to
1790+
* perform pg_class lookups. The structure of such entries doesn't
1791+
* change, but we still want to update the rd_rel entry. So
1792+
* rd_isvalid = false is left in place for a later lookup.
1793+
*/
1794+
Assert(rd->rd_isvalid ||
1795+
(rd->rd_isnailed && !criticalRelcachesBuilt));
17861796
}
17871797
return rd;
17881798
}
@@ -1985,6 +1995,81 @@ RelationReloadIndexInfo(Relation relation)
19851995
relation->rd_isvalid = true;
19861996
}
19871997

1998+
/*
1999+
* RelationReloadNailed - reload minimal information for nailed relations.
2000+
*
2001+
* The structure of a nailed relation can never change (which is good, because
2002+
* we rely on knowing their structure to be able to read catalog content). But
2003+
* some parts, e.g. pg_class.relfrozenxid, are still important to have
2004+
* accurate content for. Therefore those need to be reloaded after the arrival
2005+
* of invalidations.
2006+
*/
2007+
static void
2008+
RelationReloadNailed(Relation relation)
2009+
{
2010+
Assert(relation->rd_isnailed);
2011+
2012+
/*
2013+
* Redo RelationInitPhysicalAddr in case it is a mapped relation whose
2014+
* mapping changed.
2015+
*/
2016+
RelationInitPhysicalAddr(relation);
2017+
2018+
/* flag as needing to be revalidated */
2019+
relation->rd_isvalid = false;
2020+
2021+
/*
2022+
* Can only reread catalog contents if in a transaction. If the relation
2023+
* is currently open (not counting the nailed refcount), do so
2024+
* immediately. Otherwise we've already marked the entry as possibly
2025+
* invalid, and it'll be fixed when next opened.
2026+
*/
2027+
if (!IsTransactionState() || relation->rd_refcnt <= 1)
2028+
return;
2029+
2030+
if (relation->rd_rel->relkind == RELKIND_INDEX)
2031+
{
2032+
/*
2033+
* If it's a nailed-but-not-mapped index, then we need to re-read the
2034+
* pg_class row to see if its relfilenode changed.
2035+
*/
2036+
RelationReloadIndexInfo(relation);
2037+
}
2038+
else
2039+
{
2040+
/*
2041+
* Reload a non-index entry. We can't easily do so if relcaches
2042+
* aren't yet built, but that's fine because at that stage the
2043+
* attributes that need to be current (like relfrozenxid) aren't yet
2044+
* accessed. To ensure the entry will later be revalidated, we leave
2045+
* it in invalid state, but allow use (cf. RelationIdGetRelation()).
2046+
*/
2047+
if (criticalRelcachesBuilt)
2048+
{
2049+
HeapTuple pg_class_tuple;
2050+
Form_pg_class relp;
2051+
2052+
/*
2053+
* NB: Mark the entry as valid before starting to scan, to avoid
2054+
* self-recursion when re-building pg_class.
2055+
*/
2056+
relation->rd_isvalid = true;
2057+
2058+
pg_class_tuple = ScanPgRelation(RelationGetRelid(relation),
2059+
true, false);
2060+
relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
2061+
memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE);
2062+
heap_freetuple(pg_class_tuple);
2063+
2064+
/*
2065+
* Again mark as valid, to protect against concurrently arriving
2066+
* invalidations.
2067+
*/
2068+
relation->rd_isvalid = true;
2069+
}
2070+
}
2071+
}
2072+
19882073
/*
19892074
* RelationDestroyRelation
19902075
*
@@ -2089,26 +2174,12 @@ RelationClearRelation(Relation relation, bool rebuild)
20892174
RelationCloseSmgr(relation);
20902175

20912176
/*
2092-
* Never, never ever blow away a nailed-in system relation, because we'd
2093-
* be unable to recover. However, we must redo RelationInitPhysicalAddr
2094-
* in case it is a mapped relation whose mapping changed.
2095-
*
2096-
* If it's a nailed-but-not-mapped index, then we need to re-read the
2097-
* pg_class row to see if its relfilenode changed. We do that immediately
2098-
* if we're inside a valid transaction and the relation is open (not
2099-
* counting the nailed refcount). Otherwise just mark the entry as
2100-
* possibly invalid, and it'll be fixed when next opened.
2177+
* Treat nailed-in system relations separately, they always need to be
2178+
* accessible, so we can't blow them away.
21012179
*/
21022180
if (relation->rd_isnailed)
21032181
{
2104-
RelationInitPhysicalAddr(relation);
2105-
2106-
if (relation->rd_rel->relkind == RELKIND_INDEX)
2107-
{
2108-
relation->rd_isvalid = false; /* needs to be revalidated */
2109-
if (relation->rd_refcnt > 1 && IsTransactionState())
2110-
RelationReloadIndexInfo(relation);
2111-
}
2182+
RelationReloadNailed(relation);
21122183
return;
21132184
}
21142185

@@ -5380,24 +5451,26 @@ write_item(const void *data, Size len, FILE *fp)
53805451

53815452
/*
53825453
* Determine whether a given relation (identified by OID) is one of the ones
5383-
* we should store in the local relcache init file.
5454+
* we should store in a relcache init file.
53845455
*
53855456
* We must cache all nailed rels, and for efficiency we should cache every rel
53865457
* that supports a syscache. The former set is almost but not quite a subset
5387-
* of the latter. Currently, we must special-case TriggerRelidNameIndexId,
5388-
* which RelationCacheInitializePhase3 chooses to nail for efficiency reasons,
5389-
* but which does not support any syscache.
5390-
*
5391-
* Note: this function is currently never called for shared rels. If it were,
5392-
* we'd probably also need a special case for DatabaseNameIndexId, which is
5393-
* critical but does not support a syscache.
5458+
* of the latter. The special cases are relations where
5459+
* RelationCacheInitializePhase2/3 chooses to nail for efficiency reasons, but
5460+
* which do not support any syscache.
53945461
*/
53955462
bool
53965463
RelationIdIsInInitFile(Oid relationId)
53975464
{
5398-
if (relationId == TriggerRelidNameIndexId)
5465+
if (relationId == SharedSecLabelRelationId ||
5466+
relationId == TriggerRelidNameIndexId ||
5467+
relationId == DatabaseNameIndexId ||
5468+
relationId == SharedSecLabelObjectIndexId)
53995469
{
5400-
/* If this Assert fails, we don't need this special case anymore. */
5470+
/*
5471+
* If this Assert fails, we don't need the applicable special case
5472+
* anymore.
5473+
*/
54015474
Assert(!RelationSupportsSysCache(relationId));
54025475
return true;
54035476
}
@@ -5471,38 +5544,30 @@ RelationHasUnloggedIndex(Relation rel)
54715544
* We take the lock and do the unlink in RelationCacheInitFilePreInvalidate,
54725545
* then release the lock in RelationCacheInitFilePostInvalidate. Caller must
54735546
* send any pending SI messages between those calls.
5474-
*
5475-
* Notice this deals only with the local init file, not the shared init file.
5476-
* The reason is that there can never be a "significant" change to the
5477-
* relcache entry of a shared relation; the most that could happen is
5478-
* updates of noncritical fields such as relpages/reltuples. So, while
5479-
* it's worth updating the shared init file from time to time, it can never
5480-
* be invalid enough to make it necessary to remove it.
54815547
*/
54825548
void
54835549
RelationCacheInitFilePreInvalidate(void)
54845550
{
5485-
char initfilename[MAXPGPATH];
5551+
char localinitfname[MAXPGPATH];
5552+
char sharedinitfname[MAXPGPATH];
54865553

5487-
snprintf(initfilename, sizeof(initfilename), "%s/%s",
5488-
DatabasePath, RELCACHE_INIT_FILENAME);
5554+
if (DatabasePath)
5555+
snprintf(localinitfname, sizeof(localinitfname), "%s/%s",
5556+
DatabasePath, RELCACHE_INIT_FILENAME);
5557+
snprintf(sharedinitfname, sizeof(sharedinitfname), "global/%s",
5558+
RELCACHE_INIT_FILENAME);
54895559

54905560
LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
54915561

5492-
if (unlink(initfilename) < 0)
5493-
{
5494-
/*
5495-
* The file might not be there if no backend has been started since
5496-
* the last removal. But complain about failures other than ENOENT.
5497-
* Fortunately, it's not too late to abort the transaction if we can't
5498-
* get rid of the would-be-obsolete init file.
5499-
*/
5500-
if (errno != ENOENT)
5501-
ereport(ERROR,
5502-
(errcode_for_file_access(),
5503-
errmsg("could not remove cache file \"%s\": %m",
5504-
initfilename)));
5505-
}
5562+
/*
5563+
* The files might not be there if no backend has been started since the
5564+
* last removal. But complain about failures other than ENOENT with
5565+
* ERROR. Fortunately, it's not too late to abort the transaction if we
5566+
* can't get rid of the would-be-obsolete init file.
5567+
*/
5568+
if (DatabasePath)
5569+
unlink_initfile(localinitfname, ERROR);
5570+
unlink_initfile(sharedinitfname, ERROR);
55065571
}
55075572

55085573
void
@@ -5528,13 +5593,9 @@ RelationCacheInitFileRemove(void)
55285593
struct dirent *de;
55295594
char path[MAXPGPATH + 10 + sizeof(TABLESPACE_VERSION_DIRECTORY)];
55305595

5531-
/*
5532-
* We zap the shared cache file too. In theory it can't get out of sync
5533-
* enough to be a problem, but in data-corruption cases, who knows ...
5534-
*/
55355596
snprintf(path, sizeof(path), "global/%s",
55365597
RELCACHE_INIT_FILENAME);
5537-
unlink_initfile(path);
5598+
unlink_initfile(path, LOG);
55385599

55395600
/* Scan everything in the default tablespace */
55405601
RelationCacheInitFileRemoveInDir("base");
@@ -5586,20 +5647,23 @@ RelationCacheInitFileRemoveInDir(const char *tblspcpath)
55865647
/* Try to remove the init file in each database */
55875648
snprintf(initfilename, sizeof(initfilename), "%s/%s/%s",
55885649
tblspcpath, de->d_name, RELCACHE_INIT_FILENAME);
5589-
unlink_initfile(initfilename);
5650+
unlink_initfile(initfilename, LOG);
55905651
}
55915652
}
55925653

55935654
FreeDir(dir);
55945655
}
55955656

55965657
static void
5597-
unlink_initfile(const char *initfilename)
5658+
unlink_initfile(const char *initfilename, int elevel)
55985659
{
55995660
if (unlink(initfilename) < 0)
56005661
{
56015662
/* It might not be there, but log any error other than ENOENT */
56025663
if (errno != ENOENT)
5603-
elog(LOG, "could not remove cache file \"%s\": %m", initfilename);
5664+
ereport(elevel,
5665+
(errcode_for_file_access(),
5666+
errmsg("could not remove cache file \"%s\": %m",
5667+
initfilename)));
56045668
}
56055669
}

src/include/storage/standbydefs.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ typedef struct xl_invalidations
6464
{
6565
Oid dbId; /* MyDatabaseId */
6666
Oid tsId; /* MyDatabaseTableSpace */
67-
bool relcacheInitFileInval; /* invalidate relcache init file */
67+
bool relcacheInitFileInval; /* invalidate relcache init files */
6868
int nmsgs; /* number of shared inval msgs */
6969
SharedInvalidationMessage msgs[FLEXIBLE_ARRAY_MEMBER];
7070
} xl_invalidations;

0 commit comments

Comments
 (0)