Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 4f2458d

Browse files
committed
Use a safer method for determining whether relcache init file is stale.
When we invalidate the relcache entry for a system catalog or index, we must also delete the relcache "init file" if the init file contains a copy of that rel's entry. The old way of doing this relied on a specially maintained list of the OIDs of relations present in the init file: we made the list either when reading the file in, or when writing the file out. The problem is that when writing the file out, we included only rels present in our local relcache, which might have already suffered some deletions due to relcache inval events. In such cases we correctly decided not to overwrite the real init file with incomplete data --- but we still used the incomplete initFileRelationIds list for the rest of the current session. This could result in wrong decisions about whether the session's own actions require deletion of the init file, potentially allowing an init file created by some other concurrent session to be left around even though it's been made stale. Since we don't support changing the schema of a system catalog at runtime, the only likely scenario in which this would cause a problem in the field involves a "vacuum full" on a catalog concurrently with other activity, and even then it's far from easy to provoke. Remarkably, this has been broken since 2002 (in commit 7863404), but we had never seen a reproducible test case until recently. If it did happen in the field, the symptoms would probably involve unexpected "cache lookup failed" errors to begin with, then "could not open file" failures after the next checkpoint, as all accesses to the affected catalog stopped working. Recovery would require manually removing the stale "pg_internal.init" file. To fix, get rid of the initFileRelationIds list, and instead consult syscache.c's list of relations used in catalog caches to decide whether a relation is included in the init file. This should be a tad more efficient anyway, since we're replacing linear search of a list with ~100 entries with a binary search. It's a bit ugly that the init file contents are now so directly tied to the catalog caches, but in practice that won't make much difference. Back-patch to all supported branches.
1 parent ac86eda commit 4f2458d

File tree

5 files changed

+96
-48
lines changed

5 files changed

+96
-48
lines changed

src/backend/utils/cache/inval.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -461,10 +461,13 @@ RegisterRelcacheInvalidation(Oid dbId, Oid relId)
461461
(void) GetCurrentCommandId(true);
462462

463463
/*
464-
* If the relation being invalidated is one of those cached in the
464+
* If the relation being invalidated is one of those cached in the local
465465
* relcache init file, mark that we need to zap that file at commit.
466+
* (Note: perhaps it would be better if this code were a bit more
467+
* decoupled from the knowledge that the init file contains exactly those
468+
* non-shared rels used in catalog caches.)
466469
*/
467-
if (RelationIdIsInInitFile(relId))
470+
if (OidIsValid(dbId) && RelationSupportsSysCache(relId))
468471
transInvalInfo->RelcacheInitFileInval = true;
469472
}
470473

src/backend/utils/cache/relcache.c

Lines changed: 17 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -130,14 +130,6 @@ bool criticalSharedRelcachesBuilt = false;
130130
*/
131131
static long relcacheInvalsReceived = 0L;
132132

133-
/*
134-
* This list remembers the OIDs of the non-shared relations cached in the
135-
* database's local relcache init file. Note that there is no corresponding
136-
* list for the shared relcache init file, for reasons explained in the
137-
* comments for RelationCacheInitFileRemove.
138-
*/
139-
static List *initFileRelationIds = NIL;
140-
141133
/*
142134
* eoxact_list[] stores the OIDs of relations that (might) need AtEOXact
143135
* cleanup work. This list intentionally has limited size; if it overflows,
@@ -3190,9 +3182,6 @@ RelationCacheInitializePhase3(void)
31903182
*/
31913183
InitCatalogCachePhase2();
31923184

3193-
/* reset initFileRelationIds list; we'll fill it during write */
3194-
initFileRelationIds = NIL;
3195-
31963185
/* now write the files */
31973186
write_relcache_init_file(true);
31983187
write_relcache_init_file(false);
@@ -4477,10 +4466,6 @@ load_relcache_init_file(bool shared)
44774466
for (relno = 0; relno < num_rels; relno++)
44784467
{
44794468
RelationCacheInsert(rels[relno]);
4480-
/* also make a list of their OIDs, for RelationIdIsInInitFile */
4481-
if (!shared)
4482-
initFileRelationIds = lcons_oid(RelationGetRelid(rels[relno]),
4483-
initFileRelationIds);
44844469
}
44854470

44864471
pfree(rels);
@@ -4517,9 +4502,15 @@ write_relcache_init_file(bool shared)
45174502
int magic;
45184503
HASH_SEQ_STATUS status;
45194504
RelIdCacheEnt *idhentry;
4520-
MemoryContext oldcxt;
45214505
int i;
45224506

4507+
/*
4508+
* If we have already received any relcache inval events, there's no
4509+
* chance of succeeding so we may as well skip the whole thing.
4510+
*/
4511+
if (relcacheInvalsReceived != 0L)
4512+
return;
4513+
45234514
/*
45244515
* We must write a temporary file and rename it into place. Otherwise,
45254516
* another backend starting at about the same time might crash trying to
@@ -4579,6 +4570,16 @@ write_relcache_init_file(bool shared)
45794570
if (relform->relisshared != shared)
45804571
continue;
45814572

4573+
/*
4574+
* Ignore if not supposed to be in init file. We can allow any shared
4575+
* relation that's been loaded so far to be in the shared init file,
4576+
* but unshared relations must be used for catalog caches. (Note: if
4577+
* you want to change the criterion for rels to be kept in the init
4578+
* file, see also inval.c.)
4579+
*/
4580+
if (!shared && !RelationSupportsSysCache(RelationGetRelid(rel)))
4581+
continue;
4582+
45824583
/* first write the relcache entry proper */
45834584
write_item(rel, sizeof(RelationData), fp);
45844585

@@ -4635,15 +4636,6 @@ write_relcache_init_file(bool shared)
46354636
relform->relnatts * sizeof(int16),
46364637
fp);
46374638
}
4638-
4639-
/* also make a list of their OIDs, for RelationIdIsInInitFile */
4640-
if (!shared)
4641-
{
4642-
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
4643-
initFileRelationIds = lcons_oid(RelationGetRelid(rel),
4644-
initFileRelationIds);
4645-
MemoryContextSwitchTo(oldcxt);
4646-
}
46474639
}
46484640

46494641
if (FreeFile(fp))
@@ -4702,21 +4694,6 @@ write_item(const void *data, Size len, FILE *fp)
47024694
elog(FATAL, "could not write init file");
47034695
}
47044696

4705-
/*
4706-
* Detect whether a given relation (identified by OID) is one of the ones
4707-
* we store in the local relcache init file.
4708-
*
4709-
* Note that we effectively assume that all backends running in a database
4710-
* would choose to store the same set of relations in the init file;
4711-
* otherwise there are cases where we'd fail to detect the need for an init
4712-
* file invalidation. This does not seem likely to be a problem in practice.
4713-
*/
4714-
bool
4715-
RelationIdIsInInitFile(Oid relationId)
4716-
{
4717-
return list_member_oid(initFileRelationIds, relationId);
4718-
}
4719-
47204697
/*
47214698
* Invalidate (remove) the init file during commit of a transaction that
47224699
* changed one or more of the relation cache entries that are kept in the

src/backend/utils/cache/syscache.c

Lines changed: 71 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -791,11 +791,18 @@ static const struct cachedesc cacheinfo[] = {
791791
}
792792
};
793793

794-
static CatCache *SysCache[
795-
lengthof(cacheinfo)];
796-
static int SysCacheSize = lengthof(cacheinfo);
794+
#define SysCacheSize ((int) lengthof(cacheinfo))
795+
796+
static CatCache *SysCache[SysCacheSize];
797+
797798
static bool CacheInitialized = false;
798799

800+
/* Sorted array of OIDs of tables and indexes used by caches */
801+
static Oid SysCacheSupportingRelOid[SysCacheSize * 2];
802+
static int SysCacheSupportingRelOidSize;
803+
804+
static int oid_compare(const void *a, const void *b);
805+
799806

800807
/*
801808
* InitCatalogCache - initialize the caches
@@ -809,10 +816,12 @@ void
809816
InitCatalogCache(void)
810817
{
811818
int cacheId;
819+
int i,
820+
j;
812821

813822
Assert(!CacheInitialized);
814823

815-
MemSet(SysCache, 0, sizeof(SysCache));
824+
SysCacheSupportingRelOidSize = 0;
816825

817826
for (cacheId = 0; cacheId < SysCacheSize; cacheId++)
818827
{
@@ -825,7 +834,25 @@ InitCatalogCache(void)
825834
if (!PointerIsValid(SysCache[cacheId]))
826835
elog(ERROR, "could not initialize cache %u (%d)",
827836
cacheinfo[cacheId].reloid, cacheId);
837+
/* Accumulate data for OID lists, too */
838+
SysCacheSupportingRelOid[SysCacheSupportingRelOidSize++] =
839+
cacheinfo[cacheId].reloid;
840+
SysCacheSupportingRelOid[SysCacheSupportingRelOidSize++] =
841+
cacheinfo[cacheId].indoid;
842+
}
843+
844+
Assert(SysCacheSupportingRelOidSize <= lengthof(SysCacheSupportingRelOid));
845+
846+
/* Sort and de-dup OID arrays, so we can use binary search. */
847+
pg_qsort(SysCacheSupportingRelOid, SysCacheSupportingRelOidSize,
848+
sizeof(Oid), oid_compare);
849+
for (i = 1, j = 0; i < SysCacheSupportingRelOidSize; i++)
850+
{
851+
if (SysCacheSupportingRelOid[i] != SysCacheSupportingRelOid[j])
852+
SysCacheSupportingRelOid[++j] = SysCacheSupportingRelOid[i];
828853
}
854+
SysCacheSupportingRelOidSize = j + 1;
855+
829856
CacheInitialized = true;
830857
}
831858

@@ -1113,3 +1140,43 @@ SearchSysCacheList(int cacheId, int nkeys,
11131140
return SearchCatCacheList(SysCache[cacheId], nkeys,
11141141
key1, key2, key3, key4);
11151142
}
1143+
1144+
/*
1145+
* Test whether a relation supports a system cache, ie it is either a
1146+
* cached table or the index used for a cache.
1147+
*/
1148+
bool
1149+
RelationSupportsSysCache(Oid relid)
1150+
{
1151+
int low = 0,
1152+
high = SysCacheSupportingRelOidSize - 1;
1153+
1154+
while (low <= high)
1155+
{
1156+
int middle = low + (high - low) / 2;
1157+
1158+
if (SysCacheSupportingRelOid[middle] == relid)
1159+
return true;
1160+
if (SysCacheSupportingRelOid[middle] < relid)
1161+
low = middle + 1;
1162+
else
1163+
high = middle - 1;
1164+
}
1165+
1166+
return false;
1167+
}
1168+
1169+
1170+
/*
1171+
* OID comparator for pg_qsort
1172+
*/
1173+
static int
1174+
oid_compare(const void *a, const void *b)
1175+
{
1176+
Oid oa = *((const Oid *) a);
1177+
Oid ob = *((const Oid *) b);
1178+
1179+
if (oa == ob)
1180+
return 0;
1181+
return (oa > ob) ? 1 : -1;
1182+
}

src/include/utils/relcache.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,6 @@ extern void AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
105105
/*
106106
* Routines to help manage rebuilding of relcache init files
107107
*/
108-
extern bool RelationIdIsInInitFile(Oid relationId);
109108
extern void RelationCacheInitFilePreInvalidate(void);
110109
extern void RelationCacheInitFilePostInvalidate(void);
111110
extern void RelationCacheInitFileRemove(void);

src/include/utils/syscache.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
#include "access/attnum.h"
2020
#include "access/htup.h"
21-
/* we purposedly do not include utils/catcache.h here */
21+
/* we intentionally do not include utils/catcache.h here */
2222

2323
/*
2424
* SysCache identifiers.
@@ -125,6 +125,8 @@ struct catclist;
125125
extern struct catclist *SearchSysCacheList(int cacheId, int nkeys,
126126
Datum key1, Datum key2, Datum key3, Datum key4);
127127

128+
extern bool RelationSupportsSysCache(Oid relid);
129+
128130
/*
129131
* The use of the macros below rather than direct calls to the corresponding
130132
* functions is encouraged, as it insulates the caller from changes in the

0 commit comments

Comments
 (0)