Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit f3b5565

Browse files
committed
Use a safer method for determining whether relcache init file is stale.
When we invalidate the relcache entry for a system catalog or index, we must also delete the relcache "init file" if the init file contains a copy of that rel's entry. The old way of doing this relied on a specially maintained list of the OIDs of relations present in the init file: we made the list either when reading the file in, or when writing the file out. The problem is that when writing the file out, we included only rels present in our local relcache, which might have already suffered some deletions due to relcache inval events. In such cases we correctly decided not to overwrite the real init file with incomplete data --- but we still used the incomplete initFileRelationIds list for the rest of the current session. This could result in wrong decisions about whether the session's own actions require deletion of the init file, potentially allowing an init file created by some other concurrent session to be left around even though it's been made stale. Since we don't support changing the schema of a system catalog at runtime, the only likely scenario in which this would cause a problem in the field involves a "vacuum full" on a catalog concurrently with other activity, and even then it's far from easy to provoke. Remarkably, this has been broken since 2002 (in commit 7863404), but we had never seen a reproducible test case until recently. If it did happen in the field, the symptoms would probably involve unexpected "cache lookup failed" errors to begin with, then "could not open file" failures after the next checkpoint, as all accesses to the affected catalog stopped working. Recovery would require manually removing the stale "pg_internal.init" file. To fix, get rid of the initFileRelationIds list, and instead consult syscache.c's list of relations used in catalog caches to decide whether a relation is included in the init file. This should be a tad more efficient anyway, since we're replacing linear search of a list with ~100 entries with a binary search. It's a bit ugly that the init file contents are now so directly tied to the catalog caches, but in practice that won't make much difference. Back-patch to all supported branches.
1 parent 1497369 commit f3b5565

File tree

5 files changed

+87
-57
lines changed

5 files changed

+87
-57
lines changed

src/backend/utils/cache/inval.c

+5-2
Original file line numberDiff line numberDiff line change
@@ -507,10 +507,13 @@ RegisterRelcacheInvalidation(Oid dbId, Oid relId)
507507
(void) GetCurrentCommandId(true);
508508

509509
/*
510-
* If the relation being invalidated is one of those cached in the
510+
* If the relation being invalidated is one of those cached in the local
511511
* relcache init file, mark that we need to zap that file at commit.
512+
* (Note: perhaps it would be better if this code were a bit more
513+
* decoupled from the knowledge that the init file contains exactly those
514+
* non-shared rels used in catalog caches.)
512515
*/
513-
if (RelationIdIsInInitFile(relId))
516+
if (OidIsValid(dbId) && RelationSupportsSysCache(relId))
514517
transInvalInfo->RelcacheInitFileInval = true;
515518
}
516519

src/backend/utils/cache/relcache.c

+17-40
Original file line numberDiff line numberDiff line change
@@ -133,14 +133,6 @@ bool criticalSharedRelcachesBuilt = false;
133133
*/
134134
static long relcacheInvalsReceived = 0L;
135135

136-
/*
137-
* This list remembers the OIDs of the non-shared relations cached in the
138-
* database's local relcache init file. Note that there is no corresponding
139-
* list for the shared relcache init file, for reasons explained in the
140-
* comments for RelationCacheInitFileRemove.
141-
*/
142-
static List *initFileRelationIds = NIL;
143-
144136
/*
145137
* eoxact_list[] stores the OIDs of relations that (might) need AtEOXact
146138
* cleanup work. This list intentionally has limited size; if it overflows,
@@ -3489,9 +3481,6 @@ RelationCacheInitializePhase3(void)
34893481
*/
34903482
InitCatalogCachePhase2();
34913483

3492-
/* reset initFileRelationIds list; we'll fill it during write */
3493-
initFileRelationIds = NIL;
3494-
34953484
/* now write the files */
34963485
write_relcache_init_file(true);
34973486
write_relcache_init_file(false);
@@ -4915,10 +4904,6 @@ load_relcache_init_file(bool shared)
49154904
for (relno = 0; relno < num_rels; relno++)
49164905
{
49174906
RelationCacheInsert(rels[relno], false);
4918-
/* also make a list of their OIDs, for RelationIdIsInInitFile */
4919-
if (!shared)
4920-
initFileRelationIds = lcons_oid(RelationGetRelid(rels[relno]),
4921-
initFileRelationIds);
49224907
}
49234908

49244909
pfree(rels);
@@ -4955,9 +4940,15 @@ write_relcache_init_file(bool shared)
49554940
int magic;
49564941
HASH_SEQ_STATUS status;
49574942
RelIdCacheEnt *idhentry;
4958-
MemoryContext oldcxt;
49594943
int i;
49604944

4945+
/*
4946+
* If we have already received any relcache inval events, there's no
4947+
* chance of succeeding so we may as well skip the whole thing.
4948+
*/
4949+
if (relcacheInvalsReceived != 0L)
4950+
return;
4951+
49614952
/*
49624953
* We must write a temporary file and rename it into place. Otherwise,
49634954
* another backend starting at about the same time might crash trying to
@@ -5017,6 +5008,16 @@ write_relcache_init_file(bool shared)
50175008
if (relform->relisshared != shared)
50185009
continue;
50195010

5011+
/*
5012+
* Ignore if not supposed to be in init file. We can allow any shared
5013+
* relation that's been loaded so far to be in the shared init file,
5014+
* but unshared relations must be used for catalog caches. (Note: if
5015+
* you want to change the criterion for rels to be kept in the init
5016+
* file, see also inval.c.)
5017+
*/
5018+
if (!shared && !RelationSupportsSysCache(RelationGetRelid(rel)))
5019+
continue;
5020+
50205021
/* first write the relcache entry proper */
50215022
write_item(rel, sizeof(RelationData), fp);
50225023

@@ -5073,15 +5074,6 @@ write_relcache_init_file(bool shared)
50735074
relform->relnatts * sizeof(int16),
50745075
fp);
50755076
}
5076-
5077-
/* also make a list of their OIDs, for RelationIdIsInInitFile */
5078-
if (!shared)
5079-
{
5080-
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
5081-
initFileRelationIds = lcons_oid(RelationGetRelid(rel),
5082-
initFileRelationIds);
5083-
MemoryContextSwitchTo(oldcxt);
5084-
}
50855077
}
50865078

50875079
if (FreeFile(fp))
@@ -5140,21 +5132,6 @@ write_item(const void *data, Size len, FILE *fp)
51405132
elog(FATAL, "could not write init file");
51415133
}
51425134

5143-
/*
5144-
* Detect whether a given relation (identified by OID) is one of the ones
5145-
* we store in the local relcache init file.
5146-
*
5147-
* Note that we effectively assume that all backends running in a database
5148-
* would choose to store the same set of relations in the init file;
5149-
* otherwise there are cases where we'd fail to detect the need for an init
5150-
* file invalidation. This does not seem likely to be a problem in practice.
5151-
*/
5152-
bool
5153-
RelationIdIsInInitFile(Oid relationId)
5154-
{
5155-
return list_member_oid(initFileRelationIds, relationId);
5156-
}
5157-
51585135
/*
51595136
* Invalidate (remove) the init file during commit of a transaction that
51605137
* changed one or more of the relation cache entries that are kept in the

src/backend/utils/cache/syscache.c

+61-11
Original file line numberDiff line numberDiff line change
@@ -867,17 +867,23 @@ static const struct cachedesc cacheinfo[] = {
867867
}
868868
};
869869

870-
static CatCache *SysCache[
871-
lengthof(cacheinfo)];
872-
static int SysCacheSize = lengthof(cacheinfo);
870+
#define SysCacheSize ((int) lengthof(cacheinfo))
871+
872+
static CatCache *SysCache[SysCacheSize];
873+
873874
static bool CacheInitialized = false;
874875

875-
static Oid SysCacheRelationOid[
876-
lengthof(cacheinfo)];
876+
/* Sorted array of OIDs of tables that have caches on them */
877+
static Oid SysCacheRelationOid[SysCacheSize];
877878
static int SysCacheRelationOidSize;
878879

880+
/* Sorted array of OIDs of tables and indexes used by caches */
881+
static Oid SysCacheSupportingRelOid[SysCacheSize * 2];
882+
static int SysCacheSupportingRelOidSize;
883+
879884
static int oid_compare(const void *a, const void *b);
880885

886+
881887
/*
882888
* InitCatalogCache - initialize the caches
883889
*
@@ -891,11 +897,11 @@ InitCatalogCache(void)
891897
{
892898
int cacheId;
893899
int i,
894-
j = 0;
900+
j;
895901

896902
Assert(!CacheInitialized);
897903

898-
MemSet(SysCache, 0, sizeof(SysCache));
904+
SysCacheRelationOidSize = SysCacheSupportingRelOidSize = 0;
899905

900906
for (cacheId = 0; cacheId < SysCacheSize; cacheId++)
901907
{
@@ -908,20 +914,39 @@ InitCatalogCache(void)
908914
if (!PointerIsValid(SysCache[cacheId]))
909915
elog(ERROR, "could not initialize cache %u (%d)",
910916
cacheinfo[cacheId].reloid, cacheId);
917+
/* Accumulate data for OID lists, too */
911918
SysCacheRelationOid[SysCacheRelationOidSize++] =
912919
cacheinfo[cacheId].reloid;
920+
SysCacheSupportingRelOid[SysCacheSupportingRelOidSize++] =
921+
cacheinfo[cacheId].reloid;
922+
SysCacheSupportingRelOid[SysCacheSupportingRelOidSize++] =
923+
cacheinfo[cacheId].indoid;
913924
/* see comments for RelationInvalidatesSnapshotsOnly */
914925
Assert(!RelationInvalidatesSnapshotsOnly(cacheinfo[cacheId].reloid));
915926
}
916927

917-
/* Sort and dedup OIDs. */
928+
Assert(SysCacheRelationOidSize <= lengthof(SysCacheRelationOid));
929+
Assert(SysCacheSupportingRelOidSize <= lengthof(SysCacheSupportingRelOid));
930+
931+
/* Sort and de-dup OID arrays, so we can use binary search. */
918932
pg_qsort(SysCacheRelationOid, SysCacheRelationOidSize,
919933
sizeof(Oid), oid_compare);
920-
for (i = 1; i < SysCacheRelationOidSize; ++i)
934+
for (i = 1, j = 0; i < SysCacheRelationOidSize; i++)
935+
{
921936
if (SysCacheRelationOid[i] != SysCacheRelationOid[j])
922937
SysCacheRelationOid[++j] = SysCacheRelationOid[i];
938+
}
923939
SysCacheRelationOidSize = j + 1;
924940

941+
pg_qsort(SysCacheSupportingRelOid, SysCacheSupportingRelOidSize,
942+
sizeof(Oid), oid_compare);
943+
for (i = 1, j = 0; i < SysCacheSupportingRelOidSize; i++)
944+
{
945+
if (SysCacheSupportingRelOid[i] != SysCacheSupportingRelOid[j])
946+
SysCacheSupportingRelOid[++j] = SysCacheSupportingRelOid[i];
947+
}
948+
SysCacheSupportingRelOidSize = j + 1;
949+
925950
CacheInitialized = true;
926951
}
927952

@@ -1264,15 +1289,40 @@ RelationHasSysCache(Oid relid)
12641289
return false;
12651290
}
12661291

1292+
/*
1293+
* Test whether a relation supports a system cache, ie it is either a
1294+
* cached table or the index used for a cache.
1295+
*/
1296+
bool
1297+
RelationSupportsSysCache(Oid relid)
1298+
{
1299+
int low = 0,
1300+
high = SysCacheSupportingRelOidSize - 1;
1301+
1302+
while (low <= high)
1303+
{
1304+
int middle = low + (high - low) / 2;
1305+
1306+
if (SysCacheSupportingRelOid[middle] == relid)
1307+
return true;
1308+
if (SysCacheSupportingRelOid[middle] < relid)
1309+
low = middle + 1;
1310+
else
1311+
high = middle - 1;
1312+
}
1313+
1314+
return false;
1315+
}
1316+
12671317

12681318
/*
12691319
* OID comparator for pg_qsort
12701320
*/
12711321
static int
12721322
oid_compare(const void *a, const void *b)
12731323
{
1274-
Oid oa = *((Oid *) a);
1275-
Oid ob = *((Oid *) b);
1324+
Oid oa = *((const Oid *) a);
1325+
Oid ob = *((const Oid *) b);
12761326

12771327
if (oa == ob)
12781328
return 0;

src/include/utils/relcache.h

-1
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,6 @@ extern void AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
116116
/*
117117
* Routines to help manage rebuilding of relcache init files
118118
*/
119-
extern bool RelationIdIsInInitFile(Oid relationId);
120119
extern void RelationCacheInitFilePreInvalidate(void);
121120
extern void RelationCacheInitFilePostInvalidate(void);
122121
extern void RelationCacheInitFileRemove(void);

src/include/utils/syscache.h

+4-3
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
#include "access/attnum.h"
2020
#include "access/htup.h"
21-
/* we purposedly do not include utils/catcache.h here */
21+
/* we intentionally do not include utils/catcache.h here */
2222

2323
/*
2424
* SysCache identifiers.
@@ -131,8 +131,9 @@ struct catclist;
131131
extern struct catclist *SearchSysCacheList(int cacheId, int nkeys,
132132
Datum key1, Datum key2, Datum key3, Datum key4);
133133

134-
extern bool RelationInvalidatesSnapshotsOnly(Oid);
135-
extern bool RelationHasSysCache(Oid);
134+
extern bool RelationInvalidatesSnapshotsOnly(Oid relid);
135+
extern bool RelationHasSysCache(Oid relid);
136+
extern bool RelationSupportsSysCache(Oid relid);
136137

137138
/*
138139
* The use of the macros below rather than direct calls to the corresponding

0 commit comments

Comments
 (0)