Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 2ada677

Browse files
committed
Fix race condition in relcache init file invalidation.
The previous code tried to synchronize by unlinking the init file twice, but that doesn't actually work: it leaves a window wherein a third process could read the already-stale init file but miss the SI messages that would tell it the data is stale. The result would be bizarre failures in catalog accesses, typically "could not read block 0 in file ..." later during startup. Instead, hold RelCacheInitLock across both the unlink and the sending of the SI messages. This is more straightforward, and might even be a bit faster since only one unlink call is needed. This has been wrong since it was put in (in 2002!), so back-patch to all supported releases.
1 parent 1bb6924 commit 2ada677

File tree

4 files changed

+57
-49
lines changed

4 files changed

+57
-49
lines changed

src/backend/access/transam/twophase.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -1356,10 +1356,10 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
13561356
* after we send the SI messages. See AtEOXact_Inval()
13571357
*/
13581358
if (hdr->initfileinval)
1359-
RelationCacheInitFileInvalidate(true);
1359+
RelationCacheInitFilePreInvalidate();
13601360
SendSharedInvalidMessages(invalmsgs, hdr->ninvalmsgs);
13611361
if (hdr->initfileinval)
1362-
RelationCacheInitFileInvalidate(false);
1362+
RelationCacheInitFilePostInvalidate();
13631363

13641364
/* And now do the callbacks */
13651365
if (isCommit)

src/backend/utils/cache/inval.c

+17-16
Original file line numberDiff line numberDiff line change
@@ -854,24 +854,12 @@ xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs,
854854
return numSharedInvalidMessagesArray;
855855
}
856856

857-
#define RecoveryRelationCacheInitFileInvalidate(dbo, tbo, tf) \
858-
{ \
859-
DatabasePath = GetDatabasePath(dbo, tbo); \
860-
elog(trace_recovery(DEBUG4), "removing relcache init file in %s", DatabasePath); \
861-
RelationCacheInitFileInvalidate(tf); \
862-
pfree(DatabasePath); \
863-
}
864-
865857
/*
866858
* ProcessCommittedInvalidationMessages is executed by xact_redo_commit()
867859
* to process invalidation messages added to commit records.
868860
*
869861
* Relcache init file invalidation requires processing both
870862
* before and after we send the SI messages. See AtEOXact_Inval()
871-
*
872-
* We deliberately avoid SetDatabasePath() since it is intended to be used
873-
* only once by normal backends, so we set DatabasePath directly then
874-
* pfree after use. See RecoveryRelationCacheInitFileInvalidate() macro.
875863
*/
876864
void
877865
ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs,
@@ -885,12 +873,25 @@ ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs,
885873
(RelcacheInitFileInval ? " and relcache file invalidation" : ""));
886874

887875
if (RelcacheInitFileInval)
888-
RecoveryRelationCacheInitFileInvalidate(dbid, tsid, true);
876+
{
877+
/*
878+
* RelationCacheInitFilePreInvalidate requires DatabasePath to be set,
879+
* but we should not use SetDatabasePath during recovery, since it is
880+
* intended to be used only once by normal backends. Hence, a quick
881+
* hack: set DatabasePath directly then unset after use.
882+
*/
883+
DatabasePath = GetDatabasePath(dbid, tsid);
884+
elog(trace_recovery(DEBUG4), "removing relcache init file in \"%s\"",
885+
DatabasePath);
886+
RelationCacheInitFilePreInvalidate();
887+
pfree(DatabasePath);
888+
DatabasePath = NULL;
889+
}
889890

890891
SendSharedInvalidMessages(msgs, nmsgs);
891892

892893
if (RelcacheInitFileInval)
893-
RecoveryRelationCacheInitFileInvalidate(dbid, tsid, false);
894+
RelationCacheInitFilePostInvalidate();
894895
}
895896

896897
/*
@@ -931,7 +932,7 @@ AtEOXact_Inval(bool isCommit)
931932
* unless we committed.
932933
*/
933934
if (transInvalInfo->RelcacheInitFileInval)
934-
RelationCacheInitFileInvalidate(true);
935+
RelationCacheInitFilePreInvalidate();
935936

936937
AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,
937938
&transInvalInfo->CurrentCmdInvalidMsgs);
@@ -940,7 +941,7 @@ AtEOXact_Inval(bool isCommit)
940941
SendSharedInvalidMessages);
941942

942943
if (transInvalInfo->RelcacheInitFileInval)
943-
RelationCacheInitFileInvalidate(false);
944+
RelationCacheInitFilePostInvalidate();
944945
}
945946
else if (transInvalInfo != NULL)
946947
{

src/backend/utils/cache/relcache.c

+36-30
Original file line numberDiff line numberDiff line change
@@ -4377,8 +4377,8 @@ write_relcache_init_file(bool shared)
43774377
* updated by SI message processing, but we can't be sure whether what we
43784378
* wrote out was up-to-date.)
43794379
*
4380-
* This mustn't run concurrently with RelationCacheInitFileInvalidate, so
4381-
* grab a serialization lock for the duration.
4380+
* This mustn't run concurrently with the code that unlinks an init file
4381+
* and sends SI messages, so grab a serialization lock for the duration.
43824382
*/
43834383
LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
43844384

@@ -4442,19 +4442,22 @@ RelationIdIsInInitFile(Oid relationId)
44424442
* changed one or more of the relation cache entries that are kept in the
44434443
* local init file.
44444444
*
4445-
* We actually need to remove the init file twice: once just before sending
4446-
* the SI messages that include relcache inval for such relations, and once
4447-
* just after sending them. The unlink before ensures that a backend that's
4448-
* currently starting cannot read the now-obsolete init file and then miss
4449-
* the SI messages that will force it to update its relcache entries. (This
4450-
* works because the backend startup sequence gets into the PGPROC array before
4451-
* trying to load the init file.) The unlink after is to synchronize with a
4452-
* backend that may currently be trying to write an init file based on data
4453-
* that we've just rendered invalid. Such a backend will see the SI messages,
4454-
* but we can't leave the init file sitting around to fool later backends.
4445+
* To be safe against concurrent inspection or rewriting of the init file,
4446+
* we must take RelCacheInitLock, then remove the old init file, then send
4447+
* the SI messages that include relcache inval for such relations, and then
4448+
* release RelCacheInitLock. This serializes the whole affair against
4449+
* write_relcache_init_file, so that we can be sure that any other process
4450+
* that's concurrently trying to create a new init file won't move an
4451+
* already-stale version into place after we unlink. Also, because we unlink
4452+
* before sending the SI messages, a backend that's currently starting cannot
4453+
* read the now-obsolete init file and then miss the SI messages that will
4454+
* force it to update its relcache entries. (This works because the backend
4455+
* startup sequence gets into the sinval array before trying to load the init
4456+
* file.)
44554457
*
4456-
* Ignore any failure to unlink the file, since it might not be there if
4457-
* no backend has been started since the last removal.
4458+
* We take the lock and do the unlink in RelationCacheInitFilePreInvalidate,
4459+
* then release the lock in RelationCacheInitFilePostInvalidate. Caller must
4460+
* send any pending SI messages between those calls.
44584461
*
44594462
* Notice this deals only with the local init file, not the shared init file.
44604463
* The reason is that there can never be a "significant" change to the
@@ -4464,34 +4467,37 @@ RelationIdIsInInitFile(Oid relationId)
44644467
* be invalid enough to make it necessary to remove it.
44654468
*/
44664469
void
4467-
RelationCacheInitFileInvalidate(bool beforeSend)
4470+
RelationCacheInitFilePreInvalidate(void)
44684471
{
44694472
char initfilename[MAXPGPATH];
44704473

44714474
snprintf(initfilename, sizeof(initfilename), "%s/%s",
44724475
DatabasePath, RELCACHE_INIT_FILENAME);
44734476

4474-
if (beforeSend)
4475-
{
4476-
/* no interlock needed here */
4477-
unlink(initfilename);
4478-
}
4479-
else
4477+
LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
4478+
4479+
if (unlink(initfilename) < 0)
44804480
{
44814481
/*
4482-
* We need to interlock this against write_relcache_init_file, to
4483-
* guard against possibility that someone renames a new-but-
4484-
* already-obsolete init file into place just after we unlink. With
4485-
* the interlock, it's certain that write_relcache_init_file will
4486-
* notice our SI inval message before renaming into place, or else
4487-
* that we will execute second and successfully unlink the file.
4482+
* The file might not be there if no backend has been started since
4483+
* the last removal. But complain about failures other than ENOENT.
4484+
* Fortunately, it's not too late to abort the transaction if we
4485+
* can't get rid of the would-be-obsolete init file.
44884486
*/
4489-
LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
4490-
unlink(initfilename);
4491-
LWLockRelease(RelCacheInitLock);
4487+
if (errno != ENOENT)
4488+
ereport(ERROR,
4489+
(errcode_for_file_access(),
4490+
errmsg("could not remove cache file \"%s\": %m",
4491+
initfilename)));
44924492
}
44934493
}
44944494

4495+
void
4496+
RelationCacheInitFilePostInvalidate(void)
4497+
{
4498+
LWLockRelease(RelCacheInitLock);
4499+
}
4500+
44954501
/*
44964502
* Remove the init files during postmaster startup.
44974503
*

src/include/utils/relcache.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,8 @@ extern void AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
9898
* Routines to help manage rebuilding of relcache init files
9999
*/
100100
extern bool RelationIdIsInInitFile(Oid relationId);
101-
extern void RelationCacheInitFileInvalidate(bool beforeSend);
101+
extern void RelationCacheInitFilePreInvalidate(void);
102+
extern void RelationCacheInitFilePostInvalidate(void);
102103
extern void RelationCacheInitFileRemove(void);
103104

104105
/* should be used only by relcache.c and catcache.c */

0 commit comments

Comments
 (0)