Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit fc29917

Browse files
committed
Separate the functions of relcache entry flush and smgr cache entry flush
so that we can get the size of a shared inval message back down to what it was in 7.4 (and simplify the logic too). Phase 2 of fixing the 'SMgrRelation hashtable corrupted' problem.
1 parent 0ce4d56 commit fc29917

File tree

2 files changed

+104
-76
lines changed

2 files changed

+104
-76
lines changed

src/backend/utils/cache/inval.c

Lines changed: 85 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -53,14 +53,14 @@
5353
*
5454
* Also, whenever we see an operation on a pg_class or pg_attribute tuple,
5555
* we register a relcache flush operation for the relation described by that
56-
* tuple.
57-
*
58-
* We keep the relcache flush requests in lists separate from the catcache
59-
* tuple flush requests. This allows us to issue all the pending catcache
60-
* flushes before we issue relcache flushes, which saves us from loading
61-
* a catcache tuple during relcache load only to flush it again right away.
62-
* Also, we avoid queuing multiple relcache flush requests for the same
63-
* relation, since a relcache flush is relatively expensive to do.
56+
* tuple. pg_class updates trigger an smgr flush operation as well.
57+
*
58+
* We keep the relcache and smgr flush requests in lists separate from the
59+
* catcache tuple flush requests. This allows us to issue all the pending
60+
* catcache flushes before we issue relcache flushes, which saves us from
61+
* loading a catcache tuple during relcache load only to flush it again
62+
* right away. Also, we avoid queuing multiple relcache flush requests for
63+
* the same relation, since a relcache flush is relatively expensive to do.
6464
* (XXX is it worth testing likewise for duplicate catcache flush entries?
6565
* Probably not.)
6666
*
@@ -80,7 +80,7 @@
8080
* Portions Copyright (c) 1994, Regents of the University of California
8181
*
8282
* IDENTIFICATION
83-
* $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.69 2005/01/10 20:02:23 tgl Exp $
83+
* $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.70 2005/01/10 21:57:17 tgl Exp $
8484
*
8585
*-------------------------------------------------------------------------
8686
*/
@@ -115,7 +115,7 @@ typedef struct InvalidationChunk
115115
typedef struct InvalidationListHeader
116116
{
117117
InvalidationChunk *cclist; /* list of chunks holding catcache msgs */
118-
InvalidationChunk *rclist; /* list of chunks holding relcache msgs */
118+
InvalidationChunk *rclist; /* list of chunks holding relcache/smgr msgs */
119119
} InvalidationListHeader;
120120

121121
/*----------------
@@ -164,7 +164,7 @@ static TransInvalidationInfo *transInvalInfo = NULL;
164164

165165
static struct CACHECALLBACK
166166
{
167-
int16 id; /* cache number or SHAREDINVALRELCACHE_ID */
167+
int16 id; /* cache number or message type id */
168168
CacheCallbackFunction function;
169169
Datum arg;
170170
} cache_callback_list[MAX_CACHE_CALLBACKS];
@@ -273,7 +273,7 @@ AppendInvalidationMessageList(InvalidationChunk **destHdr,
273273
* Invalidation set support functions
274274
*
275275
* These routines understand about the division of a logical invalidation
276-
* list into separate physical lists for catcache and relcache entries.
276+
* list into separate physical lists for catcache and relcache/smgr entries.
277277
* ----------------------------------------------------------------
278278
*/
279279

@@ -299,22 +299,42 @@ AddCatcacheInvalidationMessage(InvalidationListHeader *hdr,
299299
*/
300300
static void
301301
AddRelcacheInvalidationMessage(InvalidationListHeader *hdr,
302-
Oid dbId, Oid relId, RelFileNode physId)
302+
Oid dbId, Oid relId)
303303
{
304304
SharedInvalidationMessage msg;
305305

306306
/* Don't add a duplicate item */
307307
/* We assume dbId need not be checked because it will never change */
308-
/* relfilenode fields must be checked to support reassignment */
309308
ProcessMessageList(hdr->rclist,
310-
if (msg->rc.relId == relId &&
311-
RelFileNodeEquals(msg->rc.physId, physId)) return);
309+
if (msg->rc.id == SHAREDINVALRELCACHE_ID &&
310+
msg->rc.relId == relId)
311+
return);
312312

313313
/* OK, add the item */
314314
msg.rc.id = SHAREDINVALRELCACHE_ID;
315315
msg.rc.dbId = dbId;
316316
msg.rc.relId = relId;
317-
msg.rc.physId = physId;
317+
AddInvalidationMessage(&hdr->rclist, &msg);
318+
}
319+
320+
/*
321+
* Add an smgr inval entry
322+
*/
323+
static void
324+
AddSmgrInvalidationMessage(InvalidationListHeader *hdr,
325+
RelFileNode rnode)
326+
{
327+
SharedInvalidationMessage msg;
328+
329+
/* Don't add a duplicate item */
330+
ProcessMessageList(hdr->rclist,
331+
if (msg->sm.id == SHAREDINVALSMGR_ID &&
332+
RelFileNodeEquals(msg->sm.rnode, rnode))
333+
return);
334+
335+
/* OK, add the item */
336+
msg.sm.id = SHAREDINVALSMGR_ID;
337+
msg.sm.rnode = rnode;
318338
AddInvalidationMessage(&hdr->rclist, &msg);
319339
}
320340

@@ -370,10 +390,10 @@ RegisterCatcacheInvalidation(int cacheId,
370390
* As above, but register a relcache invalidation event.
371391
*/
372392
static void
373-
RegisterRelcacheInvalidation(Oid dbId, Oid relId, RelFileNode physId)
393+
RegisterRelcacheInvalidation(Oid dbId, Oid relId)
374394
{
375395
AddRelcacheInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs,
376-
dbId, relId, physId);
396+
dbId, relId);
377397

378398
/*
379399
* If the relation being invalidated is one of those cached in the
@@ -383,10 +403,22 @@ RegisterRelcacheInvalidation(Oid dbId, Oid relId, RelFileNode physId)
383403
transInvalInfo->RelcacheInitFileInval = true;
384404
}
385405

406+
/*
407+
* RegisterSmgrInvalidation
408+
*
409+
* As above, but register an smgr invalidation event.
410+
*/
411+
static void
412+
RegisterSmgrInvalidation(RelFileNode rnode)
413+
{
414+
AddSmgrInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs,
415+
rnode);
416+
}
417+
386418
/*
387419
* LocalExecuteInvalidationMessage
388420
*
389-
* Process a single invalidation message (which could be either type).
421+
* Process a single invalidation message (which could be of any type).
390422
* Only the local caches are flushed; this does not transmit the message
391423
* to other backends.
392424
*/
@@ -426,17 +458,14 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
426458
(*ccitem->function) (ccitem->arg, msg->rc.relId);
427459
}
428460
}
461+
}
462+
else if (msg->id == SHAREDINVALSMGR_ID)
463+
{
429464
/*
430-
* If the message includes a valid relfilenode, we must ensure
431-
* the smgr cache entry gets zapped. This might not have happened
432-
* above since the relcache entry might not have existed or might
433-
* have been associated with a different relfilenode.
434-
*
435-
* XXX there is no real good reason for rnode inval to be in the
436-
* same message at all. FIXME in 8.1.
465+
* We could have smgr entries for relations of other databases,
466+
* so no short-circuit test is possible here.
437467
*/
438-
if (OidIsValid(msg->rc.physId.relNode))
439-
smgrclosenode(msg->rc.physId);
468+
smgrclosenode(msg->sm.rnode);
440469
}
441470
else
442471
elog(FATAL, "unrecognized SI message id: %d", msg->id);
@@ -475,16 +504,11 @@ InvalidateSystemCaches(void)
475504
* of catalog/relation cache entries; if so, register inval events.
476505
*/
477506
static void
478-
PrepareForTupleInvalidation(Relation relation, HeapTuple tuple,
479-
void (*CacheIdRegisterFunc) (int, uint32,
480-
ItemPointer, Oid),
481-
void (*RelationIdRegisterFunc) (Oid, Oid,
482-
RelFileNode))
507+
PrepareForTupleInvalidation(Relation relation, HeapTuple tuple)
483508
{
484509
Oid tupleRelId;
485510
Oid databaseId;
486511
Oid relationId;
487-
RelFileNode rnode;
488512

489513
/* Do nothing during bootstrap */
490514
if (IsBootstrapProcessingMode())
@@ -510,7 +534,7 @@ PrepareForTupleInvalidation(Relation relation, HeapTuple tuple,
510534
* First let the catcache do its thing
511535
*/
512536
PrepareToInvalidateCacheTuple(relation, tuple,
513-
CacheIdRegisterFunc);
537+
RegisterCatcacheInvalidation);
514538

515539
/*
516540
* Now, is this tuple one of the primary definers of a relcache entry?
@@ -520,27 +544,36 @@ PrepareForTupleInvalidation(Relation relation, HeapTuple tuple,
520544
if (tupleRelId == RelOid_pg_class)
521545
{
522546
Form_pg_class classtup = (Form_pg_class) GETSTRUCT(tuple);
547+
RelFileNode rnode;
523548

524549
relationId = HeapTupleGetOid(tuple);
525550
if (classtup->relisshared)
526551
databaseId = InvalidOid;
527552
else
528553
databaseId = MyDatabaseId;
529-
if (classtup->reltablespace)
530-
rnode.spcNode = classtup->reltablespace;
531-
else
532-
rnode.spcNode = MyDatabaseTableSpace;
533-
rnode.dbNode = databaseId;
534-
rnode.relNode = classtup->relfilenode;
535554

536555
/*
556+
* We need to send out an smgr inval as well as a relcache inval.
557+
* This is needed because other backends might possibly possess
558+
* smgr cache but not relcache entries for the target relation.
559+
*
537560
* Note: during a pg_class row update that assigns a new
538561
* relfilenode or reltablespace value, we will be called on both
539562
* the old and new tuples, and thus will broadcast invalidation
540563
* messages showing both the old and new RelFileNode values. This
541564
* ensures that other backends will close smgr references to the
542565
* old file.
566+
*
567+
* XXX possible future cleanup: it might be better to trigger smgr
568+
* flushes explicitly, rather than indirectly from pg_class updates.
543569
*/
570+
if (classtup->reltablespace)
571+
rnode.spcNode = classtup->reltablespace;
572+
else
573+
rnode.spcNode = MyDatabaseTableSpace;
574+
rnode.dbNode = databaseId;
575+
rnode.relNode = classtup->relfilenode;
576+
RegisterSmgrInvalidation(rnode);
544577
}
545578
else if (tupleRelId == RelOid_pg_attribute)
546579
{
@@ -558,18 +591,14 @@ PrepareForTupleInvalidation(Relation relation, HeapTuple tuple,
558591
* though.
559592
*/
560593
databaseId = MyDatabaseId;
561-
/* We assume no smgr cache flush is needed, either */
562-
rnode.spcNode = InvalidOid;
563-
rnode.dbNode = InvalidOid;
564-
rnode.relNode = InvalidOid;
565594
}
566595
else
567596
return;
568597

569598
/*
570599
* Yes. We need to register a relcache invalidation event.
571600
*/
572-
(*RelationIdRegisterFunc) (databaseId, relationId, rnode);
601+
RegisterRelcacheInvalidation(databaseId, relationId);
573602
}
574603

575604

@@ -790,9 +819,7 @@ CommandEndInvalidationMessages(void)
790819
void
791820
CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple)
792821
{
793-
PrepareForTupleInvalidation(relation, tuple,
794-
RegisterCatcacheInvalidation,
795-
RegisterRelcacheInvalidation);
822+
PrepareForTupleInvalidation(relation, tuple);
796823
}
797824

798825
/*
@@ -803,7 +830,10 @@ CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple)
803830
* This is used in places that need to force relcache rebuild but aren't
804831
* changing any of the tuples recognized as contributors to the relcache
805832
* entry by PrepareForTupleInvalidation. (An example is dropping an index.)
806-
* We assume in particular that relfilenode isn't changing.
833+
* We assume in particular that relfilenode/reltablespace aren't changing
834+
* (so the rd_node value is still good).
835+
*
836+
* XXX most callers of this probably don't need to force an smgr flush.
807837
*/
808838
void
809839
CacheInvalidateRelcache(Relation relation)
@@ -817,7 +847,8 @@ CacheInvalidateRelcache(Relation relation)
817847
else
818848
databaseId = MyDatabaseId;
819849

820-
RegisterRelcacheInvalidation(databaseId, relationId, relation->rd_node);
850+
RegisterRelcacheInvalidation(databaseId, relationId);
851+
RegisterSmgrInvalidation(relation->rd_node);
821852
}
822853

823854
/*
@@ -844,7 +875,8 @@ CacheInvalidateRelcacheByTuple(HeapTuple classTuple)
844875
rnode.dbNode = databaseId;
845876
rnode.relNode = classtup->relfilenode;
846877

847-
RegisterRelcacheInvalidation(databaseId, relationId, rnode);
878+
RegisterRelcacheInvalidation(databaseId, relationId);
879+
RegisterSmgrInvalidation(rnode);
848880
}
849881

850882
/*

src/include/storage/sinval.h

Lines changed: 19 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/storage/sinval.h,v 1.39 2004/12/31 22:03:42 pgsql Exp $
10+
* $PostgreSQL: pgsql/src/include/storage/sinval.h,v 1.40 2005/01/10 21:57:19 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -20,22 +20,16 @@
2020

2121

2222
/*
23-
* We currently support two types of shared-invalidation messages: one that
24-
* invalidates an entry in a catcache, and one that invalidates a relcache
25-
* entry. More types could be added if needed. The message type is
26-
* identified by the first "int16" field of the message struct. Zero or
27-
* positive means a catcache inval message (and also serves as the catcache
28-
* ID field). -1 means a relcache inval message. Other negative values
29-
* are available to identify other inval message types.
23+
* We currently support three types of shared-invalidation messages: one that
24+
* invalidates an entry in a catcache, one that invalidates a relcache entry,
25+
* and one that invalidates an smgr cache entry. More types could be added
26+
* if needed. The message type is identified by the first "int16" field of
27+
* the message struct. Zero or positive means a catcache inval message (and
28+
* also serves as the catcache ID field). -1 means a relcache inval message.
29+
* -2 means an smgr inval message. Other negative values are available to
30+
* identify other inval message types.
3031
*
31-
* Relcache invalidation messages usually also cause invalidation of entries
32-
* in the smgr's relation cache. This means they must carry both logical
33-
* and physical relation ID info (ie, both dbOID/relOID and RelFileNode).
34-
* In some cases RelFileNode information is not available so the sender fills
35-
* those fields with zeroes --- this is okay so long as no smgr cache flush
36-
* is required.
37-
*
38-
* Shared-inval events are initially driven by detecting tuple inserts,
32+
* Catcache inval events are initially driven by detecting tuple inserts,
3933
* updates and deletions in system catalogs (see CacheInvalidateHeapTuple).
4034
* An update generates two inval events, one for the old tuple and one for
4135
* the new --- this is needed to get rid of both positive entries for the
@@ -71,20 +65,22 @@ typedef struct
7165
int16 id; /* type field --- must be first */
7266
Oid dbId; /* database ID, or 0 if a shared relation */
7367
Oid relId; /* relation ID */
74-
RelFileNode physId; /* physical file ID */
75-
76-
/*
77-
* Note: it is likely that RelFileNode will someday be changed to
78-
* include database ID. In that case the dbId field will be redundant
79-
* and should be removed to save space.
80-
*/
8168
} SharedInvalRelcacheMsg;
8269

70+
#define SHAREDINVALSMGR_ID (-2)
71+
72+
typedef struct
73+
{
74+
int16 id; /* type field --- must be first */
75+
RelFileNode rnode; /* physical file ID */
76+
} SharedInvalSmgrMsg;
77+
8378
typedef union
8479
{
8580
int16 id; /* type field --- must be first */
8681
SharedInvalCatcacheMsg cc;
8782
SharedInvalRelcacheMsg rc;
83+
SharedInvalSmgrMsg sm;
8884
} SharedInvalidationMessage;
8985

9086

0 commit comments

Comments
 (0)