Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 0ce4d56

Browse files
committed
Phase 1 of fix for 'SMgrRelation hashtable corrupted' problem. This
is the minimum required fix. I want to look next at taking advantage of it by simplifying the message semantics in the shared inval message queue, but that part can be held over for 8.1 if it turns out too ugly.
1 parent cc7cd87 commit 0ce4d56

File tree

14 files changed

+121
-107
lines changed

14 files changed

+121
-107
lines changed

src/backend/access/transam/xlogutils.c

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
1212
* Portions Copyright (c) 1994, Regents of the University of California
1313
*
14-
* $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.35 2004/12/31 21:59:30 pgsql Exp $
14+
* $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.36 2005/01/10 20:02:19 tgl Exp $
1515
*
1616
*-------------------------------------------------------------------------
1717
*/
@@ -125,8 +125,7 @@ _xl_remove_hash_entry(XLogRelDesc *rdesc)
125125
if (hentry == NULL)
126126
elog(PANIC, "_xl_remove_hash_entry: file was not found in cache");
127127

128-
if (rdesc->reldata.rd_smgr != NULL)
129-
smgrclose(rdesc->reldata.rd_smgr);
128+
RelationCloseSmgr(&(rdesc->reldata));
130129

131130
memset(rdesc, 0, sizeof(XLogRelDesc));
132131
memset(tpgc, 0, sizeof(FormData_pg_class));
@@ -233,7 +232,8 @@ XLogOpenRelation(bool redo, RmgrId rmid, RelFileNode rnode)
233232
hentry->rdesc = res;
234233

235234
res->reldata.rd_targblock = InvalidBlockNumber;
236-
res->reldata.rd_smgr = smgropen(res->reldata.rd_node);
235+
res->reldata.rd_smgr = NULL;
236+
RelationOpenSmgr(&(res->reldata));
237237

238238
/*
239239
* Create the target file if it doesn't already exist. This lets
@@ -278,7 +278,5 @@ XLogCloseRelation(RelFileNode rnode)
278278

279279
rdesc = hentry->rdesc;
280280

281-
if (rdesc->reldata.rd_smgr != NULL)
282-
smgrclose(rdesc->reldata.rd_smgr);
283-
rdesc->reldata.rd_smgr = NULL;
281+
RelationCloseSmgr(&(rdesc->reldata));
284282
}

src/backend/catalog/heap.c

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/catalog/heap.c,v 1.278 2004/12/31 21:59:38 pgsql Exp $
11+
* $PostgreSQL: pgsql/src/backend/catalog/heap.c,v 1.279 2005/01/10 20:02:19 tgl Exp $
1212
*
1313
*
1414
* INTERFACE ROUTINES
@@ -322,7 +322,7 @@ heap_create(const char *relname,
322322
if (create_storage)
323323
{
324324
Assert(rel->rd_smgr == NULL);
325-
rel->rd_smgr = smgropen(rel->rd_node);
325+
RelationOpenSmgr(rel);
326326
smgrcreate(rel->rd_smgr, rel->rd_istemp, false);
327327
}
328328

@@ -1186,10 +1186,8 @@ heap_drop_with_catalog(Oid relid)
11861186
if (rel->rd_rel->relkind != RELKIND_VIEW &&
11871187
rel->rd_rel->relkind != RELKIND_COMPOSITE_TYPE)
11881188
{
1189-
if (rel->rd_smgr == NULL)
1190-
rel->rd_smgr = smgropen(rel->rd_node);
1189+
RelationOpenSmgr(rel);
11911190
smgrscheduleunlink(rel->rd_smgr, rel->rd_istemp);
1192-
rel->rd_smgr = NULL;
11931191
}
11941192

11951193
/*

src/backend/catalog/index.c

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.243 2004/12/31 21:59:38 pgsql Exp $
11+
* $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.244 2005/01/10 20:02:19 tgl Exp $
1212
*
1313
*
1414
* INTERFACE ROUTINES
@@ -780,11 +780,9 @@ index_drop(Oid indexId)
780780
*/
781781
FlushRelationBuffers(userIndexRelation, (BlockNumber) 0);
782782

783-
if (userIndexRelation->rd_smgr == NULL)
784-
userIndexRelation->rd_smgr = smgropen(userIndexRelation->rd_node);
783+
RelationOpenSmgr(userIndexRelation);
785784
smgrscheduleunlink(userIndexRelation->rd_smgr,
786785
userIndexRelation->rd_istemp);
787-
userIndexRelation->rd_smgr = NULL;
788786

789787
/*
790788
* Close and flush the index's relcache entry, to ensure relcache
@@ -1133,10 +1131,8 @@ setNewRelfilenode(Relation relation)
11331131
smgrclose(srel);
11341132

11351133
/* schedule unlinking old relfilenode */
1136-
if (relation->rd_smgr == NULL)
1137-
relation->rd_smgr = smgropen(relation->rd_node);
1134+
RelationOpenSmgr(relation);
11381135
smgrscheduleunlink(relation->rd_smgr, relation->rd_istemp);
1139-
relation->rd_smgr = NULL;
11401136

11411137
/* update the pg_class row */
11421138
rd_rel->relfilenode = newrelfilenode;

src/backend/commands/tablecmds.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.141 2004/12/31 21:59:41 pgsql Exp $
11+
* $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.142 2005/01/10 20:02:20 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -5521,10 +5521,8 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace)
55215521
copy_relation_data(rel, dstrel);
55225522

55235523
/* schedule unlinking old physical file */
5524-
if (rel->rd_smgr == NULL)
5525-
rel->rd_smgr = smgropen(rel->rd_node);
5524+
RelationOpenSmgr(rel);
55265525
smgrscheduleunlink(rel->rd_smgr, rel->rd_istemp);
5527-
rel->rd_smgr = NULL;
55285526

55295527
/*
55305528
* Now drop smgr references. The source was already dropped by

src/backend/postmaster/bgwriter.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
*
3838
*
3939
* IDENTIFICATION
40-
* $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.12 2004/12/31 22:00:40 pgsql Exp $
40+
* $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.13 2005/01/10 20:02:20 tgl Exp $
4141
*
4242
*-------------------------------------------------------------------------
4343
*/
@@ -349,9 +349,7 @@ BackgroundWriterMain(void)
349349
/*
350350
* After any checkpoint, close all smgr files. This is so we
351351
* won't hang onto smgr references to deleted files
352-
* indefinitely. (It is safe to do this because this process
353-
* does not have a relcache, and so no dangling references
354-
* could remain.)
352+
* indefinitely.
355353
*/
356354
smgrcloseall();
357355

src/backend/rewrite/rewriteDefine.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/rewrite/rewriteDefine.c,v 1.99 2004/12/31 22:00:45 pgsql Exp $
11+
* $PostgreSQL: pgsql/src/backend/rewrite/rewriteDefine.c,v 1.100 2005/01/10 20:02:21 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -484,10 +484,8 @@ DefineQueryRewrite(RuleStmt *stmt)
484484
*/
485485
if (RelisBecomingView)
486486
{
487-
if (event_relation->rd_smgr == NULL)
488-
event_relation->rd_smgr = smgropen(event_relation->rd_node);
487+
RelationOpenSmgr(event_relation);
489488
smgrscheduleunlink(event_relation->rd_smgr, event_relation->rd_istemp);
490-
event_relation->rd_smgr = NULL;
491489
}
492490

493491
/* Close rel, but keep lock till commit... */

src/backend/storage/buffer/bufmgr.c

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.184 2005/01/03 18:49:41 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.185 2005/01/10 20:02:21 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -131,8 +131,7 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum,
131131
isLocalBuf = reln->rd_istemp;
132132

133133
/* Open it at the smgr level if not already done */
134-
if (reln->rd_smgr == NULL)
135-
reln->rd_smgr = smgropen(reln->rd_node);
134+
RelationOpenSmgr(reln);
136135

137136
/* Substitute proper block number if caller asked for P_NEW */
138137
if (isExtend)
@@ -1130,8 +1129,7 @@ BlockNumber
11301129
RelationGetNumberOfBlocks(Relation relation)
11311130
{
11321131
/* Open it at the smgr level if not already done */
1133-
if (relation->rd_smgr == NULL)
1134-
relation->rd_smgr = smgropen(relation->rd_node);
1132+
RelationOpenSmgr(relation);
11351133

11361134
return smgrnblocks(relation->rd_smgr);
11371135
}
@@ -1147,8 +1145,7 @@ void
11471145
RelationTruncate(Relation rel, BlockNumber nblocks)
11481146
{
11491147
/* Open it at the smgr level if not already done */
1150-
if (rel->rd_smgr == NULL)
1151-
rel->rd_smgr = smgropen(rel->rd_node);
1148+
RelationOpenSmgr(rel);
11521149

11531150
/* Make sure rd_targblock isn't pointing somewhere past end */
11541151
rel->rd_targblock = InvalidBlockNumber;
@@ -1445,8 +1442,7 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
14451442
BufferDesc *bufHdr;
14461443

14471444
/* Open rel at the smgr level if not already done */
1448-
if (rel->rd_smgr == NULL)
1449-
rel->rd_smgr = smgropen(rel->rd_node);
1445+
RelationOpenSmgr(rel);
14501446

14511447
if (rel->rd_istemp)
14521448
{

src/backend/storage/buffer/localbuf.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
*
1010
*
1111
* IDENTIFICATION
12-
* $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.61 2004/12/31 22:00:49 pgsql Exp $
12+
* $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.62 2005/01/10 20:02:21 tgl Exp $
1313
*
1414
*-------------------------------------------------------------------------
1515
*/
@@ -108,13 +108,13 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
108108
*/
109109
if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
110110
{
111-
SMgrRelation reln;
111+
SMgrRelation oreln;
112112

113113
/* Find smgr relation for buffer */
114-
reln = smgropen(bufHdr->tag.rnode);
114+
oreln = smgropen(bufHdr->tag.rnode);
115115

116116
/* And write... */
117-
smgrwrite(reln,
117+
smgrwrite(oreln,
118118
bufHdr->tag.blockNum,
119119
(char *) MAKE_PTR(bufHdr->data),
120120
true);

src/backend/storage/smgr/smgr.c

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
*
1212
*
1313
* IDENTIFICATION
14-
* $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.84 2004/12/31 22:01:13 pgsql Exp $
14+
* $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.85 2005/01/10 20:02:22 tgl Exp $
1515
*
1616
*-------------------------------------------------------------------------
1717
*/
@@ -216,6 +216,7 @@ smgropen(RelFileNode rnode)
216216
if (!found)
217217
{
218218
/* hash_search already filled in the lookup key */
219+
reln->smgr_owner = NULL;
219220
reln->smgr_which = 0; /* we only have md.c at present */
220221
reln->md_fd = NULL; /* mark it not open */
221222
}
@@ -224,15 +225,36 @@ smgropen(RelFileNode rnode)
224225
}
225226

226227
/*
227-
* smgrclose() -- Close and delete an SMgrRelation object.
228+
* smgrsetowner() -- Establish a long-lived reference to an SMgrRelation object
228229
*
229-
* It is the caller's responsibility not to leave any dangling references
230-
* to the object. (Pointers should be cleared after successful return;
231-
* on the off chance of failure, the SMgrRelation object will still exist.)
230+
* There can be only one owner at a time; this is sufficient since currently
231+
* the only such owners exist in the relcache.
232+
*/
233+
void
234+
smgrsetowner(SMgrRelation *owner, SMgrRelation reln)
235+
{
236+
/*
237+
* First, unhook any old owner. (Normally there shouldn't be any, but
238+
* it seems possible that this can happen during swap_relation_files()
239+
* depending on the order of processing. It's ok to close the old
240+
* relcache entry early in that case.)
241+
*/
242+
if (reln->smgr_owner)
243+
*(reln->smgr_owner) = NULL;
244+
245+
/* Now establish the ownership relationship. */
246+
reln->smgr_owner = owner;
247+
*owner = reln;
248+
}
249+
250+
/*
251+
* smgrclose() -- Close and delete an SMgrRelation object.
232252
*/
233253
void
234254
smgrclose(SMgrRelation reln)
235255
{
256+
SMgrRelation *owner;
257+
236258
if (!(*(smgrsw[reln->smgr_which].smgr_close)) (reln))
237259
ereport(ERROR,
238260
(errcode_for_file_access(),
@@ -241,16 +263,24 @@ smgrclose(SMgrRelation reln)
241263
reln->smgr_rnode.dbNode,
242264
reln->smgr_rnode.relNode)));
243265

266+
owner = reln->smgr_owner;
267+
244268
if (hash_search(SMgrRelationHash,
245269
(void *) &(reln->smgr_rnode),
246270
HASH_REMOVE, NULL) == NULL)
247271
elog(ERROR, "SMgrRelation hashtable corrupted");
272+
273+
/*
274+
* Unhook the owner pointer, if any. We do this last since in the
275+
* remote possibility of failure above, the SMgrRelation object will still
276+
* exist.
277+
*/
278+
if (owner)
279+
*owner = NULL;
248280
}
249281

250282
/*
251283
* smgrcloseall() -- Close all existing SMgrRelation objects.
252-
*
253-
* It is the caller's responsibility not to leave any dangling references.
254284
*/
255285
void
256286
smgrcloseall(void)
@@ -275,8 +305,6 @@ smgrcloseall(void)
275305
* This has the same effects as smgrclose(smgropen(rnode)), but it avoids
276306
* uselessly creating a hashtable entry only to drop it again when no
277307
* such entry exists already.
278-
*
279-
* It is the caller's responsibility not to leave any dangling references.
280308
*/
281309
void
282310
smgrclosenode(RelFileNode rnode)

src/backend/utils/cache/inval.c

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@
8080
* Portions Copyright (c) 1994, Regents of the University of California
8181
*
8282
* IDENTIFICATION
83-
* $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.68 2004/12/31 22:01:25 pgsql Exp $
83+
* $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.69 2005/01/10 20:02:23 tgl Exp $
8484
*
8585
*-------------------------------------------------------------------------
8686
*/
@@ -414,17 +414,9 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
414414
}
415415
else if (msg->id == SHAREDINVALRELCACHE_ID)
416416
{
417-
/*
418-
* If the message includes a valid relfilenode, we must ensure
419-
* that smgr cache entry gets zapped. The relcache will handle
420-
* this if called, otherwise we must do it directly.
421-
*/
422417
if (msg->rc.dbId == MyDatabaseId || msg->rc.dbId == InvalidOid)
423418
{
424-
if (OidIsValid(msg->rc.physId.relNode))
425-
RelationCacheInvalidateEntry(msg->rc.relId, &msg->rc.physId);
426-
else
427-
RelationCacheInvalidateEntry(msg->rc.relId, NULL);
419+
RelationCacheInvalidateEntry(msg->rc.relId);
428420

429421
for (i = 0; i < cache_callback_count; i++)
430422
{
@@ -434,12 +426,17 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
434426
(*ccitem->function) (ccitem->arg, msg->rc.relId);
435427
}
436428
}
437-
else
438-
{
439-
/* might have smgr entry even if not in our database */
440-
if (OidIsValid(msg->rc.physId.relNode))
441-
smgrclosenode(msg->rc.physId);
442-
}
429+
/*
430+
* If the message includes a valid relfilenode, we must ensure
431+
* the smgr cache entry gets zapped. This might not have happened
432+
* above since the relcache entry might not have existed or might
433+
* have been associated with a different relfilenode.
434+
*
435+
* XXX there is no real good reason for rnode inval to be in the
436+
* same message at all. FIXME in 8.1.
437+
*/
438+
if (OidIsValid(msg->rc.physId.relNode))
439+
smgrclosenode(msg->rc.physId);
443440
}
444441
else
445442
elog(FATAL, "unrecognized SI message id: %d", msg->id);

0 commit comments

Comments
 (0)