Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 243e9b4

Browse files
committed
For inplace update, send nontransactional invalidations.
The inplace update survives ROLLBACK. The inval didn't, so another backend's DDL could then update the row without incorporating the inplace update. In the test this fixes, a mix of CREATE INDEX and ALTER TABLE resulted in a table with an index, yet relhasindex=f. That is a source of index corruption. Back-patch to v12 (all supported versions). The back branch versions don't change WAL, because those branches just added end-of-recovery SIResetAll(). All branches change the ABI of extern function PrepareToInvalidateCacheTuple(). No PGXN extension calls that, and there's no apparent use case in extensions. Reviewed by Nitin Motiani and (in earlier versions) Andres Freund. Discussion: https://postgr.es/m/20240523000548.58.nmisch@google.com
1 parent 0fe1736 commit 243e9b4

File tree

19 files changed

+401
-145
lines changed

19 files changed

+401
-145
lines changed

src/backend/access/heap/heapam.c

+52-8
Original file line numberDiff line numberDiff line change
@@ -6326,13 +6326,39 @@ heap_inplace_update_and_unlock(Relation relation,
63266326
HeapTupleHeader htup = oldtup->t_data;
63276327
uint32 oldlen;
63286328
uint32 newlen;
6329+
int nmsgs = 0;
6330+
SharedInvalidationMessage *invalMessages = NULL;
6331+
bool RelcacheInitFileInval = false;
63296332

63306333
Assert(ItemPointerEquals(&oldtup->t_self, &tuple->t_self));
63316334
oldlen = oldtup->t_len - htup->t_hoff;
63326335
newlen = tuple->t_len - tuple->t_data->t_hoff;
63336336
if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
63346337
elog(ERROR, "wrong tuple length");
63356338

6339+
/*
6340+
* Construct shared cache inval if necessary. Note that because we only
6341+
* pass the new version of the tuple, this mustn't be used for any
6342+
* operations that could change catcache lookup keys. But we aren't
6343+
* bothering with index updates either, so that's true a fortiori.
6344+
*/
6345+
CacheInvalidateHeapTupleInplace(relation, tuple, NULL);
6346+
6347+
/* Like RecordTransactionCommit(), log only if needed */
6348+
if (XLogStandbyInfoActive())
6349+
nmsgs = inplaceGetInvalidationMessages(&invalMessages,
6350+
&RelcacheInitFileInval);
6351+
6352+
/*
6353+
* Unlink relcache init files as needed. If unlinking, acquire
6354+
* RelCacheInitLock until after associated invalidations. By doing this
6355+
* in advance, if we checkpoint and then crash between inplace
6356+
* XLogInsert() and inval, we don't rely on StartupXLOG() ->
6357+
* RelationCacheInitFileRemove(). That uses elevel==LOG, so replay would
6358+
* neglect to PANIC on EIO.
6359+
*/
6360+
PreInplace_Inval();
6361+
63366362
/* NO EREPORT(ERROR) from here till changes are logged */
63376363
START_CRIT_SECTION();
63386364

@@ -6362,9 +6388,16 @@ heap_inplace_update_and_unlock(Relation relation,
63626388
XLogRecPtr recptr;
63636389

63646390
xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
6391+
xlrec.dbId = MyDatabaseId;
6392+
xlrec.tsId = MyDatabaseTableSpace;
6393+
xlrec.relcacheInitFileInval = RelcacheInitFileInval;
6394+
xlrec.nmsgs = nmsgs;
63656395

63666396
XLogBeginInsert();
6367-
XLogRegisterData((char *) &xlrec, SizeOfHeapInplace);
6397+
XLogRegisterData((char *) &xlrec, MinSizeOfHeapInplace);
6398+
if (nmsgs != 0)
6399+
XLogRegisterData((char *) invalMessages,
6400+
nmsgs * sizeof(SharedInvalidationMessage));
63686401

63696402
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
63706403
XLogRegisterBufData(0, (char *) htup + htup->t_hoff, newlen);
@@ -6376,17 +6409,28 @@ heap_inplace_update_and_unlock(Relation relation,
63766409
PageSetLSN(BufferGetPage(buffer), recptr);
63776410
}
63786411

6412+
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
6413+
6414+
/*
6415+
* Send invalidations to shared queue. SearchSysCacheLocked1() assumes we
6416+
* do this before UnlockTuple().
6417+
*
6418+
* If we're mutating a tuple visible only to this transaction, there's an
6419+
* equivalent transactional inval from the action that created the tuple,
6420+
* and this inval is superfluous.
6421+
*/
6422+
AtInplace_Inval();
6423+
63796424
END_CRIT_SECTION();
6425+
UnlockTuple(relation, &tuple->t_self, InplaceUpdateTupleLock);
63806426

6381-
heap_inplace_unlock(relation, oldtup, buffer);
6427+
AcceptInvalidationMessages(); /* local processing of just-sent inval */
63826428

63836429
/*
6384-
* Send out shared cache inval if necessary. Note that because we only
6385-
* pass the new version of the tuple, this mustn't be used for any
6386-
* operations that could change catcache lookup keys. But we aren't
6387-
* bothering with index updates either, so that's true a fortiori.
6388-
*
6389-
* XXX ROLLBACK discards the invalidation. See test inplace-inval.spec.
6430+
* Queue a transactional inval. The immediate invalidation we just sent
6431+
* is the only one known to be necessary. To reduce risk from the
6432+
* transition to immediate invalidation, continue sending a transactional
6433+
* invalidation like we've long done. Third-party code might rely on it.
63906434
*/
63916435
if (!IsBootstrapProcessingMode())
63926436
CacheInvalidateHeapTuple(relation, tuple, NULL);

src/backend/access/heap/heapam_xlog.c

+6
Original file line numberDiff line numberDiff line change
@@ -1170,6 +1170,12 @@ heap_xlog_inplace(XLogReaderState *record)
11701170
}
11711171
if (BufferIsValid(buffer))
11721172
UnlockReleaseBuffer(buffer);
1173+
1174+
ProcessCommittedInvalidationMessages(xlrec->msgs,
1175+
xlrec->nmsgs,
1176+
xlrec->relcacheInitFileInval,
1177+
xlrec->dbId,
1178+
xlrec->tsId);
11731179
}
11741180

11751181
void

src/backend/access/rmgrdesc/heapdesc.c

+4
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#include "access/heapam_xlog.h"
1818
#include "access/rmgrdesc_utils.h"
19+
#include "storage/standbydefs.h"
1920

2021
/*
2122
* NOTE: "keyname" argument cannot have trailing spaces or punctuation
@@ -253,6 +254,9 @@ heap_desc(StringInfo buf, XLogReaderState *record)
253254
xl_heap_inplace *xlrec = (xl_heap_inplace *) rec;
254255

255256
appendStringInfo(buf, "off: %u", xlrec->offnum);
257+
standby_desc_invalidations(buf, xlrec->nmsgs, xlrec->msgs,
258+
xlrec->dbId, xlrec->tsId,
259+
xlrec->relcacheInitFileInval);
256260
}
257261
}
258262

src/backend/access/rmgrdesc/standbydesc.c

+1-5
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,7 @@ standby_identify(uint8 info)
9696
return id;
9797
}
9898

99-
/*
100-
* This routine is used by both standby_desc and xact_desc, because
101-
* transaction commits and XLOG_INVALIDATIONS messages contain invalidations;
102-
* it seems pointless to duplicate the code.
103-
*/
99+
/* also used by non-standby records having analogous invalidation fields */
104100
void
105101
standby_desc_invalidations(StringInfo buf,
106102
int nmsgs, SharedInvalidationMessage *msgs,

src/backend/access/transam/xact.c

+18-8
Original file line numberDiff line numberDiff line change
@@ -1369,14 +1369,24 @@ RecordTransactionCommit(void)
13691369

13701370
/*
13711371
* Transactions without an assigned xid can contain invalidation
1372-
* messages (e.g. explicit relcache invalidations or catcache
1373-
* invalidations for inplace updates); standbys need to process those.
1374-
* We can't emit a commit record without an xid, and we don't want to
1375-
* force assigning an xid, because that'd be problematic for e.g.
1376-
* vacuum. Hence we emit a bespoke record for the invalidations. We
1377-
* don't want to use that in case a commit record is emitted, so they
1378-
* happen synchronously with commits (besides not wanting to emit more
1379-
* WAL records).
1372+
* messages. While inplace updates do this, this is not known to be
1373+
* necessary; see comment at inplace CacheInvalidateHeapTuple().
1374+
* Extensions might still rely on this capability, and standbys may
1375+
* need to process those invals. We can't emit a commit record
1376+
* without an xid, and we don't want to force assigning an xid,
1377+
* because that'd be problematic for e.g. vacuum. Hence we emit a
1378+
* bespoke record for the invalidations. We don't want to use that in
1379+
* case a commit record is emitted, so they happen synchronously with
1380+
* commits (besides not wanting to emit more WAL records).
1381+
*
1382+
* XXX Every known use of this capability is a defect. Since an XID
1383+
* isn't controlling visibility of the change that prompted invals,
1384+
* other sessions need the inval even if this transactions aborts.
1385+
*
1386+
* ON COMMIT DELETE ROWS does a nontransactional index_build(), which
1387+
* queues a relcache inval, including in transactions without an xid
1388+
* that had read the (empty) table. Standbys don't need any ON COMMIT
1389+
* DELETE ROWS invals, but we've not done the work to withhold them.
13801390
*/
13811391
if (nmsgs != 0)
13821392
{

src/backend/catalog/index.c

+9-2
Original file line numberDiff line numberDiff line change
@@ -2918,12 +2918,19 @@ index_update_stats(Relation rel,
29182918
if (dirty)
29192919
{
29202920
systable_inplace_update_finish(state, tuple);
2921-
/* the above sends a cache inval message */
2921+
/* the above sends transactional and immediate cache inval messages */
29222922
}
29232923
else
29242924
{
29252925
systable_inplace_update_cancel(state);
2926-
/* no need to change tuple, but force relcache inval anyway */
2926+
2927+
/*
2928+
* While we didn't change relhasindex, CREATE INDEX needs a
2929+
* transactional inval for when the new index's catalog rows become
2930+
* visible. Other CREATE INDEX and REINDEX code happens to also queue
2931+
* this inval, but keep this in case rare callers rely on this part of
2932+
* our API contract.
2933+
*/
29272934
CacheInvalidateRelcacheByTuple(tuple);
29282935
}
29292936

src/backend/commands/event_trigger.c

-5
Original file line numberDiff line numberDiff line change
@@ -975,11 +975,6 @@ EventTriggerOnLogin(void)
975975
* this instead of regular updates serves two purposes. First,
976976
* that avoids possible waiting on the row-level lock. Second,
977977
* that avoids dealing with TOAST.
978-
*
979-
* Changes made by inplace update may be lost due to
980-
* concurrent normal updates; see inplace-inval.spec. However,
981-
* we are OK with that. The subsequent connections will still
982-
* have a chance to set "dathasloginevt" to false.
983978
*/
984979
systable_inplace_update_finish(state, tuple);
985980
}

src/backend/replication/logical/decode.c

+11-15
Original file line numberDiff line numberDiff line change
@@ -509,23 +509,19 @@ heap_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
509509

510510
/*
511511
* Inplace updates are only ever performed on catalog tuples and
512-
* can, per definition, not change tuple visibility. Since we
513-
* don't decode catalog tuples, we're not interested in the
514-
* record's contents.
512+
* can, per definition, not change tuple visibility. Inplace
513+
* updates don't affect storage or interpretation of table rows,
514+
* so they don't affect logicalrep_write_tuple() outcomes. Hence,
515+
* we don't process invalidations from the original operation. If
516+
* inplace updates did affect those things, invalidations wouldn't
517+
* make it work, since there are no snapshot-specific versions of
518+
* inplace-updated values. Since we also don't decode catalog
519+
* tuples, we're not interested in the record's contents.
515520
*
516-
* In-place updates can be used either by XID-bearing transactions
517-
* (e.g. in CREATE INDEX CONCURRENTLY) or by XID-less
518-
* transactions (e.g. VACUUM). In the former case, the commit
519-
* record will include cache invalidations, so we mark the
520-
* transaction as catalog modifying here. Currently that's
521-
* redundant because the commit will do that as well, but once we
522-
* support decoding in-progress relations, this will be important.
521+
* WAL contains likely-unnecessary commit-time invals from the
522+
* CacheInvalidateHeapTuple() call in heap_inplace_update().
523+
* Excess invalidation is safe.
523524
*/
524-
if (!TransactionIdIsValid(xid))
525-
break;
526-
527-
(void) SnapBuildProcessChange(builder, xid, buf->origptr);
528-
ReorderBufferXidSetCatalogChanges(ctx->reorder, xid, buf->origptr);
529525
break;
530526

531527
case XLOG_HEAP_CONFIRM:

src/backend/utils/cache/catcache.c

+4-3
Original file line numberDiff line numberDiff line change
@@ -2286,7 +2286,8 @@ void
22862286
PrepareToInvalidateCacheTuple(Relation relation,
22872287
HeapTuple tuple,
22882288
HeapTuple newtuple,
2289-
void (*function) (int, uint32, Oid))
2289+
void (*function) (int, uint32, Oid, void *),
2290+
void *context)
22902291
{
22912292
slist_iter iter;
22922293
Oid reloid;
@@ -2327,7 +2328,7 @@ PrepareToInvalidateCacheTuple(Relation relation,
23272328
hashvalue = CatalogCacheComputeTupleHashValue(ccp, ccp->cc_nkeys, tuple);
23282329
dbid = ccp->cc_relisshared ? (Oid) 0 : MyDatabaseId;
23292330

2330-
(*function) (ccp->id, hashvalue, dbid);
2331+
(*function) (ccp->id, hashvalue, dbid, context);
23312332

23322333
if (newtuple)
23332334
{
@@ -2336,7 +2337,7 @@ PrepareToInvalidateCacheTuple(Relation relation,
23362337
newhashvalue = CatalogCacheComputeTupleHashValue(ccp, ccp->cc_nkeys, newtuple);
23372338

23382339
if (newhashvalue != hashvalue)
2339-
(*function) (ccp->id, newhashvalue, dbid);
2340+
(*function) (ccp->id, newhashvalue, dbid, context);
23402341
}
23412342
}
23422343
}

0 commit comments

Comments
 (0)