Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 87985cc

Browse files
committed
Allow locking updated tuples in tuple_update() and tuple_delete()
Currently, in read committed transaction isolation mode (default), we have the following sequence of actions when tuple_update()/tuple_delete() finds the tuple updated by the concurrent transaction. 1. Attempt to update/delete tuple with tuple_update()/tuple_delete(), which returns TM_Updated. 2. Lock tuple with tuple_lock(). 3. Re-evaluate plan qual (recheck if we still need to update/delete and calculate the new tuple for update). 4. Second attempt to update/delete tuple with tuple_update()/tuple_delete(). This attempt should be successful, since the tuple was previously locked. This commit eliminates step 2 by taking the lock during the first tuple_update()/tuple_delete() call. The heap table access method saves some effort by checking the updated tuple once instead of twice. Future undo-based table access methods, which will start from the latest row version, can immediately place a lock there. Also, this commit makes tuple_update()/tuple_delete() optionally save the old tuple into the dedicated slot. That saves efforts on re-fetching tuples in certain cases. The code in nodeModifyTable.c is simplified by removing the nested switch/case. Discussion: https://postgr.es/m/CAPpHfdua-YFw3XTprfutzGp28xXLigFtzNbuFY8yPhqeq6X5kg%40mail.gmail.com Reviewed-by: Aleksander Alekseev, Pavel Borisov, Vignesh C, Mason Sharp Reviewed-by: Andres Freund, Chris Travers
1 parent c7076ba commit 87985cc

File tree

9 files changed

+502
-346
lines changed

9 files changed

+502
-346
lines changed

src/backend/access/heap/heapam.c

+155-50
Large diffs are not rendered by default.

src/backend/access/heap/heapam_handler.c

+76-18
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,12 @@
4545
#include "utils/builtins.h"
4646
#include "utils/rel.h"
4747

48+
static TM_Result heapam_tuple_lock(Relation relation, ItemPointer tid,
49+
Snapshot snapshot, TupleTableSlot *slot,
50+
CommandId cid, LockTupleMode mode,
51+
LockWaitPolicy wait_policy, uint8 flags,
52+
TM_FailureData *tmfd);
53+
4854
static void reform_and_rewrite_tuple(HeapTuple tuple,
4955
Relation OldHeap, Relation NewHeap,
5056
Datum *values, bool *isnull, RewriteState rwstate);
@@ -300,23 +306,55 @@ heapam_tuple_complete_speculative(Relation relation, TupleTableSlot *slot,
300306

301307
static TM_Result
302308
heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid,
303-
Snapshot snapshot, Snapshot crosscheck, bool wait,
304-
TM_FailureData *tmfd, bool changingPart)
309+
Snapshot snapshot, Snapshot crosscheck, int options,
310+
TM_FailureData *tmfd, bool changingPart,
311+
TupleTableSlot *oldSlot)
305312
{
313+
TM_Result result;
314+
306315
/*
307316
* Currently Deleting of index tuples are handled at vacuum, in case if
308317
* the storage itself is cleaning the dead tuples by itself, it is the
309318
* time to call the index tuple deletion also.
310319
*/
311-
return heap_delete(relation, tid, cid, crosscheck, wait, tmfd, changingPart);
320+
result = heap_delete(relation, tid, cid, crosscheck, options,
321+
tmfd, changingPart, oldSlot);
322+
323+
/*
324+
* If the tuple has been concurrently updated, then get the lock on it.
325+
* (Do only if caller asked for this by setting the
326+
* TABLE_MODIFY_LOCK_UPDATED option) With the lock held retry of the
327+
* delete should succeed even if there are more concurrent update
328+
* attempts.
329+
*/
330+
if (result == TM_Updated && (options & TABLE_MODIFY_LOCK_UPDATED))
331+
{
332+
/*
333+
* heapam_tuple_lock() will take advantage of tuple loaded into
334+
* oldSlot by heap_delete().
335+
*/
336+
result = heapam_tuple_lock(relation, tid, snapshot,
337+
oldSlot, cid, LockTupleExclusive,
338+
(options & TABLE_MODIFY_WAIT) ?
339+
LockWaitBlock :
340+
LockWaitSkip,
341+
TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
342+
tmfd);
343+
344+
if (result == TM_Ok)
345+
return TM_Updated;
346+
}
347+
348+
return result;
312349
}
313350

314351

315352
static TM_Result
316353
heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
317354
CommandId cid, Snapshot snapshot, Snapshot crosscheck,
318-
bool wait, TM_FailureData *tmfd,
319-
LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
355+
int options, TM_FailureData *tmfd,
356+
LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes,
357+
TupleTableSlot *oldSlot)
320358
{
321359
bool shouldFree = true;
322360
HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
@@ -326,8 +364,8 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
326364
slot->tts_tableOid = RelationGetRelid(relation);
327365
tuple->t_tableOid = slot->tts_tableOid;
328366

329-
result = heap_update(relation, otid, tuple, cid, crosscheck, wait,
330-
tmfd, lockmode, update_indexes);
367+
result = heap_update(relation, otid, tuple, cid, crosscheck, options,
368+
tmfd, lockmode, update_indexes, oldSlot);
331369
ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
332370

333371
/*
@@ -354,6 +392,31 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
354392
if (shouldFree)
355393
pfree(tuple);
356394

395+
/*
396+
* If the tuple has been concurrently updated, then get the lock on it.
397+
* (Do only if caller asked for this by setting the
398+
* TABLE_MODIFY_LOCK_UPDATED option) With the lock held retry of the
399+
* update should succeed even if there are more concurrent update
400+
* attempts.
401+
*/
402+
if (result == TM_Updated && (options & TABLE_MODIFY_LOCK_UPDATED))
403+
{
404+
/*
405+
* heapam_tuple_lock() will take advantage of tuple loaded into
406+
* oldSlot by heap_update().
407+
*/
408+
result = heapam_tuple_lock(relation, otid, snapshot,
409+
oldSlot, cid, *lockmode,
410+
(options & TABLE_MODIFY_WAIT) ?
411+
LockWaitBlock :
412+
LockWaitSkip,
413+
TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
414+
tmfd);
415+
416+
if (result == TM_Ok)
417+
return TM_Updated;
418+
}
419+
357420
return result;
358421
}
359422

@@ -365,7 +428,6 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
365428
{
366429
BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
367430
TM_Result result;
368-
Buffer buffer;
369431
HeapTuple tuple = &bslot->base.tupdata;
370432
bool follow_updates;
371433

@@ -375,18 +437,15 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
375437
Assert(TTS_IS_BUFFERTUPLE(slot));
376438

377439
tuple_lock_retry:
378-
tuple->t_self = *tid;
379-
result = heap_lock_tuple(relation, tuple, cid, mode, wait_policy,
380-
follow_updates, &buffer, tmfd);
440+
result = heap_lock_tuple(relation, tid, slot, cid, mode, wait_policy,
441+
follow_updates, tmfd);
381442

382443
if (result == TM_Updated &&
383444
(flags & TUPLE_LOCK_FLAG_FIND_LAST_VERSION))
384445
{
385446
/* Should not encounter speculative tuple on recheck */
386447
Assert(!HeapTupleHeaderIsSpeculative(tuple->t_data));
387448

388-
ReleaseBuffer(buffer);
389-
390449
if (!ItemPointerEquals(&tmfd->ctid, &tuple->t_self))
391450
{
392451
SnapshotData SnapshotDirty;
@@ -408,6 +467,8 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
408467
InitDirtySnapshot(SnapshotDirty);
409468
for (;;)
410469
{
470+
Buffer buffer = InvalidBuffer;
471+
411472
if (ItemPointerIndicatesMovedPartitions(tid))
412473
ereport(ERROR,
413474
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
@@ -502,7 +563,7 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
502563
/*
503564
* This is a live tuple, so try to lock it again.
504565
*/
505-
ReleaseBuffer(buffer);
566+
ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
506567
goto tuple_lock_retry;
507568
}
508569

@@ -513,7 +574,7 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
513574
*/
514575
if (tuple->t_data == NULL)
515576
{
516-
Assert(!BufferIsValid(buffer));
577+
ReleaseBuffer(buffer);
517578
return TM_Deleted;
518579
}
519580

@@ -566,9 +627,6 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
566627
slot->tts_tableOid = RelationGetRelid(relation);
567628
tuple->t_tableOid = slot->tts_tableOid;
568629

569-
/* store in slot, transferring existing pin */
570-
ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
571-
572630
return result;
573631
}
574632

src/backend/access/table/tableam.c

+20-6
Original file line numberDiff line numberDiff line change
@@ -287,16 +287,23 @@ simple_table_tuple_insert(Relation rel, TupleTableSlot *slot)
287287
* via ereport().
288288
*/
289289
void
290-
simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot)
290+
simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot,
291+
TupleTableSlot *oldSlot)
291292
{
292293
TM_Result result;
293294
TM_FailureData tmfd;
295+
int options = TABLE_MODIFY_WAIT; /* wait for commit */
296+
297+
/* Fetch old tuple if the relevant slot is provided */
298+
if (oldSlot)
299+
options |= TABLE_MODIFY_FETCH_OLD_TUPLE;
294300

295301
result = table_tuple_delete(rel, tid,
296302
GetCurrentCommandId(true),
297303
snapshot, InvalidSnapshot,
298-
true /* wait for commit */ ,
299-
&tmfd, false /* changingPart */ );
304+
options,
305+
&tmfd, false /* changingPart */ ,
306+
oldSlot);
300307

301308
switch (result)
302309
{
@@ -335,17 +342,24 @@ void
335342
simple_table_tuple_update(Relation rel, ItemPointer otid,
336343
TupleTableSlot *slot,
337344
Snapshot snapshot,
338-
TU_UpdateIndexes *update_indexes)
345+
TU_UpdateIndexes *update_indexes,
346+
TupleTableSlot *oldSlot)
339347
{
340348
TM_Result result;
341349
TM_FailureData tmfd;
342350
LockTupleMode lockmode;
351+
int options = TABLE_MODIFY_WAIT; /* wait for commit */
352+
353+
/* Fetch old tuple if the relevant slot is provided */
354+
if (oldSlot)
355+
options |= TABLE_MODIFY_FETCH_OLD_TUPLE;
343356

344357
result = table_tuple_update(rel, otid, slot,
345358
GetCurrentCommandId(true),
346359
snapshot, InvalidSnapshot,
347-
true /* wait for commit */ ,
348-
&tmfd, &lockmode, update_indexes);
360+
options,
361+
&tmfd, &lockmode, update_indexes,
362+
oldSlot);
349363

350364
switch (result)
351365
{

src/backend/commands/trigger.c

+15-40
Original file line numberDiff line numberDiff line change
@@ -2773,8 +2773,8 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
27732773
void
27742774
ExecARDeleteTriggers(EState *estate,
27752775
ResultRelInfo *relinfo,
2776-
ItemPointer tupleid,
27772776
HeapTuple fdw_trigtuple,
2777+
TupleTableSlot *slot,
27782778
TransitionCaptureState *transition_capture,
27792779
bool is_crosspart_update)
27802780
{
@@ -2783,20 +2783,11 @@ ExecARDeleteTriggers(EState *estate,
27832783
if ((trigdesc && trigdesc->trig_delete_after_row) ||
27842784
(transition_capture && transition_capture->tcs_delete_old_table))
27852785
{
2786-
TupleTableSlot *slot = ExecGetTriggerOldSlot(estate, relinfo);
2787-
2788-
Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid));
2789-
if (fdw_trigtuple == NULL)
2790-
GetTupleForTrigger(estate,
2791-
NULL,
2792-
relinfo,
2793-
tupleid,
2794-
LockTupleExclusive,
2795-
slot,
2796-
NULL,
2797-
NULL,
2798-
NULL);
2799-
else
2786+
/*
2787+
* Put the FDW old tuple to the slot. Otherwise, caller is expected
2788+
* to have old tuple alredy fetched to the slot.
2789+
*/
2790+
if (fdw_trigtuple != NULL)
28002791
ExecForceStoreHeapTuple(fdw_trigtuple, slot, false);
28012792

28022793
AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
@@ -3087,18 +3078,17 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
30873078
* Note: 'src_partinfo' and 'dst_partinfo', when non-NULL, refer to the source
30883079
* and destination partitions, respectively, of a cross-partition update of
30893080
* the root partitioned table mentioned in the query, given by 'relinfo'.
3090-
* 'tupleid' in that case refers to the ctid of the "old" tuple in the source
3091-
* partition, and 'newslot' contains the "new" tuple in the destination
3092-
* partition. This interface allows to support the requirements of
3093-
* ExecCrossPartitionUpdateForeignKey(); is_crosspart_update must be true in
3094-
* that case.
3081+
* 'oldslot' contains the "old" tuple in the source partition, and 'newslot'
3082+
* contains the "new" tuple in the destination partition. This interface
3083+
* allows to support the requirements of ExecCrossPartitionUpdateForeignKey();
3084+
* is_crosspart_update must be true in that case.
30953085
*/
30963086
void
30973087
ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
30983088
ResultRelInfo *src_partinfo,
30993089
ResultRelInfo *dst_partinfo,
3100-
ItemPointer tupleid,
31013090
HeapTuple fdw_trigtuple,
3091+
TupleTableSlot *oldslot,
31023092
TupleTableSlot *newslot,
31033093
List *recheckIndexes,
31043094
TransitionCaptureState *transition_capture,
@@ -3117,29 +3107,14 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
31173107
* separately for DELETE and INSERT to capture transition table rows.
31183108
* In such case, either old tuple or new tuple can be NULL.
31193109
*/
3120-
TupleTableSlot *oldslot;
3121-
ResultRelInfo *tupsrc;
3122-
31233110
Assert((src_partinfo != NULL && dst_partinfo != NULL) ||
31243111
!is_crosspart_update);
31253112

3126-
tupsrc = src_partinfo ? src_partinfo : relinfo;
3127-
oldslot = ExecGetTriggerOldSlot(estate, tupsrc);
3128-
3129-
if (fdw_trigtuple == NULL && ItemPointerIsValid(tupleid))
3130-
GetTupleForTrigger(estate,
3131-
NULL,
3132-
tupsrc,
3133-
tupleid,
3134-
LockTupleExclusive,
3135-
oldslot,
3136-
NULL,
3137-
NULL,
3138-
NULL);
3139-
else if (fdw_trigtuple != NULL)
3113+
if (fdw_trigtuple != NULL)
3114+
{
3115+
Assert(oldslot);
31403116
ExecForceStoreHeapTuple(fdw_trigtuple, oldslot, false);
3141-
else
3142-
ExecClearTuple(oldslot);
3117+
}
31433118

31443119
AfterTriggerSaveEvent(estate, relinfo,
31453120
src_partinfo, dst_partinfo,

src/backend/executor/execReplication.c

+15-4
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,7 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
577577
{
578578
List *recheckIndexes = NIL;
579579
TU_UpdateIndexes update_indexes;
580+
TupleTableSlot *oldSlot = NULL;
580581

581582
/* Compute stored generated columns */
582583
if (rel->rd_att->constr &&
@@ -590,8 +591,12 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
590591
if (rel->rd_rel->relispartition)
591592
ExecPartitionCheck(resultRelInfo, slot, estate, true);
592593

594+
if (resultRelInfo->ri_TrigDesc &&
595+
resultRelInfo->ri_TrigDesc->trig_update_after_row)
596+
oldSlot = ExecGetTriggerOldSlot(estate, resultRelInfo);
597+
593598
simple_table_tuple_update(rel, tid, slot, estate->es_snapshot,
594-
&update_indexes);
599+
&update_indexes, oldSlot);
595600

596601
if (resultRelInfo->ri_NumIndices > 0 && (update_indexes != TU_None))
597602
recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
@@ -602,7 +607,7 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
602607
/* AFTER ROW UPDATE Triggers */
603608
ExecARUpdateTriggers(estate, resultRelInfo,
604609
NULL, NULL,
605-
tid, NULL, slot,
610+
NULL, oldSlot, slot,
606611
recheckIndexes, NULL, false);
607612

608613
list_free(recheckIndexes);
@@ -636,12 +641,18 @@ ExecSimpleRelationDelete(ResultRelInfo *resultRelInfo,
636641

637642
if (!skip_tuple)
638643
{
644+
TupleTableSlot *oldSlot = NULL;
645+
646+
if (resultRelInfo->ri_TrigDesc &&
647+
resultRelInfo->ri_TrigDesc->trig_delete_after_row)
648+
oldSlot = ExecGetTriggerOldSlot(estate, resultRelInfo);
649+
639650
/* OK, delete the tuple */
640-
simple_table_tuple_delete(rel, tid, estate->es_snapshot);
651+
simple_table_tuple_delete(rel, tid, estate->es_snapshot, oldSlot);
641652

642653
/* AFTER ROW DELETE Triggers */
643654
ExecARDeleteTriggers(estate, resultRelInfo,
644-
tid, NULL, NULL, false);
655+
NULL, oldSlot, NULL, false);
645656
}
646657
}
647658

0 commit comments

Comments
 (0)