postgrespro
diff --git a/‎src/backend/access/heap/heapam.c
Lines changed: 155 additions & 50 deletions b/‎src/backend/access/heap/heapam.c
Lines changed: 155 additions & 50 deletions
diff --git a/‎src/backend/access/heap/heapam_handler.c
Lines changed: 76 additions & 18 deletions b/‎src/backend/access/heap/heapam_handler.c
Lines changed: 76 additions & 18 deletions
diff --git a/‎src/backend/access/table/tableam.c
Lines changed: 20 additions & 6 deletions b/‎src/backend/access/table/tableam.c
Lines changed: 20 additions & 6 deletions
diff --git a/‎src/backend/commands/trigger.c
Lines changed: 15 additions & 40 deletions b/‎src/backend/commands/trigger.c
Lines changed: 15 additions & 40 deletions
diff --git a/‎src/backend/executor/execReplication.c
Lines changed: 15 additions & 4 deletions b/‎src/backend/executor/execReplication.c
Lines changed: 15 additions & 4 deletions
@@ -45,6 +45,12 @@
 #include "utils/builtins.h"
 #include "utils/rel.h"
 
+static TM_Result heapam_tuple_lock(Relation relation, ItemPointer tid,
+								   Snapshot snapshot, TupleTableSlot *slot,
+								   CommandId cid, LockTupleMode mode,
+								   LockWaitPolicy wait_policy, uint8 flags,
+								   TM_FailureData *tmfd);
+
 static void reform_and_rewrite_tuple(HeapTuple tuple,
 									 Relation OldHeap, Relation NewHeap,
 									 Datum *values, bool *isnull, RewriteState rwstate);
@@ -300,23 +306,55 @@ heapam_tuple_complete_speculative(Relation relation, TupleTableSlot *slot,
 
 static TM_Result
 heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid,
-					Snapshot snapshot, Snapshot crosscheck, bool wait,
-					TM_FailureData *tmfd, bool changingPart)
+					Snapshot snapshot, Snapshot crosscheck, int options,
+					TM_FailureData *tmfd, bool changingPart,
+					TupleTableSlot *oldSlot)
 {
+	TM_Result	result;
+
 	/*
 	 * Currently Deleting of index tuples are handled at vacuum, in case if
 	 * the storage itself is cleaning the dead tuples by itself, it is the
 	 * time to call the index tuple deletion also.
 	 */
-	return heap_delete(relation, tid, cid, crosscheck, wait, tmfd, changingPart);
+	result = heap_delete(relation, tid, cid, crosscheck, options,
+						 tmfd, changingPart, oldSlot);
+
+	/*
+	 * If the tuple has been concurrently updated, then get the lock on it.
+	 * (Do only if caller asked for this by setting the
+	 * TABLE_MODIFY_LOCK_UPDATED option)  With the lock held retry of the
+	 * delete should succeed even if there are more concurrent update
+	 * attempts.
+	 */
+	if (result == TM_Updated && (options & TABLE_MODIFY_LOCK_UPDATED))
+	{
+		/*
+		 * heapam_tuple_lock() will take advantage of tuple loaded into
+		 * oldSlot by heap_delete().
+		 */
+		result = heapam_tuple_lock(relation, tid, snapshot,
+								   oldSlot, cid, LockTupleExclusive,
+								   (options & TABLE_MODIFY_WAIT) ?
+								   LockWaitBlock :
+								   LockWaitSkip,
+								   TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
+								   tmfd);
+
+		if (result == TM_Ok)
+			return TM_Updated;
+	}
+
+	return result;
 }
 
 
 static TM_Result
 heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
 					CommandId cid, Snapshot snapshot, Snapshot crosscheck,
-					bool wait, TM_FailureData *tmfd,
-					LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
+					int options, TM_FailureData *tmfd,
+					LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes,
+					TupleTableSlot *oldSlot)
 {
 	bool		shouldFree = true;
 	HeapTuple	tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
@@ -326,8 +364,8 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
 	slot->tts_tableOid = RelationGetRelid(relation);
 	tuple->t_tableOid = slot->tts_tableOid;
 
-	result = heap_update(relation, otid, tuple, cid, crosscheck, wait,
-						 tmfd, lockmode, update_indexes);
+	result = heap_update(relation, otid, tuple, cid, crosscheck, options,
+						 tmfd, lockmode, update_indexes, oldSlot);
 	ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
 
 	/*
@@ -354,6 +392,31 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
 	if (shouldFree)
 		pfree(tuple);
 
+	/*
+	 * If the tuple has been concurrently updated, then get the lock on it.
+	 * (Do only if caller asked for this by setting the
+	 * TABLE_MODIFY_LOCK_UPDATED option)  With the lock held retry of the
+	 * update should succeed even if there are more concurrent update
+	 * attempts.
+	 */
+	if (result == TM_Updated && (options & TABLE_MODIFY_LOCK_UPDATED))
+	{
+		/*
+		 * heapam_tuple_lock() will take advantage of tuple loaded into
+		 * oldSlot by heap_update().
+		 */
+		result = heapam_tuple_lock(relation, otid, snapshot,
+								   oldSlot, cid, *lockmode,
+								   (options & TABLE_MODIFY_WAIT) ?
+								   LockWaitBlock :
+								   LockWaitSkip,
+								   TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
+								   tmfd);
+
+		if (result == TM_Ok)
+			return TM_Updated;
+	}
+
 	return result;
 }
 
@@ -365,7 +428,6 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
 {
 	BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
 	TM_Result	result;
-	Buffer		buffer;
 	HeapTuple	tuple = &bslot->base.tupdata;
 	bool		follow_updates;
 
@@ -375,18 +437,15 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
 	Assert(TTS_IS_BUFFERTUPLE(slot));
 
 tuple_lock_retry:
-	tuple->t_self = *tid;
-	result = heap_lock_tuple(relation, tuple, cid, mode, wait_policy,
-							 follow_updates, &buffer, tmfd);
+	result = heap_lock_tuple(relation, tid, slot, cid, mode, wait_policy,
+							 follow_updates, tmfd);
 
 	if (result == TM_Updated &&
 		(flags & TUPLE_LOCK_FLAG_FIND_LAST_VERSION))
 	{
 		/* Should not encounter speculative tuple on recheck */
 		Assert(!HeapTupleHeaderIsSpeculative(tuple->t_data));
 
-		ReleaseBuffer(buffer);
-
 		if (!ItemPointerEquals(&tmfd->ctid, &tuple->t_self))
 		{
 			SnapshotData SnapshotDirty;
@@ -408,6 +467,8 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
 			InitDirtySnapshot(SnapshotDirty);
 			for (;;)
 			{
+				Buffer		buffer = InvalidBuffer;
+
 				if (ItemPointerIndicatesMovedPartitions(tid))
 					ereport(ERROR,
 							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
@@ -502,7 +563,7 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
 					/*
 					 * This is a live tuple, so try to lock it again.
 					 */
-					ReleaseBuffer(buffer);
+					ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
 					goto tuple_lock_retry;
 				}
 
@@ -513,7 +574,7 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
 				 */
 				if (tuple->t_data == NULL)
 				{
-					Assert(!BufferIsValid(buffer));
+					ReleaseBuffer(buffer);
 					return TM_Deleted;
 				}
 
@@ -566,9 +627,6 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
 	slot->tts_tableOid = RelationGetRelid(relation);
 	tuple->t_tableOid = slot->tts_tableOid;
 
-	/* store in slot, transferring existing pin */
-	ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
-
 	return result;
 }
 
 
@@ -287,16 +287,23 @@ simple_table_tuple_insert(Relation rel, TupleTableSlot *slot)
  * via ereport().
  */
 void
-simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot)
+simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot,
+						  TupleTableSlot *oldSlot)
 {
 	TM_Result	result;
 	TM_FailureData tmfd;
+	int			options = TABLE_MODIFY_WAIT;	/* wait for commit */
+
+	/* Fetch old tuple if the relevant slot is provided */
+	if (oldSlot)
+		options |= TABLE_MODIFY_FETCH_OLD_TUPLE;
 
 	result = table_tuple_delete(rel, tid,
 								GetCurrentCommandId(true),
 								snapshot, InvalidSnapshot,
-								true /* wait for commit */ ,
-								&tmfd, false /* changingPart */ );
+								options,
+								&tmfd, false /* changingPart */ ,
+								oldSlot);
 
 	switch (result)
 	{
@@ -335,17 +342,24 @@ void
 simple_table_tuple_update(Relation rel, ItemPointer otid,
 						  TupleTableSlot *slot,
 						  Snapshot snapshot,
-						  TU_UpdateIndexes *update_indexes)
+						  TU_UpdateIndexes *update_indexes,
+						  TupleTableSlot *oldSlot)
 {
 	TM_Result	result;
 	TM_FailureData tmfd;
 	LockTupleMode lockmode;
+	int			options = TABLE_MODIFY_WAIT;	/* wait for commit */
+
+	/* Fetch old tuple if the relevant slot is provided */
+	if (oldSlot)
+		options |= TABLE_MODIFY_FETCH_OLD_TUPLE;
 
 	result = table_tuple_update(rel, otid, slot,
 								GetCurrentCommandId(true),
 								snapshot, InvalidSnapshot,
-								true /* wait for commit */ ,
-								&tmfd, &lockmode, update_indexes);
+								options,
+								&tmfd, &lockmode, update_indexes,
+								oldSlot);
 
 	switch (result)
 	{
 
@@ -2773,8 +2773,8 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
 void
 ExecARDeleteTriggers(EState *estate,
 					 ResultRelInfo *relinfo,
-					 ItemPointer tupleid,
 					 HeapTuple fdw_trigtuple,
+					 TupleTableSlot *slot,
 					 TransitionCaptureState *transition_capture,
 					 bool is_crosspart_update)
 {
@@ -2783,20 +2783,11 @@ ExecARDeleteTriggers(EState *estate,
 	if ((trigdesc && trigdesc->trig_delete_after_row) ||
 		(transition_capture && transition_capture->tcs_delete_old_table))
 	{
-		TupleTableSlot *slot = ExecGetTriggerOldSlot(estate, relinfo);
-
-		Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid));
-		if (fdw_trigtuple == NULL)
-			GetTupleForTrigger(estate,
-							   NULL,
-							   relinfo,
-							   tupleid,
-							   LockTupleExclusive,
-							   slot,
-							   NULL,
-							   NULL,
-							   NULL);
-		else
+		/*
+		 * Put the FDW old tuple to the slot.  Otherwise, caller is expected
+		 * to have old tuple alredy fetched to the slot.
+		 */
+		if (fdw_trigtuple != NULL)
 			ExecForceStoreHeapTuple(fdw_trigtuple, slot, false);
 
 		AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
@@ -3087,18 +3078,17 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
  * Note: 'src_partinfo' and 'dst_partinfo', when non-NULL, refer to the source
  * and destination partitions, respectively, of a cross-partition update of
  * the root partitioned table mentioned in the query, given by 'relinfo'.
- * 'tupleid' in that case refers to the ctid of the "old" tuple in the source
- * partition, and 'newslot' contains the "new" tuple in the destination
- * partition.  This interface allows to support the requirements of
- * ExecCrossPartitionUpdateForeignKey(); is_crosspart_update must be true in
- * that case.
+ * 'oldslot' contains the "old" tuple in the source partition, and 'newslot'
+ * contains the "new" tuple in the destination partition.  This interface
+ * allows to support the requirements of ExecCrossPartitionUpdateForeignKey();
+ * is_crosspart_update must be true in that case.
  */
 void
 ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
 					 ResultRelInfo *src_partinfo,
 					 ResultRelInfo *dst_partinfo,
-					 ItemPointer tupleid,
 					 HeapTuple fdw_trigtuple,
+					 TupleTableSlot *oldslot,
 					 TupleTableSlot *newslot,
 					 List *recheckIndexes,
 					 TransitionCaptureState *transition_capture,
@@ -3117,29 +3107,14 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
 		 * separately for DELETE and INSERT to capture transition table rows.
 		 * In such case, either old tuple or new tuple can be NULL.
 		 */
-		TupleTableSlot *oldslot;
-		ResultRelInfo *tupsrc;
-
 		Assert((src_partinfo != NULL && dst_partinfo != NULL) ||
 			   !is_crosspart_update);
 
-		tupsrc = src_partinfo ? src_partinfo : relinfo;
-		oldslot = ExecGetTriggerOldSlot(estate, tupsrc);
-
-		if (fdw_trigtuple == NULL && ItemPointerIsValid(tupleid))
-			GetTupleForTrigger(estate,
-							   NULL,
-							   tupsrc,
-							   tupleid,
-							   LockTupleExclusive,
-							   oldslot,
-							   NULL,
-							   NULL,
-							   NULL);
-		else if (fdw_trigtuple != NULL)
+		if (fdw_trigtuple != NULL)
+		{
+			Assert(oldslot);
 			ExecForceStoreHeapTuple(fdw_trigtuple, oldslot, false);
-		else
-			ExecClearTuple(oldslot);
+		}
 
 		AfterTriggerSaveEvent(estate, relinfo,
 							  src_partinfo, dst_partinfo,
 
@@ -577,6 +577,7 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
 	{
 		List	   *recheckIndexes = NIL;
 		TU_UpdateIndexes update_indexes;
+		TupleTableSlot *oldSlot = NULL;
 
 		/* Compute stored generated columns */
 		if (rel->rd_att->constr &&
@@ -590,8 +591,12 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
 		if (rel->rd_rel->relispartition)
 			ExecPartitionCheck(resultRelInfo, slot, estate, true);
 
+		if (resultRelInfo->ri_TrigDesc &&
+			resultRelInfo->ri_TrigDesc->trig_update_after_row)
+			oldSlot = ExecGetTriggerOldSlot(estate, resultRelInfo);
+
 		simple_table_tuple_update(rel, tid, slot, estate->es_snapshot,
-								  &update_indexes);
+								  &update_indexes, oldSlot);
 
 		if (resultRelInfo->ri_NumIndices > 0 && (update_indexes != TU_None))
 			recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
@@ -602,7 +607,7 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
 		/* AFTER ROW UPDATE Triggers */
 		ExecARUpdateTriggers(estate, resultRelInfo,
 							 NULL, NULL,
-							 tid, NULL, slot,
+							 NULL, oldSlot, slot,
 							 recheckIndexes, NULL, false);
 
 		list_free(recheckIndexes);
@@ -636,12 +641,18 @@ ExecSimpleRelationDelete(ResultRelInfo *resultRelInfo,
 
 	if (!skip_tuple)
 	{
+		TupleTableSlot *oldSlot = NULL;
+
+		if (resultRelInfo->ri_TrigDesc &&
+			resultRelInfo->ri_TrigDesc->trig_delete_after_row)
+			oldSlot = ExecGetTriggerOldSlot(estate, resultRelInfo);
+
 		/* OK, delete the tuple */
-		simple_table_tuple_delete(rel, tid, estate->es_snapshot);
+		simple_table_tuple_delete(rel, tid, estate->es_snapshot, oldSlot);
 
 		/* AFTER ROW DELETE Triggers */
 		ExecARDeleteTriggers(estate, resultRelInfo,
-							 tid, NULL, NULL, false);
+							 NULL, oldSlot, NULL, false);
 	}
 }