Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit c5b097f

Browse files
committed
Refactor code for cross-partition updates to a separate function.
ExecUpdate() is very long, so extract the part of it that deals with cross-partition updates to a separate function to make it more readable. Per Andres Freund's suggestion. Author: Amit Langote Discussion: https://www.postgresql.org/message-id/CA%2BHiwqEUgb5RdUgxR7Sqco4S09jzJstHiaT2vnCRPGR4JCAPqA%40mail.gmail.com
1 parent 7f47088 commit c5b097f

File tree

1 file changed

+158
-107
lines changed

1 file changed

+158
-107
lines changed

src/backend/executor/nodeModifyTable.c

+158-107
Original file line numberDiff line numberDiff line change
@@ -1059,6 +1059,148 @@ ldelete:;
10591059
return NULL;
10601060
}
10611061

1062+
/*
1063+
* ExecCrossPartitionUpdate --- Move an updated tuple to another partition.
1064+
*
1065+
* This works by first deleting the old tuple from the current partition,
1066+
* followed by inserting the new tuple into the root parent table, that is,
1067+
* mtstate->rootResultRelInfo. It will be re-routed from there to the
1068+
* correct partition.
1069+
*
1070+
* Returns true if the tuple has been successfully moved, or if it's found
1071+
* that the tuple was concurrently deleted so there's nothing more to do
1072+
* for the caller.
1073+
*
1074+
* False is returned if the tuple we're trying to move is found to have been
1075+
* concurrently updated. In that case, the caller must to check if the
1076+
* updated tuple that's returned in *retry_slot still needs to be re-routed,
1077+
* and call this function again or perform a regular update accordingly.
1078+
*/
1079+
static bool
1080+
ExecCrossPartitionUpdate(ModifyTableState *mtstate,
1081+
ResultRelInfo *resultRelInfo,
1082+
ItemPointer tupleid, HeapTuple oldtuple,
1083+
TupleTableSlot *slot, TupleTableSlot *planSlot,
1084+
EPQState *epqstate, bool canSetTag,
1085+
TupleTableSlot **retry_slot,
1086+
TupleTableSlot **inserted_tuple)
1087+
{
1088+
EState *estate = mtstate->ps.state;
1089+
PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
1090+
int map_index;
1091+
TupleConversionMap *tupconv_map;
1092+
TupleConversionMap *saved_tcs_map = NULL;
1093+
bool tuple_deleted;
1094+
TupleTableSlot *epqslot = NULL;
1095+
1096+
*inserted_tuple = NULL;
1097+
*retry_slot = NULL;
1098+
1099+
/*
1100+
* Disallow an INSERT ON CONFLICT DO UPDATE that causes the original row
1101+
* to migrate to a different partition. Maybe this can be implemented
1102+
* some day, but it seems a fringe feature with little redeeming value.
1103+
*/
1104+
if (((ModifyTable *) mtstate->ps.plan)->onConflictAction == ONCONFLICT_UPDATE)
1105+
ereport(ERROR,
1106+
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1107+
errmsg("invalid ON UPDATE specification"),
1108+
errdetail("The result tuple would appear in a different partition than the original tuple.")));
1109+
1110+
/*
1111+
* When an UPDATE is run on a leaf partition, we will not have partition
1112+
* tuple routing set up. In that case, fail with partition constraint
1113+
* violation error.
1114+
*/
1115+
if (proute == NULL)
1116+
ExecPartitionCheckEmitError(resultRelInfo, slot, estate);
1117+
1118+
/*
1119+
* Row movement, part 1. Delete the tuple, but skip RETURNING processing.
1120+
* We want to return rows from INSERT.
1121+
*/
1122+
ExecDelete(mtstate, resultRelInfo, tupleid, oldtuple, planSlot,
1123+
epqstate, estate,
1124+
false, /* processReturning */
1125+
false, /* canSetTag */
1126+
true, /* changingPart */
1127+
&tuple_deleted, &epqslot);
1128+
1129+
/*
1130+
* For some reason if DELETE didn't happen (e.g. trigger prevented it, or
1131+
* it was already deleted by self, or it was concurrently deleted by
1132+
* another transaction), then we should skip the insert as well;
1133+
* otherwise, an UPDATE could cause an increase in the total number of
1134+
* rows across all partitions, which is clearly wrong.
1135+
*
1136+
* For a normal UPDATE, the case where the tuple has been the subject of a
1137+
* concurrent UPDATE or DELETE would be handled by the EvalPlanQual
1138+
* machinery, but for an UPDATE that we've translated into a DELETE from
1139+
* this partition and an INSERT into some other partition, that's not
1140+
* available, because CTID chains can't span relation boundaries. We
1141+
* mimic the semantics to a limited extent by skipping the INSERT if the
1142+
* DELETE fails to find a tuple. This ensures that two concurrent
1143+
* attempts to UPDATE the same tuple at the same time can't turn one tuple
1144+
* into two, and that an UPDATE of a just-deleted tuple can't resurrect
1145+
* it.
1146+
*/
1147+
if (!tuple_deleted)
1148+
{
1149+
/*
1150+
* epqslot will be typically NULL. But when ExecDelete() finds that
1151+
* another transaction has concurrently updated the same row, it
1152+
* re-fetches the row, skips the delete, and epqslot is set to the
1153+
* re-fetched tuple slot. In that case, we need to do all the checks
1154+
* again.
1155+
*/
1156+
if (TupIsNull(epqslot))
1157+
return true;
1158+
else
1159+
{
1160+
*retry_slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot);
1161+
return false;
1162+
}
1163+
}
1164+
1165+
/*
1166+
* resultRelInfo is one of the per-subplan resultRelInfos. So we should
1167+
* convert the tuple into root's tuple descriptor, since ExecInsert()
1168+
* starts the search from root. The tuple conversion map list is in the
1169+
* order of mtstate->resultRelInfo[], so to retrieve the one for this
1170+
* resultRel, we need to know the position of the resultRel in
1171+
* mtstate->resultRelInfo[].
1172+
*/
1173+
map_index = resultRelInfo - mtstate->resultRelInfo;
1174+
Assert(map_index >= 0 && map_index < mtstate->mt_nplans);
1175+
tupconv_map = tupconv_map_for_subplan(mtstate, map_index);
1176+
if (tupconv_map != NULL)
1177+
slot = execute_attr_map_slot(tupconv_map->attrMap,
1178+
slot,
1179+
mtstate->mt_root_tuple_slot);
1180+
1181+
/*
1182+
* ExecInsert() may scribble on mtstate->mt_transition_capture, so save
1183+
* the currently active map.
1184+
*/
1185+
if (mtstate->mt_transition_capture)
1186+
saved_tcs_map = mtstate->mt_transition_capture->tcs_map;
1187+
1188+
/* Tuple routing starts from the root table. */
1189+
Assert(mtstate->rootResultRelInfo != NULL);
1190+
*inserted_tuple = ExecInsert(mtstate, mtstate->rootResultRelInfo, slot,
1191+
planSlot, estate, canSetTag);
1192+
1193+
/* Clear the INSERT's tuple and restore the saved map. */
1194+
if (mtstate->mt_transition_capture)
1195+
{
1196+
mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL;
1197+
mtstate->mt_transition_capture->tcs_map = saved_tcs_map;
1198+
}
1199+
1200+
/* We're done moving. */
1201+
return true;
1202+
}
1203+
10621204
/* ----------------------------------------------------------------
10631205
* ExecUpdate
10641206
*
@@ -1212,119 +1354,28 @@ lreplace:;
12121354
*/
12131355
if (partition_constraint_failed)
12141356
{
1215-
bool tuple_deleted;
1216-
TupleTableSlot *ret_slot;
1217-
TupleTableSlot *epqslot = NULL;
1218-
PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
1219-
int map_index;
1220-
TupleConversionMap *tupconv_map;
1221-
TupleConversionMap *saved_tcs_map = NULL;
1222-
1223-
/*
1224-
* Disallow an INSERT ON CONFLICT DO UPDATE that causes the
1225-
* original row to migrate to a different partition. Maybe this
1226-
* can be implemented some day, but it seems a fringe feature with
1227-
* little redeeming value.
1228-
*/
1229-
if (((ModifyTable *) mtstate->ps.plan)->onConflictAction == ONCONFLICT_UPDATE)
1230-
ereport(ERROR,
1231-
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1232-
errmsg("invalid ON UPDATE specification"),
1233-
errdetail("The result tuple would appear in a different partition than the original tuple.")));
1234-
1235-
/*
1236-
* When an UPDATE is run on a leaf partition, we will not have
1237-
* partition tuple routing set up. In that case, fail with
1238-
* partition constraint violation error.
1239-
*/
1240-
if (proute == NULL)
1241-
ExecPartitionCheckEmitError(resultRelInfo, slot, estate);
1242-
1243-
/*
1244-
* Row movement, part 1. Delete the tuple, but skip RETURNING
1245-
* processing. We want to return rows from INSERT.
1246-
*/
1247-
ExecDelete(mtstate, resultRelInfo, tupleid, oldtuple, planSlot,
1248-
epqstate, estate,
1249-
false, /* processReturning */
1250-
false, /* canSetTag */
1251-
true, /* changingPart */
1252-
&tuple_deleted, &epqslot);
1253-
1254-
/*
1255-
* For some reason if DELETE didn't happen (e.g. trigger prevented
1256-
* it, or it was already deleted by self, or it was concurrently
1257-
* deleted by another transaction), then we should skip the insert
1258-
* as well; otherwise, an UPDATE could cause an increase in the
1259-
* total number of rows across all partitions, which is clearly
1260-
* wrong.
1261-
*
1262-
* For a normal UPDATE, the case where the tuple has been the
1263-
* subject of a concurrent UPDATE or DELETE would be handled by
1264-
* the EvalPlanQual machinery, but for an UPDATE that we've
1265-
* translated into a DELETE from this partition and an INSERT into
1266-
* some other partition, that's not available, because CTID chains
1267-
* can't span relation boundaries. We mimic the semantics to a
1268-
* limited extent by skipping the INSERT if the DELETE fails to
1269-
* find a tuple. This ensures that two concurrent attempts to
1270-
* UPDATE the same tuple at the same time can't turn one tuple
1271-
* into two, and that an UPDATE of a just-deleted tuple can't
1272-
* resurrect it.
1273-
*/
1274-
if (!tuple_deleted)
1275-
{
1276-
/*
1277-
* epqslot will be typically NULL. But when ExecDelete()
1278-
* finds that another transaction has concurrently updated the
1279-
* same row, it re-fetches the row, skips the delete, and
1280-
* epqslot is set to the re-fetched tuple slot. In that case,
1281-
* we need to do all the checks again.
1282-
*/
1283-
if (TupIsNull(epqslot))
1284-
return NULL;
1285-
else
1286-
{
1287-
slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot);
1288-
goto lreplace;
1289-
}
1290-
}
1357+
TupleTableSlot *inserted_tuple,
1358+
*retry_slot;
1359+
bool retry;
12911360

12921361
/*
1293-
* resultRelInfo is one of the per-subplan resultRelInfos. So we
1294-
* should convert the tuple into root's tuple descriptor, since
1295-
* ExecInsert() starts the search from root. The tuple conversion
1296-
* map list is in the order of mtstate->resultRelInfo[], so to
1297-
* retrieve the one for this resultRel, we need to know the
1298-
* position of the resultRel in mtstate->resultRelInfo[].
1362+
* ExecCrossPartitionUpdate will first DELETE the row from the
1363+
* partition it's currently in and then insert it back into the
1364+
* root table, which will re-route it to the correct partition.
1365+
* The first part may have to be repeated if it is detected that
1366+
* the tuple we're trying to move has been concurrently updated.
12991367
*/
1300-
map_index = resultRelInfo - mtstate->resultRelInfo;
1301-
Assert(map_index >= 0 && map_index < mtstate->mt_nplans);
1302-
tupconv_map = tupconv_map_for_subplan(mtstate, map_index);
1303-
if (tupconv_map != NULL)
1304-
slot = execute_attr_map_slot(tupconv_map->attrMap,
1305-
slot,
1306-
mtstate->mt_root_tuple_slot);
1307-
1308-
/*
1309-
* ExecInsert() may scribble on mtstate->mt_transition_capture, so
1310-
* save the currently active map.
1311-
*/
1312-
if (mtstate->mt_transition_capture)
1313-
saved_tcs_map = mtstate->mt_transition_capture->tcs_map;
1314-
1315-
/* Tuple routing starts from the root table. */
1316-
Assert(mtstate->rootResultRelInfo != NULL);
1317-
ret_slot = ExecInsert(mtstate, mtstate->rootResultRelInfo, slot,
1318-
planSlot, estate, canSetTag);
1319-
1320-
/* Clear the INSERT's tuple and restore the saved map. */
1321-
if (mtstate->mt_transition_capture)
1368+
retry = !ExecCrossPartitionUpdate(mtstate, resultRelInfo, tupleid,
1369+
oldtuple, slot, planSlot,
1370+
epqstate, canSetTag,
1371+
&retry_slot, &inserted_tuple);
1372+
if (retry)
13221373
{
1323-
mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL;
1324-
mtstate->mt_transition_capture->tcs_map = saved_tcs_map;
1374+
slot = retry_slot;
1375+
goto lreplace;
13251376
}
13261377

1327-
return ret_slot;
1378+
return inserted_tuple;
13281379
}
13291380

13301381
/*

0 commit comments

Comments
 (0)