Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 9f2ee8f

Browse files
committed
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the "current" row for every joined relation in UPDATE, DELETE, and SELECT FOR UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the appropriate row into each scan node in the rechecking plan, forcing it to emit only that one row. The former behavior could rescan the whole of each joined relation for each recheck, which was terrible for performance, and what's much worse could result in duplicated output tuples. Also, the original implementation of EvalPlanQual could not re-use the recheck execution tree --- it had to go through a full executor init and shutdown for every row to be tested. To avoid this overhead, I've associated a special runtime Param with each LockRows or ModifyTable plan node, and arranged to make every scan node below such a node depend on that Param. Thus, by signaling a change in that Param, the EPQ machinery can just rescan the already-built test plan. This patch also adds a prohibition on set-returning functions in the targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the duplicate-output-tuple problem. It seems fairly reasonable since the other restrictions on SELECT FOR UPDATE are meant to ensure that there is a unique correspondence between source tuples and result tuples, which an output SRF destroys as much as anything else does.
1 parent 76d8883 commit 9f2ee8f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+1550
-1021
lines changed

src/backend/commands/trigger.c

+20-12
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.254 2009/10/14 22:14:21 tgl Exp $
10+
* $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.255 2009/10/26 02:26:28 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -61,7 +61,7 @@ int SessionReplicationRole = SESSION_REPLICATION_ROLE_ORIGIN;
6161
static void ConvertTriggerToFK(CreateTrigStmt *stmt, Oid funcoid);
6262
static void InsertTrigger(TriggerDesc *trigdesc, Trigger *trigger, int indx);
6363
static HeapTuple GetTupleForTrigger(EState *estate,
64-
PlanState *subplanstate,
64+
EPQState *epqstate,
6565
ResultRelInfo *relinfo,
6666
ItemPointer tid,
6767
TupleTableSlot **newSlot);
@@ -1828,7 +1828,7 @@ ExecASDeleteTriggers(EState *estate, ResultRelInfo *relinfo)
18281828
}
18291829

18301830
bool
1831-
ExecBRDeleteTriggers(EState *estate, PlanState *subplanstate,
1831+
ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
18321832
ResultRelInfo *relinfo,
18331833
ItemPointer tupleid)
18341834
{
@@ -1842,7 +1842,7 @@ ExecBRDeleteTriggers(EState *estate, PlanState *subplanstate,
18421842
TupleTableSlot *newSlot;
18431843
int i;
18441844

1845-
trigtuple = GetTupleForTrigger(estate, subplanstate, relinfo, tupleid,
1845+
trigtuple = GetTupleForTrigger(estate, epqstate, relinfo, tupleid,
18461846
&newSlot);
18471847
if (trigtuple == NULL)
18481848
return false;
@@ -1964,7 +1964,7 @@ ExecASUpdateTriggers(EState *estate, ResultRelInfo *relinfo)
19641964
}
19651965

19661966
HeapTuple
1967-
ExecBRUpdateTriggers(EState *estate, PlanState *subplanstate,
1967+
ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
19681968
ResultRelInfo *relinfo,
19691969
ItemPointer tupleid, HeapTuple newtuple)
19701970
{
@@ -1979,7 +1979,7 @@ ExecBRUpdateTriggers(EState *estate, PlanState *subplanstate,
19791979
int i;
19801980
Bitmapset *modifiedCols;
19811981

1982-
trigtuple = GetTupleForTrigger(estate, subplanstate, relinfo, tupleid,
1982+
trigtuple = GetTupleForTrigger(estate, epqstate, relinfo, tupleid,
19831983
&newSlot);
19841984
if (trigtuple == NULL)
19851985
return NULL;
@@ -2107,7 +2107,7 @@ ExecASTruncateTriggers(EState *estate, ResultRelInfo *relinfo)
21072107

21082108
static HeapTuple
21092109
GetTupleForTrigger(EState *estate,
2110-
PlanState *subplanstate,
2110+
EPQState *epqstate,
21112111
ResultRelInfo *relinfo,
21122112
ItemPointer tid,
21132113
TupleTableSlot **newSlot)
@@ -2125,8 +2125,8 @@ GetTupleForTrigger(EState *estate,
21252125

21262126
*newSlot = NULL;
21272127

2128-
/* caller must pass a subplanstate if EvalPlanQual is possible */
2129-
Assert(subplanstate != NULL);
2128+
/* caller must pass an epqstate if EvalPlanQual is possible */
2129+
Assert(epqstate != NULL);
21302130

21312131
/*
21322132
* lock tuple for update
@@ -2153,27 +2153,35 @@ ltrmark:;
21532153
ereport(ERROR,
21542154
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
21552155
errmsg("could not serialize access due to concurrent update")));
2156-
else if (!ItemPointerEquals(&update_ctid, &tuple.t_self))
2156+
if (!ItemPointerEquals(&update_ctid, &tuple.t_self))
21572157
{
21582158
/* it was updated, so look at the updated version */
21592159
TupleTableSlot *epqslot;
21602160

21612161
epqslot = EvalPlanQual(estate,
2162+
epqstate,
2163+
relation,
21622164
relinfo->ri_RangeTableIndex,
2163-
subplanstate,
21642165
&update_ctid,
21652166
update_xmax);
21662167
if (!TupIsNull(epqslot))
21672168
{
21682169
*tid = update_ctid;
21692170
*newSlot = epqslot;
2171+
2172+
/*
2173+
* EvalPlanQual already locked the tuple, but we
2174+
* re-call heap_lock_tuple anyway as an easy way
2175+
* of re-fetching the correct tuple. Speed is
2176+
* hardly a criterion in this path anyhow.
2177+
*/
21702178
goto ltrmark;
21712179
}
21722180
}
21732181

21742182
/*
21752183
* if tuple was deleted or PlanQual failed for updated tuple -
2176-
* we have not process this tuple!
2184+
* we must not process this tuple!
21772185
*/
21782186
return NULL;
21792187

src/backend/commands/vacuum.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
*
1414
*
1515
* IDENTIFICATION
16-
* $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.393 2009/09/01 04:46:49 tgl Exp $
16+
* $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.394 2009/10/26 02:26:29 tgl Exp $
1717
*
1818
*-------------------------------------------------------------------------
1919
*/
@@ -102,7 +102,7 @@ typedef VacPageListData *VacPageList;
102102
* Note: because t_ctid links can be stale (this would only occur if a prior
103103
* VACUUM crashed partway through), it is possible that new_tid points to an
104104
* empty slot or unrelated tuple. We have to check the linkage as we follow
105-
* it, just as is done in EvalPlanQual.
105+
* it, just as is done in EvalPlanQualFetch.
106106
*/
107107
typedef struct VTupleLinkData
108108
{

src/backend/executor/README

+36-39
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
$PostgreSQL: pgsql/src/backend/executor/README,v 1.10 2009/10/12 18:10:41 tgl Exp $
1+
$PostgreSQL: pgsql/src/backend/executor/README,v 1.11 2009/10/26 02:26:29 tgl Exp $
22

33
The Postgres Executor
44
=====================
@@ -160,41 +160,38 @@ modified tuple. SELECT FOR UPDATE/SHARE behaves similarly, except that its
160160
action is just to lock the modified tuple and return results based on that
161161
version of the tuple.
162162

163-
To implement this checking, we actually re-run the entire query from scratch
164-
for each modified tuple, but with the scan node that sourced the original
165-
tuple set to return only the modified tuple, not the original tuple or any
166-
of the rest of the relation. If this query returns a tuple, then the
167-
modified tuple passes the quals (and the query output is the suitably
168-
modified update tuple, if we're doing UPDATE). If no tuple is returned,
169-
then the modified tuple fails the quals, so we ignore it and continue the
170-
original query. (This is reasonably efficient for simple queries, but may
171-
be horribly slow for joins. A better design would be nice; one thought for
172-
future investigation is to treat the tuple substitution like a parameter,
173-
so that we can avoid rescanning unrelated nodes.)
174-
175-
Note a fundamental bogosity of this approach: if the relation containing
176-
the original tuple is being used in a self-join, the other instance(s) of
177-
the relation will be treated as still containing the original tuple, whereas
178-
logical consistency would demand that the modified tuple appear in them too.
179-
But we'd have to actually substitute the modified tuple for the original,
180-
while still returning all the rest of the relation, to ensure consistent
181-
answers. Implementing this correctly is a task for future work.
182-
183-
In UPDATE/DELETE, only the target relation needs to be handled this way,
184-
so only one special recheck query needs to execute at a time. In SELECT FOR
185-
UPDATE, there may be multiple relations flagged FOR UPDATE, so it's possible
186-
that while we are executing a recheck query for one modified tuple, we will
187-
hit another modified tuple in another relation. In this case we "stack up"
188-
recheck queries: a sub-recheck query is spawned in which both the first and
189-
second modified tuples will be returned as the only components of their
190-
relations. (In event of success, all these modified tuples will be locked.)
191-
Again, this isn't necessarily quite the right thing ... but in simple cases
192-
it works. Potentially, recheck queries could get nested to the depth of the
193-
number of FOR UPDATE/SHARE relations in the query.
194-
195-
It should be noted also that UPDATE/DELETE expect at most one tuple to
196-
result from the modified query, whereas in the FOR UPDATE case it's possible
197-
for multiple tuples to result (since we could be dealing with a join in
198-
which multiple tuples join to the modified tuple). We want FOR UPDATE to
199-
lock all relevant tuples, so we process all tuples output by all the stacked
200-
recheck queries.
163+
To implement this checking, we actually re-run the query from scratch for
164+
each modified tuple (or set of tuples, for SELECT FOR UPDATE), with the
165+
relation scan nodes tweaked to return only the current tuples --- either
166+
the original ones, or the updated (and now locked) versions of the modified
167+
tuple(s). If this query returns a tuple, then the modified tuple(s) pass
168+
the quals (and the query output is the suitably modified update tuple, if
169+
we're doing UPDATE). If no tuple is returned, then the modified tuple(s)
170+
fail the quals, so we ignore the current result tuple and continue the
171+
original query.
172+
173+
In UPDATE/DELETE, only the target relation needs to be handled this way.
174+
In SELECT FOR UPDATE, there may be multiple relations flagged FOR UPDATE,
175+
so we obtain lock on the current tuple version in each such relation before
176+
executing the recheck.
177+
178+
It is also possible that there are relations in the query that are not
179+
to be locked (they are neither the UPDATE/DELETE target nor specified to
180+
be locked in SELECT FOR UPDATE/SHARE). When re-running the test query
181+
we want to use the same rows from these relations that were joined to
182+
the locked rows. For ordinary relations this can be implemented relatively
183+
cheaply by including the row TID in the join outputs and re-fetching that
184+
TID. (The re-fetch is expensive, but we're trying to optimize the normal
185+
case where no re-test is needed.) We have also to consider non-table
186+
relations, such as a ValuesScan or FunctionScan. For these, since there
187+
is no equivalent of TID, the only practical solution seems to be to include
188+
the entire row value in the join output row.
189+
190+
We disallow set-returning functions in the targetlist of SELECT FOR UPDATE,
191+
so as to ensure that at most one tuple can be returned for any particular
192+
set of scan tuples. Otherwise we'd get duplicates due to the original
193+
query returning the same set of scan tuples multiple times. (Note: there
194+
is no explicit prohibition on SRFs in UPDATE, but the net effect will be
195+
that only the first result row of an SRF counts, because all subsequent
196+
rows will result in attempts to re-update an already updated target row.
197+
This is historical behavior and seems not worth changing.)

src/backend/executor/execCurrent.c

+4-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
9-
* $PostgreSQL: pgsql/src/backend/executor/execCurrent.c,v 1.11 2009/10/12 18:10:41 tgl Exp $
9+
* $PostgreSQL: pgsql/src/backend/executor/execCurrent.c,v 1.12 2009/10/26 02:26:29 tgl Exp $
1010
*
1111
*-------------------------------------------------------------------------
1212
*/
@@ -102,6 +102,9 @@ execCurrentOf(CurrentOfExpr *cexpr,
102102
{
103103
ExecRowMark *thiserm = (ExecRowMark *) lfirst(lc);
104104

105+
if (!RowMarkRequiresRowShareLock(thiserm->markType))
106+
continue; /* ignore non-FOR UPDATE/SHARE items */
107+
105108
if (RelationGetRelid(thiserm->relation) == table_oid)
106109
{
107110
if (erm)

0 commit comments

Comments
 (0)