Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 8f9f198

Browse files
committed
Restructure subtransaction handling to reduce resource consumption,
as per recent discussions. Invent SubTransactionIds that are managed like CommandIds (ie, counter is reset at start of each top transaction), and use these instead of TransactionIds to keep track of subtransaction status in those modules that need it. This means that a subtransaction does not need an XID unless it actually inserts/modifies rows in the database. Accordingly, don't assign it an XID nor take a lock on the XID until it tries to do that. This saves a lot of overhead for subtransactions that are only used for error recovery (eg plpgsql exceptions). Also, arrange to release a subtransaction's XID lock as soon as the subtransaction exits, in both the commit and abort cases. This avoids holding many unique locks after a long series of subtransactions. The price is some additional overhead in XactLockTableWait, but that seems acceptable. Finally, restructure the state machine in xact.c to have a more orthogonal set of states for subtransactions.
1 parent 42c0d1f commit 8f9f198

File tree

34 files changed

+1192
-917
lines changed

34 files changed

+1192
-917
lines changed

src/backend/access/heap/heapam.c

+13-9
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.174 2004/09/11 18:28:32 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.175 2004/09/16 16:58:25 tgl Exp $
1212
*
1313
*
1414
* INTERFACE ROUTINES
@@ -1108,6 +1108,7 @@ heap_get_latest_tid(Relation relation,
11081108
Oid
11091109
heap_insert(Relation relation, HeapTuple tup, CommandId cid)
11101110
{
1111+
TransactionId xid = GetCurrentTransactionId();
11111112
Buffer buffer;
11121113

11131114
if (relation->rd_rel->relhasoids)
@@ -1139,7 +1140,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid)
11391140

11401141
tup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
11411142
tup->t_data->t_infomask |= HEAP_XMAX_INVALID;
1142-
HeapTupleHeaderSetXmin(tup->t_data, GetCurrentTransactionId());
1143+
HeapTupleHeaderSetXmin(tup->t_data, xid);
11431144
HeapTupleHeaderSetCmin(tup->t_data, cid);
11441145
HeapTupleHeaderSetCmax(tup->t_data, 0); /* zero out Datum fields */
11451146
tup->t_tableOid = relation->rd_id;
@@ -1277,6 +1278,7 @@ heap_delete(Relation relation, ItemPointer tid,
12771278
ItemPointer ctid, CommandId cid,
12781279
Snapshot crosscheck, bool wait)
12791280
{
1281+
TransactionId xid = GetCurrentTransactionId();
12801282
ItemId lp;
12811283
HeapTupleData tp;
12821284
PageHeader dp;
@@ -1365,7 +1367,7 @@ heap_delete(Relation relation, ItemPointer tid,
13651367
HEAP_XMAX_INVALID |
13661368
HEAP_MARKED_FOR_UPDATE |
13671369
HEAP_MOVED);
1368-
HeapTupleHeaderSetXmax(tp.t_data, GetCurrentTransactionId());
1370+
HeapTupleHeaderSetXmax(tp.t_data, xid);
13691371
HeapTupleHeaderSetCmax(tp.t_data, cid);
13701372
/* Make sure there is no forward chain link in t_ctid */
13711373
tp.t_data->t_ctid = tp.t_self;
@@ -1495,6 +1497,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
14951497
ItemPointer ctid, CommandId cid,
14961498
Snapshot crosscheck, bool wait)
14971499
{
1500+
TransactionId xid = GetCurrentTransactionId();
14981501
ItemId lp;
14991502
HeapTupleData oldtup;
15001503
PageHeader dp;
@@ -1603,7 +1606,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
16031606

16041607
newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
16051608
newtup->t_data->t_infomask |= (HEAP_XMAX_INVALID | HEAP_UPDATED);
1606-
HeapTupleHeaderSetXmin(newtup->t_data, GetCurrentTransactionId());
1609+
HeapTupleHeaderSetXmin(newtup->t_data, xid);
16071610
HeapTupleHeaderSetCmin(newtup->t_data, cid);
16081611
HeapTupleHeaderSetCmax(newtup->t_data, 0); /* zero out Datum fields */
16091612

@@ -1644,7 +1647,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
16441647
HEAP_MARKED_FOR_UPDATE |
16451648
HEAP_MOVED);
16461649
oldtup.t_data->t_infomask |= HEAP_XMAX_UNLOGGED;
1647-
HeapTupleHeaderSetXmax(oldtup.t_data, GetCurrentTransactionId());
1650+
HeapTupleHeaderSetXmax(oldtup.t_data, xid);
16481651
HeapTupleHeaderSetCmax(oldtup.t_data, cid);
16491652
already_marked = true;
16501653
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
@@ -1735,7 +1738,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
17351738
HEAP_XMAX_INVALID |
17361739
HEAP_MARKED_FOR_UPDATE |
17371740
HEAP_MOVED);
1738-
HeapTupleHeaderSetXmax(oldtup.t_data, GetCurrentTransactionId());
1741+
HeapTupleHeaderSetXmax(oldtup.t_data, xid);
17391742
HeapTupleHeaderSetCmax(oldtup.t_data, cid);
17401743
}
17411744

@@ -1836,6 +1839,7 @@ int
18361839
heap_mark4update(Relation relation, HeapTuple tuple, Buffer *buffer,
18371840
CommandId cid)
18381841
{
1842+
TransactionId xid = GetCurrentTransactionId();
18391843
ItemPointer tid = &(tuple->t_self);
18401844
ItemId lp;
18411845
PageHeader dp;
@@ -1912,7 +1916,7 @@ heap_mark4update(Relation relation, HeapTuple tuple, Buffer *buffer,
19121916
HEAP_XMAX_INVALID |
19131917
HEAP_MOVED);
19141918
tuple->t_data->t_infomask |= HEAP_MARKED_FOR_UPDATE;
1915-
HeapTupleHeaderSetXmax(tuple->t_data, GetCurrentTransactionId());
1919+
HeapTupleHeaderSetXmax(tuple->t_data, xid);
19161920
HeapTupleHeaderSetCmax(tuple->t_data, cid);
19171921
/* Make sure there is no forward chain link in t_ctid */
19181922
tuple->t_data->t_ctid = *tid;
@@ -2584,6 +2588,7 @@ newsame:;
25842588
static void
25852589
_heap_unlock_tuple(void *data)
25862590
{
2591+
TransactionId xid = GetCurrentTransactionId();
25872592
xl_heaptid *xltid = (xl_heaptid *) data;
25882593
Relation reln = XLogOpenRelation(false, RM_HEAP_ID, xltid->node);
25892594
Buffer buffer;
@@ -2614,13 +2619,12 @@ _heap_unlock_tuple(void *data)
26142619

26152620
htup = (HeapTupleHeader) PageGetItem(page, lp);
26162621

2617-
if (!TransactionIdEquals(HeapTupleHeaderGetXmax(htup), GetCurrentTransactionId()))
2622+
if (!TransactionIdEquals(HeapTupleHeaderGetXmax(htup), xid))
26182623
elog(PANIC, "_heap_unlock_tuple: invalid xmax in rollback");
26192624
htup->t_infomask &= ~HEAP_XMAX_UNLOGGED;
26202625
htup->t_infomask |= HEAP_XMAX_INVALID;
26212626
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
26222627
WriteBuffer(buffer);
2623-
return;
26242628
}
26252629

26262630
void

src/backend/access/transam/README

+68-47
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
$PostgreSQL: pgsql/src/backend/access/transam/README,v 1.1 2004/08/01 20:57:59 tgl Exp $
1+
$PostgreSQL: pgsql/src/backend/access/transam/README,v 1.2 2004/09/16 16:58:26 tgl Exp $
22

33
The Transaction System
44
----------------------
@@ -9,7 +9,7 @@ the mainloop's control code, which in turn implements user-visible
99
transactions and savepoints.
1010

1111
The middle layer of code is called by postgres.c before and after the
12-
processing of each query:
12+
processing of each query, or after detecting an error:
1313

1414
StartTransactionCommand
1515
CommitTransactionCommand
@@ -44,9 +44,9 @@ effects of previous commands within the same transaction. Note that this is
4444
done automatically by CommitTransactionCommand after each query inside a
4545
transaction block, but some utility functions also do it internally to allow
4646
some operations (usually in the system catalogs) to be seen by future
47-
operations in the same utility command (for example, in DefineRelation it is
47+
operations in the same utility command. (For example, in DefineRelation it is
4848
done after creating the heap so the pg_class row is visible, to be able to
49-
lock it).
49+
lock it.)
5050

5151

5252
For example, consider the following sequence of user commands:
@@ -60,26 +60,26 @@ In the main processing loop, this results in the following function call
6060
sequence:
6161

6262
/ StartTransactionCommand;
63-
/ ProcessUtility; << BEGIN
64-
1) < BeginTransactionBlock;
65-
\ CommitTransactionCommand;
66-
\ StartTransaction;
63+
/ StartTransaction;
64+
1) < ProcessUtility; << BEGIN
65+
\ BeginTransactionBlock;
66+
\ CommitTransactionCommand;
6767

6868
/ StartTransactionCommand;
69-
2) / ProcessQuery; << SELECT * FROM foo
69+
2) / ProcessQuery; << SELECT ...
7070
\ CommitTransactionCommand;
7171
\ CommandCounterIncrement;
7272

7373
/ StartTransactionCommand;
74-
3) / ProcessQuery; << INSERT INTO foo VALUES (...)
74+
3) / ProcessQuery; << INSERT ...
7575
\ CommitTransactionCommand;
7676
\ CommandCounterIncrement;
7777

7878
/ StartTransactionCommand;
7979
/ ProcessUtility; << COMMIT
8080
4) < EndTransactionBlock;
81-
\ CommitTransaction;
82-
\ CommitTransactionCommand;
81+
\ CommitTransactionCommand;
82+
\ CommitTransaction;
8383

8484
The point of this example is to demonstrate the need for
8585
StartTransactionCommand and CommitTransactionCommand to be state smart -- they
@@ -118,15 +118,15 @@ to do all the real work. The only difference is what state we enter after
118118
AbortTransaction does its work:
119119

120120
* AbortCurrentTransaction leaves us in TBLOCK_ABORT,
121-
* UserAbortTransactionBlock leaves us in TBLOCK_ENDABORT
121+
* UserAbortTransactionBlock leaves us in TBLOCK_ABORT_END
122122

123123
Low-level transaction abort handling is divided in two phases:
124124
* AbortTransaction executes as soon as we realize the transaction has
125125
failed. It should release all shared resources (locks etc) so that we do
126126
not delay other backends unnecessarily.
127127
* CleanupTransaction executes when we finally see a user COMMIT
128128
or ROLLBACK command; it cleans things up and gets us out of the transaction
129-
internally. In particular, we mustn't destroy TopTransactionContext until
129+
completely. In particular, we mustn't destroy TopTransactionContext until
130130
this point.
131131

132132
Also, note that when a transaction is committed, we don't close it right away.
@@ -163,28 +163,48 @@ called so the system returns to the parent transaction.
163163
One important point regarding subtransaction handling is that several may need
164164
to be closed in response to a single user command. That's because savepoints
165165
have names, and we allow to commit or rollback a savepoint by name, which is
166-
not necessarily the one that was last opened. In the case of subtransaction
167-
commit this is not a problem, and we close all the involved subtransactions
168-
right away by calling CommitTransactionToLevel, which in turn calls
169-
CommitSubTransaction and PopTransaction as many times as needed.
170-
171-
In the case of subtransaction abort (when the user issues ROLLBACK TO
172-
<savepoint>), things are not so easy. We have to keep the subtransactions
173-
open and return control to the main loop. So what RollbackToSavepoint does is
174-
abort the innermost subtransaction and put it in TBLOCK_SUBENDABORT state, and
175-
put the rest in TBLOCK_SUBABORT_PENDING state. Then we return control to the
176-
main loop, which will in turn return control to us by calling
177-
CommitTransactionCommand. At this point we can close all subtransactions that
178-
are marked with the "abort pending" state. When that's done, the outermost
179-
subtransaction is created again, to conform to SQL's definition of ROLLBACK TO.
166+
not necessarily the one that was last opened. Also a COMMIT or ROLLBACK
167+
command must be able to close out the entire stack. We handle this by having
168+
the utility command subroutine mark all the state stack entries as commit-
169+
pending or abort-pending, and then when the main loop reaches
170+
CommitTransactionCommand, the real work is done. The main point of doing
171+
things this way is that if we get an error while popping state stack entries,
172+
the remaining stack entries still show what we need to do to finish up.
173+
174+
In the case of ROLLBACK TO <savepoint>, we abort all the subtransactions up
175+
through the one identified by the savepoint name, and then re-create that
176+
subtransaction level with the same name. So it's a completely new
177+
subtransaction as far as the internals are concerned.
180178

181179
Other subsystems are allowed to start "internal" subtransactions, which are
182180
handled by BeginInternalSubtransaction. This is to allow implementing
183181
exception handling, e.g. in PL/pgSQL. ReleaseCurrentSubTransaction and
184182
RollbackAndReleaseCurrentSubTransaction allows the subsystem to close said
185183
subtransactions. The main difference between this and the savepoint/release
186-
path is that BeginInternalSubtransaction is allowed when no explicit
187-
transaction block has been established, while DefineSavepoint is not.
184+
path is that we execute the complete state transition immediately in each
185+
subroutine, rather than deferring some work until CommitTransactionCommand.
186+
Another difference is that BeginInternalSubtransaction is allowed when no
187+
explicit transaction block has been established, while DefineSavepoint is not.
188+
189+
190+
Subtransaction numbering
191+
------------------------
192+
193+
A top-level transaction is always given a TransactionId (XID) as soon as it is
194+
created. This is necessary for a number of reasons, notably XMIN bookkeeping
195+
for VACUUM. However, a subtransaction doesn't need its own XID unless it
196+
(or one of its child subxacts) writes tuples into the database. Therefore,
197+
we postpone assigning XIDs to subxacts until and unless they call
198+
GetCurrentTransactionId. The subsidiary actions of obtaining a lock on the
199+
XID and and entering it into pg_subtrans and PG_PROC are done at the same time.
200+
201+
Internally, a backend needs a way to identify subtransactions whether or not
202+
they have XIDs; but this need only lasts as long as the parent top transaction
203+
endures. Therefore, we have SubTransactionId, which is somewhat like
204+
CommandId in that it's generated from a counter that we reset at the start of
205+
each top transaction. The top-level transaction itself has SubTransactionId 1,
206+
and subtransactions have IDs 2 and up. (Zero is reserved for
207+
InvalidSubTransactionId.)
188208

189209

190210
pg_clog and pg_subtrans
@@ -197,27 +217,28 @@ there's a long running transaction or a backend sitting idle with an open
197217
transaction, it may be necessary to be able to read and write this information
198218
from disk. They also allow information to be permanent across server restarts.
199219

200-
pg_clog records the commit status for each transaction. A transaction can be
201-
in progress, committed, aborted, or "sub-committed". This last state means
202-
that it's a subtransaction that's no longer running, but its parent has not
203-
updated its state yet (either it is still running, or the backend crashed
204-
without updating its status). A sub-committed transaction's status will be
205-
updated again to the final value as soon as the parent commits or aborts, or
206-
when the parent is detected to be aborted.
220+
pg_clog records the commit status for each transaction that has been assigned
221+
an XID. A transaction can be in progress, committed, aborted, or
222+
"sub-committed". This last state means that it's a subtransaction that's no
223+
longer running, but its parent has not updated its state yet (either it is
224+
still running, or the backend crashed without updating its status). A
225+
sub-committed transaction's status will be updated again to the final value as
226+
soon as the parent commits or aborts, or when the parent is detected to be
227+
aborted.
207228

208229
Savepoints are implemented using subtransactions. A subtransaction is a
209-
transaction inside a transaction; it gets its own TransactionId, but its
210-
commit or abort status is not only dependent on whether it committed itself,
211-
but also whether its parent transaction committed. To implement multiple
212-
savepoints in a transaction we allow unlimited transaction nesting depth, so
213-
any particular subtransaction's commit state is dependent on the commit status
214-
of each and every ancestor transaction.
230+
transaction inside a transaction; its commit or abort status is not only
231+
dependent on whether it committed itself, but also whether its parent
232+
transaction committed. To implement multiple savepoints in a transaction we
233+
allow unlimited transaction nesting depth, so any particular subtransaction's
234+
commit state is dependent on the commit status of each and every ancestor
235+
transaction.
215236

216237
The "subtransaction parent" (pg_subtrans) mechanism records, for each
217-
transaction, the TransactionId of its parent transaction. This information is
218-
stored as soon as the subtransaction is created. Top-level transactions do
219-
not have a parent, so they leave their pg_subtrans entries set to the default
220-
value of zero (InvalidTransactionId).
238+
transaction with an XID, the TransactionId of its parent transaction. This
239+
information is stored as soon as the subtransaction is assigned an XID.
240+
Top-level transactions do not have a parent, so they leave their pg_subtrans
241+
entries set to the default value of zero (InvalidTransactionId).
221242

222243
pg_subtrans is used to check whether the transaction in question is still
223244
running --- the main Xid of a transaction is recorded in the PGPROC struct,

0 commit comments

Comments
 (0)