Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 911cc76

Browse files
committed
Rework commit.c cleanup.
1) Abort xact inside backend if we can. 2) Allocate gather() messages inside separate ctx if we are not in xact. 3) Prevent nested entry into mm hooks.
1 parent c3d28b7 commit 911cc76

File tree

3 files changed

+88
-71
lines changed

3 files changed

+88
-71
lines changed

src/commit.c

+74-62
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ static struct MtmCommitState
5252
{
5353
char gid[GIDSIZE];
5454
GlobalTx *gtx;
55-
bool abort_prepare; /* whether cleanup can (should) abort xact immediately */
55+
bool inside_commit_sequence;
56+
MemoryContext mctx;
5657
} mtm_commit_state;
5758

5859
static void
@@ -94,6 +95,13 @@ MtmXactCallback(XactEvent event, void *arg)
9495
return;
9596
}
9697

98+
/*
99+
* MtmTwoPhaseCommit does (Start|Commit)TransactionCommand, they shouldn't
100+
* nest into our hooks again.
101+
*/
102+
if (mtm_commit_state.inside_commit_sequence)
103+
return;
104+
97105
switch (event)
98106
{
99107
case XACT_EVENT_START:
@@ -119,77 +127,65 @@ MtmXactCallback(XactEvent event, void *arg)
119127
static void
120128
mtm_commit_cleanup(int status, Datum arg)
121129
{
122-
/* 0 is ERROR, 1 is on exit hook */
123-
bool on_exit = DatumGetInt32(arg) == 1;
124-
125130
ReleasePB();
126131
dmq_stream_unsubscribe();
127132

128133
if (mtm_commit_state.gtx != NULL)
129134
{
130135
/*
131-
* This crutchy dance matters if this is proc exit (FATAL in the
132-
* middle of commit sequence): global_tx.c automatically releases gtx,
133-
* and it would be really not nice to rely on the order in which the
134-
* hooks are called. So reacquire the gtx if it was already released.
136+
* If we managed to prepare the xact but failed to commit, try to
137+
* abort it immediately if it is still possible (no precommit =>
138+
* others nodes can't commit) or issue a warning about unclear xact
139+
* status
135140
*/
136-
Assert(GetMyGlobalTx() != NULL || on_exit);
137-
if (GetMyGlobalTx() == NULL)
138-
mtm_commit_state.gtx = GlobalTxAcquire(mtm_commit_state.gid,
139-
false);
140-
/*
141-
* If we managed to prepare the xact, tell resolver to deal with it
142-
*/
143-
if (mtm_commit_state.gtx != NULL && mtm_commit_state.gtx->prepared)
141+
if (mtm_commit_state.gtx->prepared)
144142
{
145-
/*
146-
* If there were no precommit, xact will definitely be aborted. We
147-
* could abort it right here, but this requires very careful
148-
* acting:
149-
* 1) who would release gtx if we fail during
150-
* FinishPreparedTransaction?
151-
*
152-
* 2) Generally FinishPreparedTransaction and
153-
* SetPreparedTransactionState should run in xact, i.e.
154-
* surrounded by
155-
* StartTransactionCommand/CommitTransactionCommand.
156-
* e.g. LockGXact will fail if I am not the owner of
157-
* gxact. However, some hops are needed to avoid entering
158-
* MtmTwoPhaseCommit/MtmBeginTransaction twice. In particular,
159-
* erroring out from MtmBeginTransaction on attempt to abort
160-
* xact after we got network error because, well, we are
161-
* isolated after network error is really unpleasant. This
162-
* relates to all
163-
* StartTransactionCommand/CommitTransactionCommand in this
164-
* file.
165-
*
166-
* As a sort of compromise, we could instead add the 'this is my
167-
* orphaned prepare => directly abort it' logic to resolver.
168-
*/
169143
if ((term_cmp(mtm_commit_state.gtx->state.accepted,
170-
InvalidGTxTerm) == 0) && !MtmVolksWagenMode)
144+
InvalidGTxTerm) == 0))
171145
{
172-
ereport(WARNING,
173-
(errcode(ERRCODE_INTERNAL_ERROR),
174-
errmsg("[multimaster] exiting commit sequence of transaction %s which will be aborted",
175-
mtm_commit_state.gid)));
176-
146+
/* there was no precommit, we can abort */
147+
PG_TRY();
148+
{
149+
AbortOutOfAnyTransaction();
150+
StartTransactionCommand();
151+
FinishPreparedTransaction(mtm_commit_state.gid, false, false);
152+
mtm_commit_state.gtx->state.status = GTXAborted;
153+
mtm_log(MtmTxFinish, "TXFINISH: %s aborted as own orphaned not precomitted",
154+
mtm_commit_state.gid);
155+
CommitTransactionCommand();
156+
157+
}
158+
/*
159+
* this should be extremely unlikely, but if we fail, don't
160+
* forget to release gtx
161+
*/
162+
PG_CATCH();
163+
{
164+
GlobalTxRelease(mtm_commit_state.gtx);
165+
mtm_commit_state.gtx = NULL;
166+
mtm_commit_state.inside_commit_sequence = false;
167+
PG_RE_THROW();
168+
}
169+
PG_END_TRY();
177170
}
178-
else if (!MtmVolksWagenMode)
171+
else
179172
{
180-
ereport(WARNING,
181-
(errcode(ERRCODE_TRANSACTION_RESOLUTION_UNKNOWN),
182-
errmsg("[multimaster] exiting commit sequence of transaction %s with unknown status",
183-
mtm_commit_state.gid),
184-
errdetail("The transaction will be committed or aborted later.")));
173+
ResolverWake();
174+
if (!MtmVolksWagenMode)
175+
{
176+
ereport(WARNING,
177+
(errcode(ERRCODE_TRANSACTION_RESOLUTION_UNKNOWN),
178+
errmsg("[multimaster] exiting commit sequence of transaction %s with unknown status",
179+
mtm_commit_state.gid),
180+
errdetail("The transaction will be committed or aborted later.")));
181+
182+
}
185183
}
186-
mtm_commit_state.gtx->orphaned = true;
187184
}
188-
if (mtm_commit_state.gtx != NULL)
189-
GlobalTxRelease(mtm_commit_state.gtx);
185+
GlobalTxRelease(mtm_commit_state.gtx);
190186
mtm_commit_state.gtx = NULL;
191-
ResolverWake();
192187
}
188+
mtm_commit_state.inside_commit_sequence = false;
193189
}
194190

195191
void
@@ -222,7 +218,15 @@ MtmBeginTransaction()
222218
CacheRegisterSyscacheCallback(PROCOID,
223219
proc_change_cb,
224220
(Datum) 0);
225-
on_shmem_exit(mtm_commit_cleanup, Int32GetDatum(1));
221+
/*
222+
* mtm_commit_cleanup must do its job *before* gtx is released, so
223+
* register gtx hook first (it will be called last)
224+
*/
225+
GlobalTxEnsureBeforeShmemExitHook();
226+
before_shmem_exit(mtm_commit_cleanup, Int32GetDatum(1));
227+
mtm_commit_state.mctx = AllocSetContextCreate(TopMemoryContext,
228+
"MtmCommitContext",
229+
ALLOCSET_DEFAULT_SIZES);
226230
init_done = true;
227231
}
228232

@@ -423,7 +427,10 @@ MtmTwoPhaseCommit(void)
423427

424428
/* prepare for cleanup */
425429
mtm_commit_state.gtx = NULL;
426-
mtm_commit_state.abort_prepare = false;
430+
mtm_commit_state.inside_commit_sequence = true;
431+
/* used for allocations not inside tx, e.g. messages in gather() */
432+
MemoryContextReset(mtm_commit_state.mctx);
433+
427434
/*
428435
* Note that we do not HOLD_INTERRUPTS; user might cancel waiting whenever
429436
* he wants. However, probably xact status would be unclear at that
@@ -460,6 +467,12 @@ MtmTwoPhaseCommit(void)
460467
/* prepare transaction on our node */
461468

462469
mtm_commit_state.gtx = GlobalTxAcquire(mtm_commit_state.gid, true);
470+
/*
471+
* it is simpler to mark gtx originated here as orphaned from the
472+
* beginning rather than in error handler; resolver won't touch gtx
473+
* while it is locked on us anyway
474+
*/
475+
mtm_commit_state.gtx->orphaned = true;
463476
Assert(mtm_commit_state.gtx->state.status == GTXInvalid);
464477
/*
465478
* PREPARE doesn't happen here; ret 0 just means we were already in
@@ -474,13 +487,14 @@ MtmTwoPhaseCommit(void)
474487

475488
AllowTempIn2PC = true;
476489
CommitTransactionCommand(); /* here we actually PrepareTransaction */
477-
ReleasePB(); /* don't hold generation switch anymore */
478490
mtm_commit_state.gtx->prepared = true;
491+
ReleasePB(); /* don't hold generation switch anymore */
479492
/* end_lsn of PREPARE */
480493
mtm_log(MtmTxFinish, "TXFINISH: %s prepared at %X/%X",
481494
mtm_commit_state.gid,
482495
(uint32) (XactLastCommitEnd >> 32),
483496
(uint32) (XactLastCommitEnd));
497+
MemoryContextSwitchTo(mtm_commit_state.mctx);
484498

485499
/*
486500
* By definition of generations, we must collect PREPARE ack from
@@ -513,7 +527,6 @@ MtmTwoPhaseCommit(void)
513527
if (!ret)
514528
{
515529
MtmGeneration new_gen = MtmGetCurrentGen(false);
516-
mtm_commit_state.abort_prepare = true;
517530
ereport(ERROR,
518531
(errcode(ERRCODE_INTERNAL_ERROR),
519532
errmsg("[multimaster] failed to collect prepare acks due to generation switch: was num=" UINT64_FORMAT ", members=%s, now num=" UINT64_FORMAT ", members=%s",
@@ -527,7 +540,6 @@ MtmTwoPhaseCommit(void)
527540
nodemask_t failed_cohort = cohort;
528541
for (i = 0; i < n_messages; i++)
529542
BIT_CLEAR(failed_cohort, p_messages[i]->node_id - 1);
530-
mtm_commit_state.abort_prepare = true;
531543

532544
ereport(ERROR,
533545
(errcode(ERRCODE_CONNECTION_FAILURE),
@@ -538,7 +550,6 @@ MtmTwoPhaseCommit(void)
538550
{
539551
if (!p_messages[i]->prepared)
540552
{
541-
mtm_commit_state.abort_prepare = true;
542553
/* don't print random gid, node id for regression tests output */
543554
if (MtmVolksWagenMode)
544555
ereport(ERROR,
@@ -688,6 +699,7 @@ MtmTwoPhaseCommit(void)
688699
dmq_stream_unsubscribe();
689700
mtm_log(MtmTxTrace, "%s unsubscribed for %s",
690701
mtm_commit_state.gid, dmq_stream_name);
702+
mtm_commit_state.inside_commit_sequence = false;
691703
}
692704
PG_CATCH();
693705
{

src/global_tx.c

+13-8
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,19 @@ MtmGlobalTxShmemStartup(void)
187187
LWLockRelease(AddinShmemInitLock);
188188
}
189189

190+
/*
191+
* needed for commit.c proper order of cleanup actions
192+
*/
193+
void
194+
GlobalTxEnsureBeforeShmemExitHook(void)
195+
{
196+
if (!gtx_exit_registered)
197+
{
198+
before_shmem_exit(GlobalTxAtExit, 0);
199+
gtx_exit_registered = true;
200+
}
201+
}
202+
190203
/*
191204
* Obtain a global tx and lock it on calling backend.
192205
*
@@ -269,14 +282,6 @@ GlobalTxAcquire(const char *gid, bool create)
269282
return gtx;
270283
}
271284

272-
/*
273-
* needed for ugly hack in commit.c exit hook
274-
*/
275-
GlobalTx *GetMyGlobalTx(void)
276-
{
277-
return my_locked_gtx;
278-
}
279-
280285
/*
281286
* Release our lock on this transaction and remove it from hash if it is
282287
* finished. We also remove shmem entry if gtx is not prepared: it is used

src/include/global_tx.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,8 @@ extern gtx_shared_data *gtx_shared;
8484

8585
void MtmGlobalTxInit(void);
8686
void MtmGlobalTxShmemStartup(void);
87+
void GlobalTxEnsureBeforeShmemExitHook(void);
8788
GlobalTx *GlobalTxAcquire(const char *gid, bool create);
88-
GlobalTx *GetMyGlobalTx(void);
8989
void GlobalTxRelease(GlobalTx *gtx);
9090
void GlobalTxAtExit(int code, Datum arg);
9191
void GlobalTxLoadAll(void);

0 commit comments

Comments
 (0)