@@ -52,7 +52,8 @@ static struct MtmCommitState
52
52
{
53
53
char gid [GIDSIZE ];
54
54
GlobalTx * gtx ;
55
- bool abort_prepare ; /* whether cleanup can (should) abort xact immediately */
55
+ bool inside_commit_sequence ;
56
+ MemoryContext mctx ;
56
57
} mtm_commit_state ;
57
58
58
59
static void
@@ -94,6 +95,13 @@ MtmXactCallback(XactEvent event, void *arg)
94
95
return ;
95
96
}
96
97
98
+ /*
99
+ * MtmTwoPhaseCommit does (Start|Commit)TransactionCommand, they shouldn't
100
+ * nest into our hooks again.
101
+ */
102
+ if (mtm_commit_state .inside_commit_sequence )
103
+ return ;
104
+
97
105
switch (event )
98
106
{
99
107
case XACT_EVENT_START :
@@ -119,77 +127,65 @@ MtmXactCallback(XactEvent event, void *arg)
119
127
static void
120
128
mtm_commit_cleanup (int status , Datum arg )
121
129
{
122
- /* 0 is ERROR, 1 is on exit hook */
123
- bool on_exit = DatumGetInt32 (arg ) == 1 ;
124
-
125
130
ReleasePB ();
126
131
dmq_stream_unsubscribe ();
127
132
128
133
if (mtm_commit_state .gtx != NULL )
129
134
{
130
135
/*
131
- * This crutchy dance matters if this is proc exit (FATAL in the
132
- * middle of commit sequence): global_tx.c automatically releases gtx,
133
- * and it would be really not nice to rely on the order in which the
134
- * hooks are called. So reacquire the gtx if it was already released.
136
+ * If we managed to prepare the xact but failed to commit, try to
137
+ * abort it immediately if it is still possible (no precommit =>
138
+ * others nodes can't commit) or issue a warning about unclear xact
139
+ * status
135
140
*/
136
- Assert (GetMyGlobalTx () != NULL || on_exit );
137
- if (GetMyGlobalTx () == NULL )
138
- mtm_commit_state .gtx = GlobalTxAcquire (mtm_commit_state .gid ,
139
- false);
140
- /*
141
- * If we managed to prepare the xact, tell resolver to deal with it
142
- */
143
- if (mtm_commit_state .gtx != NULL && mtm_commit_state .gtx -> prepared )
141
+ if (mtm_commit_state .gtx -> prepared )
144
142
{
145
- /*
146
- * If there were no precommit, xact will definitely be aborted. We
147
- * could abort it right here, but this requires very careful
148
- * acting:
149
- * 1) who would release gtx if we fail during
150
- * FinishPreparedTransaction?
151
- *
152
- * 2) Generally FinishPreparedTransaction and
153
- * SetPreparedTransactionState should run in xact, i.e.
154
- * surrounded by
155
- * StartTransactionCommand/CommitTransactionCommand.
156
- * e.g. LockGXact will fail if I am not the owner of
157
- * gxact. However, some hops are needed to avoid entering
158
- * MtmTwoPhaseCommit/MtmBeginTransaction twice. In particular,
159
- * erroring out from MtmBeginTransaction on attempt to abort
160
- * xact after we got network error because, well, we are
161
- * isolated after network error is really unpleasant. This
162
- * relates to all
163
- * StartTransactionCommand/CommitTransactionCommand in this
164
- * file.
165
- *
166
- * As a sort of compromise, we could instead add the 'this is my
167
- * orphaned prepare => directly abort it' logic to resolver.
168
- */
169
143
if ((term_cmp (mtm_commit_state .gtx -> state .accepted ,
170
- InvalidGTxTerm ) == 0 ) && ! MtmVolksWagenMode )
144
+ InvalidGTxTerm ) == 0 ))
171
145
{
172
- ereport (WARNING ,
173
- (errcode (ERRCODE_INTERNAL_ERROR ),
174
- errmsg ("[multimaster] exiting commit sequence of transaction %s which will be aborted" ,
175
- mtm_commit_state .gid )));
176
-
146
+ /* there was no precommit, we can abort */
147
+ PG_TRY ();
148
+ {
149
+ AbortOutOfAnyTransaction ();
150
+ StartTransactionCommand ();
151
+ FinishPreparedTransaction (mtm_commit_state .gid , false, false);
152
+ mtm_commit_state .gtx -> state .status = GTXAborted ;
153
+ mtm_log (MtmTxFinish , "TXFINISH: %s aborted as own orphaned not precomitted" ,
154
+ mtm_commit_state .gid );
155
+ CommitTransactionCommand ();
156
+
157
+ }
158
+ /*
159
+ * this should be extremely unlikely, but if we fail, don't
160
+ * forget to release gtx
161
+ */
162
+ PG_CATCH ();
163
+ {
164
+ GlobalTxRelease (mtm_commit_state .gtx );
165
+ mtm_commit_state .gtx = NULL ;
166
+ mtm_commit_state .inside_commit_sequence = false;
167
+ PG_RE_THROW ();
168
+ }
169
+ PG_END_TRY ();
177
170
}
178
- else if (! MtmVolksWagenMode )
171
+ else
179
172
{
180
- ereport (WARNING ,
181
- (errcode (ERRCODE_TRANSACTION_RESOLUTION_UNKNOWN ),
182
- errmsg ("[multimaster] exiting commit sequence of transaction %s with unknown status" ,
183
- mtm_commit_state .gid ),
184
- errdetail ("The transaction will be committed or aborted later." )));
173
+ ResolverWake ();
174
+ if (!MtmVolksWagenMode )
175
+ {
176
+ ereport (WARNING ,
177
+ (errcode (ERRCODE_TRANSACTION_RESOLUTION_UNKNOWN ),
178
+ errmsg ("[multimaster] exiting commit sequence of transaction %s with unknown status" ,
179
+ mtm_commit_state .gid ),
180
+ errdetail ("The transaction will be committed or aborted later." )));
181
+
182
+ }
185
183
}
186
- mtm_commit_state .gtx -> orphaned = true;
187
184
}
188
- if (mtm_commit_state .gtx != NULL )
189
- GlobalTxRelease (mtm_commit_state .gtx );
185
+ GlobalTxRelease (mtm_commit_state .gtx );
190
186
mtm_commit_state .gtx = NULL ;
191
- ResolverWake ();
192
187
}
188
+ mtm_commit_state .inside_commit_sequence = false;
193
189
}
194
190
195
191
void
@@ -222,7 +218,15 @@ MtmBeginTransaction()
222
218
CacheRegisterSyscacheCallback (PROCOID ,
223
219
proc_change_cb ,
224
220
(Datum ) 0 );
225
- on_shmem_exit (mtm_commit_cleanup , Int32GetDatum (1 ));
221
+ /*
222
+ * mtm_commit_cleanup must do its job *before* gtx is released, so
223
+ * register gtx hook first (it will be called last)
224
+ */
225
+ GlobalTxEnsureBeforeShmemExitHook ();
226
+ before_shmem_exit (mtm_commit_cleanup , Int32GetDatum (1 ));
227
+ mtm_commit_state .mctx = AllocSetContextCreate (TopMemoryContext ,
228
+ "MtmCommitContext" ,
229
+ ALLOCSET_DEFAULT_SIZES );
226
230
init_done = true;
227
231
}
228
232
@@ -423,7 +427,10 @@ MtmTwoPhaseCommit(void)
423
427
424
428
/* prepare for cleanup */
425
429
mtm_commit_state .gtx = NULL ;
426
- mtm_commit_state .abort_prepare = false;
430
+ mtm_commit_state .inside_commit_sequence = true;
431
+ /* used for allocations not inside tx, e.g. messages in gather() */
432
+ MemoryContextReset (mtm_commit_state .mctx );
433
+
427
434
/*
428
435
* Note that we do not HOLD_INTERRUPTS; user might cancel waiting whenever
429
436
* he wants. However, probably xact status would be unclear at that
@@ -460,6 +467,12 @@ MtmTwoPhaseCommit(void)
460
467
/* prepare transaction on our node */
461
468
462
469
mtm_commit_state .gtx = GlobalTxAcquire (mtm_commit_state .gid , true);
470
+ /*
471
+ * it is simpler to mark gtx originated here as orphaned from the
472
+ * beginning rather than in error handler; resolver won't touch gtx
473
+ * while it is locked on us anyway
474
+ */
475
+ mtm_commit_state .gtx -> orphaned = true;
463
476
Assert (mtm_commit_state .gtx -> state .status == GTXInvalid );
464
477
/*
465
478
* PREPARE doesn't happen here; ret 0 just means we were already in
@@ -474,13 +487,14 @@ MtmTwoPhaseCommit(void)
474
487
475
488
AllowTempIn2PC = true;
476
489
CommitTransactionCommand (); /* here we actually PrepareTransaction */
477
- ReleasePB (); /* don't hold generation switch anymore */
478
490
mtm_commit_state .gtx -> prepared = true;
491
+ ReleasePB (); /* don't hold generation switch anymore */
479
492
/* end_lsn of PREPARE */
480
493
mtm_log (MtmTxFinish , "TXFINISH: %s prepared at %X/%X" ,
481
494
mtm_commit_state .gid ,
482
495
(uint32 ) (XactLastCommitEnd >> 32 ),
483
496
(uint32 ) (XactLastCommitEnd ));
497
+ MemoryContextSwitchTo (mtm_commit_state .mctx );
484
498
485
499
/*
486
500
* By definition of generations, we must collect PREPARE ack from
@@ -513,7 +527,6 @@ MtmTwoPhaseCommit(void)
513
527
if (!ret )
514
528
{
515
529
MtmGeneration new_gen = MtmGetCurrentGen (false);
516
- mtm_commit_state .abort_prepare = true;
517
530
ereport (ERROR ,
518
531
(errcode (ERRCODE_INTERNAL_ERROR ),
519
532
errmsg ("[multimaster] failed to collect prepare acks due to generation switch: was num=" UINT64_FORMAT ", members=%s, now num=" UINT64_FORMAT ", members=%s" ,
@@ -527,7 +540,6 @@ MtmTwoPhaseCommit(void)
527
540
nodemask_t failed_cohort = cohort ;
528
541
for (i = 0 ; i < n_messages ; i ++ )
529
542
BIT_CLEAR (failed_cohort , p_messages [i ]-> node_id - 1 );
530
- mtm_commit_state .abort_prepare = true;
531
543
532
544
ereport (ERROR ,
533
545
(errcode (ERRCODE_CONNECTION_FAILURE ),
@@ -538,7 +550,6 @@ MtmTwoPhaseCommit(void)
538
550
{
539
551
if (!p_messages [i ]-> prepared )
540
552
{
541
- mtm_commit_state .abort_prepare = true;
542
553
/* don't print random gid, node id for regression tests output */
543
554
if (MtmVolksWagenMode )
544
555
ereport (ERROR ,
@@ -688,6 +699,7 @@ MtmTwoPhaseCommit(void)
688
699
dmq_stream_unsubscribe ();
689
700
mtm_log (MtmTxTrace , "%s unsubscribed for %s" ,
690
701
mtm_commit_state .gid , dmq_stream_name );
702
+ mtm_commit_state .inside_commit_sequence = false;
691
703
}
692
704
PG_CATCH ();
693
705
{
0 commit comments