@@ -61,7 +61,8 @@ typedef struct {
61
61
bool isReplicated ; /* transaction on replica */
62
62
bool isDistributed ; /* transaction performed INSERT/UPDATE/DELETE and has to be replicated to other nodes */
63
63
bool containsDML ; /* transaction contains DML statements */
64
- csn_t snapshot ; /* transaction snaphsot */
64
+ bool isPrepared ; /* transaction is prepared as part of 2PC */
65
+ csn_t snapshot ; /* transaction snaphsot */
65
66
} MtmCurrentTrans ;
66
67
67
68
/* #define USE_SPINLOCK 1 */
@@ -94,6 +95,8 @@ static void MtmSetTransactionStatus(TransactionId xid, int nsubxids, Transaction
94
95
static void MtmInitialize (void );
95
96
static void MtmXactCallback (XactEvent event , void * arg );
96
97
static void MtmBeginTransaction (MtmCurrentTrans * x );
98
+ static void MtmPrecommitTransaction (MtmCurrentTrans * x );
99
+ static bool MtmCommitTransaction (TransactionId xid , int nsubxids , TransactionId * subxids );
97
100
static void MtmPrepareTransaction (MtmCurrentTrans * x );
98
101
static void MtmEndTransaction (MtmCurrentTrans * x , bool commit );
99
102
static TransactionId MtmGetOldestXmin (Relation rel , bool ignoreVacuum );
@@ -143,6 +146,7 @@ static int MtmWorkers;
143
146
static int MtmVacuumDelay ;
144
147
static int MtmMinRecoveryLag ;
145
148
static int MtmMaxRecoveryLag ;
149
+ static bool MtmUse2PC ;
146
150
147
151
static ExecutorFinish_hook_type PreviousExecutorFinishHook ;
148
152
static ProcessUtility_hook_type PreviousProcessUtilityHook ;
@@ -467,6 +471,9 @@ MtmXactCallback(XactEvent event, void *arg)
467
471
MtmBeginTransaction (& dtmTx );
468
472
break ;
469
473
case XACT_EVENT_PRE_COMMIT :
474
+ MtmPrecommitTransaction (& dtmTx );
475
+ break ;
476
+ case XACT_EVENT_PREPARE :
470
477
MtmPrepareTransaction (& dtmTx );
471
478
break ;
472
479
case XACT_EVENT_COMMIT :
@@ -498,6 +505,7 @@ MtmBeginTransaction(MtmCurrentTrans* x)
498
505
x -> isReplicated = false;
499
506
x -> isDistributed = MtmIsUserTransaction ();
500
507
x -> containsDML = false;
508
+ x -> isPrepared = false;
501
509
x -> snapshot = MtmAssignCSN ();
502
510
x -> gtid .xid = InvalidTransactionId ;
503
511
MtmUnlock ();
@@ -561,10 +569,11 @@ MtmCheckClusterLock()
561
569
}
562
570
563
571
/*
572
+ * This functions is called as pre-commit callback.
564
573
* We need to pass snapshot to WAL-sender, so create record in transaction status hash table
565
574
* before commit
566
575
*/
567
- static void MtmPrepareTransaction (MtmCurrentTrans * x )
576
+ static void MtmPrecommitTransaction (MtmCurrentTrans * x )
568
577
{
569
578
MtmTransState * ts ;
570
579
int i ;
@@ -608,6 +617,20 @@ static void MtmPrepareTransaction(MtmCurrentTrans* x)
608
617
MTM_TRACE ("%d: MtmPrepareTransaction prepare commit of %d CSN=%ld\n" , getpid (), x -> xid , ts -> csn );
609
618
}
610
619
620
+ static void
621
+ MtmPrepareTransaction (MtmCurrentTrans * x )
622
+ {
623
+ TransactionId * subxids ;
624
+ int nSubxids ;
625
+ MtmPrecommitTransaction (x );
626
+ x -> isPrepared = true;
627
+ nSubxids = xactGetCommittedChildren (& subxids );
628
+ if (!MtmCommitTransaction (x -> xid , nSubxids , subxids ))
629
+ {
630
+ elog (ERROR , "Commit of transaction %d is rejected by DTM" , x -> xid );
631
+ }
632
+ }
633
+
611
634
/**
612
635
* Check state of replication slots. If some of them are too much lag behind wal, then drop this slots to avoid
613
636
* WAL overflow
@@ -755,7 +778,7 @@ static void
755
778
MtmSetTransactionStatus (TransactionId xid , int nsubxids , TransactionId * subxids , XidStatus status , XLogRecPtr lsn )
756
779
{
757
780
MTM_TRACE ("%d: MtmSetTransactionStatus %u(%u) = %u, isDistributed=%d\n" , getpid (), xid , dtmTx .xid , status , dtmTx .isDistributed );
758
- if (xid == dtmTx .xid && dtmTx .isDistributed )
781
+ if (xid == dtmTx .xid && dtmTx .isDistributed && ! dtmTx . isPrepared )
759
782
{
760
783
if (status == TRANSACTION_STATUS_ABORTED || !dtmTx .containsDML || dtm -> status == MTM_RECOVERY )
761
784
{
@@ -812,6 +835,18 @@ _PG_init(void)
812
835
if (!process_shared_preload_libraries_in_progress )
813
836
return ;
814
837
838
+ DefineCustomBoolVariable (
839
+ "multimaster.use_2pc" ,
840
+ "Use two phase commit" ,
841
+ "Replace normal commit with two phase commit" ,
842
+ & MtmUse2PC ,
843
+ false,
844
+ PGC_BACKEND ,
845
+ 0 ,
846
+ NULL ,
847
+ NULL ,
848
+ NULL
849
+ );
815
850
DefineCustomIntVariable (
816
851
"multimaster.min_recovery_lag" ,
817
852
"Minamal lag of WAL-sender performing recovery after which cluster is locked until recovery is completed" ,
@@ -1313,13 +1348,54 @@ static bool MtmProcessDDLCommand(char const* queryString)
1313
1348
return false;
1314
1349
}
1315
1350
1351
+ /*
1352
+ * Genenerate global transaction identifier for two-pahse commit.
1353
+ * It should be unique for all nodes
1354
+ */
1355
+ static char *
1356
+ MtmGenerateGid ()
1357
+ {
1358
+ static int localCount ;
1359
+ return psprintf ("GID-%d-%d-%d" , MtmNodeId , MyProcPid , ++ localCount );
1360
+ }
1361
+
1316
1362
static void MtmProcessUtility (Node * parsetree , const char * queryString ,
1317
1363
ProcessUtilityContext context , ParamListInfo params ,
1318
1364
DestReceiver * dest , char * completionTag )
1319
1365
{
1320
1366
bool skipCommand ;
1321
1367
switch (nodeTag (parsetree ))
1322
1368
{
1369
+ case T_TransactionStmt :
1370
+ {
1371
+ TransactionStmt * stmt = (TransactionStmt * ) parsetree ;
1372
+ switch (stmt -> kind )
1373
+ {
1374
+ case TRANS_STMT_COMMIT :
1375
+ if (MtmUse2PC ) {
1376
+ char * gid = MtmGenerateGid ();
1377
+ if (!PrepareTransactionBlock (gid ))
1378
+ {
1379
+ /* report unsuccessful commit in completionTag */
1380
+ if (completionTag ) {
1381
+ strcpy (completionTag , "ROLLBACK" );
1382
+ }
1383
+ /* ??? Should we do explicit rollback */
1384
+ } else {
1385
+ FinishPreparedTransaction (gid , true);
1386
+ }
1387
+ return ;
1388
+ }
1389
+ break ;
1390
+ case TRANS_STMT_PREPARE :
1391
+ case TRANS_STMT_COMMIT_PREPARED :
1392
+ case TRANS_STMT_ROLLBACK_PREPARED :
1393
+ elog (ERROR , "Two phase commit is not supported by multimaster" );
1394
+ default :
1395
+ break ;
1396
+ }
1397
+ }
1398
+ /* no break */
1323
1399
case T_PlannedStmt :
1324
1400
case T_ClosePortalStmt :
1325
1401
case T_FetchStmt :
@@ -1333,7 +1409,6 @@ static void MtmProcessUtility(Node *parsetree, const char *queryString,
1333
1409
case T_LoadStmt :
1334
1410
case T_VariableSetStmt :
1335
1411
case T_VariableShowStmt :
1336
- case T_TransactionStmt :
1337
1412
skipCommand = true;
1338
1413
break ;
1339
1414
default :
0 commit comments