@@ -246,6 +246,7 @@ static int MtmGcPeriod;
246
246
static bool MtmIgnoreTablesWithoutPk ;
247
247
static int MtmLockCount ;
248
248
static bool MtmMajorNode ;
249
+ static bool MtmBreakConnection ;
249
250
250
251
static ExecutorStart_hook_type PreviousExecutorStartHook ;
251
252
static ExecutorFinish_hook_type PreviousExecutorFinishHook ;
@@ -418,16 +419,27 @@ MtmInitializeSequence(int64* start, int64* step)
418
419
* -------------------------------------------
419
420
*/
420
421
421
- csn_t MtmTransactionSnapshot (TransactionId xid )
422
+ /*
423
+ * Get snapshot of transaction proceed by WAL sender pglogical plugin.
424
+ * If it is local transaction or replication node is not in participant mask, then return INVALID_CSN.
425
+ * Transaction should be skept by WAL sender in the following cases:
426
+ * 1. Transaction was replicated from some other node and it is not a recovery process.
427
+ * 2. State of transaction is unknown
428
+ * 3. Replication node is not participated in transaction
429
+ */
430
+ csn_t MtmDistributedTransactionSnapshot (TransactionId xid , int nodeId , nodemask_t * participantsMask )
422
431
{
423
432
csn_t snapshot = INVALID_CSN ;
424
-
433
+ * participantsMask = 0 ;
425
434
MtmLock (LW_SHARED );
426
435
if (Mtm -> status == MTM_ONLINE ) {
427
436
MtmTransState * ts = (MtmTransState * )hash_search (MtmXid2State , & xid , HASH_FIND , NULL );
428
- if (ts != NULL && !ts -> isLocal ) {
429
- snapshot = ts -> snapshot ;
430
- Assert (ts -> gtid .node == MtmNodeId || MtmIsRecoverySession );
437
+ if (ts != NULL ) {
438
+ * participantsMask = ts -> participantsMask ;
439
+ if (!ts -> isLocal && BIT_CHECK (ts -> participantsMask , nodeId - 1 )) {
440
+ snapshot = ts -> snapshot ;
441
+ Assert (ts -> gtid .node == MtmNodeId || MtmIsRecoverySession );
442
+ }
431
443
}
432
444
}
433
445
MtmUnlock ();
@@ -621,6 +633,9 @@ MtmAdjustOldestXid(TransactionId xid)
621
633
return xid ;
622
634
}
623
635
636
+
637
+
638
+
624
639
/*
625
640
* -------------------------------------------
626
641
* Transaction list manipulation.
@@ -789,7 +804,7 @@ MtmBeginTransaction(MtmCurrentTrans* x)
789
804
* Allow execution of transaction by bg-workers to make it possible to perform recovery.
790
805
*/
791
806
MtmUnlock ();
792
- elog (ERROR , "Multimaster node is not online: current status %s" , MtmNodeStatusMnem [Mtm -> status ]);
807
+ elog (MtmBreakConnection ? FATAL : ERROR , "Multimaster node is not online: current status %s" , MtmNodeStatusMnem [Mtm -> status ]);
793
808
}
794
809
x -> containsDML = false;
795
810
x -> snapshot = MtmAssignCSN ();
@@ -799,10 +814,8 @@ MtmBeginTransaction(MtmCurrentTrans* x)
799
814
800
815
/*
801
816
* Check if there is global multimaster lock preventing new transaction from commit to make a chance to wal-senders to caught-up.
802
- * Only "own" transactions are blocked. Transactions replicated from other nodes (including recovered transaction) should be proceeded
803
- * and should not cause cluster status change.
804
817
*/
805
- if (x -> isDistributed /* && x->isReplicated*/ ) {
818
+ if (x -> isDistributed ) {
806
819
MtmCheckClusterLock ();
807
820
}
808
821
@@ -1141,6 +1154,9 @@ MtmPostPrepareTransaction(MtmCurrentTrans* x)
1141
1154
}
1142
1155
MtmUnlock ();
1143
1156
if (x -> isTwoPhase ) {
1157
+ if (x -> status == TRANSACTION_STATUS_ABORTED ) {
1158
+ elog (ERROR , "Prepare of user's 2PC transaction %s (%llu) is aborted by DTM" , x -> gid , (long64 )x -> xid );
1159
+ }
1144
1160
MtmResetTransaction ();
1145
1161
}
1146
1162
}
@@ -2198,7 +2214,29 @@ void MtmReconnectNode(int nodeId)
2198
2214
MtmUnlock ();
2199
2215
}
2200
2216
2201
-
2217
+ /*
2218
+ * Check particioants of replicated transaction. This function is called by receiver at start of replicated transaction to
2219
+ * check that all live nodes are participated in it.
2220
+ */
2221
+ bool MtmCheckParticipants (GlobalTransactionId * gtid , nodemask_t participantsMask )
2222
+ {
2223
+ bool result = true;
2224
+ MtmLock (LW_EXCLUSIVE );
2225
+ if (BIT_CHECK (Mtm -> disabledNodeMask , gtid -> node - 1 )) {
2226
+ elog (WARNING , "Ignore transaction %llu from disabled node %d" , (long64 )gtid -> xid , gtid -> node );
2227
+ result = false;
2228
+ } else {
2229
+ nodemask_t liveMask = (((nodemask_t )1 << Mtm -> nAllNodes ) - 1 ) & ~Mtm -> disabledNodeMask ;
2230
+ BIT_SET (participantsMask , gtid -> node - 1 );
2231
+ if (liveMask & ~participantsMask ) {
2232
+ elog (WARNING , "Ignore transaction %llu from node %d because some of live nodes (%llx) are not participated in it (%llx)" ,
2233
+ (long64 )gtid -> xid , gtid -> node , liveMask , participantsMask );
2234
+ result = false;
2235
+ }
2236
+ }
2237
+ MtmUnlock ();
2238
+ return result ;
2239
+ }
2202
2240
2203
2241
/*
2204
2242
* -------------------------------------------
@@ -2836,6 +2874,18 @@ _PG_init(void)
2836
2874
NULL
2837
2875
);
2838
2876
2877
+ DefineCustomBoolVariable (
2878
+ "multimaster.break_connection" ,
2879
+ "Break connection with client when node is no online" ,
2880
+ NULL ,
2881
+ & MtmBreakConnection ,
2882
+ false,
2883
+ PGC_BACKEND ,
2884
+ 0 ,
2885
+ NULL ,
2886
+ NULL ,
2887
+ NULL
2888
+ );
2839
2889
DefineCustomBoolVariable (
2840
2890
"multimaster.major_node" ,
2841
2891
"Node which forms a majority in case of partitioning in cliques with equal number of nodes" ,
@@ -4358,8 +4408,8 @@ static bool MtmTwoPhaseCommit(MtmCurrentTrans* x)
4358
4408
}
4359
4409
if (!PrepareTransactionBlock (x -> gid ))
4360
4410
{
4361
- if (!MtmVolksWagenMode )
4362
- elog (WARNING , "Failed to prepare transaction %s" , x -> gid );
4411
+ // if (!MtmVolksWagenMode)
4412
+ elog (WARNING , "Failed to prepare transaction %s (%llu) " , x -> gid , ( long64 ) x -> xid );
4363
4413
/* ??? Should we do explicit rollback */
4364
4414
} else {
4365
4415
CommitTransactionCommand ();
0 commit comments