@@ -83,6 +83,8 @@ typedef struct {
83
83
bool isReplicated ; /* transaction on replica */
84
84
bool isDistributed ; /* transaction performed INSERT/UPDATE/DELETE and has to be replicated to other nodes */
85
85
bool isPrepared ; /* transaction is perpared at first stage of 2PC */
86
+ bool isSuspended ; /* prepared transaction is suspended because coordinator node is switch to offline */
87
+ bool isTransactionBlock ; /* is transaction block */
86
88
bool containsDML ; /* transaction contains DML statements */
87
89
XidStatus status ; /* transaction status */
88
90
csn_t snapshot ; /* transaction snaphsot */
@@ -711,7 +713,7 @@ MtmXactCallback(XactEvent event, void *arg)
711
713
}
712
714
713
715
/*
714
- * Check if this is "normal" user trnsaction which should be distributed to other nodes
716
+ * Check if this is "normal" user transaction which should be distributed to other nodes
715
717
*/
716
718
static bool
717
719
MtmIsUserTransaction ()
@@ -733,6 +735,7 @@ MtmResetTransaction()
733
735
x -> gtid .xid = InvalidTransactionId ;
734
736
x -> isDistributed = false;
735
737
x -> isPrepared = false;
738
+ x -> isSuspended = false;
736
739
x -> isTwoPhase = false;
737
740
x -> csn =
738
741
x -> status = TRANSACTION_STATUS_UNKNOWN ;
@@ -762,6 +765,7 @@ MtmBeginTransaction(MtmCurrentTrans* x)
762
765
x -> isReplicated = MtmIsLogicalReceiver ;
763
766
x -> isDistributed = MtmIsUserTransaction ();
764
767
x -> isPrepared = false;
768
+ x -> isSuspended = false;
765
769
x -> isTwoPhase = false;
766
770
x -> isTransactionBlock = IsTransactionBlock ();
767
771
/* Application name can be changed usnig PGAPPNAME environment variable */
@@ -1003,14 +1007,18 @@ MtmPostPrepareTransaction(MtmCurrentTrans* x)
1003
1007
}
1004
1008
if (ts -> status != TRANSACTION_STATUS_ABORTED && !ts -> votingCompleted ) {
1005
1009
if (ts -> isPrepared ) {
1006
- elog (ERROR , "Commit of distributed transaction %s is suspended because node is switched to %s mode" , ts -> gid , MtmNodeStatusMnem [Mtm -> status ]);
1007
- }
1008
- if (Mtm -> status != MTM_ONLINE ) {
1009
- elog (WARNING , "Commit of distributed transaction is canceled because node is switched to %s mode" , MtmNodeStatusMnem [Mtm -> status ]);
1010
- } else {
1011
- elog (WARNING , "Commit of distributed transaction is canceled because cluster configuration was changed" );
1010
+ // GetNewTransactionId(false); /* force increment of transaction counter */
1011
+ // elog(ERROR, "Commit of distributed transaction %s is suspended because node is switched to %s mode", ts->gid, MtmNodeStatusMnem[Mtm->status]);
1012
+ elog (WARNING , "Commit of distributed transaction %s is suspended because node is switched to %s mode" , ts -> gid , MtmNodeStatusMnem [Mtm -> status ]);
1013
+ x -> isSuspended = true;
1014
+ } else {
1015
+ if (Mtm -> status != MTM_ONLINE ) {
1016
+ elog (WARNING , "Commit of distributed transaction is canceled because node is switched to %s mode" , MtmNodeStatusMnem [Mtm -> status ]);
1017
+ } else {
1018
+ elog (WARNING , "Commit of distributed transaction is canceled because cluster configuration was changed" );
1019
+ }
1020
+ MtmAbortTransaction (ts );
1012
1021
}
1013
- MtmAbortTransaction (ts );
1014
1022
}
1015
1023
x -> status = ts -> status ;
1016
1024
MTM_LOG3 ("%d: Result of vote: %d" , MyProcPid , ts -> status );
@@ -1077,14 +1085,18 @@ MtmCommitPreparedTransaction(MtmCurrentTrans* x)
1077
1085
}
1078
1086
if (ts -> status != TRANSACTION_STATUS_ABORTED && !ts -> votingCompleted ) {
1079
1087
if (ts -> isPrepared ) {
1080
- elog (ERROR , "Commit of distributed transaction %s is suspended because node is switched to %s mode" , ts -> gid , MtmNodeStatusMnem [Mtm -> status ]);
1081
- }
1082
- if (Mtm -> status != MTM_ONLINE ) {
1083
- elog (WARNING , "Commit of distributed transaction is canceled because node is switched to %s mode" , MtmNodeStatusMnem [Mtm -> status ]);
1084
- } else {
1085
- elog (WARNING , "Commit of distributed transaction is canceled because cluster configuration was changed" );
1088
+ // GetNewTransactionId(false); /* force increment of transaction counter */
1089
+ // elog(ERROR, "Commit of distributed transaction %s is suspended because node is switched to %s mode", ts->gid, MtmNodeStatusMnem[Mtm->status]);
1090
+ elog (WARNING , "Commit of distributed transaction %s is suspended because node is switched to %s mode" , ts -> gid , MtmNodeStatusMnem [Mtm -> status ]);
1091
+ x -> isSuspended = true;
1092
+ } else {
1093
+ if (Mtm -> status != MTM_ONLINE ) {
1094
+ elog (WARNING , "Commit of distributed transaction is canceled because node is switched to %s mode" , MtmNodeStatusMnem [Mtm -> status ]);
1095
+ } else {
1096
+ elog (WARNING , "Commit of distributed transaction is canceled because cluster configuration was changed" );
1097
+ }
1098
+ MtmAbortTransaction (ts );
1086
1099
}
1087
- MtmAbortTransaction (ts );
1088
1100
}
1089
1101
x -> status = ts -> status ;
1090
1102
x -> xid = ts -> xid ;
@@ -1292,6 +1304,7 @@ static void MtmStartRecovery()
1292
1304
MtmLock (LW_EXCLUSIVE );
1293
1305
BIT_SET (Mtm -> disabledNodeMask , MtmNodeId - 1 );
1294
1306
MtmSwitchClusterMode (MTM_RECOVERY );
1307
+ Mtm -> recoveredLSN = InvalidXLogRecPtr ;
1295
1308
MtmUnlock ();
1296
1309
}
1297
1310
@@ -1603,6 +1616,7 @@ bool MtmRecoveryCaughtUp(int nodeId, XLogRecPtr slotLSN)
1603
1616
MTM_LOG1 ("%d: node %d is caugth-up without locking cluster" , MyProcPid , nodeId );
1604
1617
/* We are lucky: caugth-up without locking cluster! */
1605
1618
}
1619
+ Mtm -> recoveredLSN = walLSN ;
1606
1620
MtmEnableNode (nodeId );
1607
1621
Mtm -> nConfigChanges += 1 ;
1608
1622
caughtUp = true;
@@ -2074,6 +2088,7 @@ static void MtmInitialize()
2074
2088
Mtm -> walSenderLockerMask = 0 ;
2075
2089
Mtm -> nodeLockerMask = 0 ;
2076
2090
Mtm -> reconnectMask = 0 ;
2091
+ Mtm -> recoveredLSN = InvalidXLogRecPtr ;
2077
2092
Mtm -> nLockers = 0 ;
2078
2093
Mtm -> nActiveTransactions = 0 ;
2079
2094
Mtm -> votingTransactions = NULL ;
@@ -2101,13 +2116,14 @@ static void MtmInitialize()
2101
2116
Mtm -> nodes [i ].con = MtmConnections [i ];
2102
2117
Mtm -> nodes [i ].flushPos = 0 ;
2103
2118
Mtm -> nodes [i ].lastHeartbeat = 0 ;
2104
- Mtm -> nodes [i ].restartLsn = 0 ;
2119
+ Mtm -> nodes [i ].restartLSN = InvalidXLogRecPtr ;
2105
2120
Mtm -> nodes [i ].originId = InvalidRepOriginId ;
2106
2121
Mtm -> nodes [i ].timeline = 0 ;
2122
+ Mtm -> nodes [i ].recoveredLSN = InvalidXLogRecPtr ;
2107
2123
}
2108
2124
Mtm -> nodes [MtmNodeId - 1 ].originId = DoNotReplicateId ;
2109
2125
/* All transaction originated from the current node should be ignored during recovery */
2110
- Mtm -> nodes [MtmNodeId - 1 ].restartLsn = (XLogRecPtr )PG_UINT64_MAX ;
2126
+ Mtm -> nodes [MtmNodeId - 1 ].restartLSN = (XLogRecPtr )PG_UINT64_MAX ;
2111
2127
PGSemaphoreCreate (& Mtm -> sendSemaphore );
2112
2128
PGSemaphoreReset (& Mtm -> sendSemaphore );
2113
2129
SpinLockInit (& Mtm -> spinlock );
@@ -2851,18 +2867,21 @@ void MtmFinishPreparedTransaction(MtmTransState* ts, bool commit)
2851
2867
*/
2852
2868
MtmReplicationMode MtmGetReplicationMode (int nodeId , sig_atomic_t volatile * shutdown )
2853
2869
{
2854
- bool recovery = false ;
2870
+ MtmReplicationMode mode = REPLMODE_OPEN_EXISTED ;
2855
2871
2856
- while (Mtm -> status != MTM_CONNECTED && Mtm -> status != MTM_ONLINE )
2872
+ while (( Mtm -> status != MTM_CONNECTED && Mtm -> status != MTM_ONLINE ) || BIT_CHECK ( Mtm -> disabledNodeMask , nodeId - 1 ) )
2857
2873
{
2858
2874
if (* shutdown )
2859
2875
{
2860
2876
return REPLMODE_EXIT ;
2861
2877
}
2862
- MTM_LOG2 ("%d: receiver slot mode %s" , MyProcPid , MtmNodeStatusMnem [Mtm -> status ]);
2863
2878
MtmLock (LW_EXCLUSIVE );
2879
+ if (BIT_CHECK (Mtm -> disabledNodeMask , nodeId - 1 )) {
2880
+ mode = REPLMODE_CREATE_NEW ;
2881
+ }
2882
+ MTM_LOG2 ("%d: receiver slot mode %s" , MyProcPid , MtmNodeStatusMnem [Mtm -> status ]);
2864
2883
if (Mtm -> status == MTM_RECOVERY ) {
2865
- recovery = true ;
2884
+ mode = REPLMODE_RECOVERED ;
2866
2885
if ((Mtm -> recoverySlot == 0 && (Mtm -> donorNodeId == MtmNodeId || Mtm -> donorNodeId == nodeId ))
2867
2886
|| Mtm -> recoverySlot == nodeId )
2868
2887
{
@@ -2880,13 +2899,14 @@ MtmReplicationMode MtmGetReplicationMode(int nodeId, sig_atomic_t volatile* shut
2880
2899
/* delay opening of other slots until recovery is completed */
2881
2900
MtmSleep (STATUS_POLL_DELAY );
2882
2901
}
2883
- if (recovery ) {
2902
+ if (mode == REPLMODE_RECOVERED ) {
2884
2903
MTM_LOG1 ("%d: Restart replication from node %d after end of recovery" , MyProcPid , nodeId );
2904
+ } else if (mode == REPLMODE_CREATE_NEW ) {
2905
+ MTM_LOG1 ("%d: Start replication from recovered node %d" , MyProcPid , nodeId );
2885
2906
} else {
2886
2907
MTM_LOG1 ("%d: Continue replication from node %d" , MyProcPid , nodeId );
2887
2908
}
2888
- /* After recovery completion we need to drop all other slots to avoid receive of redundant data */
2889
- return recovery ? REPLMODE_RECOVERED : REPLMODE_NORMAL ;
2909
+ return mode ;
2890
2910
}
2891
2911
2892
2912
static bool MtmIsBroadcast ()
@@ -2965,7 +2985,7 @@ MtmReplicationStartupHook(struct PGLogicalStartupHookArgs* args)
2965
2985
MtmIsRecoverySession = true;
2966
2986
} else if (strcmp (strVal (elem -> arg ), "recovered" ) == 0 ) {
2967
2987
recoveryCompleted = true;
2968
- } else if (strcmp (strVal (elem -> arg ), "normal " ) != 0 ) {
2988
+ } else if (strcmp (strVal (elem -> arg ), "open_existed" ) != 0 && strcmp ( strVal ( elem -> arg ), "create_new " ) != 0 ) {
2969
2989
elog (ERROR , "Illegal recovery mode %s" , strVal (elem -> arg ));
2970
2990
}
2971
2991
} else {
@@ -2977,14 +2997,20 @@ MtmReplicationStartupHook(struct PGLogicalStartupHookArgs* args)
2977
2997
} else {
2978
2998
elog (ERROR , "Restart position is not specified" );
2979
2999
}
3000
+ } else if (strcmp ("mtm_recovered_pos" , elem -> defname ) == 0 ) {
3001
+ if (elem -> arg != NULL && strVal (elem -> arg ) != NULL ) {
3002
+ sscanf (strVal (elem -> arg ), "%lx" , & Mtm -> nodes [MtmReplicationNodeId - 1 ].recoveredLSN );
3003
+ } else {
3004
+ elog (ERROR , "Recovered position is not specified" );
3005
+ }
2980
3006
}
2981
3007
}
2982
3008
MtmLock (LW_EXCLUSIVE );
2983
3009
if (MtmIsRecoverySession ) {
2984
3010
MTM_LOG1 ("%d: Node %d start recovery of node %d at position %lx" , MyProcPid , MtmNodeId , MtmReplicationNodeId , recoveryStartPos );
2985
3011
Assert (MyReplicationSlot != NULL );
2986
3012
if (recoveryStartPos < MyReplicationSlot -> data .restart_lsn ) {
2987
- elog (ERROR , "Specified recovery start position %lx is beyond restart lsn %lx" , recoveryStartPos , MyReplicationSlot -> data .restart_lsn );
3013
+ elog (WARNING , "Specified recovery start position %lx is beyond restart lsn %lx" , recoveryStartPos , MyReplicationSlot -> data .restart_lsn );
2988
3014
}
2989
3015
if (!BIT_CHECK (Mtm -> disabledNodeMask , MtmReplicationNodeId - 1 )) {
2990
3016
MtmDisableNode (MtmReplicationNodeId );
@@ -3133,17 +3159,17 @@ bool MtmFilterTransaction(char* record, int size)
3133
3159
default :
3134
3160
break ;
3135
3161
}
3136
- duplicate = Mtm -> status == MTM_RECOVERY && origin_lsn != InvalidXLogRecPtr && origin_lsn <= Mtm -> nodes [origin_node - 1 ].restartLsn ;
3162
+ duplicate = Mtm -> status == MTM_RECOVERY && origin_lsn != InvalidXLogRecPtr && origin_lsn <= Mtm -> nodes [origin_node - 1 ].restartLSN ;
3137
3163
3138
3164
MTM_LOG1 ("%s transaction %s from node %d lsn %lx, origin node %d, original lsn=%lx, current lsn=%lx" ,
3139
- duplicate ? "Ignore" : "Apply" , gid , replication_node , end_lsn , origin_node , origin_lsn , Mtm -> nodes [origin_node - 1 ].restartLsn );
3165
+ duplicate ? "Ignore" : "Apply" , gid , replication_node , end_lsn , origin_node , origin_lsn , Mtm -> nodes [origin_node - 1 ].restartLSN );
3140
3166
if (Mtm -> status == MTM_RECOVERY ) {
3141
- if (Mtm -> nodes [origin_node - 1 ].restartLsn < origin_lsn ) {
3142
- Mtm -> nodes [origin_node - 1 ].restartLsn = origin_lsn ;
3167
+ if (Mtm -> nodes [origin_node - 1 ].restartLSN < origin_lsn ) {
3168
+ Mtm -> nodes [origin_node - 1 ].restartLSN = origin_lsn ;
3143
3169
}
3144
3170
} else {
3145
- if (Mtm -> nodes [replication_node - 1 ].restartLsn < end_lsn ) {
3146
- Mtm -> nodes [replication_node - 1 ].restartLsn = end_lsn ;
3171
+ if (Mtm -> nodes [replication_node - 1 ].restartLSN < end_lsn ) {
3172
+ Mtm -> nodes [replication_node - 1 ].restartLSN = end_lsn ;
3147
3173
}
3148
3174
}
3149
3175
return duplicate ;
@@ -3758,12 +3784,16 @@ static bool MtmTwoPhaseCommit(MtmCurrentTrans* x)
3758
3784
/* ??? Should we do explicit rollback */
3759
3785
} else {
3760
3786
CommitTransactionCommand ();
3761
- StartTransactionCommand ();
3762
- if (MtmGetCurrentTransactionStatus () == TRANSACTION_STATUS_ABORTED ) {
3763
- FinishPreparedTransaction (x -> gid , false);
3764
- elog (ERROR , "Transaction %s is aborted by DTM" , x -> gid );
3765
- } else {
3766
- FinishPreparedTransaction (x -> gid , true);
3787
+ if (x -> isSuspended ) {
3788
+ elog (WARNING , "Transaction %s is left in prepared state because coordinator onde is not online" , x -> gid );
3789
+ } else {
3790
+ StartTransactionCommand ();
3791
+ if (x -> status == TRANSACTION_STATUS_ABORTED ) {
3792
+ FinishPreparedTransaction (x -> gid , false);
3793
+ elog (ERROR , "Transaction %s is aborted by DTM" , x -> gid );
3794
+ } else {
3795
+ FinishPreparedTransaction (x -> gid , true);
3796
+ }
3767
3797
}
3768
3798
}
3769
3799
return true;
0 commit comments