@@ -83,6 +83,7 @@ typedef struct {
83
83
bool isReplicated ; /* transaction on replica */
84
84
bool isDistributed ; /* transaction performed INSERT/UPDATE/DELETE and has to be replicated to other nodes */
85
85
bool isPrepared ; /* transaction is perpared at first stage of 2PC */
86
+ bool isSuspended ; /* prepared transaction is suspended because coordinator node is switch to offline */
86
87
bool isTransactionBlock ; /* is transaction block */
87
88
bool containsDML ; /* transaction contains DML statements */
88
89
XidStatus status ; /* transaction status */
@@ -712,7 +713,7 @@ MtmXactCallback(XactEvent event, void *arg)
712
713
}
713
714
714
715
/*
715
- * Check if this is "normal" user trnsaction which should be distributed to other nodes
716
+ * Check if this is "normal" user transaction which should be distributed to other nodes
716
717
*/
717
718
static bool
718
719
MtmIsUserTransaction ()
@@ -734,6 +735,7 @@ MtmResetTransaction()
734
735
x -> gtid .xid = InvalidTransactionId ;
735
736
x -> isDistributed = false;
736
737
x -> isPrepared = false;
738
+ x -> isSuspended = false;
737
739
x -> isTwoPhase = false;
738
740
x -> csn =
739
741
x -> status = TRANSACTION_STATUS_UNKNOWN ;
@@ -763,6 +765,7 @@ MtmBeginTransaction(MtmCurrentTrans* x)
763
765
x -> isReplicated = MtmIsLogicalReceiver ;
764
766
x -> isDistributed = MtmIsUserTransaction ();
765
767
x -> isPrepared = false;
768
+ x -> isSuspended = false;
766
769
x -> isTwoPhase = false;
767
770
x -> isTransactionBlock = IsTransactionBlock ();
768
771
/* Application name can be changed usnig PGAPPNAME environment variable */
@@ -1004,14 +1007,18 @@ MtmPostPrepareTransaction(MtmCurrentTrans* x)
1004
1007
}
1005
1008
if (ts -> status != TRANSACTION_STATUS_ABORTED && !ts -> votingCompleted ) {
1006
1009
if (ts -> isPrepared ) {
1007
- elog (ERROR , "Commit of distributed transaction %s is suspended because node is switched to %s mode" , ts -> gid , MtmNodeStatusMnem [Mtm -> status ]);
1008
- }
1009
- if (Mtm -> status != MTM_ONLINE ) {
1010
- elog (WARNING , "Commit of distributed transaction is canceled because node is switched to %s mode" , MtmNodeStatusMnem [Mtm -> status ]);
1011
- } else {
1012
- elog (WARNING , "Commit of distributed transaction is canceled because cluster configuration was changed" );
1010
+ // GetNewTransactionId(false); /* force increment of transaction counter */
1011
+ // elog(ERROR, "Commit of distributed transaction %s is suspended because node is switched to %s mode", ts->gid, MtmNodeStatusMnem[Mtm->status]);
1012
+ elog (WARNING , "Commit of distributed transaction %s is suspended because node is switched to %s mode" , ts -> gid , MtmNodeStatusMnem [Mtm -> status ]);
1013
+ x -> isSuspended = true;
1014
+ } else {
1015
+ if (Mtm -> status != MTM_ONLINE ) {
1016
+ elog (WARNING , "Commit of distributed transaction is canceled because node is switched to %s mode" , MtmNodeStatusMnem [Mtm -> status ]);
1017
+ } else {
1018
+ elog (WARNING , "Commit of distributed transaction is canceled because cluster configuration was changed" );
1019
+ }
1020
+ MtmAbortTransaction (ts );
1013
1021
}
1014
- MtmAbortTransaction (ts );
1015
1022
}
1016
1023
x -> status = ts -> status ;
1017
1024
MTM_LOG3 ("%d: Result of vote: %d" , MyProcPid , ts -> status );
@@ -1078,14 +1085,18 @@ MtmCommitPreparedTransaction(MtmCurrentTrans* x)
1078
1085
}
1079
1086
if (ts -> status != TRANSACTION_STATUS_ABORTED && !ts -> votingCompleted ) {
1080
1087
if (ts -> isPrepared ) {
1081
- elog (ERROR , "Commit of distributed transaction %s is suspended because node is switched to %s mode" , ts -> gid , MtmNodeStatusMnem [Mtm -> status ]);
1082
- }
1083
- if (Mtm -> status != MTM_ONLINE ) {
1084
- elog (WARNING , "Commit of distributed transaction is canceled because node is switched to %s mode" , MtmNodeStatusMnem [Mtm -> status ]);
1085
- } else {
1086
- elog (WARNING , "Commit of distributed transaction is canceled because cluster configuration was changed" );
1088
+ // GetNewTransactionId(false); /* force increment of transaction counter */
1089
+ // elog(ERROR, "Commit of distributed transaction %s is suspended because node is switched to %s mode", ts->gid, MtmNodeStatusMnem[Mtm->status]);
1090
+ elog (WARNING , "Commit of distributed transaction %s is suspended because node is switched to %s mode" , ts -> gid , MtmNodeStatusMnem [Mtm -> status ]);
1091
+ x -> isSuspended = true;
1092
+ } else {
1093
+ if (Mtm -> status != MTM_ONLINE ) {
1094
+ elog (WARNING , "Commit of distributed transaction is canceled because node is switched to %s mode" , MtmNodeStatusMnem [Mtm -> status ]);
1095
+ } else {
1096
+ elog (WARNING , "Commit of distributed transaction is canceled because cluster configuration was changed" );
1097
+ }
1098
+ MtmAbortTransaction (ts );
1087
1099
}
1088
- MtmAbortTransaction (ts );
1089
1100
}
1090
1101
x -> status = ts -> status ;
1091
1102
x -> xid = ts -> xid ;
@@ -1122,6 +1133,16 @@ MtmAbortPreparedTransaction(MtmCurrentTrans* x)
1122
1133
}
1123
1134
}
1124
1135
1136
+ static void
1137
+ MtmLogAbortLogicalMessage (int nodeId , char const * gid )
1138
+ {
1139
+ MtmAbortLogicalMessage msg ;
1140
+ strcpy (msg .gid , gid );
1141
+ msg .origin_node = nodeId ;
1142
+ msg .origin_lsn = replorigin_session_origin_lsn ;
1143
+ XLogFlush (LogLogicalMessage ("A" , (char * )& msg , sizeof msg , false));
1144
+ }
1145
+
1125
1146
static void
1126
1147
MtmEndTransaction (MtmCurrentTrans * x , bool commit )
1127
1148
{
@@ -1143,7 +1164,8 @@ MtmEndTransaction(MtmCurrentTrans* x, bool commit)
1143
1164
}
1144
1165
if (ts != NULL ) {
1145
1166
if (* ts -> gid )
1146
- MTM_LOG2 ("TRANSLOG: %s transaction %s status %d" , (commit ? "commit" : "rollback" ), ts -> gid , ts -> status );
1167
+ MTM_LOG1 ("TRANSLOG: %s transaction git=%s xid=%d node=%d dxid=%d status %d" ,
1168
+ (commit ? "commit" : "rollback" ), ts -> gid , ts -> xid , ts -> gtid .node , ts -> gtid .xid , ts -> status );
1147
1169
if (commit ) {
1148
1170
if (!(ts -> status == TRANSACTION_STATUS_UNKNOWN
1149
1171
|| (ts -> status == TRANSACTION_STATUS_IN_PROGRESS && Mtm -> status == MTM_RECOVERY )))
@@ -1202,7 +1224,8 @@ MtmEndTransaction(MtmCurrentTrans* x, bool commit)
1202
1224
}
1203
1225
MtmTransactionListAppend (ts );
1204
1226
if (* x -> gid ) {
1205
- LogLogicalMessage ("A" , x -> gid , strlen (x -> gid ) + 1 , false);
1227
+ replorigin_session_origin_lsn = InvalidXLogRecPtr ;
1228
+ MtmLogAbortLogicalMessage (MtmNodeId , x -> gid );
1206
1229
}
1207
1230
}
1208
1231
MtmSend2PCMessage (ts , MSG_ABORTED ); /* send notification to coordinator */
@@ -1293,6 +1316,7 @@ static void MtmStartRecovery()
1293
1316
MtmLock (LW_EXCLUSIVE );
1294
1317
BIT_SET (Mtm -> disabledNodeMask , MtmNodeId - 1 );
1295
1318
MtmSwitchClusterMode (MTM_RECOVERY );
1319
+ Mtm -> recoveredLSN = InvalidXLogRecPtr ;
1296
1320
MtmUnlock ();
1297
1321
}
1298
1322
@@ -1604,6 +1628,7 @@ bool MtmRecoveryCaughtUp(int nodeId, XLogRecPtr slotLSN)
1604
1628
MTM_LOG1 ("%d: node %d is caugth-up without locking cluster" , MyProcPid , nodeId );
1605
1629
/* We are lucky: caugth-up without locking cluster! */
1606
1630
}
1631
+ Mtm -> recoveredLSN = walLSN ;
1607
1632
MtmEnableNode (nodeId );
1608
1633
Mtm -> nConfigChanges += 1 ;
1609
1634
caughtUp = true;
@@ -2075,6 +2100,7 @@ static void MtmInitialize()
2075
2100
Mtm -> walSenderLockerMask = 0 ;
2076
2101
Mtm -> nodeLockerMask = 0 ;
2077
2102
Mtm -> reconnectMask = 0 ;
2103
+ Mtm -> recoveredLSN = InvalidXLogRecPtr ;
2078
2104
Mtm -> nLockers = 0 ;
2079
2105
Mtm -> nActiveTransactions = 0 ;
2080
2106
Mtm -> votingTransactions = NULL ;
@@ -2102,13 +2128,14 @@ static void MtmInitialize()
2102
2128
Mtm -> nodes [i ].con = MtmConnections [i ];
2103
2129
Mtm -> nodes [i ].flushPos = 0 ;
2104
2130
Mtm -> nodes [i ].lastHeartbeat = 0 ;
2105
- Mtm -> nodes [i ].restartLsn = 0 ;
2131
+ Mtm -> nodes [i ].restartLSN = InvalidXLogRecPtr ;
2106
2132
Mtm -> nodes [i ].originId = InvalidRepOriginId ;
2107
2133
Mtm -> nodes [i ].timeline = 0 ;
2134
+ Mtm -> nodes [i ].recoveredLSN = InvalidXLogRecPtr ;
2108
2135
}
2109
2136
Mtm -> nodes [MtmNodeId - 1 ].originId = DoNotReplicateId ;
2110
2137
/* All transaction originated from the current node should be ignored during recovery */
2111
- Mtm -> nodes [MtmNodeId - 1 ].restartLsn = (XLogRecPtr )PG_UINT64_MAX ;
2138
+ Mtm -> nodes [MtmNodeId - 1 ].restartLSN = (XLogRecPtr )PG_UINT64_MAX ;
2112
2139
PGSemaphoreCreate (& Mtm -> sendSemaphore );
2113
2140
PGSemaphoreReset (& Mtm -> sendSemaphore );
2114
2141
SpinLockInit (& Mtm -> spinlock );
@@ -2811,20 +2838,23 @@ void MtmReleaseRecoverySlot(int nodeId)
2811
2838
if (Mtm -> recoverySlot == nodeId ) {
2812
2839
Mtm -> recoverySlot = 0 ;
2813
2840
}
2814
- }
2841
+ }
2815
2842
2816
- void MtmRollbackPreparedTransaction (char const * gid )
2843
+ void MtmRollbackPreparedTransaction (int nodeId , char const * gid )
2817
2844
{
2818
- MTM_LOG1 ("Abort prepared transaction %s" , gid );
2819
- if (MtmExchangeGlobalTransactionStatus (gid , TRANSACTION_STATUS_ABORTED ) == TRANSACTION_STATUS_UNKNOWN ) {
2845
+ XidStatus status = MtmExchangeGlobalTransactionStatus (gid , TRANSACTION_STATUS_ABORTED );
2846
+ MTM_LOG1 ("Abort prepared transaction %s status %d" , gid , status );
2847
+ if (status == TRANSACTION_STATUS_UNKNOWN ) {
2820
2848
MTM_LOG1 ("PGLOGICAL_ABORT_PREPARED commit: gid=%s #2" , gid );
2821
2849
MtmResetTransaction ();
2822
2850
StartTransactionCommand ();
2823
- MtmBeginSession (MtmReplicationNodeId );
2851
+ MtmBeginSession (nodeId );
2824
2852
MtmSetCurrentTransactionGID (gid );
2825
2853
FinishPreparedTransaction (gid , false);
2826
2854
CommitTransactionCommand ();
2827
- MtmEndSession (MtmReplicationNodeId , true);
2855
+ MtmEndSession (nodeId , true);
2856
+ } else if (status == TRANSACTION_STATUS_IN_PROGRESS ) {
2857
+ MtmLogAbortLogicalMessage (nodeId , gid );
2828
2858
}
2829
2859
}
2830
2860
@@ -2852,18 +2882,21 @@ void MtmFinishPreparedTransaction(MtmTransState* ts, bool commit)
2852
2882
*/
2853
2883
MtmReplicationMode MtmGetReplicationMode (int nodeId , sig_atomic_t volatile * shutdown )
2854
2884
{
2855
- bool recovery = false ;
2885
+ MtmReplicationMode mode = REPLMODE_OPEN_EXISTED ;
2856
2886
2857
- while (Mtm -> status != MTM_CONNECTED && Mtm -> status != MTM_ONLINE )
2887
+ while (( Mtm -> status != MTM_CONNECTED && Mtm -> status != MTM_ONLINE ) || BIT_CHECK ( Mtm -> disabledNodeMask , nodeId - 1 ) )
2858
2888
{
2859
2889
if (* shutdown )
2860
2890
{
2861
2891
return REPLMODE_EXIT ;
2862
2892
}
2863
- MTM_LOG2 ("%d: receiver slot mode %s" , MyProcPid , MtmNodeStatusMnem [Mtm -> status ]);
2864
2893
MtmLock (LW_EXCLUSIVE );
2894
+ if (BIT_CHECK (Mtm -> disabledNodeMask , nodeId - 1 )) {
2895
+ mode = REPLMODE_CREATE_NEW ;
2896
+ }
2897
+ MTM_LOG2 ("%d: receiver slot mode %s" , MyProcPid , MtmNodeStatusMnem [Mtm -> status ]);
2865
2898
if (Mtm -> status == MTM_RECOVERY ) {
2866
- recovery = true ;
2899
+ mode = REPLMODE_RECOVERED ;
2867
2900
if ((Mtm -> recoverySlot == 0 && (Mtm -> donorNodeId == MtmNodeId || Mtm -> donorNodeId == nodeId ))
2868
2901
|| Mtm -> recoverySlot == nodeId )
2869
2902
{
@@ -2881,13 +2914,14 @@ MtmReplicationMode MtmGetReplicationMode(int nodeId, sig_atomic_t volatile* shut
2881
2914
/* delay opening of other slots until recovery is completed */
2882
2915
MtmSleep (STATUS_POLL_DELAY );
2883
2916
}
2884
- if (recovery ) {
2917
+ if (mode == REPLMODE_RECOVERED ) {
2885
2918
MTM_LOG1 ("%d: Restart replication from node %d after end of recovery" , MyProcPid , nodeId );
2919
+ } else if (mode == REPLMODE_CREATE_NEW ) {
2920
+ MTM_LOG1 ("%d: Start replication from recovered node %d" , MyProcPid , nodeId );
2886
2921
} else {
2887
2922
MTM_LOG1 ("%d: Continue replication from node %d" , MyProcPid , nodeId );
2888
2923
}
2889
- /* After recovery completion we need to drop all other slots to avoid receive of redundant data */
2890
- return recovery ? REPLMODE_RECOVERED : REPLMODE_NORMAL ;
2924
+ return mode ;
2891
2925
}
2892
2926
2893
2927
static bool MtmIsBroadcast ()
@@ -2966,7 +3000,7 @@ MtmReplicationStartupHook(struct PGLogicalStartupHookArgs* args)
2966
3000
MtmIsRecoverySession = true;
2967
3001
} else if (strcmp (strVal (elem -> arg ), "recovered" ) == 0 ) {
2968
3002
recoveryCompleted = true;
2969
- } else if (strcmp (strVal (elem -> arg ), "normal " ) != 0 ) {
3003
+ } else if (strcmp (strVal (elem -> arg ), "open_existed" ) != 0 && strcmp ( strVal ( elem -> arg ), "create_new " ) != 0 ) {
2970
3004
elog (ERROR , "Illegal recovery mode %s" , strVal (elem -> arg ));
2971
3005
}
2972
3006
} else {
@@ -2978,14 +3012,20 @@ MtmReplicationStartupHook(struct PGLogicalStartupHookArgs* args)
2978
3012
} else {
2979
3013
elog (ERROR , "Restart position is not specified" );
2980
3014
}
3015
+ } else if (strcmp ("mtm_recovered_pos" , elem -> defname ) == 0 ) {
3016
+ if (elem -> arg != NULL && strVal (elem -> arg ) != NULL ) {
3017
+ sscanf (strVal (elem -> arg ), "%lx" , & Mtm -> nodes [MtmReplicationNodeId - 1 ].recoveredLSN );
3018
+ } else {
3019
+ elog (ERROR , "Recovered position is not specified" );
3020
+ }
2981
3021
}
2982
3022
}
2983
3023
MtmLock (LW_EXCLUSIVE );
2984
3024
if (MtmIsRecoverySession ) {
2985
3025
MTM_LOG1 ("%d: Node %d start recovery of node %d at position %lx" , MyProcPid , MtmNodeId , MtmReplicationNodeId , recoveryStartPos );
2986
3026
Assert (MyReplicationSlot != NULL );
2987
3027
if (recoveryStartPos < MyReplicationSlot -> data .restart_lsn ) {
2988
- elog (ERROR , "Specified recovery start position %lx is beyond restart lsn %lx" , recoveryStartPos , MyReplicationSlot -> data .restart_lsn );
3028
+ elog (WARNING , "Specified recovery start position %lx is beyond restart lsn %lx" , recoveryStartPos , MyReplicationSlot -> data .restart_lsn );
2989
3029
}
2990
3030
if (!BIT_CHECK (Mtm -> disabledNodeMask , MtmReplicationNodeId - 1 )) {
2991
3031
MtmDisableNode (MtmReplicationNodeId );
@@ -3134,19 +3174,11 @@ bool MtmFilterTransaction(char* record, int size)
3134
3174
default :
3135
3175
break ;
3136
3176
}
3137
- duplicate = Mtm -> status == MTM_RECOVERY && origin_lsn != InvalidXLogRecPtr && origin_lsn <= Mtm -> nodes [origin_node - 1 ].restartLsn ;
3177
+ //duplicate = Mtm->status == MTM_RECOVERY && origin_lsn != InvalidXLogRecPtr && origin_lsn <= Mtm->nodes[origin_node-1].restartLSN;
3178
+ duplicate = origin_lsn != InvalidXLogRecPtr && origin_lsn <= Mtm -> nodes [origin_node - 1 ].restartLSN ;
3138
3179
3139
- MTM_LOG1 ("%s transaction %s from node %d lsn %lx, origin node %d, original lsn=%lx, current lsn=%lx" ,
3140
- duplicate ? "Ignore" : "Apply" , gid , replication_node , end_lsn , origin_node , origin_lsn , Mtm -> nodes [origin_node - 1 ].restartLsn );
3141
- if (Mtm -> status == MTM_RECOVERY ) {
3142
- if (Mtm -> nodes [origin_node - 1 ].restartLsn < origin_lsn ) {
3143
- Mtm -> nodes [origin_node - 1 ].restartLsn = origin_lsn ;
3144
- }
3145
- } else {
3146
- if (Mtm -> nodes [replication_node - 1 ].restartLsn < end_lsn ) {
3147
- Mtm -> nodes [replication_node - 1 ].restartLsn = end_lsn ;
3148
- }
3149
- }
3180
+ MTM_LOG1 ("%s transaction %s from node %d lsn %lx, flags=%x, origin node %d, original lsn=%lx, current lsn=%lx" ,
3181
+ duplicate ? "Ignore" : "Apply" , gid , replication_node , end_lsn , flags , origin_node , origin_lsn , Mtm -> nodes [origin_node - 1 ].restartLSN );
3150
3182
return duplicate ;
3151
3183
}
3152
3184
@@ -3759,12 +3791,16 @@ static bool MtmTwoPhaseCommit(MtmCurrentTrans* x)
3759
3791
/* ??? Should we do explicit rollback */
3760
3792
} else {
3761
3793
CommitTransactionCommand ();
3762
- StartTransactionCommand ();
3763
- if (MtmGetCurrentTransactionStatus () == TRANSACTION_STATUS_ABORTED ) {
3764
- FinishPreparedTransaction (x -> gid , false);
3765
- elog (ERROR , "Transaction %s is aborted by DTM" , x -> gid );
3766
- } else {
3767
- FinishPreparedTransaction (x -> gid , true);
3794
+ if (x -> isSuspended ) {
3795
+ elog (WARNING , "Transaction %s is left in prepared state because coordinator onde is not online" , x -> gid );
3796
+ } else {
3797
+ StartTransactionCommand ();
3798
+ if (x -> status == TRANSACTION_STATUS_ABORTED ) {
3799
+ FinishPreparedTransaction (x -> gid , false);
3800
+ elog (ERROR , "Transaction %s is aborted by DTM" , x -> gid );
3801
+ } else {
3802
+ FinishPreparedTransaction (x -> gid , true);
3803
+ }
3768
3804
}
3769
3805
}
3770
3806
return true;
0 commit comments