15
15
#include "miscadmin.h"
16
16
17
17
#include "libpq-fe.h"
18
+ #include "lib/stringinfo.h"
19
+ #include "libpq/pqformat.h"
18
20
#include "common/username.h"
19
21
20
22
#include "postmaster/postmaster.h"
@@ -926,7 +928,9 @@ MtmVotingCompleted(MtmTransState* ts)
926
928
ts -> votingCompleted = true;
927
929
ts -> status = TRANSACTION_STATUS_UNKNOWN ;
928
930
return true;
929
- } else {
931
+ } else {
932
+ MTM_LOG1 ("Transaction %s is considered as prepared (status=%d participants=%lx disabled=%lx, voted=%lx)" ,
933
+ ts -> gid , ts -> status , ts -> participantsMask , Mtm -> disabledNodeMask , ts -> votedMask );
930
934
ts -> isPrepared = true;
931
935
if (ts -> isTwoPhase ) {
932
936
ts -> votingCompleted = true;
@@ -980,9 +984,10 @@ MtmPostPrepareTransaction(MtmCurrentTrans* x)
980
984
MtmResetTransaction ();
981
985
} else {
982
986
int result = 0 ;
983
-
987
+ int nConfigChanges = Mtm -> nConfigChanges ;
984
988
/* Wait votes from all nodes until: */
985
- while (!MtmVotingCompleted (ts ))
989
+ while (!MtmVotingCompleted (ts )
990
+ && (ts -> isPrepared || nConfigChanges == Mtm -> nConfigChanges ))
986
991
{
987
992
MtmUnlock ();
988
993
MTM_TXTRACE (x , "PostPrepareTransaction WaitLatch Start" );
@@ -998,8 +1003,15 @@ MtmPostPrepareTransaction(MtmCurrentTrans* x)
998
1003
MtmLock (LW_EXCLUSIVE );
999
1004
}
1000
1005
if (ts -> status != TRANSACTION_STATUS_ABORTED && !ts -> votingCompleted ) {
1006
+ if (ts -> isPrepared ) {
1007
+ elog (ERROR , "Commit of distributed transaction %s is suspended because node is switched to %s mode" , ts -> gid , MtmNodeStatusMnem [Mtm -> status ]);
1008
+ }
1009
+ if (Mtm -> status != MTM_ONLINE ) {
1010
+ elog (WARNING , "Commit of distributed transaction is canceled because node is switched to %s mode" , MtmNodeStatusMnem [Mtm -> status ]);
1011
+ } else {
1012
+ elog (WARNING , "Commit of distributed transaction is canceled because cluster configuration was changed" );
1013
+ }
1001
1014
MtmAbortTransaction (ts );
1002
- elog (WARNING , "Commit of distributed transaction is canceled because node is switched to %s mode" , MtmNodeStatusMnem [Mtm -> status ]);
1003
1015
}
1004
1016
x -> status = ts -> status ;
1005
1017
MTM_LOG3 ("%d: Result of vote: %d" , MyProcPid , ts -> status );
@@ -1032,6 +1044,7 @@ MtmCommitPreparedTransaction(MtmCurrentTrans* x)
1032
1044
elog (WARNING , "Global transaciton ID '%s' is not found" , x -> gid );
1033
1045
} else {
1034
1046
int result = 0 ;
1047
+ int nConfigChanges = Mtm -> nConfigChanges ;
1035
1048
1036
1049
Assert (tm -> state != NULL );
1037
1050
MTM_LOG3 ("Commit prepared transaction %d with gid='%s'" , x -> xid , x -> gid );
@@ -1046,7 +1059,8 @@ MtmCommitPreparedTransaction(MtmCurrentTrans* x)
1046
1059
MtmSend2PCMessage (ts , MSG_PRECOMMIT );
1047
1060
1048
1061
/* Wait votes from all nodes until: */
1049
- while (!MtmVotingCompleted (ts ))
1062
+ while (!MtmVotingCompleted (ts )
1063
+ && (ts -> isPrepared || nConfigChanges == Mtm -> nConfigChanges ))
1050
1064
{
1051
1065
MtmUnlock ();
1052
1066
MTM_TXTRACE (x , "CommitPreparedTransaction WaitLatch Start" );
@@ -1063,8 +1077,15 @@ MtmCommitPreparedTransaction(MtmCurrentTrans* x)
1063
1077
}
1064
1078
}
1065
1079
if (ts -> status != TRANSACTION_STATUS_ABORTED && !ts -> votingCompleted ) {
1080
+ if (ts -> isPrepared ) {
1081
+ elog (ERROR , "Commit of distributed transaction %s is suspended because node is switched to %s mode" , ts -> gid , MtmNodeStatusMnem [Mtm -> status ]);
1082
+ }
1083
+ if (Mtm -> status != MTM_ONLINE ) {
1084
+ elog (WARNING , "Commit of distributed transaction is canceled because node is switched to %s mode" , MtmNodeStatusMnem [Mtm -> status ]);
1085
+ } else {
1086
+ elog (WARNING , "Commit of distributed transaction is canceled because cluster configuration was changed" );
1087
+ }
1066
1088
MtmAbortTransaction (ts );
1067
- elog (WARNING , "Commit of distributed transaction is canceled because node is switched to %s mode" , MtmNodeStatusMnem [Mtm -> status ]);
1068
1089
}
1069
1090
x -> status = ts -> status ;
1070
1091
x -> xid = ts -> xid ;
@@ -1166,11 +1187,14 @@ MtmEndTransaction(MtmCurrentTrans* x, bool commit)
1166
1187
}
1167
1188
ts -> status = TRANSACTION_STATUS_ABORTED ;
1168
1189
ts -> isLocal = true;
1190
+ ts -> isPrepared = false;
1169
1191
ts -> snapshot = x -> snapshot ;
1192
+ ts -> isTwoPhase = x -> isTwoPhase ;
1170
1193
ts -> csn = MtmAssignCSN ();
1171
1194
ts -> gtid = x -> gtid ;
1172
1195
ts -> nSubxids = 0 ;
1173
1196
ts -> votingCompleted = true;
1197
+ strcpy (ts -> gid , x -> gid );
1174
1198
if (ts -> isActive ) {
1175
1199
ts -> isActive = false;
1176
1200
Assert (Mtm -> nActiveTransactions != 0 );
@@ -1226,8 +1250,9 @@ void MtmSend2PCMessage(MtmTransState* ts, MtmMessageCode cmd)
1226
1250
int i ;
1227
1251
for (i = 0 ; i < Mtm -> nAllNodes ; i ++ )
1228
1252
{
1229
- if (BIT_CHECK (ts -> participantsMask & ~Mtm -> disabledNodeMask , i ) && TransactionIdIsValid ( ts -> xids [ i ]) )
1253
+ if (BIT_CHECK (ts -> participantsMask & ~Mtm -> disabledNodeMask , i ))
1230
1254
{
1255
+ Assert (TransactionIdIsValid (ts -> xids [i ]));
1231
1256
msg .node = i + 1 ;
1232
1257
msg .dxid = ts -> xids [i ];
1233
1258
MtmSendMessage (& msg );
@@ -1655,7 +1680,7 @@ MtmCheckClusterLock()
1655
1680
continue ;
1656
1681
} else {
1657
1682
/* All lockers are synchronized their logs */
1658
- /* Remove lock and mark them as rceovered */
1683
+ /* Remove lock and mark them as recovered */
1659
1684
MTM_LOG1 ("Complete recovery of %d nodes (node mask %lx)" , Mtm -> nLockers , (long ) Mtm -> nodeLockerMask );
1660
1685
Assert (Mtm -> walSenderLockerMask == 0 );
1661
1686
Assert ((Mtm -> nodeLockerMask & Mtm -> disabledNodeMask ) == Mtm -> nodeLockerMask );
@@ -2082,6 +2107,8 @@ static void MtmInitialize()
2082
2107
Mtm -> nodes [i ].timeline = 0 ;
2083
2108
}
2084
2109
Mtm -> nodes [MtmNodeId - 1 ].originId = DoNotReplicateId ;
2110
+ /* All transaction originated from the current node should be ignored during recovery */
2111
+ Mtm -> nodes [MtmNodeId - 1 ].restartLsn = (XLogRecPtr )PG_UINT64_MAX ;
2085
2112
PGSemaphoreCreate (& Mtm -> sendSemaphore );
2086
2113
PGSemaphoreReset (& Mtm -> sendSemaphore );
2087
2114
SpinLockInit (& Mtm -> spinlock );
@@ -2808,12 +2835,7 @@ void MtmFinishPreparedTransaction(MtmTransState* ts, bool commit)
2808
2835
Assert (!IsTransactionState ());
2809
2836
MtmResetTransaction ();
2810
2837
StartTransactionCommand ();
2811
- #if 0
2812
- if (Mtm -> nodes [MtmNodeId - 1 ].originId == InvalidRepOriginId ) {
2813
- /* This dummy origin is used for local commits/aborts which should not be replicated */
2814
- Mtm -> nodes [MtmNodeId - 1 ].originId = replorigin_create (psprintf (MULTIMASTER_SLOT_PATTERN , MtmNodeId ));
2815
- }
2816
- #endif
2838
+
2817
2839
MtmBeginSession (MtmNodeId );
2818
2840
MtmSetCurrentTransactionCSN (ts -> csn );
2819
2841
MtmSetCurrentTransactionGID (ts -> gid );
@@ -2830,7 +2852,6 @@ void MtmFinishPreparedTransaction(MtmTransState* ts, bool commit)
2830
2852
*/
2831
2853
MtmReplicationMode MtmGetReplicationMode (int nodeId , sig_atomic_t volatile * shutdown )
2832
2854
{
2833
- int i ;
2834
2855
bool recovery = false;
2835
2856
2836
2857
while (Mtm -> status != MTM_CONNECTED && Mtm -> status != MTM_ONLINE )
@@ -2852,9 +2873,6 @@ MtmReplicationMode MtmGetReplicationMode(int nodeId, sig_atomic_t volatile* shut
2852
2873
Mtm -> nReceivers = 0 ;
2853
2874
Mtm -> recoveryCount += 1 ;
2854
2875
Mtm -> pglogicalNodeMask = 0 ;
2855
- for (i = 0 ; i < Mtm -> nAllNodes ; i ++ ) {
2856
- Mtm -> nodes [i ].restartLsn = InvalidXLogRecPtr ;
2857
- }
2858
2876
MtmUnlock ();
2859
2877
return REPLMODE_RECOVERY ;
2860
2878
}
@@ -3071,6 +3089,67 @@ MtmReplicationRowFilterHook(struct PGLogicalRowFilterArgs* args)
3071
3089
return isDistributed ;
3072
3090
}
3073
3091
3092
+ bool MtmFilterTransaction (char * record , int size )
3093
+ {
3094
+ StringInfoData s ;
3095
+ uint8 flags ;
3096
+ XLogRecPtr origin_lsn ;
3097
+ XLogRecPtr end_lsn ;
3098
+ int replication_node ;
3099
+ int origin_node ;
3100
+ char const * gid = "" ;
3101
+ bool duplicate ;
3102
+
3103
+ s .data = record ;
3104
+ s .len = size ;
3105
+ s .maxlen = -1 ;
3106
+ s .cursor = 0 ;
3107
+
3108
+ Assert (pq_getmsgbyte (& s ) == 'C' );
3109
+ flags = pq_getmsgbyte (& s ); /* flags */
3110
+ replication_node = pq_getmsgbyte (& s );
3111
+
3112
+ /* read fields */
3113
+ pq_getmsgint64 (& s ); /* commit_lsn */
3114
+ end_lsn = pq_getmsgint64 (& s ); /* end_lsn */
3115
+ pq_getmsgint64 (& s ); /* commit_time */
3116
+
3117
+ origin_node = pq_getmsgbyte (& s );
3118
+ origin_lsn = pq_getmsgint64 (& s );
3119
+
3120
+ Assert (replication_node == MtmReplicationNodeId &&
3121
+ origin_node != 0 &&
3122
+ (Mtm -> status == MTM_RECOVERY || origin_node == replication_node ));
3123
+
3124
+ switch (PGLOGICAL_XACT_EVENT (flags ))
3125
+ {
3126
+ case PGLOGICAL_PREPARE :
3127
+ case PGLOGICAL_ABORT_PREPARED :
3128
+ gid = pq_getmsgstring (& s );
3129
+ break ;
3130
+ case PGLOGICAL_COMMIT_PREPARED :
3131
+ pq_getmsgint64 (& s ); /* CSN */
3132
+ gid = pq_getmsgstring (& s );
3133
+ break ;
3134
+ default :
3135
+ break ;
3136
+ }
3137
+ duplicate = Mtm -> status == MTM_RECOVERY && origin_lsn != InvalidXLogRecPtr && origin_lsn <= Mtm -> nodes [origin_node - 1 ].restartLsn ;
3138
+
3139
+ MTM_LOG1 ("%s transaction %s from node %d lsn %lx, origin node %d, original lsn=%lx, current lsn=%lx" ,
3140
+ duplicate ? "Ignore" : "Apply" , gid , replication_node , end_lsn , origin_node , origin_lsn , Mtm -> nodes [origin_node - 1 ].restartLsn );
3141
+ if (Mtm -> status == MTM_RECOVERY ) {
3142
+ if (Mtm -> nodes [origin_node - 1 ].restartLsn < origin_lsn ) {
3143
+ Mtm -> nodes [origin_node - 1 ].restartLsn = origin_lsn ;
3144
+ }
3145
+ } else {
3146
+ if (Mtm -> nodes [replication_node - 1 ].restartLsn < end_lsn ) {
3147
+ Mtm -> nodes [replication_node - 1 ].restartLsn = end_lsn ;
3148
+ }
3149
+ }
3150
+ return duplicate ;
3151
+ }
3152
+
3074
3153
void MtmSetupReplicationHooks (struct PGLogicalHooks * hooks )
3075
3154
{
3076
3155
hooks -> startup_hook = MtmReplicationStartupHook ;
0 commit comments