15
15
#include "miscadmin.h"
16
16
17
17
#include "libpq-fe.h"
18
+ #include "lib/stringinfo.h"
19
+ #include "libpq/pqformat.h"
18
20
#include "common/username.h"
19
21
20
22
#include "postmaster/postmaster.h"
@@ -925,7 +927,9 @@ MtmVotingCompleted(MtmTransState* ts)
925
927
ts -> votingCompleted = true;
926
928
ts -> status = TRANSACTION_STATUS_UNKNOWN ;
927
929
return true;
928
- } else {
930
+ } else {
931
+ MTM_LOG1 ("Transaction %s is considered as prepared (status=%d participants=%lx disabled=%lx, voted=%lx)" ,
932
+ ts -> gid , ts -> status , ts -> participantsMask , Mtm -> disabledNodeMask , ts -> votedMask );
929
933
ts -> isPrepared = true;
930
934
if (ts -> isTwoPhase ) {
931
935
ts -> votingCompleted = true;
@@ -979,9 +983,10 @@ MtmPostPrepareTransaction(MtmCurrentTrans* x)
979
983
MtmResetTransaction ();
980
984
} else {
981
985
int result = 0 ;
982
-
986
+ int nConfigChanges = Mtm -> nConfigChanges ;
983
987
/* Wait votes from all nodes until: */
984
- while (!MtmVotingCompleted (ts ))
988
+ while (!MtmVotingCompleted (ts )
989
+ && (ts -> isPrepared || nConfigChanges == Mtm -> nConfigChanges ))
985
990
{
986
991
MtmUnlock ();
987
992
MTM_TXTRACE (x , "PostPrepareTransaction WaitLatch Start" );
@@ -997,8 +1002,15 @@ MtmPostPrepareTransaction(MtmCurrentTrans* x)
997
1002
MtmLock (LW_EXCLUSIVE );
998
1003
}
999
1004
if (ts -> status != TRANSACTION_STATUS_ABORTED && !ts -> votingCompleted ) {
1005
+ if (ts -> isPrepared ) {
1006
+ elog (ERROR , "Commit of distributed transaction %s is suspended because node is switched to %s mode" , ts -> gid , MtmNodeStatusMnem [Mtm -> status ]);
1007
+ }
1008
+ if (Mtm -> status != MTM_ONLINE ) {
1009
+ elog (WARNING , "Commit of distributed transaction is canceled because node is switched to %s mode" , MtmNodeStatusMnem [Mtm -> status ]);
1010
+ } else {
1011
+ elog (WARNING , "Commit of distributed transaction is canceled because cluster configuration was changed" );
1012
+ }
1000
1013
MtmAbortTransaction (ts );
1001
- elog (WARNING , "Commit of distributed transaction is canceled because node is switched to %s mode" , MtmNodeStatusMnem [Mtm -> status ]);
1002
1014
}
1003
1015
x -> status = ts -> status ;
1004
1016
MTM_LOG3 ("%d: Result of vote: %d" , MyProcPid , ts -> status );
@@ -1031,6 +1043,7 @@ MtmCommitPreparedTransaction(MtmCurrentTrans* x)
1031
1043
elog (WARNING , "Global transaciton ID '%s' is not found" , x -> gid );
1032
1044
} else {
1033
1045
int result = 0 ;
1046
+ int nConfigChanges = Mtm -> nConfigChanges ;
1034
1047
1035
1048
Assert (tm -> state != NULL );
1036
1049
MTM_LOG3 ("Commit prepared transaction %d with gid='%s'" , x -> xid , x -> gid );
@@ -1045,7 +1058,8 @@ MtmCommitPreparedTransaction(MtmCurrentTrans* x)
1045
1058
MtmSend2PCMessage (ts , MSG_PRECOMMIT );
1046
1059
1047
1060
/* Wait votes from all nodes until: */
1048
- while (!MtmVotingCompleted (ts ))
1061
+ while (!MtmVotingCompleted (ts )
1062
+ && (ts -> isPrepared || nConfigChanges == Mtm -> nConfigChanges ))
1049
1063
{
1050
1064
MtmUnlock ();
1051
1065
MTM_TXTRACE (x , "CommitPreparedTransaction WaitLatch Start" );
@@ -1062,8 +1076,15 @@ MtmCommitPreparedTransaction(MtmCurrentTrans* x)
1062
1076
}
1063
1077
}
1064
1078
if (ts -> status != TRANSACTION_STATUS_ABORTED && !ts -> votingCompleted ) {
1079
+ if (ts -> isPrepared ) {
1080
+ elog (ERROR , "Commit of distributed transaction %s is suspended because node is switched to %s mode" , ts -> gid , MtmNodeStatusMnem [Mtm -> status ]);
1081
+ }
1082
+ if (Mtm -> status != MTM_ONLINE ) {
1083
+ elog (WARNING , "Commit of distributed transaction is canceled because node is switched to %s mode" , MtmNodeStatusMnem [Mtm -> status ]);
1084
+ } else {
1085
+ elog (WARNING , "Commit of distributed transaction is canceled because cluster configuration was changed" );
1086
+ }
1065
1087
MtmAbortTransaction (ts );
1066
- elog (WARNING , "Commit of distributed transaction is canceled because node is switched to %s mode" , MtmNodeStatusMnem [Mtm -> status ]);
1067
1088
}
1068
1089
x -> status = ts -> status ;
1069
1090
x -> xid = ts -> xid ;
@@ -1165,11 +1186,14 @@ MtmEndTransaction(MtmCurrentTrans* x, bool commit)
1165
1186
}
1166
1187
ts -> status = TRANSACTION_STATUS_ABORTED ;
1167
1188
ts -> isLocal = true;
1189
+ ts -> isPrepared = false;
1168
1190
ts -> snapshot = x -> snapshot ;
1191
+ ts -> isTwoPhase = x -> isTwoPhase ;
1169
1192
ts -> csn = MtmAssignCSN ();
1170
1193
ts -> gtid = x -> gtid ;
1171
1194
ts -> nSubxids = 0 ;
1172
1195
ts -> votingCompleted = true;
1196
+ strcpy (ts -> gid , x -> gid );
1173
1197
if (ts -> isActive ) {
1174
1198
ts -> isActive = false;
1175
1199
Assert (Mtm -> nActiveTransactions != 0 );
@@ -1225,8 +1249,9 @@ void MtmSend2PCMessage(MtmTransState* ts, MtmMessageCode cmd)
1225
1249
int i ;
1226
1250
for (i = 0 ; i < Mtm -> nAllNodes ; i ++ )
1227
1251
{
1228
- if (BIT_CHECK (ts -> participantsMask & ~Mtm -> disabledNodeMask , i ) && TransactionIdIsValid ( ts -> xids [ i ]) )
1252
+ if (BIT_CHECK (ts -> participantsMask & ~Mtm -> disabledNodeMask , i ))
1229
1253
{
1254
+ Assert (TransactionIdIsValid (ts -> xids [i ]));
1230
1255
msg .node = i + 1 ;
1231
1256
msg .dxid = ts -> xids [i ];
1232
1257
MtmSendMessage (& msg );
@@ -1654,7 +1679,7 @@ MtmCheckClusterLock()
1654
1679
continue ;
1655
1680
} else {
1656
1681
/* All lockers are synchronized their logs */
1657
- /* Remove lock and mark them as rceovered */
1682
+ /* Remove lock and mark them as recovered */
1658
1683
MTM_LOG1 ("Complete recovery of %d nodes (node mask %lx)" , Mtm -> nLockers , (long ) Mtm -> nodeLockerMask );
1659
1684
Assert (Mtm -> walSenderLockerMask == 0 );
1660
1685
Assert ((Mtm -> nodeLockerMask & Mtm -> disabledNodeMask ) == Mtm -> nodeLockerMask );
@@ -2081,6 +2106,8 @@ static void MtmInitialize()
2081
2106
Mtm -> nodes [i ].timeline = 0 ;
2082
2107
}
2083
2108
Mtm -> nodes [MtmNodeId - 1 ].originId = DoNotReplicateId ;
2109
+ /* All transaction originated from the current node should be ignored during recovery */
2110
+ Mtm -> nodes [MtmNodeId - 1 ].restartLsn = (XLogRecPtr )PG_UINT64_MAX ;
2084
2111
PGSemaphoreCreate (& Mtm -> sendSemaphore );
2085
2112
PGSemaphoreReset (& Mtm -> sendSemaphore );
2086
2113
SpinLockInit (& Mtm -> spinlock );
@@ -2807,12 +2834,7 @@ void MtmFinishPreparedTransaction(MtmTransState* ts, bool commit)
2807
2834
Assert (!IsTransactionState ());
2808
2835
MtmResetTransaction ();
2809
2836
StartTransactionCommand ();
2810
- #if 0
2811
- if (Mtm -> nodes [MtmNodeId - 1 ].originId == InvalidRepOriginId ) {
2812
- /* This dummy origin is used for local commits/aborts which should not be replicated */
2813
- Mtm -> nodes [MtmNodeId - 1 ].originId = replorigin_create (psprintf (MULTIMASTER_SLOT_PATTERN , MtmNodeId ));
2814
- }
2815
- #endif
2837
+
2816
2838
MtmBeginSession (MtmNodeId );
2817
2839
MtmSetCurrentTransactionCSN (ts -> csn );
2818
2840
MtmSetCurrentTransactionGID (ts -> gid );
@@ -2829,7 +2851,6 @@ void MtmFinishPreparedTransaction(MtmTransState* ts, bool commit)
2829
2851
*/
2830
2852
MtmReplicationMode MtmGetReplicationMode (int nodeId , sig_atomic_t volatile * shutdown )
2831
2853
{
2832
- int i ;
2833
2854
bool recovery = false;
2834
2855
2835
2856
while (Mtm -> status != MTM_CONNECTED && Mtm -> status != MTM_ONLINE )
@@ -2851,9 +2872,6 @@ MtmReplicationMode MtmGetReplicationMode(int nodeId, sig_atomic_t volatile* shut
2851
2872
Mtm -> nReceivers = 0 ;
2852
2873
Mtm -> recoveryCount += 1 ;
2853
2874
Mtm -> pglogicalNodeMask = 0 ;
2854
- for (i = 0 ; i < Mtm -> nAllNodes ; i ++ ) {
2855
- Mtm -> nodes [i ].restartLsn = InvalidXLogRecPtr ;
2856
- }
2857
2875
MtmUnlock ();
2858
2876
return REPLMODE_RECOVERY ;
2859
2877
}
@@ -3070,6 +3088,67 @@ MtmReplicationRowFilterHook(struct PGLogicalRowFilterArgs* args)
3070
3088
return isDistributed ;
3071
3089
}
3072
3090
3091
+ bool MtmFilterTransaction (char * record , int size )
3092
+ {
3093
+ StringInfoData s ;
3094
+ uint8 flags ;
3095
+ XLogRecPtr origin_lsn ;
3096
+ XLogRecPtr end_lsn ;
3097
+ int replication_node ;
3098
+ int origin_node ;
3099
+ char const * gid = "" ;
3100
+ bool duplicate ;
3101
+
3102
+ s .data = record ;
3103
+ s .len = size ;
3104
+ s .maxlen = -1 ;
3105
+ s .cursor = 0 ;
3106
+
3107
+ Assert (pq_getmsgbyte (& s ) == 'C' );
3108
+ flags = pq_getmsgbyte (& s ); /* flags */
3109
+ replication_node = pq_getmsgbyte (& s );
3110
+
3111
+ /* read fields */
3112
+ pq_getmsgint64 (& s ); /* commit_lsn */
3113
+ end_lsn = pq_getmsgint64 (& s ); /* end_lsn */
3114
+ pq_getmsgint64 (& s ); /* commit_time */
3115
+
3116
+ origin_node = pq_getmsgbyte (& s );
3117
+ origin_lsn = pq_getmsgint64 (& s );
3118
+
3119
+ Assert (replication_node == MtmReplicationNodeId &&
3120
+ origin_node != 0 &&
3121
+ (Mtm -> status == MTM_RECOVERY || origin_node == replication_node ));
3122
+
3123
+ switch (PGLOGICAL_XACT_EVENT (flags ))
3124
+ {
3125
+ case PGLOGICAL_PREPARE :
3126
+ case PGLOGICAL_ABORT_PREPARED :
3127
+ gid = pq_getmsgstring (& s );
3128
+ break ;
3129
+ case PGLOGICAL_COMMIT_PREPARED :
3130
+ pq_getmsgint64 (& s ); /* CSN */
3131
+ gid = pq_getmsgstring (& s );
3132
+ break ;
3133
+ default :
3134
+ break ;
3135
+ }
3136
+ duplicate = Mtm -> status == MTM_RECOVERY && origin_lsn != InvalidXLogRecPtr && origin_lsn <= Mtm -> nodes [origin_node - 1 ].restartLsn ;
3137
+
3138
+ MTM_LOG1 ("%s transaction %s from node %d lsn %lx, origin node %d, original lsn=%lx, current lsn=%lx" ,
3139
+ duplicate ? "Ignore" : "Apply" , gid , replication_node , end_lsn , origin_node , origin_lsn , Mtm -> nodes [origin_node - 1 ].restartLsn );
3140
+ if (Mtm -> status == MTM_RECOVERY ) {
3141
+ if (Mtm -> nodes [origin_node - 1 ].restartLsn < origin_lsn ) {
3142
+ Mtm -> nodes [origin_node - 1 ].restartLsn = origin_lsn ;
3143
+ }
3144
+ } else {
3145
+ if (Mtm -> nodes [replication_node - 1 ].restartLsn < end_lsn ) {
3146
+ Mtm -> nodes [replication_node - 1 ].restartLsn = end_lsn ;
3147
+ }
3148
+ }
3149
+ return duplicate ;
3150
+ }
3151
+
3073
3152
void MtmSetupReplicationHooks (struct PGLogicalHooks * hooks )
3074
3153
{
3075
3154
hooks -> startup_hook = MtmReplicationStartupHook ;
0 commit comments