@@ -89,8 +89,6 @@ typedef struct {
89
89
pgid_t gid ; /* global transaction identifier (used by 2pc) */
90
90
} MtmCurrentTrans ;
91
91
92
- /* #define USE_SPINLOCK 1 */
93
-
94
92
typedef enum
95
93
{
96
94
MTM_STATE_LOCK_ID
@@ -245,6 +243,7 @@ static int MtmMaxRecoveryLag;
245
243
static int MtmGcPeriod ;
246
244
static bool MtmIgnoreTablesWithoutPk ;
247
245
static int MtmLockCount ;
246
+ static int MtmSenderStarted ;
248
247
249
248
static ExecutorStart_hook_type PreviousExecutorStartHook ;
250
249
static ExecutorFinish_hook_type PreviousExecutorFinishHook ;
@@ -273,16 +272,12 @@ void MtmLock(LWLockMode mode)
273
272
return ;
274
273
}
275
274
}
276
- #ifdef USE_SPINLOCK
277
- SpinLockAcquire (& Mtm -> spinlock );
278
- #else
279
275
start = MtmGetSystemTime ();
280
276
LWLockAcquire ((LWLockId )& Mtm -> locks [MTM_STATE_LOCK_ID ], mode );
281
277
stop = MtmGetSystemTime ();
282
278
if (stop > start + MSEC_TO_USEC (MtmHeartbeatSendTimeout )) {
283
279
MTM_LOG1 ("%d: obtaining %s lock takes %ld microseconds" , MyProcPid , (mode == LW_EXCLUSIVE ? "exclusive" : "shared" ), stop - start );
284
280
}
285
- #endif
286
281
Mtm -> lastLockHolder = MyProcPid ;
287
282
}
288
283
@@ -291,11 +286,7 @@ void MtmUnlock(void)
291
286
if (MtmLockCount != 0 && -- MtmLockCount != 0 ) {
292
287
return ;
293
288
}
294
- #ifdef USE_SPINLOCK
295
- SpinLockRelease (& Mtm -> spinlock );
296
- #else
297
289
LWLockRelease ((LWLockId )& Mtm -> locks [MTM_STATE_LOCK_ID ]);
298
- #endif
299
290
Mtm -> lastLockHolder = 0 ;
300
291
}
301
292
@@ -1231,7 +1222,7 @@ MtmEndTransaction(MtmCurrentTrans* x, bool commit)
1231
1222
if (commit ) {
1232
1223
if (!(ts -> status == TRANSACTION_STATUS_UNKNOWN
1233
1224
|| (ts -> status == TRANSACTION_STATUS_IN_PROGRESS && Mtm -> status == MTM_RECOVERY )))
1234
- { Assert (false);
1225
+ {
1235
1226
elog (ERROR , "Attempt to commit %s transaction %d (%s)" ,
1236
1227
MtmTxnStatusMnem [ts -> status ], ts -> xid , ts -> gid );
1237
1228
}
@@ -1304,20 +1295,24 @@ MtmEndTransaction(MtmCurrentTrans* x, bool commit)
1304
1295
1305
1296
void MtmSendMessage (MtmArbiterMessage * msg )
1306
1297
{
1307
- MtmMessageQueue * mq = Mtm -> freeQueue ;
1308
- MtmMessageQueue * sendQueue = Mtm -> sendQueue ;
1309
- if (mq == NULL ) {
1310
- mq = (MtmMessageQueue * )ShmemAlloc (sizeof (MtmMessageQueue ));
1311
- } else {
1312
- Mtm -> freeQueue = mq -> next ;
1313
- }
1314
- mq -> msg = * msg ;
1315
- mq -> next = sendQueue ;
1316
- Mtm -> sendQueue = mq ;
1317
- if (sendQueue == NULL ) {
1318
- /* singal semaphore only once for the whole list */
1319
- PGSemaphoreUnlock (& Mtm -> sendSemaphore );
1298
+ SpinLockAcquire (& Mtm -> queueSpinlock );
1299
+ {
1300
+ MtmMessageQueue * mq = Mtm -> freeQueue ;
1301
+ MtmMessageQueue * sendQueue = Mtm -> sendQueue ;
1302
+ if (mq == NULL ) {
1303
+ mq = (MtmMessageQueue * )ShmemAlloc (sizeof (MtmMessageQueue ));
1304
+ } else {
1305
+ Mtm -> freeQueue = mq -> next ;
1306
+ }
1307
+ mq -> msg = * msg ;
1308
+ mq -> next = sendQueue ;
1309
+ Mtm -> sendQueue = mq ;
1310
+ if (sendQueue == NULL ) {
1311
+ /* singal semaphore only once for the whole list */
1312
+ PGSemaphoreUnlock (& Mtm -> sendSemaphore );
1313
+ }
1320
1314
}
1315
+ SpinLockRelease (& Mtm -> queueSpinlock );
1321
1316
}
1322
1317
1323
1318
void MtmSend2PCMessage (MtmTransState * ts , MtmMessageCode cmd )
@@ -1667,8 +1662,8 @@ void MtmRecoveryCompleted(void)
1667
1662
Mtm -> nodes [i ].lastHeartbeat = 0 ; /* defuse watchdog until first heartbeat is received */
1668
1663
}
1669
1664
/* Mode will be changed to online once all logical receiver are connected */
1670
- elog (LOG , "Recovery completed with %d active receivers from %d" , Mtm -> nReceivers , Mtm -> nLiveNodes - 1 );
1671
- MtmSwitchClusterMode (Mtm -> nReceivers == Mtm -> nLiveNodes - 1 ? MTM_ONLINE : MTM_CONNECTED );
1665
+ elog (LOG , "Recovery completed with %d active receivers and %d started senders from %d" , Mtm -> nReceivers , Mtm -> nSenders , Mtm -> nLiveNodes - 1 );
1666
+ MtmSwitchClusterMode (Mtm -> nReceivers == Mtm -> nLiveNodes - 1 && Mtm -> nSenders == Mtm -> nLiveNodes - 1 ? MTM_ONLINE : MTM_CONNECTED );
1672
1667
MtmUnlock ();
1673
1668
}
1674
1669
@@ -2010,7 +2005,7 @@ void MtmOnNodeDisconnect(int nodeId)
2010
2005
MtmLock (LW_EXCLUSIVE );
2011
2006
BIT_SET (Mtm -> connectivityMask , nodeId - 1 );
2012
2007
BIT_SET (Mtm -> reconnectMask , nodeId - 1 );
2013
- MTM_LOG1 ( "Disconnect node %d connectivity mask %llx" , nodeId , (long long ) Mtm -> connectivityMask );
2008
+ elog ( LOG , "Disconnect node %d connectivity mask %llx" , nodeId , (long long ) Mtm -> connectivityMask );
2014
2009
MtmUnlock ();
2015
2010
2016
2011
MtmSleep (MSEC_TO_USEC (MtmHeartbeatSendTimeout ));
@@ -2020,6 +2015,7 @@ void MtmOnNodeDisconnect(int nodeId)
2020
2015
void MtmOnNodeConnect (int nodeId )
2021
2016
{
2022
2017
MtmLock (LW_EXCLUSIVE );
2018
+ elog (LOG , "Connect node %d connectivity mask %llx" , nodeId , (long long ) Mtm -> connectivityMask );
2023
2019
BIT_CLEAR (Mtm -> connectivityMask , nodeId - 1 );
2024
2020
BIT_CLEAR (Mtm -> reconnectMask , nodeId - 1 );
2025
2021
MtmUnlock ();
@@ -2198,6 +2194,7 @@ static void MtmInitialize()
2198
2194
Mtm -> transListHead = NULL ;
2199
2195
Mtm -> transListTail = & Mtm -> transListHead ;
2200
2196
Mtm -> nReceivers = 0 ;
2197
+ Mtm -> nSenders = 0 ;
2201
2198
Mtm -> timeShift = 0 ;
2202
2199
Mtm -> transCount = 0 ;
2203
2200
Mtm -> gcCount = 0 ;
@@ -2229,7 +2226,7 @@ static void MtmInitialize()
2229
2226
Mtm -> nodes [MtmNodeId - 1 ].restartLSN = (XLogRecPtr )PG_UINT64_MAX ;
2230
2227
PGSemaphoreCreate (& Mtm -> sendSemaphore );
2231
2228
PGSemaphoreReset (& Mtm -> sendSemaphore );
2232
- SpinLockInit (& Mtm -> spinlock );
2229
+ SpinLockInit (& Mtm -> queueSpinlock );
2233
2230
BgwPoolInit (& Mtm -> pool , MtmExecutor , MtmDatabaseName , MtmDatabaseUser , MtmQueueSize , MtmWorkers );
2234
2231
RegisterXactCallback (MtmXactCallback , NULL );
2235
2232
MtmTx .snapshot = INVALID_CSN ;
@@ -2906,11 +2903,9 @@ void MtmReceiverStarted(int nodeId)
2906
2903
MtmEnableNode (nodeId );
2907
2904
MtmCheckQuorum ();
2908
2905
}
2909
- elog (LOG , "Start %d receivers from %d cluster status %s" , Mtm -> nReceivers + 1 , Mtm -> nLiveNodes - 1 , MtmNodeStatusMnem [Mtm -> status ]);
2910
- if (++ Mtm -> nReceivers == Mtm -> nLiveNodes - 1 ) {
2911
- if (Mtm -> status == MTM_CONNECTED ) {
2912
- MtmSwitchClusterMode (MTM_ONLINE );
2913
- }
2906
+ elog (LOG , "Start %d receivers and %d senders from %d cluster status %s" , Mtm -> nReceivers + 1 , Mtm -> nSenders , Mtm -> nLiveNodes - 1 , MtmNodeStatusMnem [Mtm -> status ]);
2907
+ if (++ Mtm -> nReceivers == Mtm -> nLiveNodes - 1 && Mtm -> nSenders == Mtm -> nLiveNodes - 1 && Mtm -> status == MTM_CONNECTED ) {
2908
+ MtmSwitchClusterMode (MTM_ONLINE );
2914
2909
}
2915
2910
}
2916
2911
MtmUnlock ();
@@ -2946,18 +2941,23 @@ void MtmRollbackPreparedTransaction(int nodeId, char const* gid)
2946
2941
2947
2942
void MtmFinishPreparedTransaction (MtmTransState * ts , bool commit )
2948
2943
{
2944
+ bool insideTransaction = IsTransactionState ();
2949
2945
Assert (ts -> votingCompleted );
2950
- Assert (!IsTransactionState ());
2951
2946
MtmResetTransaction ();
2952
- StartTransactionCommand ();
2953
-
2954
- MtmBeginSession (MtmNodeId );
2947
+
2948
+ if (!insideTransaction ) {
2949
+ StartTransactionCommand ();
2950
+ }
2951
+ //MtmBeginSession(MtmNodeId);
2955
2952
MtmSetCurrentTransactionCSN (ts -> csn );
2956
2953
MtmSetCurrentTransactionGID (ts -> gid );
2957
2954
FinishPreparedTransaction (ts -> gid , commit );
2958
- CommitTransactionCommand ();
2959
- MtmEndSession (MtmNodeId , true);
2960
- Assert (ts -> status == commit ? TRANSACTION_STATUS_COMMITTED : TRANSACTION_STATUS_ABORTED );
2955
+
2956
+ if (!insideTransaction ) {
2957
+ CommitTransactionCommand ();
2958
+ //MtmEndSession(MtmNodeId, true);
2959
+ Assert (ts -> status == commit ? TRANSACTION_STATUS_COMMITTED : TRANSACTION_STATUS_ABORTED );
2960
+ }
2961
2961
}
2962
2962
2963
2963
/*
@@ -2997,6 +2997,7 @@ MtmReplicationMode MtmGetReplicationMode(int nodeId, sig_atomic_t volatile* shut
2997
2997
elog (WARNING , "Process %d starts recovery from node %d" , MyProcPid , nodeId );
2998
2998
Mtm -> recoverySlot = nodeId ;
2999
2999
Mtm -> nReceivers = 0 ;
3000
+ Mtm -> nSenders = 0 ;
3000
3001
Mtm -> recoveryCount += 1 ;
3001
3002
Mtm -> pglogicalNodeMask = 0 ;
3002
3003
MtmUnlock ();
@@ -3015,6 +3016,7 @@ MtmReplicationMode MtmGetReplicationMode(int nodeId, sig_atomic_t volatile* shut
3015
3016
} else {
3016
3017
MTM_LOG1 ("%d: Continue replication from node %d" , MyProcPid , nodeId );
3017
3018
}
3019
+ BIT_SET (Mtm -> reconnectMask , nodeId - 1 ); /* arbiter should try to reestblish connection with this node */
3018
3020
MtmUnlock ();
3019
3021
return mode ;
3020
3022
}
@@ -3144,6 +3146,12 @@ MtmReplicationStartupHook(struct PGLogicalStartupHookArgs* args)
3144
3146
} else {
3145
3147
MTM_LOG1 ("Node %d start logical replication to node %d in normal mode" , MtmNodeId , MtmReplicationNodeId );
3146
3148
}
3149
+ elog (LOG , "Start %d senders and %d receivers from %d cluster status %s" , Mtm -> nSenders + 1 , Mtm -> nReceivers , Mtm -> nLiveNodes - 1 , MtmNodeStatusMnem [Mtm -> status ]);
3150
+ MtmSenderStarted = 1 ;
3151
+ if (++ Mtm -> nSenders == Mtm -> nLiveNodes - 1 && Mtm -> nReceivers == Mtm -> nLiveNodes - 1 && Mtm -> status == MTM_CONNECTED ) {
3152
+ MtmSwitchClusterMode (MTM_ONLINE );
3153
+ }
3154
+ BIT_SET (Mtm -> reconnectMask , MtmReplicationNodeId - 1 ); /* arbiter should try to reestblish connection with this node */
3147
3155
MtmUnlock ();
3148
3156
on_shmem_exit (MtmOnProcExit , 0 );
3149
3157
}
@@ -3192,6 +3200,9 @@ static void
3192
3200
MtmReplicationShutdownHook (struct PGLogicalShutdownHookArgs * args )
3193
3201
{
3194
3202
if (MtmReplicationNodeId >= 0 ) {
3203
+ MtmLock (LW_EXCLUSIVE );
3204
+ Mtm -> nSenders -= MtmSenderStarted ;
3205
+ MtmUnlock ();
3195
3206
MTM_LOG1 ("Logical replication to node %d is stopped" , MtmReplicationNodeId );
3196
3207
/* MtmOnNodeDisconnect(MtmReplicationNodeId); */
3197
3208
MtmReplicationNodeId = -1 ; /* defuse on_proc_exit hook */
@@ -3290,7 +3301,7 @@ bool MtmFilterTransaction(char* record, int size)
3290
3301
}
3291
3302
3292
3303
if (duplicate ) {
3293
- MTM_LOG1 ("Ignore transaction %s from node %d flags=%x, our restartLSN for node: %lx,restart_lsn = (origin node %d == MtmReplicationNodeId %d) ? end_lsn=%lx, origin_lsn=%lx" ,
3304
+ MTM_LOG2 ("Ignore transaction %s from node %d flags=%x, our restartLSN for node: %lx,restart_lsn = (origin node %d == MtmReplicationNodeId %d) ? end_lsn=%lx, origin_lsn=%lx" ,
3294
3305
gid , replication_node , flags , Mtm -> nodes [origin_node - 1 ].restartLSN , origin_node , MtmReplicationNodeId , end_lsn , origin_lsn );
3295
3306
} else {
3296
3307
MTM_LOG2 ("Apply transaction %s from node %d lsn %lx, flags=%x, origin node %d, original lsn=%lx, current lsn=%lx" ,
0 commit comments