@@ -1316,13 +1316,15 @@ static void MtmPollStatusOfPreparedTransactions(int disabledNodeId)
1316
1316
1317
1317
static void MtmDisableNode (int nodeId )
1318
1318
{
1319
+ timestamp_t now = MtmGetSystemTime ();
1320
+ elog (WARNING , "Disable node %d at xlog position %lx, last status change time %d msec ago" , nodeId , GetXLogInsertRecPtr (),
1321
+ (int )USEC_TO_MSEC (now - Mtm -> nodes [nodeId - 1 ].lastStatusChangeTime ));
1319
1322
BIT_SET (Mtm -> disabledNodeMask , nodeId - 1 );
1320
- Mtm -> nodes [nodeId - 1 ].lastStatusChangeTime = MtmGetSystemTime () ;
1323
+ Mtm -> nodes [nodeId - 1 ].lastStatusChangeTime = now ;
1321
1324
Mtm -> nodes [nodeId - 1 ].lastHeartbeat = 0 ; /* defuse watchdog until first heartbeat is received */
1322
1325
if (nodeId != MtmNodeId ) {
1323
1326
Mtm -> nLiveNodes -= 1 ;
1324
1327
}
1325
- elog (WARNING , "Disable node %d at xlog position %lx" , nodeId , GetXLogInsertRecPtr ());
1326
1328
MtmPollStatusOfPreparedTransactions (nodeId );
1327
1329
}
1328
1330
@@ -1345,8 +1347,8 @@ void MtmRecoveryCompleted(void)
1345
1347
MtmNodeId , Mtm -> disabledNodeMask , Mtm -> reconnectMask , Mtm -> nLiveNodes );
1346
1348
MtmLock (LW_EXCLUSIVE );
1347
1349
Mtm -> recoverySlot = 0 ;
1348
- Mtm -> nodes [MtmNodeId - 1 ].lastStatusChangeTime = MtmGetSystemTime ();
1349
1350
BIT_CLEAR (Mtm -> disabledNodeMask , MtmNodeId - 1 );
1351
+ Mtm -> nodes [MtmNodeId - 1 ].lastStatusChangeTime = MtmGetSystemTime ();
1350
1352
for (i = 0 ; i < Mtm -> nAllNodes ; i ++ ) {
1351
1353
Mtm -> nodes [i ].lastHeartbeat = 0 ; /* defuse watchdog until first heartbeat is received */
1352
1354
}
@@ -1600,10 +1602,11 @@ bool MtmRefreshClusterStatus(bool nowait, int testNodeId)
1600
1602
if (disabled ) {
1601
1603
timestamp_t now = MtmGetSystemTime ();
1602
1604
for (i = 0 , mask = disabled ; mask != 0 ; i ++ , mask >>= 1 ) {
1603
- if (mask & 1 ) {
1604
- if (Mtm -> nodes [i ].lastStatusChangeTime + MSEC_TO_USEC (MtmNodeDisableDelay ) < now ) {
1605
- MtmDisableNode (i + 1 );
1606
- }
1605
+ if (i + 1 != MtmNodeId
1606
+ && (mask & 1 ) != 0
1607
+ && Mtm -> nodes [i ].lastStatusChangeTime + MSEC_TO_USEC (MtmNodeDisableDelay ) < now )
1608
+ {
1609
+ MtmDisableNode (i + 1 );
1607
1610
}
1608
1611
}
1609
1612
}
@@ -1615,15 +1618,16 @@ bool MtmRefreshClusterStatus(bool nowait, int testNodeId)
1615
1618
1616
1619
if (disabled |enabled ) {
1617
1620
MtmCheckQuorum ();
1618
- }
1619
- /* Interrupt voting for active transaction and abort them */
1620
- for (ts = Mtm -> transListHead ; ts != NULL ; ts = ts -> next ) {
1621
- MTM_LOG3 ("Active transaction gid='%s', coordinator=%d, xid=%d, status=%d, gtid.xid=%d" ,
1622
- ts -> gid , ts -> gtid .nхode , ts -> xid , ts -> status , ts -> gtid .xid );
1623
- if (MtmIsCoordinator (ts )) {
1624
- if (!ts -> votingCompleted && disabled != 0 && ts -> status != TRANSACTION_STATUS_ABORTED ) {
1625
- MtmAbortTransaction (ts );
1626
- MtmWakeUpBackend (ts );
1621
+
1622
+ /* Interrupt voting for active transaction and abort them */
1623
+ for (ts = Mtm -> transListHead ; ts != NULL ; ts = ts -> next ) {
1624
+ MTM_LOG3 ("Active transaction gid='%s', coordinator=%d, xid=%d, status=%d, gtid.xid=%d" ,
1625
+ ts -> gid , ts -> gtid .nхode , ts -> xid , ts -> status , ts -> gtid .xid );
1626
+ if (MtmIsCoordinator (ts )) {
1627
+ if (!ts -> votingCompleted && disabled != 0 && ts -> status != TRANSACTION_STATUS_ABORTED ) {
1628
+ MtmAbortTransaction (ts );
1629
+ MtmWakeUpBackend (ts );
1630
+ }
1627
1631
}
1628
1632
}
1629
1633
}
@@ -2242,7 +2246,7 @@ _PG_init(void)
2242
2246
"Minimal amount of time (msec) between node status change" ,
2243
2247
"This delay is used to avoid false detection of node failure and to prevent blinking of node status node" ,
2244
2248
& MtmNodeDisableDelay ,
2245
- 1000 ,
2249
+ 2000 ,
2246
2250
1 ,
2247
2251
INT_MAX ,
2248
2252
PGC_BACKEND ,
0 commit comments