@@ -318,10 +318,17 @@ static void MtmCheckResponse(MtmArbiterMessage* resp)
318
318
&& Mtm -> status != MTM_RECOVERY
319
319
&& Mtm -> nodes [MtmNodeId - 1 ].lastStatusChangeTime + MSEC_TO_USEC (MtmNodeDisableDelay ) < MtmGetSystemTime ())
320
320
{
321
- elog (WARNING , "Node %d thinks that I was dead, while I am %s (message %s)" , resp -> node , MtmNodeStatusMnem [Mtm -> status ], messageKindText [resp -> code ]);
321
+ elog (WARNING , "Node %d thinks that I am dead, while I am %s (message %s)" , resp -> node , MtmNodeStatusMnem [Mtm -> status ], messageKindText [resp -> code ]);
322
322
BIT_SET (Mtm -> disabledNodeMask , MtmNodeId - 1 );
323
323
MtmSwitchClusterMode (MTM_RECOVERY );
324
- }
324
+ } else if (BIT_CHECK (Mtm -> disabledNodeMask , resp -> node - 1 ) && sockets [resp -> node - 1 ] < 0 ) {
325
+ /* We receive heartbeat from dsiable node with
326
+ * Looks like it is restarted.
327
+ * Try to reconnect to it.
328
+ */
329
+ elog (WARNING , "Receive heartbeat from disabled node %d" , resp -> node );
330
+ BIT_SET (Mtm -> reconnectMask , resp -> node - 1 );
331
+ }
325
332
}
326
333
327
334
static void MtmScheduleHeartbeat ()
@@ -355,7 +362,8 @@ static void MtmSendHeartbeat()
355
362
if (i + 1 != MtmNodeId ) {
356
363
if (!BIT_CHECK (busy_mask , i )
357
364
&& (Mtm -> status != MTM_ONLINE
358
- || (sockets [i ] >= 0 && !BIT_CHECK (Mtm -> disabledNodeMask , i ))
365
+ || sockets [i ] >= 0
366
+ || !BIT_CHECK (Mtm -> disabledNodeMask , i )
359
367
|| BIT_CHECK (Mtm -> reconnectMask , i )))
360
368
{
361
369
if (!MtmSendToNode (i , & msg , sizeof (msg ))) {
@@ -885,6 +893,8 @@ static void MtmReceiver(Datum arg)
885
893
Mtm -> nodes [node - 1 ].connectivityMask = msg -> connectivityMask ;
886
894
Mtm -> nodes [node - 1 ].lastHeartbeat = MtmGetSystemTime ();
887
895
896
+ MtmCheckResponse (msg );
897
+
888
898
switch (msg -> code ) {
889
899
case MSG_HEARTBEAT :
890
900
MTM_LOG2 ("Receive HEARTBEAT from node %d with timestamp %ld delay %ld" ,
@@ -964,7 +974,6 @@ static void MtmReceiver(Datum arg)
964
974
messageKindText [msg -> code ], ts -> xid , ts -> gid , node );
965
975
continue ;
966
976
}
967
- MtmCheckResponse (msg );
968
977
BIT_SET (ts -> votedMask , node - 1 );
969
978
970
979
if (MtmIsCoordinator (ts )) {
0 commit comments