@@ -90,7 +90,7 @@ static void MtmSender(Datum arg);
90
90
static void MtmReceiver (Datum arg );
91
91
static void MtmMonitor (Datum arg );
92
92
static void MtmSendHeartbeat (void );
93
- static bool MtmSendToNode (int node , void const * buf , int size );
93
+ static bool MtmSendToNode (int node , void const * buf , int size , time_t reconnectTimeout );
94
94
95
95
char const * const MtmMessageKindMnem [] =
96
96
{
@@ -214,7 +214,7 @@ static void MtmDisconnect(int node)
214
214
MtmOnNodeDisconnect (node + 1 );
215
215
}
216
216
217
- static int MtmWaitSocket (int sd , bool forWrite , time_t timeoutMsec )
217
+ static int MtmWaitSocket (int sd , bool forWrite , timestamp_t timeoutMsec )
218
218
{
219
219
struct timeval tv ;
220
220
fd_set set ;
@@ -227,7 +227,7 @@ static int MtmWaitSocket(int sd, bool forWrite, time_t timeoutMsec)
227
227
MtmCheckHeartbeat ();
228
228
now = MtmGetSystemTime ();
229
229
if (now > deadline ) {
230
- return 0 ;
230
+ now = deadline ;
231
231
}
232
232
tv .tv_sec = (deadline - now )/USECS_PER_SEC ;
233
233
tv .tv_usec = (deadline - now )%USECS_PER_SEC ;
@@ -355,7 +355,7 @@ static void MtmSendHeartbeat()
355
355
timestamp_t now = MtmGetSystemTime ();
356
356
msg .code = MSG_HEARTBEAT ;
357
357
msg .disabledNodeMask = Mtm -> disabledNodeMask ;
358
- msg .connectivityMask = Mtm -> connectivityMask ;
358
+ msg .connectivityMask = SELF_CONNECTIVITY_MASK ;
359
359
msg .oldestSnapshot = Mtm -> nodes [MtmNodeId - 1 ].oldestSnapshot ;
360
360
msg .node = MtmNodeId ;
361
361
msg .csn = now ;
@@ -373,15 +373,15 @@ static void MtmSendHeartbeat()
373
373
|| !BIT_CHECK (Mtm -> disabledNodeMask , i )
374
374
|| BIT_CHECK (Mtm -> reconnectMask , i )))
375
375
{
376
- if (!MtmSendToNode (i , & msg , sizeof (msg ))) {
376
+ if (!MtmSendToNode (i , & msg , sizeof (msg ), MtmHeartbeatSendTimeout )) {
377
377
elog (LOG , "Arbiter failed to send heartbeat to node %d" , i + 1 );
378
378
} else {
379
379
if (last_heartbeat_to_node [i ] + MSEC_TO_USEC (MtmHeartbeatSendTimeout )* 2 < now ) {
380
380
MTM_LOG1 ("Last heartbeat to node %d was sent %ld microseconds ago" , i + 1 , now - last_heartbeat_to_node [i ]);
381
381
}
382
382
last_heartbeat_to_node [i ] = now ;
383
383
/* Connectivity mask can be cleared by MtmWatchdog: in this case sockets[i] >= 0 */
384
- if (BIT_CHECK (Mtm -> connectivityMask , i )) {
384
+ if (BIT_CHECK (SELF_CONNECTIVITY_MASK , i )) {
385
385
MTM_LOG1 ("Force reconnect to node %d" , i + 1 );
386
386
close (sockets [i ]);
387
387
sockets [i ] = -1 ;
@@ -411,7 +411,7 @@ void MtmCheckHeartbeat()
411
411
}
412
412
413
413
414
- static int MtmConnectSocket (int node , int port , int timeout )
414
+ static int MtmConnectSocket (int node , int port , time_t timeout )
415
415
{
416
416
struct sockaddr_in sock_inet ;
417
417
unsigned addrs [MAX_ROUTES ];
@@ -422,6 +422,8 @@ static int MtmConnectSocket(int node, int port, int timeout)
422
422
timestamp_t start = MtmGetSystemTime ();
423
423
char const * host = Mtm -> nodes [node ].con .hostName ;
424
424
nodemask_t save_mask = busy_mask ;
425
+ timestamp_t afterWait ;
426
+ timestamp_t beforeWait ;
425
427
426
428
sock_inet .sin_family = AF_INET ;
427
429
sock_inet .sin_port = htons (port );
@@ -435,7 +437,6 @@ static int MtmConnectSocket(int node, int port, int timeout)
435
437
Retry :
436
438
while (1 ) {
437
439
int rc = -1 ;
438
-
439
440
sd = socket (AF_INET , SOCK_STREAM , 0 );
440
441
if (sd < 0 ) {
441
442
elog (LOG , "Arbiter failed to create socket: %d" , errno );
@@ -461,7 +462,8 @@ static int MtmConnectSocket(int node, int port, int timeout)
461
462
if (rc == 0 ) {
462
463
break ;
463
464
}
464
- if (errno != EINPROGRESS || start + MSEC_TO_USEC (timeout ) < MtmGetSystemTime ()) {
465
+ beforeWait = MtmGetSystemTime ();
466
+ if (errno != EINPROGRESS || start + MSEC_TO_USEC (timeout ) < beforeWait ) {
465
467
elog (WARNING , "Arbiter failed to connect to %s:%d: error=%d" , host , port , errno );
466
468
close (sd );
467
469
busy_mask = save_mask ;
@@ -485,8 +487,10 @@ static int MtmConnectSocket(int node, int port, int timeout)
485
487
elog (WARNING , "Arbiter waiting socket to %s:%d: rc=%d, error=%d" , host , port , rc , errno );
486
488
}
487
489
close (sd );
488
- MtmCheckHeartbeat ();
489
- MtmSleep (MSEC_TO_USEC (MtmHeartbeatSendTimeout ));
490
+ afterWait = MtmGetSystemTime ();
491
+ if (afterWait < beforeWait + MSEC_TO_USEC (MtmHeartbeatSendTimeout )) {
492
+ MtmSleep (beforeWait + MSEC_TO_USEC (MtmHeartbeatSendTimeout ) - afterWait );
493
+ }
490
494
}
491
495
}
492
496
MtmSetSocketOptions (sd );
@@ -496,7 +500,7 @@ static int MtmConnectSocket(int node, int port, int timeout)
496
500
req .hdr .sxid = ShmemVariableCache -> nextXid ;
497
501
req .hdr .csn = MtmGetCurrentTime ();
498
502
req .hdr .disabledNodeMask = Mtm -> disabledNodeMask ;
499
- req .hdr .connectivityMask = Mtm -> connectivityMask ;
503
+ req .hdr .connectivityMask = SELF_CONNECTIVITY_MASK ;
500
504
strcpy (req .connStr , Mtm -> nodes [MtmNodeId - 1 ].con .connStr );
501
505
if (!MtmWriteSocket (sd , & req , sizeof req )) {
502
506
elog (WARNING , "Arbiter failed to send handshake message to %s:%d: %d" , host , port , errno );
@@ -553,7 +557,7 @@ static void MtmOpenConnections()
553
557
}
554
558
555
559
556
- static bool MtmSendToNode (int node , void const * buf , int size )
560
+ static bool MtmSendToNode (int node , void const * buf , int size , time_t reconnectTimeout )
557
561
{
558
562
bool result = true;
559
563
nodemask_t save_mask = busy_mask ;
@@ -580,7 +584,7 @@ static bool MtmSendToNode(int node, void const* buf, int size)
580
584
close (sockets [node ]);
581
585
sockets [node ] = -1 ;
582
586
}
583
- sockets [node ] = MtmConnectSocket (node , Mtm -> nodes [node ].con .arbiterPort , MtmReconnectTimeout );
587
+ sockets [node ] = MtmConnectSocket (node , Mtm -> nodes [node ].con .arbiterPort , reconnectTimeout );
584
588
if (sockets [node ] < 0 ) {
585
589
MtmOnNodeDisconnect (node + 1 );
586
590
result = false;
@@ -634,7 +638,7 @@ static void MtmAcceptOneConnection()
634
638
635
639
resp .code = MSG_STATUS ;
636
640
resp .disabledNodeMask = Mtm -> disabledNodeMask ;
637
- resp .connectivityMask = Mtm -> connectivityMask ;
641
+ resp .connectivityMask = SELF_CONNECTIVITY_MASK ;
638
642
resp .dxid = HANDSHAKE_MAGIC ;
639
643
resp .sxid = ShmemVariableCache -> nextXid ;
640
644
resp .csn = MtmGetCurrentTime ();
@@ -759,7 +763,7 @@ static void MtmSender(Datum arg)
759
763
760
764
for (i = 0 ; i < Mtm -> nAllNodes ; i ++ ) {
761
765
if (txBuffer [i ].used != 0 ) {
762
- MtmSendToNode (i , txBuffer [i ].data , txBuffer [i ].used * sizeof (MtmArbiterMessage ));
766
+ MtmSendToNode (i , txBuffer [i ].data , txBuffer [i ].used * sizeof (MtmArbiterMessage ), MtmReconnectTimeout );
763
767
txBuffer [i ].used = 0 ;
764
768
}
765
769
}
@@ -813,7 +817,7 @@ static void MtmMonitor(Datum arg)
813
817
BackgroundWorkerInitializeConnection (MtmDatabaseName , NULL );
814
818
815
819
while (!stop ) {
816
- int rc = WaitLatch (& MyProc -> procLatch , WL_TIMEOUT | WL_POSTMASTER_DEATH , MtmHeartbeatRecvTimeout );
820
+ int rc = WaitLatch (& MyProc -> procLatch , WL_TIMEOUT | WL_POSTMASTER_DEATH , MtmHeartbeatSendTimeout );
817
821
if (rc & WL_POSTMASTER_DEATH ) {
818
822
break ;
819
823
}
@@ -951,7 +955,7 @@ static void MtmReceiver(Datum arg)
951
955
MTM_LOG1 ("Send response %s for transaction %s to node %d" , MtmTxnStatusMnem [msg -> status ], msg -> gid , msg -> node );
952
956
}
953
957
msg -> disabledNodeMask = Mtm -> disabledNodeMask ;
954
- msg -> connectivityMask = Mtm -> connectivityMask ;
958
+ msg -> connectivityMask = SELF_CONNECTIVITY_MASK ;
955
959
msg -> oldestSnapshot = Mtm -> nodes [MtmNodeId - 1 ].oldestSnapshot ;
956
960
msg -> code = MSG_POLL_STATUS ;
957
961
MtmSendMessage (msg );
@@ -1142,10 +1146,10 @@ static void MtmReceiver(Datum arg)
1142
1146
}
1143
1147
}
1144
1148
if (Mtm -> status == MTM_ONLINE ) {
1145
- /* Check for hearbeat only in case of timeout expiration: it means that we do not have unproceeded events.
1149
+ now = MtmGetSystemTime ();
1150
+ /* Check for heartbeats only in case of timeout expiration: it means that we do not have unproceeded events.
1146
1151
* It helps to avoid false node failure detection because of blocking receiver.
1147
1152
*/
1148
- now = MtmGetSystemTime ();
1149
1153
if (n == 0 ) {
1150
1154
selectTimeout = MtmHeartbeatRecvTimeout ; /* restore select timeout */
1151
1155
if (now > lastHeartbeatCheck + MSEC_TO_USEC (MtmHeartbeatRecvTimeout )) {
@@ -1158,10 +1162,6 @@ static void MtmReceiver(Datum arg)
1158
1162
}
1159
1163
lastHeartbeatCheck = now ;
1160
1164
}
1161
- if (Mtm -> disabledNodeMask != 0 ) {
1162
- /* If timeout is expired and there are disabled nodes, then recheck cluster's state */
1163
- MtmRefreshClusterStatus ();
1164
- }
1165
1165
} else {
1166
1166
if (now > lastHeartbeatCheck + MSEC_TO_USEC (MtmHeartbeatRecvTimeout )) {
1167
1167
/* Switch to non-blocking mode to proceed all pending requests before doing watchdog check */
0 commit comments