@@ -382,6 +382,7 @@ static void MtmSendHeartbeat()
382
382
last_heartbeat_to_node [i ] = now ;
383
383
/* Connectivity mask can be cleared by MtmWatchdog: in this case sockets[i] >= 0 */
384
384
if (BIT_CHECK (Mtm -> connectivityMask , i )) {
385
+ MTM_LOG1 ("Force reconnect to node %d" , i + 1 );
385
386
close (sockets [i ]);
386
387
sockets [i ] = -1 ;
387
388
MtmReconnectNode (i + 1 ); /* set reconnect mask to force node reconnent */
@@ -484,6 +485,7 @@ static int MtmConnectSocket(int node, int port, int timeout)
484
485
elog (WARNING , "Arbiter waiting socket to %s:%d: rc=%d, error=%d" , host , port , rc , errno );
485
486
}
486
487
close (sd );
488
+ MtmCheckHeartbeat ();
487
489
MtmSleep (MSEC_TO_USEC (MtmHeartbeatSendTimeout ));
488
490
}
489
491
}
@@ -827,6 +829,7 @@ static void MtmReceiver(Datum arg)
827
829
MtmBuffer * rxBuffer = (MtmBuffer * )palloc0 (sizeof (MtmBuffer )* nNodes );
828
830
timestamp_t lastHeartbeatCheck = MtmGetSystemTime ();
829
831
timestamp_t now ;
832
+ timestamp_t selectTimeout = MtmHeartbeatRecvTimeout ;
830
833
831
834
#if USE_EPOLL
832
835
struct epoll_event * events = (struct epoll_event * )palloc (sizeof (struct epoll_event )* nNodes );
@@ -857,7 +860,7 @@ static void MtmReceiver(Datum arg)
857
860
858
861
while (!stop ) {
859
862
#if USE_EPOLL
860
- n = epoll_wait (epollfd , events , nNodes , MtmHeartbeatRecvTimeout );
863
+ n = epoll_wait (epollfd , events , nNodes , selectTimeout );
861
864
if (n < 0 ) {
862
865
if (errno == EINTR ) {
863
866
continue ;
@@ -871,16 +874,15 @@ static void MtmReceiver(Datum arg)
871
874
MtmDisconnect (i );
872
875
}
873
876
}
874
- now = MtmGetSystemTime ();
875
877
for (j = 0 ; j < n ; j ++ ) {
876
878
if (events [j ].events & EPOLLIN )
877
879
#else
878
880
fd_set events ;
879
881
do {
880
882
struct timeval tv ;
881
883
events = inset ;
882
- tv .tv_sec = MtmHeartbeatRecvTimeout /1000 ;
883
- tv .tv_usec = MtmHeartbeatRecvTimeout %1000 * 1000 ;
884
+ tv .tv_sec = selectTimeout /1000 ;
885
+ tv .tv_usec = selectTimeout %1000 * 1000 ;
884
886
do {
885
887
n = select (max_fd + 1 , & events , NULL , NULL , & tv );
886
888
} while (n < 0 && errno == EINTR );
@@ -889,7 +891,6 @@ static void MtmReceiver(Datum arg)
889
891
if (n < 0 ) {
890
892
elog (ERROR , "Arbiter failed to select sockets: %d" , errno );
891
893
}
892
- now = MtmGetSystemTime ();
893
894
for (i = 0 ; i < nNodes ; i ++ ) {
894
895
if (sockets [i ] >= 0 && FD_ISSET (sockets [i ], & events ))
895
896
#endif
@@ -1070,8 +1071,8 @@ static void MtmReceiver(Datum arg)
1070
1071
break ;
1071
1072
case MSG_ABORTED :
1072
1073
if (ts -> status == TRANSACTION_STATUS_COMMITTED ) {
1073
- elog (WARNING , "Receive ABORTED response for already committed transaction %d from node %d" ,
1074
- ts -> xid , node );
1074
+ elog (WARNING , "Receive ABORTED response for already committed transaction %d (%s) from node %d" ,
1075
+ ts -> xid , ts -> gid , node );
1075
1076
continue ;
1076
1077
}
1077
1078
if (ts -> status != TRANSACTION_STATUS_ABORTED ) {
@@ -1084,8 +1085,12 @@ static void MtmReceiver(Datum arg)
1084
1085
break ;
1085
1086
case MSG_PRECOMMITTED :
1086
1087
MTM_TXTRACE (ts , "MtmTransReceiver got MSG_PRECOMMITTED" );
1087
- if (ts -> status != TRANSACTION_STATUS_ABORTED ) {
1088
- Assert (ts -> status == TRANSACTION_STATUS_IN_PROGRESS );
1088
+ if (ts -> status == TRANSACTION_STATUS_COMMITTED ) {
1089
+ elog (WARNING , "Receive PRECOMMITTED response for already committed transaction %d (%s) from node %d" ,
1090
+ ts -> xid , ts -> gid , node );
1091
+ continue ;
1092
+ }
1093
+ if (ts -> status == TRANSACTION_STATUS_IN_PROGRESS ) {
1089
1094
if (msg -> csn > ts -> csn ) {
1090
1095
ts -> csn = msg -> csn ;
1091
1096
MtmSyncClock (ts -> csn );
@@ -1096,7 +1101,9 @@ static void MtmReceiver(Datum arg)
1096
1101
MtmWakeUpBackend (ts );
1097
1102
}
1098
1103
} else {
1099
- elog (WARNING , "Receive PRECOMMITTED response for aborted transaction" ); // How it can happen? SHould we use assert here?
1104
+ Assert (ts -> status == TRANSACTION_STATUS_ABORTED );
1105
+ elog (WARNING , "Receive PRECOMMITTED response for aborted transaction %d (%s) from node %d" ,
1106
+ ts -> xid , ts -> gid , node ); // How it can happen? SHould we use assert here?
1100
1107
if ((ts -> participantsMask & ~Mtm -> disabledNodeMask & ~ts -> votedMask ) == 0 ) {
1101
1108
MtmWakeUpBackend (ts );
1102
1109
}
@@ -1134,21 +1141,34 @@ static void MtmReceiver(Datum arg)
1134
1141
}
1135
1142
}
1136
1143
if (Mtm -> status == MTM_ONLINE ) {
1137
- /* "now" is time of performing select, so that delays in processing should not cause false detection */
1138
- if (now > lastHeartbeatCheck + MSEC_TO_USEC (MtmHeartbeatRecvTimeout )) {
1139
- if (!MtmWatchdog (now )) {
1140
- for (i = 0 ; i < nNodes ; i ++ ) {
1141
- if (Mtm -> nodes [i ].lastHeartbeat != 0 && sockets [i ] >= 0 ) {
1142
- MTM_LOG1 ("Last heartbeat from node %d received %ld microseconds ago" , i + 1 , now - Mtm -> nodes [i ].lastHeartbeat );
1144
+ /* Check for hearbeat only in case of timeout expiration: it means that we do not have unproceeded events.
1145
+ * It helps to avoid false node failure detection because of blocking receiver.
1146
+ */
1147
+ now = MtmGetSystemTime ();
1148
+ if (n == 0 ) {
1149
+ selectTimeout = MtmHeartbeatRecvTimeout ; /* restore select timeout */
1150
+ if (now > lastHeartbeatCheck + MSEC_TO_USEC (MtmHeartbeatRecvTimeout )) {
1151
+ if (!MtmWatchdog (now )) {
1152
+ for (i = 0 ; i < nNodes ; i ++ ) {
1153
+ if (Mtm -> nodes [i ].lastHeartbeat != 0 && sockets [i ] >= 0 ) {
1154
+ MTM_LOG1 ("Last heartbeat from node %d received %ld microseconds ago" , i + 1 , now - Mtm -> nodes [i ].lastHeartbeat );
1155
+ }
1143
1156
}
1144
1157
}
1158
+ lastHeartbeatCheck = now ;
1159
+ }
1160
+ if (Mtm -> disabledNodeMask != 0 ) {
1161
+ /* If timeout is expired and there are disabled nodes, then recheck cluster's state */
1162
+ MtmRefreshClusterStatus (false);
1163
+ }
1164
+ } else {
1165
+ if (now > lastHeartbeatCheck + MSEC_TO_USEC (MtmHeartbeatRecvTimeout )) {
1166
+ /* Switch to non-blocking mode to proceed all pending requests before doing watchdog check */
1167
+ selectTimeout = 0 ;
1145
1168
}
1146
- lastHeartbeatCheck = now ;
1147
- }
1148
- if (n == 0 && Mtm -> disabledNodeMask != 0 ) {
1149
- /* If timeout is expired and there are disabled nodes, then recheck cluster's state */
1150
- MtmRefreshClusterStatus (false);
1151
1169
}
1170
+ } else if (n == 0 ) {
1171
+ selectTimeout = MtmHeartbeatRecvTimeout ; /* restore select timeout */
1152
1172
}
1153
1173
}
1154
1174
proc_exit (1 ); /* force restart of this bgwroker */
0 commit comments