Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 4624b32

Browse files
committed
do not try to wait for a stable clique
1 parent bc2f052 commit 4624b32

File tree

1 file changed

+13
-26
lines changed

1 file changed

+13
-26
lines changed

contrib/mmts/state.c

Lines changed: 13 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -451,7 +451,7 @@ MtmBuildConnectivityMatrix(nodemask_t* matrix)
451451
void
452452
MtmRefreshClusterStatus()
453453
{
454-
nodemask_t newClique, oldClique;
454+
nodemask_t newClique;
455455
nodemask_t matrix[MAX_NODES];
456456
nodemask_t trivialClique = ~SELF_CONNECTIVITY_MASK & (((nodemask_t)1 << Mtm->nAllNodes)-1);
457457
int cliqueSize;
@@ -529,38 +529,25 @@ MtmRefreshClusterStatus()
529529

530530
/*
531531
* Check for clique.
532+
*
533+
* Sleep is added to make sure that will detect all failures that we can.
534+
* Otherwise if we will receive information about dead node from our peer
535+
* before we detect that ourself we can disable innocent node.
532536
*/
537+
MtmSleep(2*MSEC_TO_USEC(MtmHeartbeatRecvTimeout));
533538
MtmBuildConnectivityMatrix(matrix);
534539
newClique = MtmFindMaxClique(matrix, Mtm->nAllNodes, &cliqueSize);
535540

536541
if (newClique == Mtm->clique)
537542
return;
538543

539-
MTM_LOG1("[STATE] Old clique: %s", maskToString(Mtm->clique, Mtm->nAllNodes));
540-
541-
/*
542-
* Otherwise make sure that all nodes have a chance to replicate their connectivity
543-
* mask and we have the "consistent" picture. Obviously we can not get true consistent
544-
* snapshot, but at least try to wait heartbeat send timeout is expired and
545-
* connectivity graph is stabilized.
546-
*/
547-
do {
548-
oldClique = newClique;
549-
/*
550-
* Double timeout to consider the worst case when heartbeat receive interval is added
551-
* with refresh cluster status interval.
552-
*/
553-
MtmSleep(MSEC_TO_USEC(MtmHeartbeatRecvTimeout)*2);
554-
MtmBuildConnectivityMatrix(matrix);
555-
newClique = MtmFindMaxClique(matrix, Mtm->nAllNodes, &cliqueSize);
556-
} while (newClique != oldClique);
557-
558-
MTM_LOG1("[STATE] New clique: %s", maskToString(oldClique, Mtm->nAllNodes));
559-
560-
if (newClique != trivialClique)
561-
{
562-
MTM_LOG1("[STATE] NONTRIVIAL CLIQUE! (trivial: %s)", maskToString(trivialClique, Mtm->nAllNodes)); // XXXX some false-positives, fixme
563-
}
544+
MTM_LOG1("[STATE] Changed clique: %s -> %s ({%s, %s, %s}, %s)",
545+
maskToString(Mtm->clique, Mtm->nAllNodes),
546+
maskToString(newClique, Mtm->nAllNodes),
547+
maskToString(~Mtm->nodes[0].connectivityMask, Mtm->nAllNodes),
548+
maskToString(~Mtm->nodes[1].connectivityMask, Mtm->nAllNodes),
549+
maskToString(~Mtm->nodes[2].connectivityMask, Mtm->nAllNodes),
550+
newClique == trivialClique ? "trivial" : "non-trivial");
564551

565552
/*
566553
* We are using clique only to disable nodes.

0 commit comments

Comments
 (0)