Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit bdbaeff

Browse files
Alexander Kuzmenkovkelvich
Alexander Kuzmenkov
authored andcommitted
Show the reason for the node status.
1 parent 7f63018 commit bdbaeff

File tree

3 files changed

+37
-22
lines changed

3 files changed

+37
-22
lines changed

multimaster.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -995,7 +995,9 @@ MtmBeginTransaction(MtmCurrentTrans* x)
995995
* Allow execution of transaction by bg-workers to make it possible to perform recovery.
996996
*/
997997
MtmUnlock();
998-
MTM_ELOG(MtmBreakConnection ? FATAL : ERROR, "Multimaster node is not online: current status %s", MtmNodeStatusMnem[Mtm->status]);
998+
MTM_ELOG(MtmBreakConnection ? FATAL : ERROR,
999+
"Multimaster node is not online: current status %s: %s",
1000+
MtmNodeStatusMnem[Mtm->status], Mtm->statusReason);
9991001
}
10001002
x->containsDML = false;
10011003
x->gtid.xid = InvalidTransactionId;

multimaster.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,8 @@ typedef struct {
292292
typedef struct
293293
{
294294
MtmNodeStatus status; /* Status of this node */
295+
/* A human-readable description of why the current status was set */
296+
char *statusReason;
295297
int recoverySlot; /* NodeId of recovery slot or 0 if none */
296298
volatile slock_t queueSpinlock; /* spinlock used to protect sender queue */
297299
PGSemaphore sendSemaphore; /* semaphore used to notify mtm-sender about new responses to coordinator */

state.c

Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -61,15 +61,16 @@ countZeroBits(nodemask_t mask, int nNodes)
6161
}
6262

6363
static void
64-
MtmSetClusterStatus(MtmNodeStatus status)
64+
MtmSetClusterStatus(MtmNodeStatus status, char *statusReason)
6565
{
6666
if (Mtm->status == status)
6767
return;
6868

6969
Mtm->nConfigChanges += 1; /* this will restart backends */
7070

71-
MTM_LOG1("[STATE] Switching status from %s to %s status",
72-
MtmNodeStatusMnem[Mtm->status], MtmNodeStatusMnem[status]);
71+
MTM_LOG1("[STATE] Switching status from %s to %s status: %s",
72+
MtmNodeStatusMnem[Mtm->status], MtmNodeStatusMnem[status],
73+
statusReason);
7374

7475
/*
7576
* Do some actions on specific status transitions.
@@ -101,13 +102,15 @@ MtmSetClusterStatus(MtmNodeStatus status)
101102
}
102103

103104
Mtm->status = status;
105+
Mtm->statusReason = statusReason;
104106
}
105107

106108
static void
107109
MtmCheckState(void)
108110
{
109111
// int nVotingNodes = MtmGetNumberOfVotingNodes();
110112
bool isEnabledState;
113+
char *statusReason = "node is disabled by default";
111114
MtmNodeStatus old_status;
112115
int nEnabled = countZeroBits(Mtm->disabledNodeMask, Mtm->nAllNodes);
113116
int nConnected = countZeroBits(SELF_CONNECTIVITY_MASK, Mtm->nAllNodes);
@@ -126,42 +129,50 @@ MtmCheckState(void)
126129
(MtmMajorNode || Mtm->refereeGrant),
127130
maskToString(Mtm->stoppedNodeMask, Mtm->nAllNodes));
128131

129-
isEnabledState =
130-
( (nConnected >= Mtm->nAllNodes/2+1) /* majority */
131-
// XXXX: should we restrict major with two nodes setup?
132-
|| (nConnected == Mtm->nAllNodes/2 && MtmMajorNode) /* or half + major node */
133-
|| (nConnected == Mtm->nAllNodes/2 && Mtm->refereeGrant) ) /* or half + referee */
134-
&& (BIT_CHECK(Mtm->clique, MtmNodeId-1) || Mtm->refereeGrant) /* in clique when non-major */
135-
&& !BIT_CHECK(Mtm->stoppedNodeMask, MtmNodeId-1); /* is not stopped */
132+
#define ENABLE_IF(cond, reason) if (!(condition) && !isEnabledState) { \
133+
isEnabledState = true; statusReason = reason; }
134+
#define DISABLE_IF(cond, reason) if ((condition) && isEnabledState) { \
135+
isEnabledState = false; statusReason = reason; }
136+
137+
isEnabledState = false;
138+
ENABLE_IF(nConnected >= Mtm->nAllNodes/2+1,
139+
"node belongs to the majority group");
140+
ENABLE_IF(nConnected == Mtm->nAllNodes/2 && MtmMajorNode,
141+
"node is a major node");
142+
ENABLE_IF(nConnected == Mtm->nAllNodes/2 && Mtm->refereeGrant,
143+
"node has a referee grant");
144+
DISABLE_IF(!BIT_CHECK(Mtm->clique, MtmNodeId-1) && !Mtm->refereeGrant,
145+
"node is not in clique and has no referree grant");
146+
DISABLE_IF(BIT_CHECK(Mtm->stoppedNodeMask, MtmNodeId-1),
147+
"node is stopped manually");
148+
149+
#undef ENABLE_IF
150+
#undef DISABLE_IF
136151

137152
/* ANY -> MTM_DISABLED */
138153
if (!isEnabledState)
139154
{
140155
// BIT_SET(Mtm->disabledNodeMask, MtmNodeId-1);
141-
MtmSetClusterStatus(MTM_DISABLED);
156+
MtmSetClusterStatus(MTM_DISABLED, statusReason);
142157
MtmDisableNode(MtmNodeId);
143158
return;
144159
}
145160

146161
switch (Mtm->status)
147162
{
148163
case MTM_DISABLED:
149-
if (isEnabledState)
150-
{
151-
MtmSetClusterStatus(MTM_RECOVERY);
164+
MtmSetClusterStatus(MTM_RECOVERY, statusReason);
152165

153-
if (old_status != Mtm->status)
154-
MtmCheckState();
155-
return;
156-
}
157-
break;
166+
if (old_status != Mtm->status)
167+
MtmCheckState();
168+
return;
158169

159170
case MTM_RECOVERY:
160171
if (!BIT_CHECK(Mtm->disabledNodeMask, MtmNodeId-1))
161172
{
162173
MTM_LOG1("[LOCK] set lock on MTM_RECOVERY switch");
163174
BIT_SET(Mtm->originLockNodeMask, MtmNodeId-1); // kk trick, XXXX: log that
164-
MtmSetClusterStatus(MTM_RECOVERED);
175+
MtmSetClusterStatus(MTM_RECOVERED, statusReason);
165176

166177
if (old_status != Mtm->status)
167178
MtmCheckState();
@@ -186,7 +197,7 @@ MtmCheckState(void)
186197
*/
187198
MTM_LOG1("[LOCK] release lock on MTM_RECOVERED switch");
188199
BIT_CLEAR(Mtm->originLockNodeMask, MtmNodeId-1);
189-
MtmSetClusterStatus(MTM_ONLINE);
200+
MtmSetClusterStatus(MTM_ONLINE, statusReason);
190201

191202
if (old_status != Mtm->status)
192203
MtmCheckState();

0 commit comments

Comments
 (0)