Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 4702e5c

Browse files
knizhnikkelvich
authored andcommitted
Handle changing of node status
1 parent 7889c2f commit 4702e5c

File tree

5 files changed

+41
-21
lines changed

5 files changed

+41
-21
lines changed

arbiter.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ static int MtmConnectSocket(char const* host, int port, int max_attempts)
295295
goto Retry;
296296
}
297297

298-
/* Some node cnosidered that I am dead, so switch to recovery mode */
298+
/* Some node considered that I am dead, so switch to recovery mode */
299299
if (BIT_CHECK(msg.disabledNodeMask, MtmNodeId-1)) {
300300
elog(WARNING, "Node is switched to recovery mode");
301301
ds->status = MTM_RECOVERY;

bkb.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ static void findMaximumIndependentSet(NodeList* cur, NodeList* result, nodemask_
9898
}
9999
}
100100

101-
nodemask_t MtmFindMaxClique(nodemask_t* graph, int n_nodes)
101+
nodemask_t MtmFindMaxClique(nodemask_t* graph, int n_nodes, int* clique_size)
102102
{
103103
NodeList tmp;
104104
NodeList result;
@@ -116,5 +116,6 @@ nodemask_t MtmFindMaxClique(nodemask_t* graph, int n_nodes)
116116
for (i = 0; i < result.size; i++) {
117117
BIT_SET(mask, result.nodes[i]);
118118
}
119+
*clique_size = result.size;
119120
return mask;
120121
}

bkb.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,6 @@ typedef uint64_t nodemask_t;
1111
#define BIT_CLEAR(mask, bit) (mask &= ~((nodemask_t)1 << (bit)))
1212
#define BIT_SET(mask, bit) (mask |= ((nodemask_t)1 << (bit)))
1313

14-
extern nodemask_t MtmFindMaxClique(nodemask_t* matrix, int n_modes);
14+
extern nodemask_t MtmFindMaxClique(nodemask_t* matrix, int n_modes, int* clique_size);
1515

1616
#endif

multimaster.c

Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1084,13 +1084,20 @@ _PG_fini(void)
10841084
*/
10851085

10861086

1087-
static void MtmSwitchFromRecoveryToNormalMode()
1087+
void MtmSwitchToNormalMode()
10881088
{
10891089
dtm->status = MTM_ONLINE;
10901090
elog(WARNING, "Switch to normal mode");
10911091
/* ??? Something else to do here? */
10921092
}
10931093

1094+
void MtmSwitchToRecoveryMode()
1095+
{
1096+
dtm->status = MTM_RECOVERY;
1097+
/* ??? Something else to do here? */
1098+
elog(ERROR, "Switch to normal mode");
1099+
}
1100+
10941101

10951102
void MtmJoinTransaction(GlobalTransactionId* gtid, csn_t globalSnapshot)
10961103
{
@@ -1110,7 +1117,7 @@ void MtmJoinTransaction(GlobalTransactionId* gtid, csn_t globalSnapshot)
11101117
Assert(dtm->status == MTM_RECOVERY);
11111118
} else if (dtm->status == MTM_RECOVERY) {
11121119
/* When recovery is completed we get normal transaction ID and switch to normal mode */
1113-
MtmSwitchFromRecoveryToNormalMode();
1120+
MtmSwitchToNormalMode();
11141121
}
11151122
dtmTx.gtid = *gtid;
11161123
dtmTx.xid = GetCurrentTransactionId();
@@ -1646,41 +1653,51 @@ MtmDetectGlobalDeadLock(PGPROC* proc)
16461653
static void
16471654
MtmBuildConnectivityMatrix(nodemask_t* matrix)
16481655
{
1649-
int i;
1650-
for (i = 0; i < MtmNodes; i++) {
1656+
int i, j, n = MtmNodes;
1657+
for (i = 0; i < n; i++) {
16511658
if (i+1 != MtmNodeId) {
16521659
void* data = PaxosGet(psprintf("node-mask-%d", i+1), NULL, NULL);
16531660
matrix[i] = *(nodemask_t*)data;
16541661
} else {
16551662
matrix[i] = dtm->connectivityMask;
16561663
}
16571664
}
1665+
/* make matrix symetric: required for Bron–Kerbosch algorithm */
1666+
for (i = 0; i < n; i++) {
1667+
for (j = 0; j < i; j++) {
1668+
matrix[i] |= ((matrix[j] >> i) & 1) << j;
1669+
}
1670+
}
16581671
}
16591672

16601673

16611674
void MtmUpdateClusterStatus(void)
16621675
{
1663-
nodemask_t mask, clique, disconnectedMask;
1676+
nodemask_t mask, clique;
16641677
nodemask_t matrix[MAX_NODES];
1678+
int clique_size;
16651679
int i;
16661680

16671681
MtmBuildConnectivityMatrix(matrix);
16681682

1669-
clique = MtmFindMaxClique(matrix, MtmNodes);
1670-
disconnectedMask = ~clique & (((nodemask_t)1 << MtmNodes)-1);
1671-
MtmLock(LW_EXCLUSIVE);
1672-
mask = disconnectedMask & ~dtm->disabledNodeMask;
1673-
for (i = 0; mask != 0; i++, mask >>= 1) {
1674-
if (mask & 1) {
1675-
dtm->nNodes -= 1;
1676-
BIT_SET(dtm->disabledNodeMask, i);
1683+
clique = MtmFindMaxClique(matrix, MtmNodes, &clique_size);
1684+
if (clique_size >= MtmNodes/2+1) { /* have quorum */
1685+
MtmLock(LW_EXCLUSIVE);
1686+
mask = ~clique & (((nodemask_t)1 << MtmNodes)-1) & ~dtm->disabledNodeMask;
1687+
for (i = 0; mask != 0; i++, mask >>= 1) {
1688+
if (mask & 1) {
1689+
dtm->nNodes -= 1;
1690+
BIT_SET(dtm->disabledNodeMask, i);
1691+
}
16771692
}
1693+
MtmUnlock();
1694+
if (BIT_CHECK(dtm->disabledNodeMask, MtmNodeId-1)) {
1695+
/* I was excluded from cluster:( */
1696+
MtmSwitchToRecoveryMode();
1697+
}
1698+
} else {
1699+
elog(WARNING, "Clique %lx has no quorum", clique);
16781700
}
1679-
if (dtm->disabledNodeMask != disconnectedMask) {
1680-
dtm->disabledNodeMask |= disconnectedMask;
1681-
PaxosSet(psprintf("node-mask-%d", MtmNodeId), &dtm->disabledNodeMask, sizeof dtm->disabledNodeMask);
1682-
}
1683-
MtmUnlock();
16841701
}
16851702

16861703
void MtmOnNodeDisconnect(int nodeId)

multimaster.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,4 +148,6 @@ extern timestamp_t MtmGetCurrentTime(void);
148148
extern void MtmSleep(timestamp_t interval);
149149
extern bool MtmIsRecoveredNode(int nodeId);
150150
extern void MtmUpdateClusterStatus(void);
151+
extern void MtmSwitchToNormalMode(void);
152+
extern void MtmSwitchToRecoveryMode(void);
151153
#endif

0 commit comments

Comments
 (0)