Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit b7551e4

Browse files
knizhnikkelvich
authored andcommitted
Add status change time
1 parent b69ec26 commit b7551e4

File tree

4 files changed

+41
-9
lines changed

4 files changed

+41
-9
lines changed

multimaster--1.0.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ AS 'MODULE_PATHNAME','mtm_get_snapshot'
2424
LANGUAGE C;
2525

2626

27-
CREATE TYPE mtm.node_state AS (id integer, disabled bool, disconnected bool, catchUp bool, slotLag bigint, avgTransDelay bigint, connStr text);
27+
CREATE TYPE mtm.node_state AS (id integer, disabled bool, disconnected bool, catchUp bool, slotLag bigint, avgTransDelay bigint, lastStatusChange timestamp, connStr text);
2828

2929
CREATE FUNCTION mtm.get_nodes_state() RETURNS SETOF mtm.node_state
3030
AS 'MODULE_PATHNAME','mtm_get_nodes_state'

multimaster.c

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ int MtmConnectAttempts;
178178
int MtmConnectTimeout;
179179
int MtmKeepaliveTimeout;
180180
int MtmReconnectAttempts;
181+
int MtmNodeDisableDelay;
181182
bool MtmUseRaftable;
182183
MtmConnectionInfo* MtmConnections;
183184

@@ -992,6 +993,7 @@ void MtmRecoveryCompleted(void)
992993
MtmLock(LW_EXCLUSIVE);
993994
Mtm->recoverySlot = 0;
994995
BIT_CLEAR(Mtm->disabledNodeMask, MtmNodeId-1);
996+
Mtm->nodes[MtmNodeId-1].lastStatusChangeTime = time(NULL);
995997
/* Mode will be changed to online once all locagical reciever are connected */
996998
MtmSwitchClusterMode(MTM_CONNECTED);
997999
MtmUnlock();
@@ -1080,6 +1082,7 @@ bool MtmRecoveryCaughtUp(int nodeId, XLogRecPtr slotLSN)
10801082
/* We are lucky: caugth-up without locking cluster! */
10811083
}
10821084
BIT_CLEAR(Mtm->disabledNodeMask, nodeId-1);
1085+
Mtm->nodes[nodeId-1].lastStatusChangeTime = time(NULL);
10831086
Mtm->nNodes += 1;
10841087
caughtUp = true;
10851088
} else if (!BIT_CHECK(Mtm->nodeLockerMask, nodeId-1)
@@ -1222,13 +1225,15 @@ bool MtmRefreshClusterStatus(bool nowait)
12221225
if (mask & 1) {
12231226
Mtm->nNodes -= 1;
12241227
BIT_SET(Mtm->disabledNodeMask, i);
1228+
Mtm->nodes[i].lastStatusChangeTime = time(NULL);
12251229
}
12261230
}
12271231
mask = clique & Mtm->disabledNodeMask; /* new enabled nodes mask */
12281232
for (i = 0; mask != 0; i++, mask >>= 1) {
12291233
if (mask & 1) {
12301234
Mtm->nNodes += 1;
12311235
BIT_CLEAR(Mtm->disabledNodeMask, i);
1236+
Mtm->nodes[i].lastStatusChangeTime = time(NULL);
12321237
}
12331238
}
12341239
MtmCheckQuorum();
@@ -1268,6 +1273,11 @@ void MtmOnNodeDisconnect(int nodeId)
12681273
{
12691274
MtmTransState *ts;
12701275

1276+
if (Mtm->nodes[nodeId-1].lastStatusChangeTime + MtmNodeDisableDelay > time(NULL)) {
1277+
/* Avoid false detection of node failure and prevent node status blinking */
1278+
return;
1279+
}
1280+
12711281
BIT_SET(Mtm->connectivityMask, nodeId-1);
12721282
BIT_SET(Mtm->reconnectMask, nodeId-1);
12731283
RaftableSet(psprintf("node-mask-%d", MtmNodeId), &Mtm->connectivityMask, sizeof Mtm->connectivityMask, false);
@@ -1278,6 +1288,7 @@ void MtmOnNodeDisconnect(int nodeId)
12781288
if (!MtmRefreshClusterStatus(false)) {
12791289
MtmLock(LW_EXCLUSIVE);
12801290
if (!BIT_CHECK(Mtm->disabledNodeMask, nodeId-1)) {
1291+
Mtm->nodes[nodeId-1].lastStatusChangeTime = time(NULL);
12811292
BIT_SET(Mtm->disabledNodeMask, nodeId-1);
12821293
Mtm->nNodes -= 1;
12831294
MtmCheckQuorum();
@@ -1445,6 +1456,7 @@ static void MtmInitialize()
14451456
for (i = 0; i < MtmNodes; i++) {
14461457
Mtm->nodes[i].oldestSnapshot = 0;
14471458
Mtm->nodes[i].transDelay = 0;
1459+
Mtm->nodes[i].lastStatusChangeTime = time(NULL);
14481460
Mtm->nodes[i].con = MtmConnections[i];
14491461
}
14501462
PGSemaphoreCreate(&Mtm->votingSemaphore);
@@ -1565,10 +1577,25 @@ _PG_init(void)
15651577
if (!process_shared_preload_libraries_in_progress)
15661578
return;
15671579

1580+
DefineCustomIntVariable(
1581+
"multimaster.node_disable_delay",
1582+
"Minamal amount of time (sec) between node status change",
1583+
"This delay is used to avoid false detection of node failure and to prevent blinking of node status node",
1584+
&MtmNodeDisableDelay,
1585+
1,
1586+
1,
1587+
INT_MAX,
1588+
PGC_BACKEND,
1589+
0,
1590+
NULL,
1591+
NULL,
1592+
NULL
1593+
);
1594+
15681595
DefineCustomIntVariable(
15691596
"multimaster.min_recovery_lag",
15701597
"Minamal lag of WAL-sender performing recovery after which cluster is locked until recovery is completed",
1571-
"When wal-sender almost catch-up WAL current position we need to stop 'Achilles tortile compeition' and "
1598+
"When wal-sender almost catch-up WAL current position we need to stop 'Achilles tortile competition' and "
15721599
"temporary stop commit of new transactions until node will be completely repared",
15731600
&MtmMinRecoveryLag,
15741601
100000,
@@ -1890,6 +1917,7 @@ void MtmDropNode(int nodeId, bool dropSlot)
18901917
{
18911918
elog(ERROR, "NodeID %d is out of range [1,%d]", nodeId, Mtm->nNodes);
18921919
}
1920+
Mtm->nodes[nodeId-1].lastStatusChangeTime = time(NULL);
18931921
BIT_SET(Mtm->disabledNodeMask, nodeId-1);
18941922
Mtm->nNodes -= 1;
18951923
MtmCheckQuorum();
@@ -1940,13 +1968,15 @@ MtmReplicationStartupHook(struct PGLogicalStartupHookArgs* args)
19401968
if (MtmIsRecoverySession) {
19411969
MTM_LOG1("%d: Node %d start recovery of node %d", MyProcPid, MtmNodeId, MtmReplicationNodeId);
19421970
if (!BIT_CHECK(Mtm->disabledNodeMask, MtmReplicationNodeId-1)) {
1971+
Mtm->nodes[MtmReplicationNodeId-1].lastStatusChangeTime = time(NULL);
19431972
BIT_SET(Mtm->disabledNodeMask, MtmReplicationNodeId-1);
19441973
Mtm->nNodes -= 1;
19451974
MtmCheckQuorum();
19461975
}
19471976
} else if (BIT_CHECK(Mtm->disabledNodeMask, MtmReplicationNodeId-1)) {
19481977
if (recoveryCompleted) {
19491978
MTM_LOG1("Node %d consider that recovery of node %d is completed: start normal replication", MtmNodeId, MtmReplicationNodeId);
1979+
Mtm->nodes[MtmReplicationNodeId-1].lastStatusChangeTime = time(NULL);
19501980
BIT_CLEAR(Mtm->disabledNodeMask, MtmReplicationNodeId-1);
19511981
Mtm->nNodes += 1;
19521982
MtmCheckQuorum();
@@ -2057,8 +2087,8 @@ typedef struct
20572087
int nodeId;
20582088
char* connStrPtr;
20592089
TupleDesc desc;
2060-
Datum values[7];
2061-
bool nulls[7];
2090+
Datum values[8];
2091+
bool nulls[8];
20622092
} MtmGetNodeStateCtx;
20632093

20642094
Datum
@@ -2095,11 +2125,12 @@ mtm_get_nodes_state(PG_FUNCTION_ARGS)
20952125
usrfctx->values[4] = Int64GetDatum(lag);
20962126
usrfctx->nulls[4] = lag < 0;
20972127
usrfctx->values[5] = Int64GetDatum(Mtm->transCount ? Mtm->nodes[usrfctx->nodeId-1].transDelay/Mtm->transCount : 0);
2128+
usrfctx->values[6] = TimestampTzGetDatum(time_t_to_timestamptz(Mtm->nodes[usrfctx->nodeId-1].lastStatusChangeTime));
20982129
p = strchr(usrfctx->connStrPtr, ',');
20992130
if (p != NULL) {
21002131
*p++ = '\0';
21012132
}
2102-
usrfctx->values[6] = CStringGetTextDatum(usrfctx->connStrPtr);
2133+
usrfctx->values[7] = CStringGetTextDatum(usrfctx->connStrPtr);
21032134
usrfctx->connStrPtr = p;
21042135
usrfctx->nodeId += 1;
21052136

multimaster.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ typedef struct
119119
{
120120
MtmConnectionInfo con;
121121
time_t transDelay;
122+
time_t lastStatusChangeTime;
122123
csn_t oldestSnapshot; /* Oldest snapshot used by active transactions at this node */
123124
} MtmNodeInfo;
124125

@@ -170,7 +171,6 @@ typedef struct
170171
MtmTransState** transListTail; /* Tail of L1 list of all finished transactionds, used to append new elements.
171172
This list is expected to be in CSN ascending order, by strict order may be violated */
172173
uint64 transCount; /* Counter of transactions perfromed by this node */
173-
time_t nodeTransDelay[MAX_NODES]; /* Time of waiting transaction acknowledgment from node */
174174
BgwPool pool; /* Pool of background workers for applying logical replication patches */
175175
MtmNodeInfo nodes[1]; /* [MtmNodes]: per-node data */
176176
} MtmState;
@@ -190,6 +190,7 @@ extern int MtmConnectAttempts;
190190
extern int MtmConnectTimeout;
191191
extern int MtmReconnectAttempts;
192192
extern int MtmKeepaliveTimeout;
193+
extern int MtmNodeDisableDelay;
193194
extern HTAB* MtmXid2State;
194195

195196
extern MtmConnectionInfo* MtmConnections;

t/001_basic_recovery.pl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,9 @@
5858
###############################################################################
5959

6060
diag("starting node 2");
61-
$cluster->{nodes}->[2]->start;
62-
diag("sleeping 30");
63-
sleep(30); # XXX: here we can poll
61+
$nodes[2]->start;
62+
diag("sleeping 10");
63+
sleep(10); # XXX: here we can poll
6464
diag("inserting 3");
6565
$cluster->psql(0, 'postgres', "insert into t values(3, 30);");
6666
diag("selecting");

0 commit comments

Comments
 (0)