Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 63e88b3

Browse files
knizhnikkelvich
authored andcommitted
Add recovery test
1 parent e641c63 commit 63e88b3

File tree

5 files changed

+23
-23
lines changed

5 files changed

+23
-23
lines changed

bgwpool.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,6 @@ static void BgwPoolMainLoop(Datum arg)
3030
BackgroundWorkerUnblockSignals();
3131
BackgroundWorkerInitializeConnection(pool->dbname, NULL);
3232

33-
elog(WARNING, "Start background worker %d", id);
34-
3533
while(true) {
3634
PGSemaphoreLock(&pool->available);
3735
SpinLockAcquire(&pool->lock);

check-recovery.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
rm -rf /home/knizhnik/postgres_cluster/contrib/mmts/tmp_check/log
2+
TESTDIR='/home/knizhnik/postgres_cluster/contrib/mmts' PATH="/home/knizhnik/postgres_cluster/tmp_install/home/knizhnik/postgres_cluster/dist/bin:$PATH" LD_LIBRARY_PATH="/home/knizhnik/postgres_cluster/tmp_install/home/knizhnik/postgres_cluster/dist/lib:$LD_LIBRARY_PATH" PGPORT='65432' PG_REGRESS='/home/knizhnik/postgres_cluster/contrib/mmts/../../src/test/regress/pg_regress' prove -I ../../src/test/perl/ --verbose t/001_basic_recovery.pl

multimaster.c

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -988,7 +988,7 @@ void MtmAbortTransaction(MtmTransState* ts)
988988

989989
void MtmRecoveryCompleted(void)
990990
{
991-
elog(WARNING, "Recovery of node %d is completed", MtmNodeId);
991+
elog(NOTICE, "Recovery of node %d is completed", MtmNodeId);
992992
MtmLock(LW_EXCLUSIVE);
993993
Mtm->recoverySlot = 0;
994994
BIT_CLEAR(Mtm->disabledNodeMask, MtmNodeId-1);
@@ -1071,12 +1071,12 @@ bool MtmRecoveryCaughtUp(int nodeId, XLogRecPtr slotLSN)
10711071
XLogRecPtr walLSN = GetXLogInsertRecPtr();
10721072
if (slotLSN == walLSN && Mtm->nActiveTransactions == 0) {
10731073
if (BIT_CHECK(Mtm->nodeLockerMask, nodeId-1)) {
1074-
elog(WARNING,"Node %d is caught-up", nodeId);
1074+
elog(NOTICE,"Node %d is caught-up", nodeId);
10751075
BIT_CLEAR(Mtm->walSenderLockerMask, MyWalSnd - WalSndCtl->walsnds);
10761076
BIT_CLEAR(Mtm->nodeLockerMask, nodeId-1);
10771077
Mtm->nLockers -= 1;
10781078
} else {
1079-
elog(WARNING,"Node %d is caugth-up without locking cluster", nodeId);
1079+
elog(NOTICE,"%d: dode %d is caugth-up without locking cluster", MyProcPid, nodeId);
10801080
/* We are lucky: caugth-up without locking cluster! */
10811081
}
10821082
BIT_CLEAR(Mtm->disabledNodeMask, nodeId-1);
@@ -1091,7 +1091,7 @@ bool MtmRecoveryCaughtUp(int nodeId, XLogRecPtr slotLSN)
10911091
* We have to maintain two bitmasks: one is marking wal sender, another - correspondent nodes.
10921092
* Is there some better way to establish mapping between nodes ad WAL-seconder?
10931093
*/
1094-
elog(WARNING,"Node %d is almost caught-up: slot position %lx, WAL position %lx, active transactions %d",
1094+
elog(NOTICE,"Node %d is almost caught-up: slot position %lx, WAL position %lx, active transactions %d",
10951095
nodeId, slotLSN, walLSN, Mtm->nActiveTransactions);
10961096
Assert(MyWalSnd != NULL); /* This function is called by WAL-sender, so it should not be NULL */
10971097
BIT_SET(Mtm->nodeLockerMask, nodeId-1);
@@ -1108,7 +1108,7 @@ bool MtmRecoveryCaughtUp(int nodeId, XLogRecPtr slotLSN)
11081108
void MtmSwitchClusterMode(MtmNodeStatus mode)
11091109
{
11101110
Mtm->status = mode;
1111-
elog(WARNING, "Switch to %s mode", MtmNodeStatusMnem[mode]);
1111+
elog(NOTICE, "Switch to %s mode", MtmNodeStatusMnem[mode]);
11121112
/* ??? Something else to do here? */
11131113
}
11141114

@@ -1136,7 +1136,7 @@ MtmCheckClusterLock()
11361136
break;
11371137
} else {
11381138
/* recovered replica catched up with master */
1139-
elog(WARNING, "WAL-sender %d complete recovery", i);
1139+
elog(NOTICE, "WAL-sender %d complete recovery", i);
11401140
BIT_CLEAR(Mtm->walSenderLockerMask, i);
11411141
}
11421142
}
@@ -1155,7 +1155,7 @@ MtmCheckClusterLock()
11551155
} else {
11561156
/* All lockers are synchronized their logs */
11571157
/* Remove lock and mark them as receovered */
1158-
elog(WARNING, "Complete recovery of %d nodes (node mask %lx)", Mtm->nLockers, (long) Mtm->nodeLockerMask);
1158+
elog(NOTICE, "Complete recovery of %d nodes (node mask %lx)", Mtm->nLockers, (long) Mtm->nodeLockerMask);
11591159
Assert(Mtm->walSenderLockerMask == 0);
11601160
Assert((Mtm->nodeLockerMask & Mtm->disabledNodeMask) == Mtm->nodeLockerMask);
11611161
Mtm->disabledNodeMask &= ~Mtm->nodeLockerMask;
@@ -1215,7 +1215,7 @@ bool MtmRefreshClusterStatus(bool nowait)
12151215

12161216
clique = MtmFindMaxClique(matrix, MtmNodes, &clique_size);
12171217
if (clique_size >= MtmNodes/2+1) { /* have quorum */
1218-
elog(WARNING, "Find clique %lx, disabledNodeMask %lx", (long) clique, (long) Mtm->disabledNodeMask);
1218+
elog(NOTICE, "Find clique %lx, disabledNodeMask %lx", (long) clique, (long) Mtm->disabledNodeMask);
12191219
MtmLock(LW_EXCLUSIVE);
12201220
mask = ~clique & (((nodemask_t)1 << MtmNodes)-1) & ~Mtm->disabledNodeMask; /* new disabled nodes mask */
12211221
for (i = 0; mask != 0; i++, mask >>= 1) {
@@ -1243,7 +1243,7 @@ bool MtmRefreshClusterStatus(bool nowait)
12431243
MtmSwitchClusterMode(MTM_RECOVERY);
12441244
}
12451245
} else {
1246-
elog(WARNING, "Clique %lx has no quorum", (long) clique);
1246+
elog(NOTICE, "Clique %lx has no quorum", (long) clique);
12471247
MtmSwitchClusterMode(MTM_IN_MINORITY);
12481248
}
12491249
return true;
@@ -1258,7 +1258,7 @@ void MtmCheckQuorum(void)
12581258
}
12591259
} else {
12601260
if (Mtm->status == MTM_IN_MINORITY) {
1261-
elog(WARNING, "Node is in majority: dissbled mask %lx", (long) Mtm->disabledNodeMask);
1261+
elog(NOTICE, "Node is in majority: dissbled mask %lx", (long) Mtm->disabledNodeMask);
12621262
MtmSwitchClusterMode(MTM_ONLINE);
12631263
}
12641264
}
@@ -1285,7 +1285,7 @@ void MtmOnNodeDisconnect(int nodeId)
12851285
for (ts = Mtm->transListHead; ts != NULL; ts = ts->next) {
12861286
if (!ts->votingCompleted) {
12871287
if (ts->status != TRANSACTION_STATUS_ABORTED) {
1288-
elog(WARNING, "Rollback active transaction %d:%d", ts->gtid.node, ts->gtid.xid);
1288+
elog(NOTICE, "Rollback active transaction %d:%d", ts->gtid.node, ts->gtid.xid);
12891289
MtmAbortTransaction(ts);
12901290
}
12911291
MtmWakeUpBackend(ts);
@@ -1844,7 +1844,7 @@ MtmSlotMode MtmReceiverSlotMode(int nodeId)
18441844
recovery = true;
18451845
if (Mtm->recoverySlot == 0 || Mtm->recoverySlot == nodeId) {
18461846
/* Choose for recovery first available slot */
1847-
elog(WARNING, "Start recovery from node %d", nodeId);
1847+
elog(NOTICE, "Start recovery from node %d", nodeId);
18481848
Mtm->recoverySlot = nodeId;
18491849
return SLOT_OPEN_EXISTED;
18501850
}
@@ -1853,7 +1853,7 @@ MtmSlotMode MtmReceiverSlotMode(int nodeId)
18531853
MtmSleep(STATUS_POLL_DELAY);
18541854
}
18551855
if (recovery) {
1856-
elog(WARNING, "Recreate replication slot for node %d after end of recovery", nodeId);
1856+
elog(NOTICE, "Recreate replication slot for node %d after end of recovery", nodeId);
18571857
} else {
18581858
MTM_INFO("%d: Reuse replication slot for node %d\n", MyProcPid, nodeId);
18591859
}
@@ -1907,7 +1907,7 @@ static void
19071907
MtmOnProcExit(int code, Datum arg)
19081908
{
19091909
if (MtmReplicationNodeId >= 0) {
1910-
elog(WARNING, "WAL-sender to %d is terminated", MtmReplicationNodeId);
1910+
elog(NOTICE, "WAL-sender to %d is terminated", MtmReplicationNodeId);
19111911
MtmOnNodeDisconnect(MtmReplicationNodeId);
19121912
}
19131913
}
@@ -1938,15 +1938,15 @@ MtmReplicationStartupHook(struct PGLogicalStartupHookArgs* args)
19381938
}
19391939
MtmLock(LW_EXCLUSIVE);
19401940
if (MtmIsRecoverySession) {
1941-
elog(WARNING, "%d: Node %d start recovery of node %d", MyProcPid, MtmNodeId, MtmReplicationNodeId);
1941+
elog(NOTICE, "%d: Node %d start recovery of node %d", MyProcPid, MtmNodeId, MtmReplicationNodeId);
19421942
if (!BIT_CHECK(Mtm->disabledNodeMask, MtmReplicationNodeId-1)) {
19431943
BIT_SET(Mtm->disabledNodeMask, MtmReplicationNodeId-1);
19441944
Mtm->nNodes -= 1;
19451945
MtmCheckQuorum();
19461946
}
19471947
} else if (BIT_CHECK(Mtm->disabledNodeMask, MtmReplicationNodeId-1)) {
19481948
if (recoveryCompleted) {
1949-
elog(WARNING, "Node %d consider that recovery of node %d is completed: start normal replication", MtmNodeId, MtmReplicationNodeId);
1949+
elog(NOTICE, "Node %d consider that recovery of node %d is completed: start normal replication", MtmNodeId, MtmReplicationNodeId);
19501950
BIT_CLEAR(Mtm->disabledNodeMask, MtmReplicationNodeId-1);
19511951
Mtm->nNodes += 1;
19521952
MtmCheckQuorum();
@@ -1964,7 +1964,7 @@ static void
19641964
MtmReplicationShutdownHook(struct PGLogicalShutdownHookArgs* args)
19651965
{
19661966
if (MtmReplicationNodeId >= 0) {
1967-
elog(WARNING, "Logical replication to node %d is stopped", MtmReplicationNodeId);
1967+
elog(NOTICE, "Logical replication to node %d is stopped", MtmReplicationNodeId);
19681968
MtmOnNodeDisconnect(MtmReplicationNodeId);
19691969
MtmReplicationNodeId = -1; /* defuse on_proc_exit hook */
19701970
}
@@ -2411,7 +2411,6 @@ static bool MtmTwoPhaseCommit(MtmCurrentTrans* x)
24112411
if (x->isDistributed && x->containsDML) {
24122412
MtmGenerateGid(x->gid);
24132413
if (!x->isTransactionBlock) {
2414-
/* elog(WARNING, "Start transaction block for %s", x->gid); */
24152414
BeginTransactionBlock();
24162415
CommitTransactionCommand();
24172416
StartTransactionCommand();

pglogical_proto.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ pglogical_write_commit(StringInfo out, PGLogicalOutputData *data,
150150
return;
151151
}
152152
if (MtmRecoveryCaughtUp(MtmReplicationNodeId, txn->end_lsn)) {
153+
elog(NOTICE, "wal-sender complete recovery of node %d at LSN(commit %lx, end %lx, log %lx) in transaction %s event %d", MtmReplicationNodeId, commit_lsn, txn->end_lsn, GetXLogInsertRecPtr(), txn->gid, flags);
153154
flags |= PGLOGICAL_CAUGHT_UP;
154155
}
155156
}
@@ -418,7 +419,7 @@ pglogical_init_api(PGLogicalProtoType typ)
418419
{
419420
PGLogicalProtoAPI* res = palloc0(sizeof(PGLogicalProtoAPI));
420421
sscanf(MyReplicationSlot->data.name.data, MULTIMASTER_SLOT_PATTERN, &MtmReplicationNodeId);
421-
elog(WARNING, "%d: PRGLOGICAL init API for slot %s node %d", MyProcPid, MyReplicationSlot->data.name.data, MtmReplicationNodeId);
422+
elog(LOG, "%d: PRGLOGICAL init API for slot %s node %d", MyProcPid, MyReplicationSlot->data.name.data, MtmReplicationNodeId);
422423
res->write_rel = pglogical_write_rel;
423424
res->write_begin = pglogical_write_begin;
424425
res->write_commit = pglogical_write_commit;

pglogical_receiver.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -292,10 +292,10 @@ pglogical_receiver_main(Datum main_arg)
292292
* So we assume that LSNs are the same for local and remote node
293293
*/
294294
originStartPos = Mtm->status == MTM_RECOVERY ? GetXLogInsertRecPtr() : 0;
295-
elog(WARNING, "Start logical receiver at position %lx from node %d", originStartPos, args->remote_node);
295+
elog(NOTICE, "Start logical receiver at position %lx from node %d", originStartPos, args->remote_node);
296296
} else {
297297
originStartPos = replorigin_get_progress(originId, false);
298-
elog(WARNING, "Restart logical receiver at position %lx from node %d", originStartPos, args->remote_node);
298+
elog(NOTICE, "Restart logical receiver at position %lx from node %d", originStartPos, args->remote_node);
299299
}
300300
CommitTransactionCommand();
301301

0 commit comments

Comments
 (0)