@@ -225,6 +225,7 @@ char const* const MtmNodeStatusMnem[] =
225
225
"Recovery" ,
226
226
"Recovered" ,
227
227
"InMinor" ,
228
+ "OutOfClique" ,
228
229
"OutOfService"
229
230
};
230
231
@@ -373,6 +374,7 @@ void MtmLock(LWLockMode mode)
373
374
if (mode == LW_EXCLUSIVE ) {
374
375
Assert (MtmLockCount == 0 );
375
376
Mtm -> lastLockHolder = MyProcPid ;
377
+ Assert (MyProcPid );
376
378
MtmLockCount = 1 ;
377
379
}
378
380
}
@@ -1155,7 +1157,6 @@ bool MtmWatchdog(timestamp_t now)
1155
1157
MTM_LOG1 ("[STATE] Node %i: Disconnect due to heartbeat timeout (%d msec)" ,
1156
1158
i + 1 , (int )USEC_TO_MSEC (now - Mtm -> nodes [i ].lastHeartbeat ));
1157
1159
MtmOnNodeDisconnect (i + 1 );
1158
- MtmDisableNode (i + 1 );
1159
1160
allAlive = false;
1160
1161
}
1161
1162
}
@@ -1176,6 +1177,7 @@ void MtmPrecommitTransaction(char const* gid)
1176
1177
MTM_ELOG (WARNING , "MtmPrecommitTransaction: transaction '%s' is not found" , gid );
1177
1178
} else {
1178
1179
MtmTransState * ts = tm -> state ;
1180
+ // Assert(ts != NULL);
1179
1181
if (ts == NULL ) {
1180
1182
MTM_ELOG (WARNING , "MtmPrecommitTransaction: transaction '%s' is not yet prepared, status %s" , gid , MtmTxnStatusMnem [tm -> status ]);
1181
1183
MtmUnlock ();
@@ -1501,6 +1503,7 @@ MtmEndTransaction(MtmCurrentTrans* x, bool commit)
1501
1503
if (!(ts -> status == TRANSACTION_STATUS_UNKNOWN
1502
1504
|| (ts -> status == TRANSACTION_STATUS_IN_PROGRESS && Mtm -> status == MTM_RECOVERY )))
1503
1505
{
1506
+ MtmUnlock ();
1504
1507
MTM_ELOG (ERROR , "Attempt to commit %s transaction %s (%llu)" ,
1505
1508
MtmTxnStatusMnem [ts -> status ], ts -> gid , (long64 )ts -> xid );
1506
1509
}
@@ -2026,15 +2029,19 @@ static int64 MtmGetSlotLag(int nodeId)
2026
2029
*/
2027
2030
bool MtmIsRecoveredNode (int nodeId )
2028
2031
{
2029
- if (BIT_CHECK (Mtm -> disabledNodeMask , nodeId - 1 )) {
2030
- if (!MtmIsRecoverySession ) {
2031
- MTM_ELOG (ERROR , "Node %d is marked as disabled but is not in recovery mode" , nodeId );
2032
- }
2033
- return true;
2034
- } else {
2035
- MtmIsRecoverySession = false; /* recovery is completed */
2036
- return false;
2037
- }
2032
+ if (!BIT_CHECK (Mtm -> disabledNodeMask , nodeId - 1 ))
2033
+ Assert (!MtmIsRecoverySession );
2034
+
2035
+ return BIT_CHECK (Mtm -> disabledNodeMask , nodeId - 1 ) && MtmIsRecoverySession ;
2036
+ // if (BIT_CHECK(Mtm->disabledNodeMask, nodeId-1)) {
2037
+ // if (!MtmIsRecoverySession) {
2038
+ // MTM_ELOG(WARNING, "Node %d is marked as disabled but is not in recovery mode", nodeId);
2039
+ // }
2040
+ // return true;
2041
+ // } else {
2042
+ // MtmIsRecoverySession = false; /* recovery is completed */
2043
+ // return false;
2044
+ // }
2038
2045
}
2039
2046
2040
2047
/*
@@ -2060,7 +2067,7 @@ void MtmCheckRecoveryCaughtUp(int nodeId, lsn_t slotLSN)
2060
2067
*/
2061
2068
MTM_LOG1 ("Node %d is almost caught-up: slot position %llx, WAL position %llx, active transactions %d" ,
2062
2069
nodeId , slotLSN , walLSN , Mtm -> nActiveTransactions );
2063
- BIT_SET (Mtm -> originLockNodeMask , nodeId - 1 );
2070
+ BIT_SET (Mtm -> originLockNodeMask , nodeId - 1 ); // XXXX: log that
2064
2071
} else {
2065
2072
MTM_LOG2 ("Continue recovery of node %d, slot position %llx, WAL position %llx,"
2066
2073
" WAL sender position %llx, lockers %llx, active transactions %d" , nodeId , slotLSN ,
@@ -2082,6 +2089,7 @@ bool MtmRecoveryCaughtUp(int nodeId, lsn_t walEndPtr)
2082
2089
if (MtmIsRecoveredNode (nodeId ) && Mtm -> nActiveTransactions == 0 ) {
2083
2090
MtmStateProcessNeighborEvent (nodeId , MTM_NEIGHBOR_RECOVERY_CAUGHTUP );
2084
2091
caughtUp = true;
2092
+ MtmIsRecoverySession = false;
2085
2093
}
2086
2094
MtmUnlock ();
2087
2095
return caughtUp ;
@@ -2099,6 +2107,7 @@ MtmLockCluster(void)
2099
2107
}
2100
2108
MtmLock (LW_EXCLUSIVE );
2101
2109
if (BIT_CHECK (Mtm -> originLockNodeMask , MtmNodeId - 1 )) {
2110
+ MtmUnlock ();
2102
2111
elog (ERROR , "There is already pending exclusive lock" );
2103
2112
}
2104
2113
BIT_SET (Mtm -> originLockNodeMask , MtmNodeId - 1 );
@@ -2351,6 +2360,7 @@ static void MtmInitialize()
2351
2360
Mtm -> nLiveNodes = 0 ; //MtmNodes;
2352
2361
Mtm -> nAllNodes = MtmNodes ;
2353
2362
Mtm -> disabledNodeMask = 7 ; //XXXX
2363
+ Mtm -> clique = 7 ; // XXXX
2354
2364
Mtm -> stalledNodeMask = 0 ;
2355
2365
Mtm -> stoppedNodeMask = 0 ;
2356
2366
Mtm -> deadNodeMask = 0 ;
@@ -2383,7 +2393,7 @@ static void MtmInitialize()
2383
2393
for (i = 0 ; i < MtmNodes ; i ++ ) {
2384
2394
Mtm -> nodes [i ].oldestSnapshot = 0 ;
2385
2395
Mtm -> nodes [i ].disabledNodeMask = 0 ;
2386
- Mtm -> nodes [i ].connectivityMask = 7 ;
2396
+ Mtm -> nodes [i ].connectivityMask = 7 ; // XXXX
2387
2397
Mtm -> nodes [i ].lockGraphUsed = 0 ;
2388
2398
Mtm -> nodes [i ].lockGraphAllocated = 0 ;
2389
2399
Mtm -> nodes [i ].lockGraphData = NULL ;
@@ -3308,17 +3318,56 @@ MtmReplicationMode MtmGetReplicationMode(int nodeId, sig_atomic_t volatile* shut
3308
3318
Mtm -> preparedTransactionsLoaded = true;
3309
3319
}
3310
3320
3311
- while (BIT_CHECK (Mtm -> disabledNodeMask , MtmNodeId - 1 ) ||
3312
- BIT_CHECK (SELF_CONNECTIVITY_MASK , nodeId - 1 ))
3321
+ // while (BIT_CHECK(Mtm->disabledNodeMask, MtmNodeId - 1) ||
3322
+ // BIT_CHECK(SELF_CONNECTIVITY_MASK, nodeId - 1) ||
3323
+ // !BIT_CHECK(Mtm->clique, nodeId - 1) ||
3324
+ // !BIT_CHECK(Mtm->clique, MtmNodeId - 1) )
3325
+ // {
3326
+ // if (*shutdown)
3327
+ // {
3328
+ // MtmUnlock();
3329
+ // return REPLMODE_EXIT;
3330
+ // }
3331
+
3332
+ // if ((Mtm->recoverySlot == 0 || Mtm->recoverySlot == nodeId) &&
3333
+ // (!BIT_CHECK(SELF_CONNECTIVITY_MASK, nodeId - 1)))
3334
+ // {
3335
+ // /* Lock on us */
3336
+ // Mtm->recoverySlot = nodeId;
3337
+ // MtmPollStatusOfPreparedTransactions();
3338
+ // MtmUnlock();
3339
+ // return REPLMODE_RECOVERY;
3340
+ // }
3341
+
3342
+ // MTM_LOG1("Replication to node %d is pending: recovery node=%d, donor node=%d, connectivity mask=%llx, disabled mask=%llx",
3343
+ // nodeId, Mtm->recoverySlot, Mtm->donorNodeId, SELF_CONNECTIVITY_MASK, Mtm->disabledNodeMask);
3344
+
3345
+ // MtmUnlock();
3346
+ // /* delay opening of other slots until recovery is completed */
3347
+ // MtmSleep(STATUS_POLL_DELAY);
3348
+ // MtmLock(LW_EXCLUSIVE);
3349
+ // }
3350
+
3351
+ // MtmUnlock();
3352
+
3353
+ // return REPLMODE_RECOVERED;
3354
+
3355
+ /* Await until node is connected and both receiver and sender are in clique */
3356
+ while (BIT_CHECK (SELF_CONNECTIVITY_MASK , nodeId - 1 ) ||
3357
+ !BIT_CHECK (Mtm -> clique , nodeId - 1 ) ||
3358
+ !BIT_CHECK (Mtm -> clique , MtmNodeId - 1 ) )
3313
3359
{
3360
+ MtmUnlock ();
3314
3361
if (* shutdown )
3315
- {
3316
- MtmUnlock ();
3317
3362
return REPLMODE_EXIT ;
3318
- }
3363
+ MtmSleep (STATUS_POLL_DELAY );
3364
+ MtmLock (LW_EXCLUSIVE );
3365
+ }
3319
3366
3320
- if ((Mtm -> recoverySlot == 0 || Mtm -> recoverySlot == nodeId ) &&
3321
- (!BIT_CHECK (SELF_CONNECTIVITY_MASK , nodeId - 1 )))
3367
+ if (BIT_CHECK (Mtm -> disabledNodeMask , MtmNodeId - 1 ))
3368
+ {
3369
+ /* Ok, then start recovery by luckiest walreceiver */
3370
+ if (Mtm -> recoverySlot == 0 || Mtm -> recoverySlot == nodeId )
3322
3371
{
3323
3372
/* Lock on us */
3324
3373
Mtm -> recoverySlot = nodeId ;
@@ -3327,87 +3376,19 @@ MtmReplicationMode MtmGetReplicationMode(int nodeId, sig_atomic_t volatile* shut
3327
3376
return REPLMODE_RECOVERY ;
3328
3377
}
3329
3378
3330
- MTM_LOG1 ("Replication to node %d is pending: recovery node=%d, donor node=%d, connectivity mask=%llx, disabled mask=%llx" ,
3331
- nodeId , Mtm -> recoverySlot , Mtm -> donorNodeId , SELF_CONNECTIVITY_MASK , Mtm -> disabledNodeMask );
3332
-
3333
- MtmUnlock ();
3334
- /* delay opening of other slots until recovery is completed */
3335
- MtmSleep (STATUS_POLL_DELAY );
3336
- MtmLock (LW_EXCLUSIVE );
3379
+ /* And force less lucky walreceivers wait until recovery is completed */
3380
+ while (BIT_CHECK (Mtm -> disabledNodeMask , MtmNodeId - 1 ))
3381
+ {
3382
+ MtmUnlock ();
3383
+ if (* shutdown )
3384
+ return REPLMODE_EXIT ;
3385
+ MtmSleep (STATUS_POLL_DELAY );
3386
+ MtmLock (LW_EXCLUSIVE );
3387
+ }
3337
3388
}
3338
3389
3339
3390
MtmUnlock ();
3340
-
3341
3391
return REPLMODE_RECOVERED ;
3342
-
3343
-
3344
-
3345
-
3346
-
3347
- // while ((Mtm->status != MTM_CONNECTED && Mtm->status != MTM_RECOVERED && Mtm->status != MTM_ONLINE)
3348
- // || BIT_CHECK(Mtm->disabledNodeMask, nodeId-1))
3349
- // // while (BIT_CHECK(Mtm->disabledNodeMask, nodeId-1))
3350
- // {
3351
- // if (*shutdown)
3352
- // {
3353
- // MtmUnlock();
3354
- // return REPLMODE_EXIT;
3355
- // }
3356
- // // /* We are not interested in receiving any deteriorated logical messages from recovered node, so recreate slot */
3357
- // // if (BIT_CHECK(Mtm->disabledNodeMask, nodeId-1)) {
3358
- // // mode = REPLMODE_CREATE_NEW;
3359
- // // }
3360
- // // MTM_LOG2("%d: receiver slot mode %s", MyProcPid, MtmNodeStatusMnem[Mtm->status]);
3361
-
3362
- // if (Mtm->status == MTM_RECOVERY) {
3363
- // mode = REPLMODE_RECOVERED;
3364
- // /* Choose node for recovery if
3365
- // * 1. It is not chosen yet or the same node was chosen before
3366
- // * 2. It is donor node or there is no donor node
3367
- // * 3. Connections with all other live nodes were established
3368
- // */
3369
- // if ((Mtm->recoverySlot == 0 || Mtm->recoverySlot == nodeId)
3370
- // && (Mtm->donorNodeId == MtmNodeId || Mtm->donorNodeId == nodeId)
3371
- // && (SELF_CONNECTIVITY_MASK & ~Mtm->disabledNodeMask) == 0)
3372
- // {
3373
- // /* Choose for recovery first available slot or slot of donor node (if any) */
3374
- // if (Mtm->nAllNodes >= 3) {
3375
- // MTM_ELOG(WARNING, "Process %d starts recovery from node %d restartLSNs={%llx, %llx, %llx}",
3376
- // MyProcPid, nodeId, Mtm->nodes[0].restartLSN, Mtm->nodes[1].restartLSN, Mtm->nodes[2].restartLSN);
3377
- // } else {
3378
- // MTM_ELOG(WARNING, "Process %d starts recovery from node %d", MyProcPid, nodeId);
3379
- // }
3380
- // Mtm->recoverySlot = nodeId;
3381
- // // Mtm->nReceivers = 0;
3382
- // // Mtm->nSenders = 0;
3383
- // // Mtm->recoveryCount += 1;
3384
- // // Mtm->pglogicalReceiverMask = 0;
3385
- // // Mtm->pglogicalSenderMask = 0;
3386
- // MtmPollStatusOfPreparedTransactions();
3387
- // MtmUnlock();
3388
- // return REPLMODE_RECOVERY;
3389
- // }
3390
- // }
3391
- // MTM_LOG1("Replication to node %d is pending: recovery node=%d, donor node=%d, connectivity mask=%llx, disabled mask=%llx",
3392
- // nodeId, Mtm->recoverySlot, Mtm->donorNodeId, SELF_CONNECTIVITY_MASK, Mtm->disabledNodeMask);
3393
- // MtmUnlock();
3394
- // /* delay opening of other slots until recovery is completed */
3395
- // MtmSleep(STATUS_POLL_DELAY);
3396
- // MtmLock(LW_EXCLUSIVE);
3397
- // }
3398
- // if (Mtm->status == MTM_RECOVERED) {
3399
- // mode = REPLMODE_RECOVERED;
3400
- // }
3401
- // // if (mode == REPLMODE_RECOVERED) {
3402
- // // MTM_LOG1("%d: Restart replication from node %d after end of recovery", MyProcPid, nodeId);
3403
- // // } else if (mode == REPLMODE_CREATE_NEW) {
3404
- // // MTM_LOG1("%d: Start replication from recovered node %d", MyProcPid, nodeId);
3405
- // // } else {
3406
- // // MTM_LOG1("%d: Continue replication from node %d", MyProcPid, nodeId);
3407
- // // }
3408
- // BIT_SET(Mtm->reconnectMask, nodeId-1); /* arbiter should try to reestablish connection with this node */
3409
- // MtmUnlock();
3410
- // return mode;
3411
3392
}
3412
3393
3413
3394
static bool MtmIsBroadcast ()
0 commit comments