@@ -221,6 +221,7 @@ char const* const MtmNodeStatusMnem[] =
221
221
"Recovery" ,
222
222
"Recovered" ,
223
223
"InMinor" ,
224
+ "OutOfClique" ,
224
225
"OutOfService"
225
226
};
226
227
@@ -366,6 +367,7 @@ void MtmLock(LWLockMode mode)
366
367
if (mode == LW_EXCLUSIVE ) {
367
368
Assert (MtmLockCount == 0 );
368
369
Mtm -> lastLockHolder = MyProcPid ;
370
+ Assert (MyProcPid );
369
371
MtmLockCount = 1 ;
370
372
}
371
373
}
@@ -1145,7 +1147,6 @@ bool MtmWatchdog(timestamp_t now)
1145
1147
MTM_LOG1 ("[STATE] Node %i: Disconnect due to heartbeat timeout (%d msec)" ,
1146
1148
i + 1 , (int )USEC_TO_MSEC (now - Mtm -> nodes [i ].lastHeartbeat ));
1147
1149
MtmOnNodeDisconnect (i + 1 );
1148
- MtmDisableNode (i + 1 );
1149
1150
allAlive = false;
1150
1151
}
1151
1152
}
@@ -1166,8 +1167,11 @@ void MtmPrecommitTransaction(char const* gid)
1166
1167
MTM_ELOG (WARNING , "MtmPrecommitTransaction: transaction '%s' is not found" , gid );
1167
1168
} else {
1168
1169
MtmTransState * ts = tm -> state ;
1169
- Assert (ts != NULL );
1170
- if (ts -> status == TRANSACTION_STATUS_IN_PROGRESS ) {
1170
+ // Assert(ts != NULL);
1171
+ if (ts == NULL ) {
1172
+ MTM_ELOG (WARNING , "MtmPrecommitTransaction: transaction '%s' is not yet prepared, status %s" , gid , MtmTxnStatusMnem [tm -> status ]);
1173
+ MtmUnlock ();
1174
+ } else if (ts -> status == TRANSACTION_STATUS_IN_PROGRESS ) {
1171
1175
ts -> status = TRANSACTION_STATUS_UNKNOWN ;
1172
1176
ts -> csn = MtmAssignCSN ();
1173
1177
MtmAdjustSubtransactions (ts );
@@ -1489,6 +1493,7 @@ MtmEndTransaction(MtmCurrentTrans* x, bool commit)
1489
1493
if (!(ts -> status == TRANSACTION_STATUS_UNKNOWN
1490
1494
|| (ts -> status == TRANSACTION_STATUS_IN_PROGRESS && Mtm -> status == MTM_RECOVERY )))
1491
1495
{
1496
+ MtmUnlock ();
1492
1497
MTM_ELOG (ERROR , "Attempt to commit %s transaction %s (%llu)" ,
1493
1498
MtmTxnStatusMnem [ts -> status ], ts -> gid , (long64 )ts -> xid );
1494
1499
}
@@ -2014,15 +2019,19 @@ static int64 MtmGetSlotLag(int nodeId)
2014
2019
*/
2015
2020
bool MtmIsRecoveredNode (int nodeId )
2016
2021
{
2017
- if (BIT_CHECK (Mtm -> disabledNodeMask , nodeId - 1 )) {
2018
- if (!MtmIsRecoverySession ) {
2019
- MTM_ELOG (ERROR , "Node %d is marked as disabled but is not in recovery mode" , nodeId );
2020
- }
2021
- return true;
2022
- } else {
2023
- MtmIsRecoverySession = false; /* recovery is completed */
2024
- return false;
2025
- }
2022
+ if (!BIT_CHECK (Mtm -> disabledNodeMask , nodeId - 1 ))
2023
+ Assert (!MtmIsRecoverySession );
2024
+
2025
+ return BIT_CHECK (Mtm -> disabledNodeMask , nodeId - 1 ) && MtmIsRecoverySession ;
2026
+ // if (BIT_CHECK(Mtm->disabledNodeMask, nodeId-1)) {
2027
+ // if (!MtmIsRecoverySession) {
2028
+ // MTM_ELOG(WARNING, "Node %d is marked as disabled but is not in recovery mode", nodeId);
2029
+ // }
2030
+ // return true;
2031
+ // } else {
2032
+ // MtmIsRecoverySession = false; /* recovery is completed */
2033
+ // return false;
2034
+ // }
2026
2035
}
2027
2036
2028
2037
/*
@@ -2048,7 +2057,7 @@ void MtmCheckRecoveryCaughtUp(int nodeId, lsn_t slotLSN)
2048
2057
*/
2049
2058
MTM_LOG1 ("Node %d is almost caught-up: slot position %llx, WAL position %llx, active transactions %d" ,
2050
2059
nodeId , slotLSN , walLSN , Mtm -> nActiveTransactions );
2051
- BIT_SET (Mtm -> originLockNodeMask , nodeId - 1 );
2060
+ BIT_SET (Mtm -> originLockNodeMask , nodeId - 1 ); // XXXX: log that
2052
2061
} else {
2053
2062
MTM_LOG2 ("Continue recovery of node %d, slot position %llx, WAL position %llx,"
2054
2063
" WAL sender position %llx, lockers %llx, active transactions %d" , nodeId , slotLSN ,
@@ -2070,6 +2079,7 @@ bool MtmRecoveryCaughtUp(int nodeId, lsn_t walEndPtr)
2070
2079
if (MtmIsRecoveredNode (nodeId ) && Mtm -> nActiveTransactions == 0 ) {
2071
2080
MtmStateProcessNeighborEvent (nodeId , MTM_NEIGHBOR_RECOVERY_CAUGHTUP );
2072
2081
caughtUp = true;
2082
+ MtmIsRecoverySession = false;
2073
2083
}
2074
2084
MtmUnlock ();
2075
2085
return caughtUp ;
@@ -2087,6 +2097,7 @@ MtmLockCluster(void)
2087
2097
}
2088
2098
MtmLock (LW_EXCLUSIVE );
2089
2099
if (BIT_CHECK (Mtm -> originLockNodeMask , MtmNodeId - 1 )) {
2100
+ MtmUnlock ();
2090
2101
elog (ERROR , "There is already pending exclusive lock" );
2091
2102
}
2092
2103
BIT_SET (Mtm -> originLockNodeMask , MtmNodeId - 1 );
@@ -2339,6 +2350,7 @@ static void MtmInitialize()
2339
2350
Mtm -> nLiveNodes = 0 ; //MtmNodes;
2340
2351
Mtm -> nAllNodes = MtmNodes ;
2341
2352
Mtm -> disabledNodeMask = 7 ; //XXXX
2353
+ Mtm -> clique = 7 ; // XXXX
2342
2354
Mtm -> stalledNodeMask = 0 ;
2343
2355
Mtm -> stoppedNodeMask = 0 ;
2344
2356
Mtm -> deadNodeMask = 0 ;
@@ -2371,7 +2383,7 @@ static void MtmInitialize()
2371
2383
for (i = 0 ; i < MtmNodes ; i ++ ) {
2372
2384
Mtm -> nodes [i ].oldestSnapshot = 0 ;
2373
2385
Mtm -> nodes [i ].disabledNodeMask = 0 ;
2374
- Mtm -> nodes [i ].connectivityMask = 7 ;
2386
+ Mtm -> nodes [i ].connectivityMask = 7 ; // XXXX
2375
2387
Mtm -> nodes [i ].lockGraphUsed = 0 ;
2376
2388
Mtm -> nodes [i ].lockGraphAllocated = 0 ;
2377
2389
Mtm -> nodes [i ].lockGraphData = NULL ;
@@ -3214,17 +3226,56 @@ MtmReplicationMode MtmGetReplicationMode(int nodeId, sig_atomic_t volatile* shut
3214
3226
Mtm -> preparedTransactionsLoaded = true;
3215
3227
}
3216
3228
3217
- while (BIT_CHECK (Mtm -> disabledNodeMask , MtmNodeId - 1 ) ||
3218
- BIT_CHECK (SELF_CONNECTIVITY_MASK , nodeId - 1 ))
3229
+ // while (BIT_CHECK(Mtm->disabledNodeMask, MtmNodeId - 1) ||
3230
+ // BIT_CHECK(SELF_CONNECTIVITY_MASK, nodeId - 1) ||
3231
+ // !BIT_CHECK(Mtm->clique, nodeId - 1) ||
3232
+ // !BIT_CHECK(Mtm->clique, MtmNodeId - 1) )
3233
+ // {
3234
+ // if (*shutdown)
3235
+ // {
3236
+ // MtmUnlock();
3237
+ // return REPLMODE_EXIT;
3238
+ // }
3239
+
3240
+ // if ((Mtm->recoverySlot == 0 || Mtm->recoverySlot == nodeId) &&
3241
+ // (!BIT_CHECK(SELF_CONNECTIVITY_MASK, nodeId - 1)))
3242
+ // {
3243
+ // /* Lock on us */
3244
+ // Mtm->recoverySlot = nodeId;
3245
+ // MtmPollStatusOfPreparedTransactions();
3246
+ // MtmUnlock();
3247
+ // return REPLMODE_RECOVERY;
3248
+ // }
3249
+
3250
+ // MTM_LOG1("Replication to node %d is pending: recovery node=%d, donor node=%d, connectivity mask=%llx, disabled mask=%llx",
3251
+ // nodeId, Mtm->recoverySlot, Mtm->donorNodeId, SELF_CONNECTIVITY_MASK, Mtm->disabledNodeMask);
3252
+
3253
+ // MtmUnlock();
3254
+ // /* delay opening of other slots until recovery is completed */
3255
+ // MtmSleep(STATUS_POLL_DELAY);
3256
+ // MtmLock(LW_EXCLUSIVE);
3257
+ // }
3258
+
3259
+ // MtmUnlock();
3260
+
3261
+ // return REPLMODE_RECOVERED;
3262
+
3263
+ /* Await until node is connected and both receiver and sender are in clique */
3264
+ while (BIT_CHECK (SELF_CONNECTIVITY_MASK , nodeId - 1 ) ||
3265
+ !BIT_CHECK (Mtm -> clique , nodeId - 1 ) ||
3266
+ !BIT_CHECK (Mtm -> clique , MtmNodeId - 1 ) )
3219
3267
{
3268
+ MtmUnlock ();
3220
3269
if (* shutdown )
3221
- {
3222
- MtmUnlock ();
3223
3270
return REPLMODE_EXIT ;
3224
- }
3271
+ MtmSleep (STATUS_POLL_DELAY );
3272
+ MtmLock (LW_EXCLUSIVE );
3273
+ }
3225
3274
3226
- if ((Mtm -> recoverySlot == 0 || Mtm -> recoverySlot == nodeId ) &&
3227
- (!BIT_CHECK (SELF_CONNECTIVITY_MASK , nodeId - 1 )))
3275
+ if (BIT_CHECK (Mtm -> disabledNodeMask , MtmNodeId - 1 ))
3276
+ {
3277
+ /* Ok, then start recovery by luckiest walreceiver */
3278
+ if (Mtm -> recoverySlot == 0 || Mtm -> recoverySlot == nodeId )
3228
3279
{
3229
3280
/* Lock on us */
3230
3281
Mtm -> recoverySlot = nodeId ;
@@ -3233,87 +3284,19 @@ MtmReplicationMode MtmGetReplicationMode(int nodeId, sig_atomic_t volatile* shut
3233
3284
return REPLMODE_RECOVERY ;
3234
3285
}
3235
3286
3236
- MTM_LOG1 ("Replication to node %d is pending: recovery node=%d, donor node=%d, connectivity mask=%llx, disabled mask=%llx" ,
3237
- nodeId , Mtm -> recoverySlot , Mtm -> donorNodeId , SELF_CONNECTIVITY_MASK , Mtm -> disabledNodeMask );
3238
-
3239
- MtmUnlock ();
3240
- /* delay opening of other slots until recovery is completed */
3241
- MtmSleep (STATUS_POLL_DELAY );
3242
- MtmLock (LW_EXCLUSIVE );
3287
+ /* And force less lucky walreceivers wait until recovery is completed */
3288
+ while (BIT_CHECK (Mtm -> disabledNodeMask , MtmNodeId - 1 ))
3289
+ {
3290
+ MtmUnlock ();
3291
+ if (* shutdown )
3292
+ return REPLMODE_EXIT ;
3293
+ MtmSleep (STATUS_POLL_DELAY );
3294
+ MtmLock (LW_EXCLUSIVE );
3295
+ }
3243
3296
}
3244
3297
3245
3298
MtmUnlock ();
3246
-
3247
3299
return REPLMODE_RECOVERED ;
3248
-
3249
-
3250
-
3251
-
3252
-
3253
- // while ((Mtm->status != MTM_CONNECTED && Mtm->status != MTM_RECOVERED && Mtm->status != MTM_ONLINE)
3254
- // || BIT_CHECK(Mtm->disabledNodeMask, nodeId-1))
3255
- // // while (BIT_CHECK(Mtm->disabledNodeMask, nodeId-1))
3256
- // {
3257
- // if (*shutdown)
3258
- // {
3259
- // MtmUnlock();
3260
- // return REPLMODE_EXIT;
3261
- // }
3262
- // // /* We are not interested in receiving any deteriorated logical messages from recovered node, so recreate slot */
3263
- // // if (BIT_CHECK(Mtm->disabledNodeMask, nodeId-1)) {
3264
- // // mode = REPLMODE_CREATE_NEW;
3265
- // // }
3266
- // // MTM_LOG2("%d: receiver slot mode %s", MyProcPid, MtmNodeStatusMnem[Mtm->status]);
3267
-
3268
- // if (Mtm->status == MTM_RECOVERY) {
3269
- // mode = REPLMODE_RECOVERED;
3270
- // /* Choose node for recovery if
3271
- // * 1. It is not chosen yet or the same node was chosen before
3272
- // * 2. It is donor node or there is no donor node
3273
- // * 3. Connections with all other live nodes were established
3274
- // */
3275
- // if ((Mtm->recoverySlot == 0 || Mtm->recoverySlot == nodeId)
3276
- // && (Mtm->donorNodeId == MtmNodeId || Mtm->donorNodeId == nodeId)
3277
- // && (SELF_CONNECTIVITY_MASK & ~Mtm->disabledNodeMask) == 0)
3278
- // {
3279
- // /* Choose for recovery first available slot or slot of donor node (if any) */
3280
- // if (Mtm->nAllNodes >= 3) {
3281
- // MTM_ELOG(WARNING, "Process %d starts recovery from node %d restartLSNs={%llx, %llx, %llx}",
3282
- // MyProcPid, nodeId, Mtm->nodes[0].restartLSN, Mtm->nodes[1].restartLSN, Mtm->nodes[2].restartLSN);
3283
- // } else {
3284
- // MTM_ELOG(WARNING, "Process %d starts recovery from node %d", MyProcPid, nodeId);
3285
- // }
3286
- // Mtm->recoverySlot = nodeId;
3287
- // // Mtm->nReceivers = 0;
3288
- // // Mtm->nSenders = 0;
3289
- // // Mtm->recoveryCount += 1;
3290
- // // Mtm->pglogicalReceiverMask = 0;
3291
- // // Mtm->pglogicalSenderMask = 0;
3292
- // MtmPollStatusOfPreparedTransactions();
3293
- // MtmUnlock();
3294
- // return REPLMODE_RECOVERY;
3295
- // }
3296
- // }
3297
- // MTM_LOG1("Replication to node %d is pending: recovery node=%d, donor node=%d, connectivity mask=%llx, disabled mask=%llx",
3298
- // nodeId, Mtm->recoverySlot, Mtm->donorNodeId, SELF_CONNECTIVITY_MASK, Mtm->disabledNodeMask);
3299
- // MtmUnlock();
3300
- // /* delay opening of other slots until recovery is completed */
3301
- // MtmSleep(STATUS_POLL_DELAY);
3302
- // MtmLock(LW_EXCLUSIVE);
3303
- // }
3304
- // if (Mtm->status == MTM_RECOVERED) {
3305
- // mode = REPLMODE_RECOVERED;
3306
- // }
3307
- // // if (mode == REPLMODE_RECOVERED) {
3308
- // // MTM_LOG1("%d: Restart replication from node %d after end of recovery", MyProcPid, nodeId);
3309
- // // } else if (mode == REPLMODE_CREATE_NEW) {
3310
- // // MTM_LOG1("%d: Start replication from recovered node %d", MyProcPid, nodeId);
3311
- // // } else {
3312
- // // MTM_LOG1("%d: Continue replication from node %d", MyProcPid, nodeId);
3313
- // // }
3314
- // BIT_SET(Mtm->reconnectMask, nodeId-1); /* arbiter should try to reestablish connection with this node */
3315
- // MtmUnlock();
3316
- // return mode;
3317
3300
}
3318
3301
3319
3302
static bool MtmIsBroadcast ()
0 commit comments