@@ -170,7 +170,7 @@ static void MtmShmemStartup(void);
170
170
171
171
static BgwPool * MtmPoolConstructor (void );
172
172
static bool MtmRunUtilityStmt (PGconn * conn , char const * sql , char * * errmsg );
173
- static void MtmBroadcastUtilityStmt (char const * sql , bool ignoreError );
173
+ static void MtmBroadcastUtilityStmt (char const * sql , bool ignoreError , int forceOnNode );
174
174
static void MtmProcessDDLCommand (char const * queryString , bool transactional );
175
175
176
176
static void MtmLockCluster (void );
@@ -978,7 +978,9 @@ MtmBeginTransaction(MtmCurrentTrans* x)
978
978
x -> isTwoPhase = false;
979
979
x -> isTransactionBlock = IsTransactionBlock ();
980
980
/* Application name can be changed using PGAPPNAME environment variable */
981
- if (x -> isDistributed && Mtm -> status != MTM_ONLINE && strcmp (application_name , MULTIMASTER_ADMIN ) != 0 && !MtmBypass ) {
981
+ if (x -> isDistributed && Mtm -> status != MTM_ONLINE && strcmp (application_name , MULTIMASTER_ADMIN ) != 0
982
+ && strcmp (application_name , MULTIMASTER_BROADCAST_SERVICE ) != 0
983
+ && !MtmBypass ) {
982
984
/* Reject all user's transactions at offline cluster.
983
985
* Allow execution of transaction by bg-workers to make it possible to perform recovery.
984
986
*/
@@ -2406,7 +2408,7 @@ static void MtmInitialize()
2406
2408
for (i = 0 ; i < MtmNodes ; i ++ ) {
2407
2409
Mtm -> nodes [i ].oldestSnapshot = 0 ;
2408
2410
Mtm -> nodes [i ].disabledNodeMask = 0 ;
2409
- Mtm -> nodes [i ].connectivityMask = 7 ; // XXXX
2411
+ Mtm -> nodes [i ].connectivityMask = ((( nodemask_t ) 1 << MtmNodes ) - 1 );
2410
2412
Mtm -> nodes [i ].lockGraphUsed = 0 ;
2411
2413
Mtm -> nodes [i ].lockGraphAllocated = 0 ;
2412
2414
Mtm -> nodes [i ].lockGraphData = NULL ;
@@ -2419,6 +2421,7 @@ static void MtmInitialize()
2419
2421
Mtm -> nodes [i ].originId = InvalidRepOriginId ;
2420
2422
Mtm -> nodes [i ].timeline = 0 ;
2421
2423
Mtm -> nodes [i ].nHeartbeats = 0 ;
2424
+ Mtm -> nodes [i ].manualRecovery = false;
2422
2425
Mtm -> nodes [i ].slotDeleted = false;
2423
2426
}
2424
2427
Mtm -> nodes [MtmNodeId - 1 ].originId = DoNotReplicateId ;
@@ -3341,9 +3344,8 @@ MtmReplicationMode MtmGetReplicationMode(int nodeId, sig_atomic_t volatile* shut
3341
3344
}
3342
3345
3343
3346
/* Await until node is connected and both receiver and sender are in clique */
3344
- while (BIT_CHECK (SELF_CONNECTIVITY_MASK , nodeId - 1 ) ||
3345
- !BIT_CHECK (Mtm -> clique , nodeId - 1 ) ||
3346
- !BIT_CHECK (Mtm -> clique , MtmNodeId - 1 ) )
3347
+ while (BIT_CHECK (EFFECTIVE_CONNECTIVITY_MASK , nodeId - 1 ) ||
3348
+ BIT_CHECK (EFFECTIVE_CONNECTIVITY_MASK , MtmNodeId - 1 ))
3347
3349
{
3348
3350
MtmUnlock ();
3349
3351
if (* shutdown )
@@ -3398,6 +3400,7 @@ void MtmRecoverNode(int nodeId)
3398
3400
MTM_ELOG (ERROR , "NodeID %d is out of range [1,%d]" , nodeId , Mtm -> nAllNodes );
3399
3401
}
3400
3402
MtmLock (LW_EXCLUSIVE );
3403
+ Mtm -> nodes [nodeId - 1 ].manualRecovery = true;
3401
3404
if (BIT_CHECK (Mtm -> stoppedNodeMask , nodeId - 1 ))
3402
3405
{
3403
3406
Assert (BIT_CHECK (Mtm -> disabledNodeMask , nodeId - 1 ));
@@ -3408,8 +3411,8 @@ void MtmRecoverNode(int nodeId)
3408
3411
3409
3412
if (!MtmIsBroadcast ())
3410
3413
{
3411
- MtmBroadcastUtilityStmt (psprintf ("select pg_create_logical_replication_slot('" MULTIMASTER_SLOT_PATTERN "', '" MULTIMASTER_NAME "')" , nodeId ), true);
3412
- MtmBroadcastUtilityStmt (psprintf ("select mtm.recover_node(%d)" , nodeId ), true);
3414
+ MtmBroadcastUtilityStmt (psprintf ("select pg_create_logical_replication_slot('" MULTIMASTER_SLOT_PATTERN "', '" MULTIMASTER_NAME "')" , nodeId ), true, 0 );
3415
+ MtmBroadcastUtilityStmt (psprintf ("select mtm.recover_node(%d)" , nodeId ), true, 0 );
3413
3416
}
3414
3417
}
3415
3418
@@ -3439,7 +3442,7 @@ void MtmResumeNode(int nodeId)
3439
3442
3440
3443
if (!MtmIsBroadcast ())
3441
3444
{
3442
- MtmBroadcastUtilityStmt (psprintf ("select mtm.resume_node(%d)" , nodeId ), true);
3445
+ MtmBroadcastUtilityStmt (psprintf ("select mtm.resume_node(%d)" , nodeId ), true, nodeId );
3443
3446
}
3444
3447
}
3445
3448
@@ -3454,20 +3457,19 @@ void MtmStopNode(int nodeId, bool dropSlot)
3454
3457
MTM_ELOG (ERROR , "NodeID %d is out of range [1,%d]" , nodeId , Mtm -> nAllNodes );
3455
3458
}
3456
3459
3457
- MtmLock (LW_EXCLUSIVE );
3460
+ if (!MtmIsBroadcast ())
3461
+ {
3462
+ MtmBroadcastUtilityStmt (psprintf ("select mtm.stop_node(%d,%s)" , nodeId , dropSlot ? "true" : "false" ), true, nodeId );
3463
+ }
3458
3464
3465
+ MtmLock (LW_EXCLUSIVE );
3459
3466
BIT_SET (Mtm -> stoppedNodeMask , nodeId - 1 );
3460
-
3461
3467
if (!BIT_CHECK (Mtm -> disabledNodeMask , nodeId - 1 ))
3462
3468
{
3463
3469
MtmDisableNode (nodeId );
3464
3470
}
3465
3471
MtmUnlock ();
3466
3472
3467
- if (!MtmIsBroadcast ())
3468
- {
3469
- MtmBroadcastUtilityStmt (psprintf ("select mtm.stop_node(%d,%s)" , nodeId , dropSlot ? "true" : "false" ), true);
3470
- }
3471
3473
if (dropSlot )
3472
3474
{
3473
3475
MtmDropSlot (nodeId );
@@ -3541,12 +3543,8 @@ MtmReplicationStartupHook(struct PGLogicalStartupHookArgs* args)
3541
3543
}
3542
3544
3543
3545
if (BIT_CHECK (Mtm -> stoppedNodeMask , MtmReplicationNodeId - 1 )) {
3544
- MTM_ELOG (WARNING , "Stopped node %d tries to initiate recovery" , MtmReplicationNodeId );
3545
- do {
3546
- MtmUnlock ();
3547
- MtmSleep (STATUS_POLL_DELAY );
3548
- MtmLock (LW_EXCLUSIVE );
3549
- } while (BIT_CHECK (Mtm -> stoppedNodeMask , MtmReplicationNodeId - 1 ));
3546
+ MtmUnlock ();
3547
+ MTM_ELOG (ERROR , "Stopped node %d tries to connect" , MtmReplicationNodeId );
3550
3548
}
3551
3549
3552
3550
if (MtmIsRecoverySession ) {
@@ -3853,8 +3851,8 @@ mtm_add_node(PG_FUNCTION_ARGS)
3853
3851
}
3854
3852
if (!MtmIsBroadcast ())
3855
3853
{
3856
- MtmBroadcastUtilityStmt (psprintf ("select pg_create_logical_replication_slot('" MULTIMASTER_SLOT_PATTERN "', '" MULTIMASTER_NAME "')" , Mtm -> nAllNodes + 1 ), true);
3857
- MtmBroadcastUtilityStmt (psprintf ("select mtm.add_node('%s')" , connStr ), true);
3854
+ MtmBroadcastUtilityStmt (psprintf ("select pg_create_logical_replication_slot('" MULTIMASTER_SLOT_PATTERN "', '" MULTIMASTER_NAME "')" , Mtm -> nAllNodes + 1 ), true, 0 );
3855
+ MtmBroadcastUtilityStmt (psprintf ("select mtm.add_node('%s')" , connStr ), true, 0 );
3858
3856
}
3859
3857
else
3860
3858
{
@@ -4399,7 +4397,7 @@ MtmNoticeReceiver(void *i, const PGresult *res)
4399
4397
pfree (stripped_notice );
4400
4398
}
4401
4399
4402
- static void MtmBroadcastUtilityStmt (char const * sql , bool ignoreError )
4400
+ static void MtmBroadcastUtilityStmt (char const * sql , bool ignoreError , int forceOnNode )
4403
4401
{
4404
4402
int i = 0 ;
4405
4403
nodemask_t disabledNodeMask = Mtm -> disabledNodeMask ;
@@ -4411,7 +4409,7 @@ static void MtmBroadcastUtilityStmt(char const* sql, bool ignoreError)
4411
4409
4412
4410
for (i = 0 ; i < nNodes ; i ++ )
4413
4411
{
4414
- if (!BIT_CHECK (disabledNodeMask , i ))
4412
+ if (!BIT_CHECK (disabledNodeMask , i ) || ( i + 1 == forceOnNode ) )
4415
4413
{
4416
4414
conns [i ] = PQconnectdb_safe (psprintf ("%s application_name=%s" , Mtm -> nodes [i ].con .connStr , MULTIMASTER_BROADCAST_SERVICE ));
4417
4415
if (PQstatus (conns [i ]) != CONNECTION_OK )
0 commit comments