Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit a0af6e5

Browse files
knizhnikkelvich
authored andcommitted
Fix syspend node lock
1 parent bcde377 commit a0af6e5

File tree

2 files changed

+44
-13
lines changed

2 files changed

+44
-13
lines changed

multimaster.c

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,7 @@ static int MtmLockCount;
255255
static bool MtmMajorNode;
256256
static bool MtmBreakConnection;
257257
static bool MtmSuspended;
258+
static bool MtmInsideTransaction;
258259

259260
static ExecutorStart_hook_type PreviousExecutorStartHook;
260261
static ExecutorFinish_hook_type PreviousExecutorFinishHook;
@@ -278,6 +279,15 @@ static bool MtmAtExitHookRegistered = false;
278279
*/
279280
void MtmReleaseLocks(void)
280281
{
282+
MtmResetTransaction();
283+
if (MtmInsideTransaction)
284+
{
285+
MtmLock(LW_EXCLUSIVE);
286+
Assert(Mtm->nRunningTransactions > 0);
287+
Mtm->nRunningTransactions -= 1;
288+
MtmInsideTransaction = false;
289+
MtmUnlock();
290+
}
281291
if (MtmSuspended) {
282292
MtmResumeNode();
283293
}
@@ -287,6 +297,7 @@ void MtmReleaseLocks(void)
287297
Mtm->lastLockHolder = 0;
288298
LWLockRelease((LWLockId)&Mtm->locks[MTM_STATE_LOCK_ID]);
289299
}
300+
290301
}
291302

292303
/*
@@ -870,14 +881,20 @@ MtmBeginTransaction(MtmCurrentTrans* x)
870881
&& strcmp(application_name, MULTIMASTER_ADMIN) != 0)
871882
{
872883
MtmCheckClusterLock();
873-
}
884+
}
885+
MtmInsideTransaction = true;
886+
Mtm->nRunningTransactions += 1;
887+
874888
x->snapshot = MtmAssignCSN();
889+
MTM_LOG1("Start transaction %lld with snapshot %lld", (long64)x->xid, x->snapshot);
875890

876891
MtmUnlock();
877892

878893
MTM_LOG3("%d: MtmLocalTransaction: %s transaction %u uses local snapshot %llu",
879894
MyProcPid, x->isDistributed ? "distributed" : "local", x->xid, x->snapshot);
880-
}
895+
} else {
896+
Assert(MtmInsideTransaction);
897+
}
881898
}
882899

883900

@@ -1328,15 +1345,20 @@ MtmLogAbortLogicalMessage(int nodeId, char const* gid)
13281345
static void
13291346
MtmEndTransaction(MtmCurrentTrans* x, bool commit)
13301347
{
1331-
MTM_LOG2("%d: End transaction %d, prepared=%d, replicated=%d, distributed=%d, 2pc=%d, gid=%s -> %s",
1332-
MyProcPid, x->xid, x->isPrepared, x->isReplicated, x->isDistributed, x->isTwoPhase, x->gid, commit ? "commit" : "abort");
1333-
if (MtmSuspended) {
1334-
MtmResumeNode();
1335-
}
1348+
MTM_LOG3("%d: End transaction %lld, prepared=%d, replicated=%d, distributed=%d, 2pc=%d, gid=%s -> %s, LSN %lld",
1349+
MyProcPid, (long64)x->xid, x->isPrepared, x->isReplicated, x->isDistributed, x->isTwoPhase, x->gid, commit ? "commit" : "abort", (long64)GetXLogInsertRecPtr());
13361350
commit &= (x->status != TRANSACTION_STATUS_ABORTED);
1351+
1352+
MtmLock(LW_EXCLUSIVE);
1353+
1354+
if (MtmInsideTransaction) {
1355+
Assert(Mtm->nRunningTransactions > 0);
1356+
Mtm->nRunningTransactions -= 1;
1357+
MtmInsideTransaction = false;
1358+
}
1359+
13371360
if (x->isDistributed && (x->isPrepared || x->isReplicated) && !x->isTwoPhase) {
13381361
MtmTransState* ts = NULL;
1339-
MtmLock(LW_EXCLUSIVE);
13401362
if (x->isPrepared) {
13411363
ts = (MtmTransState*)hash_search(MtmXid2State, &x->xid, HASH_FIND, NULL);
13421364
Assert(ts != NULL);
@@ -1419,12 +1441,16 @@ MtmEndTransaction(MtmCurrentTrans* x, bool commit)
14191441
#endif
14201442
}
14211443
Assert(!x->isActive);
1422-
MtmUnlock();
14231444
}
1445+
MtmUnlock();
1446+
14241447
MtmResetTransaction();
14251448
if (!MyReplicationSlot) {
14261449
MtmCheckSlots();
14271450
}
1451+
if (MtmSuspended) {
1452+
MtmResumeNode();
1453+
}
14281454
}
14291455

14301456
/*
@@ -2059,22 +2085,24 @@ static void
20592085
MtmSuspendNode(void)
20602086
{
20612087
timestamp_t delay = MIN_WAIT_TIMEOUT;
2062-
bool insideTransaction = MtmTx.isActive;
20632088
Assert(!MtmSuspended);
20642089
MtmLock(LW_EXCLUSIVE);
20652090
if (Mtm->exclusiveLock) {
20662091
elog(ERROR, "There is already pending exclusive lock");
20672092
}
20682093
Mtm->exclusiveLock = true;
20692094
MtmSuspended = true;
2070-
while (Mtm->nActiveTransactions != insideTransaction) {
2095+
MTM_LOG2("Transaction %lld tries to suspend node at %lld insideTransaction=%d, active transactions=%lld",
2096+
(long64)MtmTx.xid, MtmGetCurrentTime(), insideTransaction, (long64)Mtm->nRunningTransactions);
2097+
while (Mtm->nRunningTransactions != 1) { /* I am one */
20712098
MtmUnlock();
20722099
MtmSleep(delay);
20732100
if (delay*2 <= MAX_WAIT_TIMEOUT) {
20742101
delay *= 2;
20752102
}
20762103
MtmLock(LW_EXCLUSIVE);
20772104
}
2105+
MTM_LOG2("Transaction %lld suspended node at %lld, LSN %lld, active transactions=%lld", (long64)MtmTx.xid, MtmGetCurrentTime(), (long64)GetXLogInsertRecPtr(), (long64)Mtm->nRunningTransactions);
20782106
MtmUnlock();
20792107
}
20802108

@@ -2085,6 +2113,7 @@ static void
20852113
MtmResumeNode(void)
20862114
{
20872115
MtmLock(LW_EXCLUSIVE);
2116+
MTM_LOG2("Transaction %lld resume node at %lld status %s LSN %lld", (long64)MtmTx.xid, MtmGetCurrentTime(), MtmTxnStatusMnem[MtmTx.status], (long64)GetXLogInsertRecPtr());
20882117
Mtm->exclusiveLock = false;
20892118
MtmSuspended = false;
20902119
MtmUnlock();
@@ -2527,6 +2556,7 @@ static void MtmInitialize()
25272556
Mtm->nLockers = 0;
25282557
Mtm->exclusiveLock = false;
25292558
Mtm->nActiveTransactions = 0;
2559+
Mtm->nRunningTransactions = 0;
25302560
Mtm->votingTransactions = NULL;
25312561
Mtm->transListHead = NULL;
25322562
Mtm->transListTail = &Mtm->transListHead;
@@ -3369,7 +3399,7 @@ void MtmFinishPreparedTransaction(MtmTransState* ts, bool commit)
33693399
MtmTx.isActive = true;
33703400
FinishPreparedTransaction(ts->gid, commit);
33713401
if (commit) {
3372-
MTM_LOG2("Distributed transaction %s is committed", ts->gid);
3402+
MTM_LOG2("Distributed transaction %s (%lld) is committed at %lld with LSN=%lld", ts->gid, (long64)ts->xid, MtmGetCurrentTime(), (long64)GetXLogInsertRecPtr());
33733403
}
33743404
if (!insideTransaction) {
33753405
CommitTransactionCommand();
@@ -4556,7 +4586,7 @@ static bool MtmTwoPhaseCommit(MtmCurrentTrans* x)
45564586
MTM_ELOG(ERROR, "Transaction %s (%llu) is aborted by DTM", x->gid, (long64)x->xid);
45574587
} else {
45584588
FinishPreparedTransaction(x->gid, true);
4559-
MTM_LOG2("Distributed transaction %s is committed", x->gid);
4589+
MTM_LOG2("Distributed transaction %s (%lld) is committed at %lld with LSN=%lld", x->gid, (long64)x->xid, MtmGetCurrentTime(), (long64)GetXLogInsertRecPtr());
45604590
}
45614591
}
45624592
}

multimaster.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,7 @@ typedef struct
295295
int nSenders; /* Number of started WAL senders (used to determine moment when recovery) */
296296
int nLockers; /* Number of lockers */
297297
int nActiveTransactions; /* Number of active 2PC transactions */
298+
int nRunningTransactions; /* Number of all running transactions */
298299
int nConfigChanges; /* Number of cluster configuration changes */
299300
int recoveryCount; /* Number of completed recoveries */
300301
int donorNodeId; /* Cluster node from which this node was populated */

0 commit comments

Comments
 (0)