38
38
#include "access/twophase.h"
39
39
#include "utils/guc.h"
40
40
#include "utils/hsearch.h"
41
+ #include "utils/timeout.h"
41
42
#include "utils/tqual.h"
42
43
#include "utils/array.h"
43
44
#include "utils/builtins.h"
@@ -94,7 +95,7 @@ typedef enum
94
95
#define MTM_MAP_SIZE MTM_HASH_SIZE
95
96
#define MIN_WAIT_TIMEOUT 1000
96
97
#define MAX_WAIT_TIMEOUT 100000
97
- #define MAX_WAIT_LOOPS 100
98
+ #define MAX_WAIT_LOOPS 100 // 1000000
98
99
#define STATUS_POLL_DELAY USECS_PER_SEC
99
100
100
101
void _PG_init (void );
@@ -117,6 +118,7 @@ PG_FUNCTION_INFO_V1(mtm_get_cluster_info);
117
118
PG_FUNCTION_INFO_V1 (mtm_make_table_local );
118
119
PG_FUNCTION_INFO_V1 (mtm_dump_lock_graph );
119
120
PG_FUNCTION_INFO_V1 (mtm_inject_2pc_error );
121
+ PG_FUNCTION_INFO_V1 (mtm_check_deadlock );
120
122
121
123
static Snapshot MtmGetSnapshot (Snapshot snapshot );
122
124
static void MtmInitialize (void );
@@ -274,15 +276,15 @@ void MtmUnlock(void)
274
276
Mtm -> lastLockHolder = 0 ;
275
277
}
276
278
277
- void MtmLockNode (int nodeId )
279
+ void MtmLockNode (int nodeId , LWLockMode mode )
278
280
{
279
- Assert (nodeId > 0 && nodeId <= Mtm -> nAllNodes );
280
- LWLockAcquire ((LWLockId )& Mtm -> locks [nodeId ], LW_EXCLUSIVE );
281
+ Assert (nodeId > 0 && nodeId <= MtmMaxNodes * 2 );
282
+ LWLockAcquire ((LWLockId )& Mtm -> locks [nodeId ], mode );
281
283
}
282
284
283
285
void MtmUnlockNode (int nodeId )
284
286
{
285
- Assert (nodeId > 0 && nodeId <= Mtm -> nAllNodes );
287
+ Assert (nodeId > 0 && nodeId <= MtmMaxNodes * 2 );
286
288
LWLockRelease ((LWLockId )& Mtm -> locks [nodeId ]);
287
289
}
288
290
@@ -437,6 +439,7 @@ bool MtmXidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
437
439
static timestamp_t totalSleepTime ;
438
440
static timestamp_t maxSleepTime ;
439
441
#endif
442
+ timestamp_t start = MtmGetSystemTime ();
440
443
timestamp_t delay = MIN_WAIT_TIMEOUT ;
441
444
int i ;
442
445
Assert (xid != InvalidTransactionId );
@@ -460,7 +463,10 @@ bool MtmXidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
460
463
if (ts -> csn > MtmTx .snapshot ) {
461
464
MTM_LOG4 ("%d: tuple with xid=%d(csn=%ld) is invisibile in snapshot %ld" ,
462
465
MyProcPid , xid , ts -> csn , MtmTx .snapshot );
463
- MtmUnlock ();
466
+ if (MtmGetSystemTime () - start > USECS_PER_SEC ) {
467
+ elog (WARNING , "Backend %d waits for transaction %x status %ld usecs" , MyProcPid , xid , MtmGetSystemTime () - start );
468
+ }
469
+ MtmUnlock ();
464
470
return true;
465
471
}
466
472
if (ts -> status == TRANSACTION_STATUS_UNKNOWN )
@@ -499,6 +505,9 @@ bool MtmXidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
499
505
MTM_LOG4 ("%d: tuple with xid=%d(csn= %ld) is %s in snapshot %ld" ,
500
506
MyProcPid , xid , ts -> csn , invisible ? "rollbacked" : "committed" , MtmTx .snapshot );
501
507
MtmUnlock ();
508
+ if (MtmGetSystemTime () - start > USECS_PER_SEC ) {
509
+ elog (WARNING , "Backend %d waits for %s transaction %x %ld usecs" , MyProcPid , invisible ? "rollbacked" : "committed" , xid , MtmGetSystemTime () - start );
510
+ }
502
511
return invisible ;
503
512
}
504
513
}
@@ -510,7 +519,7 @@ bool MtmXidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
510
519
}
511
520
}
512
521
MtmUnlock ();
513
- elog (ERROR , "Failed to get status of XID %d" , xid );
522
+ elog (ERROR , "Failed to get status of XID %d in %ld usec " , xid , MtmGetSystemTime () - start );
514
523
return true;
515
524
}
516
525
@@ -1091,6 +1100,7 @@ void MtmSend2PCMessage(MtmTransState* ts, MtmMessageCode cmd)
1091
1100
msg .sxid = ts -> xid ;
1092
1101
msg .csn = ts -> csn ;
1093
1102
msg .disabledNodeMask = Mtm -> disabledNodeMask ;
1103
+ msg .connectivityMask = Mtm -> connectivityMask ;
1094
1104
msg .oldestSnapshot = Mtm -> nodes [MtmNodeId - 1 ].oldestSnapshot ;
1095
1105
memcpy (msg .gid , ts -> gid , MULTIMASTER_MAX_GID_SIZE );
1096
1106
@@ -1118,6 +1128,7 @@ void MtmBroadcastPollMessage(MtmTransState* ts)
1118
1128
MtmArbiterMessage msg ;
1119
1129
msg .code = MSG_POLL_REQUEST ;
1120
1130
msg .disabledNodeMask = Mtm -> disabledNodeMask ;
1131
+ msg .connectivityMask = Mtm -> connectivityMask ;
1121
1132
msg .oldestSnapshot = Mtm -> nodes [MtmNodeId - 1 ].oldestSnapshot ;
1122
1133
memcpy (msg .gid , ts -> gid , MULTIMASTER_MAX_GID_SIZE );
1123
1134
@@ -1681,8 +1692,6 @@ void MtmCheckQuorum(void)
1681
1692
1682
1693
void MtmOnNodeDisconnect (int nodeId )
1683
1694
{
1684
- MtmTransState * ts ;
1685
-
1686
1695
if (BIT_CHECK (Mtm -> disabledNodeMask , nodeId - 1 ))
1687
1696
{
1688
1697
/* Node is already disabled */
@@ -1711,11 +1720,13 @@ void MtmOnNodeDisconnect(int nodeId)
1711
1720
}
1712
1721
1713
1722
MtmSleep (MSEC_TO_USEC (MtmHeartbeatSendTimeout ));
1714
-
1723
+ #if 0
1715
1724
if (!MtmUseRaftable )
1716
1725
{
1717
1726
MtmLock (LW_EXCLUSIVE );
1718
1727
if (!BIT_CHECK (Mtm -> disabledNodeMask , nodeId - 1 )) {
1728
+ MtmTransState * ts ;
1729
+
1719
1730
MtmDisableNode (nodeId );
1720
1731
MtmCheckQuorum ();
1721
1732
/* Interrupt voting for active transaction and abort them */
@@ -1729,7 +1740,10 @@ void MtmOnNodeDisconnect(int nodeId)
1729
1740
}
1730
1741
}
1731
1742
MtmUnlock ();
1732
- } else {
1743
+ }
1744
+ else
1745
+ #endif
1746
+ {
1733
1747
MtmRefreshClusterStatus (false, 0 );
1734
1748
}
1735
1749
}
@@ -1942,6 +1956,11 @@ static void MtmInitialize()
1942
1956
Mtm -> freeQueue = NULL ;
1943
1957
for (i = 0 ; i < MtmNodes ; i ++ ) {
1944
1958
Mtm -> nodes [i ].oldestSnapshot = 0 ;
1959
+ Mtm -> nodes [i ].disabledNodeMask = 0 ;
1960
+ Mtm -> nodes [i ].connectivityMask = 0 ;
1961
+ Mtm -> nodes [i ].lockGraphUsed = 0 ;
1962
+ Mtm -> nodes [i ].lockGraphAllocated = 0 ;
1963
+ Mtm -> nodes [i ].lockGraphData = NULL ;
1945
1964
Mtm -> nodes [i ].transDelay = 0 ;
1946
1965
Mtm -> nodes [i ].lastStatusChangeTime = MtmGetSystemTime ();
1947
1966
Mtm -> nodes [i ].con = MtmConnections [i ];
@@ -2581,7 +2600,7 @@ _PG_init(void)
2581
2600
* resources in mtm_shmem_startup().
2582
2601
*/
2583
2602
RequestAddinShmemSpace (MTM_SHMEM_SIZE + MtmQueueSize );
2584
- RequestNamedLWLockTranche (MULTIMASTER_NAME , 1 + MtmMaxNodes );
2603
+ RequestNamedLWLockTranche (MULTIMASTER_NAME , 1 + MtmMaxNodes * 2 );
2585
2604
2586
2605
BgwPoolStart (MtmWorkers , MtmPoolConstructor );
2587
2606
@@ -3238,7 +3257,7 @@ Datum mtm_dump_lock_graph(PG_FUNCTION_ARGS)
3238
3257
for (i = 0 ; i < Mtm -> nAllNodes ; i ++ )
3239
3258
{
3240
3259
size_t size ;
3241
- char * data = RaftableGet (psprintf ("lock-graph-%d" , i + 1 ), & size , NULL , false);
3260
+ char * data = RaftableGet (psprintf ("lock-graph-%d" , i + 1 ), & size , NULL , false);
3242
3261
if (data ) {
3243
3262
GlobalTransactionId * gtid = (GlobalTransactionId * )data ;
3244
3263
GlobalTransactionId * last = (GlobalTransactionId * )(data + size );
@@ -3630,12 +3649,28 @@ static bool MtmProcessDDLCommand(char const* queryString)
3630
3649
}
3631
3650
3632
3651
MTM_LOG1 ("Sending utility: %s" , queryWithContext );
3633
- LogLogicalMessage ("MTM:GUC " , queryWithContext , strlen (queryWithContext ), true);
3652
+ LogLogicalMessage ("G " , queryWithContext , strlen (queryWithContext )+ 1 , true);
3634
3653
3635
3654
MtmTx .containsDML = true;
3636
3655
return false;
3637
3656
}
3638
3657
3658
+ void MtmUpdateLockGraph (int nodeId , void const * messageBody , int messageSize )
3659
+ {
3660
+ int allocated ;
3661
+ MtmLockNode (nodeId + MtmMaxNodes , LW_EXCLUSIVE );
3662
+ allocated = Mtm -> nodes [nodeId - 1 ].lockGraphAllocated ;
3663
+ if (messageSize > allocated ) {
3664
+ allocated = Max (Max (MULTIMASTER_LOCK_BUF_INIT_SIZE , allocated * 2 ), messageSize );
3665
+ Mtm -> nodes [nodeId - 1 ].lockGraphData = ShmemAlloc (allocated );
3666
+ Mtm -> nodes [nodeId - 1 ].lockGraphAllocated = allocated ;
3667
+ }
3668
+ memcpy (Mtm -> nodes [nodeId - 1 ].lockGraphData , messageBody , messageSize );
3669
+ Mtm -> nodes [nodeId - 1 ].lockGraphUsed = messageSize ;
3670
+ MtmUnlockNode (nodeId + MtmMaxNodes );
3671
+ MTM_LOG1 ("Update deadlock graph for node %d size %d" , nodeId , messageSize );
3672
+ }
3673
+
3639
3674
static void MtmProcessUtility (Node * parsetree , const char * queryString ,
3640
3675
ProcessUtilityContext context , ParamListInfo params ,
3641
3676
DestReceiver * dest , char * completionTag )
@@ -3953,20 +3988,19 @@ MtmSerializeLock(PROCLOCK* proclock, void* arg)
3953
3988
}
3954
3989
3955
3990
static bool
3956
- MtmDetectGlobalDeadLock ( PGPROC * proc )
3991
+ MtmDetectGlobalDeadLockFortXid ( TransactionId xid )
3957
3992
{
3958
- ByteBuffer buf ;
3959
- PGXACT * pgxact = & ProcGlobal -> allPgXact [proc -> pgprocno ];
3960
3993
bool hasDeadlock = false;
3961
-
3962
- if ( TransactionIdIsValid ( pgxact -> xid )) {
3994
+ if ( TransactionIdIsValid ( xid )) {
3995
+ ByteBuffer buf ;
3963
3996
MtmGraph graph ;
3964
3997
GlobalTransactionId gtid ;
3965
3998
int i ;
3966
3999
3967
4000
ByteBufferAlloc (& buf );
3968
4001
EnumerateLocks (MtmSerializeLock , & buf );
3969
4002
RaftableSet (psprintf ("lock-graph-%d" , MtmNodeId ), buf .data , buf .used , false);
4003
+ MtmSleep (MSEC_TO_USEC (DeadlockTimeout ));
3970
4004
MtmGraphInit (& graph );
3971
4005
MtmGraphAdd (& graph , (GlobalTransactionId * )buf .data , buf .used /sizeof (GlobalTransactionId ));
3972
4006
ByteBufferFree (& buf );
@@ -3981,9 +4015,9 @@ MtmDetectGlobalDeadLock(PGPROC* proc)
3981
4015
}
3982
4016
}
3983
4017
}
3984
- MtmGetGtid (pgxact -> xid , & gtid );
4018
+ MtmGetGtid (xid , & gtid );
3985
4019
hasDeadlock = MtmGraphFindLoop (& graph , & gtid );
3986
- elog (WARNING , "Distributed deadlock check for %u:%u = %d" , gtid .node , gtid .xid , hasDeadlock );
4020
+ elog (WARNING , "Distributed deadlock check by backend %d for %u:%u = %d" , MyProcPid , gtid .node , gtid .xid , hasDeadlock );
3987
4021
if (!hasDeadlock ) {
3988
4022
/* There is no deadlock loop in graph, but deadlock can be caused by lack of apply workers: if all of them are busy, then some transactions
3989
4023
* can not be appied just because there are no vacant workers and it cause additional dependency between transactions which is not
@@ -3994,8 +4028,27 @@ MtmDetectGlobalDeadLock(PGPROC* proc)
3994
4028
hasDeadlock = true;
3995
4029
elog (WARNING , "Apply workers were blocked more than %d msec" ,
3996
4030
(int )USEC_TO_MSEC (MtmGetSystemTime () - lastPeekTime ));
4031
+ } else {
4032
+ MTM_LOG1 ("Enable deadlock timeout in backend %d for transaction %d" , MyProcPid , xid );
4033
+ enable_timeout_after (DEADLOCK_TIMEOUT , DeadlockTimeout );
3997
4034
}
3998
4035
}
3999
4036
}
4000
4037
return hasDeadlock ;
4001
4038
}
4039
+
4040
+ static bool
4041
+ MtmDetectGlobalDeadLock (PGPROC * proc )
4042
+ {
4043
+ PGXACT * pgxact = & ProcGlobal -> allPgXact [proc -> pgprocno ];
4044
+
4045
+ MTM_LOG1 ("Detect global deadlock for %d by backend %d" , pgxact -> xid , MyProcPid );
4046
+
4047
+ return MtmDetectGlobalDeadLockFortXid (pgxact -> xid );
4048
+ }
4049
+
4050
+ Datum mtm_check_deadlock (PG_FUNCTION_ARGS )
4051
+ {
4052
+ TransactionId xid = PG_GETARG_INT32 (0 );
4053
+ PG_RETURN_BOOL (MtmDetectGlobalDeadLockFortXid (xid ));
4054
+ }
0 commit comments