38
38
#include "access/twophase.h"
39
39
#include "utils/guc.h"
40
40
#include "utils/hsearch.h"
41
+ #include "utils/timeout.h"
41
42
#include "utils/tqual.h"
42
43
#include "utils/array.h"
43
44
#include "utils/builtins.h"
@@ -93,7 +94,7 @@ typedef enum
93
94
#define MTM_MAP_SIZE MTM_HASH_SIZE
94
95
#define MIN_WAIT_TIMEOUT 1000
95
96
#define MAX_WAIT_TIMEOUT 100000
96
- #define MAX_WAIT_LOOPS 100
97
+ #define MAX_WAIT_LOOPS 100 // 1000000
97
98
#define STATUS_POLL_DELAY USECS_PER_SEC
98
99
99
100
void _PG_init (void );
@@ -116,6 +117,7 @@ PG_FUNCTION_INFO_V1(mtm_get_cluster_info);
116
117
PG_FUNCTION_INFO_V1 (mtm_make_table_local );
117
118
PG_FUNCTION_INFO_V1 (mtm_dump_lock_graph );
118
119
PG_FUNCTION_INFO_V1 (mtm_inject_2pc_error );
120
+ PG_FUNCTION_INFO_V1 (mtm_check_deadlock );
119
121
120
122
static Snapshot MtmGetSnapshot (Snapshot snapshot );
121
123
static void MtmInitialize (void );
@@ -274,15 +276,15 @@ void MtmUnlock(void)
274
276
Mtm -> lastLockHolder = 0 ;
275
277
}
276
278
277
- void MtmLockNode (int nodeId )
279
+ void MtmLockNode (int nodeId , LWLockMode mode )
278
280
{
279
- Assert (nodeId > 0 && nodeId <= Mtm -> nAllNodes );
280
- LWLockAcquire ((LWLockId )& Mtm -> locks [nodeId ], LW_EXCLUSIVE );
281
+ Assert (nodeId > 0 && nodeId <= MtmMaxNodes * 2 );
282
+ LWLockAcquire ((LWLockId )& Mtm -> locks [nodeId ], mode );
281
283
}
282
284
283
285
void MtmUnlockNode (int nodeId )
284
286
{
285
- Assert (nodeId > 0 && nodeId <= Mtm -> nAllNodes );
287
+ Assert (nodeId > 0 && nodeId <= MtmMaxNodes * 2 );
286
288
LWLockRelease ((LWLockId )& Mtm -> locks [nodeId ]);
287
289
}
288
290
@@ -437,6 +439,7 @@ bool MtmXidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
437
439
static timestamp_t totalSleepTime ;
438
440
static timestamp_t maxSleepTime ;
439
441
#endif
442
+ timestamp_t start = MtmGetSystemTime ();
440
443
timestamp_t delay = MIN_WAIT_TIMEOUT ;
441
444
int i ;
442
445
Assert (xid != InvalidTransactionId );
@@ -460,7 +463,10 @@ bool MtmXidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
460
463
if (ts -> csn > MtmTx .snapshot ) {
461
464
MTM_LOG4 ("%d: tuple with xid=%d(csn=%ld) is invisibile in snapshot %ld" ,
462
465
MyProcPid , xid , ts -> csn , MtmTx .snapshot );
463
- MtmUnlock ();
466
+ if (MtmGetSystemTime () - start > USECS_PER_SEC ) {
467
+ elog (WARNING , "Backend %d waits for transaction %x status %ld usecs" , MyProcPid , xid , MtmGetSystemTime () - start );
468
+ }
469
+ MtmUnlock ();
464
470
return true;
465
471
}
466
472
if (ts -> status == TRANSACTION_STATUS_UNKNOWN )
@@ -499,6 +505,9 @@ bool MtmXidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
499
505
MTM_LOG4 ("%d: tuple with xid=%d(csn= %ld) is %s in snapshot %ld" ,
500
506
MyProcPid , xid , ts -> csn , invisible ? "rollbacked" : "committed" , MtmTx .snapshot );
501
507
MtmUnlock ();
508
+ if (MtmGetSystemTime () - start > USECS_PER_SEC ) {
509
+ elog (WARNING , "Backend %d waits for %s transaction %x %ld usecs" , MyProcPid , invisible ? "rollbacked" : "committed" , xid , MtmGetSystemTime () - start );
510
+ }
502
511
return invisible ;
503
512
}
504
513
}
@@ -510,7 +519,7 @@ bool MtmXidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
510
519
}
511
520
}
512
521
MtmUnlock ();
513
- elog (ERROR , "Failed to get status of XID %d" , xid );
522
+ elog (ERROR , "Failed to get status of XID %d in %ld usec " , xid , MtmGetSystemTime () - start );
514
523
return true;
515
524
}
516
525
@@ -1091,6 +1100,7 @@ void MtmSend2PCMessage(MtmTransState* ts, MtmMessageCode cmd)
1091
1100
msg .sxid = ts -> xid ;
1092
1101
msg .csn = ts -> csn ;
1093
1102
msg .disabledNodeMask = Mtm -> disabledNodeMask ;
1103
+ msg .connectivityMask = Mtm -> connectivityMask ;
1094
1104
msg .oldestSnapshot = Mtm -> nodes [MtmNodeId - 1 ].oldestSnapshot ;
1095
1105
memcpy (msg .gid , ts -> gid , MULTIMASTER_MAX_GID_SIZE );
1096
1106
@@ -1118,6 +1128,7 @@ void MtmBroadcastPollMessage(MtmTransState* ts)
1118
1128
MtmArbiterMessage msg ;
1119
1129
msg .code = MSG_POLL_REQUEST ;
1120
1130
msg .disabledNodeMask = Mtm -> disabledNodeMask ;
1131
+ msg .connectivityMask = Mtm -> connectivityMask ;
1121
1132
msg .oldestSnapshot = Mtm -> nodes [MtmNodeId - 1 ].oldestSnapshot ;
1122
1133
memcpy (msg .gid , ts -> gid , MULTIMASTER_MAX_GID_SIZE );
1123
1134
@@ -1703,8 +1714,6 @@ void MtmCheckQuorum(void)
1703
1714
1704
1715
void MtmOnNodeDisconnect (int nodeId )
1705
1716
{
1706
- MtmTransState * ts ;
1707
-
1708
1717
if (BIT_CHECK (Mtm -> disabledNodeMask , nodeId - 1 ))
1709
1718
{
1710
1719
/* Node is already disabled */
@@ -1733,11 +1742,13 @@ void MtmOnNodeDisconnect(int nodeId)
1733
1742
}
1734
1743
1735
1744
MtmSleep (MSEC_TO_USEC (MtmHeartbeatSendTimeout ));
1736
-
1745
+ #if 0
1737
1746
if (!MtmUseRaftable )
1738
1747
{
1739
1748
MtmLock (LW_EXCLUSIVE );
1740
1749
if (!BIT_CHECK (Mtm -> disabledNodeMask , nodeId - 1 )) {
1750
+ MtmTransState * ts ;
1751
+
1741
1752
MtmDisableNode (nodeId );
1742
1753
MtmCheckQuorum ();
1743
1754
/* Interrupt voting for active transaction and abort them */
@@ -1751,7 +1762,10 @@ void MtmOnNodeDisconnect(int nodeId)
1751
1762
}
1752
1763
}
1753
1764
MtmUnlock ();
1754
- } else {
1765
+ }
1766
+ else
1767
+ #endif
1768
+ {
1755
1769
MtmRefreshClusterStatus (false, 0 );
1756
1770
}
1757
1771
}
@@ -1964,6 +1978,11 @@ static void MtmInitialize()
1964
1978
Mtm -> freeQueue = NULL ;
1965
1979
for (i = 0 ; i < MtmNodes ; i ++ ) {
1966
1980
Mtm -> nodes [i ].oldestSnapshot = 0 ;
1981
+ Mtm -> nodes [i ].disabledNodeMask = 0 ;
1982
+ Mtm -> nodes [i ].connectivityMask = 0 ;
1983
+ Mtm -> nodes [i ].lockGraphUsed = 0 ;
1984
+ Mtm -> nodes [i ].lockGraphAllocated = 0 ;
1985
+ Mtm -> nodes [i ].lockGraphData = NULL ;
1967
1986
Mtm -> nodes [i ].transDelay = 0 ;
1968
1987
Mtm -> nodes [i ].lastStatusChangeTime = MtmGetSystemTime ();
1969
1988
Mtm -> nodes [i ].con = MtmConnections [i ];
@@ -2603,7 +2622,7 @@ _PG_init(void)
2603
2622
* resources in mtm_shmem_startup().
2604
2623
*/
2605
2624
RequestAddinShmemSpace (MTM_SHMEM_SIZE + MtmQueueSize );
2606
- RequestNamedLWLockTranche (MULTIMASTER_NAME , 1 + MtmMaxNodes );
2625
+ RequestNamedLWLockTranche (MULTIMASTER_NAME , 1 + MtmMaxNodes * 2 );
2607
2626
2608
2627
BgwPoolStart (MtmWorkers , MtmPoolConstructor );
2609
2628
@@ -3260,7 +3279,7 @@ Datum mtm_dump_lock_graph(PG_FUNCTION_ARGS)
3260
3279
for (i = 0 ; i < Mtm -> nAllNodes ; i ++ )
3261
3280
{
3262
3281
size_t size ;
3263
- char * data = RaftableGet (psprintf ("lock-graph-%d" , i + 1 ), & size , NULL , false);
3282
+ char * data = RaftableGet (psprintf ("lock-graph-%d" , i + 1 ), & size , NULL , false);
3264
3283
if (data ) {
3265
3284
GlobalTransactionId * gtid = (GlobalTransactionId * )data ;
3266
3285
GlobalTransactionId * last = (GlobalTransactionId * )(data + size );
@@ -3672,12 +3691,28 @@ static bool MtmProcessDDLCommand(char const* queryString)
3672
3691
}
3673
3692
3674
3693
MTM_LOG1 ("Sending utility: %s" , queryWithContext );
3675
- LogLogicalMessage ("MTM:GUC " , queryWithContext , strlen (queryWithContext ), true);
3694
+ LogLogicalMessage ("G " , queryWithContext , strlen (queryWithContext )+ 1 , true);
3676
3695
3677
3696
MtmTx .containsDML = true;
3678
3697
return false;
3679
3698
}
3680
3699
3700
+ void MtmUpdateLockGraph (int nodeId , void const * messageBody , int messageSize )
3701
+ {
3702
+ int allocated ;
3703
+ MtmLockNode (nodeId + MtmMaxNodes , LW_EXCLUSIVE );
3704
+ allocated = Mtm -> nodes [nodeId - 1 ].lockGraphAllocated ;
3705
+ if (messageSize > allocated ) {
3706
+ allocated = Max (Max (MULTIMASTER_LOCK_BUF_INIT_SIZE , allocated * 2 ), messageSize );
3707
+ Mtm -> nodes [nodeId - 1 ].lockGraphData = ShmemAlloc (allocated );
3708
+ Mtm -> nodes [nodeId - 1 ].lockGraphAllocated = allocated ;
3709
+ }
3710
+ memcpy (Mtm -> nodes [nodeId - 1 ].lockGraphData , messageBody , messageSize );
3711
+ Mtm -> nodes [nodeId - 1 ].lockGraphUsed = messageSize ;
3712
+ MtmUnlockNode (nodeId + MtmMaxNodes );
3713
+ MTM_LOG1 ("Update deadlock graph for node %d size %d" , nodeId , messageSize );
3714
+ }
3715
+
3681
3716
static void MtmProcessUtility (Node * parsetree , const char * queryString ,
3682
3717
ProcessUtilityContext context , ParamListInfo params ,
3683
3718
DestReceiver * dest , char * completionTag )
@@ -3996,20 +4031,19 @@ MtmSerializeLock(PROCLOCK* proclock, void* arg)
3996
4031
}
3997
4032
3998
4033
static bool
3999
- MtmDetectGlobalDeadLock ( PGPROC * proc )
4034
+ MtmDetectGlobalDeadLockFortXid ( TransactionId xid )
4000
4035
{
4001
- ByteBuffer buf ;
4002
- PGXACT * pgxact = & ProcGlobal -> allPgXact [proc -> pgprocno ];
4003
4036
bool hasDeadlock = false;
4004
-
4005
- if ( TransactionIdIsValid ( pgxact -> xid )) {
4037
+ if ( TransactionIdIsValid ( xid )) {
4038
+ ByteBuffer buf ;
4006
4039
MtmGraph graph ;
4007
4040
GlobalTransactionId gtid ;
4008
4041
int i ;
4009
4042
4010
4043
ByteBufferAlloc (& buf );
4011
4044
EnumerateLocks (MtmSerializeLock , & buf );
4012
4045
RaftableSet (psprintf ("lock-graph-%d" , MtmNodeId ), buf .data , buf .used , false);
4046
+ MtmSleep (MSEC_TO_USEC (DeadlockTimeout ));
4013
4047
MtmGraphInit (& graph );
4014
4048
MtmGraphAdd (& graph , (GlobalTransactionId * )buf .data , buf .used /sizeof (GlobalTransactionId ));
4015
4049
ByteBufferFree (& buf );
@@ -4024,9 +4058,9 @@ MtmDetectGlobalDeadLock(PGPROC* proc)
4024
4058
}
4025
4059
}
4026
4060
}
4027
- MtmGetGtid (pgxact -> xid , & gtid );
4061
+ MtmGetGtid (xid , & gtid );
4028
4062
hasDeadlock = MtmGraphFindLoop (& graph , & gtid );
4029
- elog (WARNING , "Distributed deadlock check for %u:%u = %d" , gtid .node , gtid .xid , hasDeadlock );
4063
+ elog (WARNING , "Distributed deadlock check by backend %d for %u:%u = %d" , MyProcPid , gtid .node , gtid .xid , hasDeadlock );
4030
4064
if (!hasDeadlock ) {
4031
4065
/* There is no deadlock loop in graph, but deadlock can be caused by lack of apply workers: if all of them are busy, then some transactions
4032
4066
* can not be appied just because there are no vacant workers and it cause additional dependency between transactions which is not
@@ -4037,8 +4071,27 @@ MtmDetectGlobalDeadLock(PGPROC* proc)
4037
4071
hasDeadlock = true;
4038
4072
elog (WARNING , "Apply workers were blocked more than %d msec" ,
4039
4073
(int )USEC_TO_MSEC (MtmGetSystemTime () - lastPeekTime ));
4074
+ } else {
4075
+ MTM_LOG1 ("Enable deadlock timeout in backend %d for transaction %d" , MyProcPid , xid );
4076
+ enable_timeout_after (DEADLOCK_TIMEOUT , DeadlockTimeout );
4040
4077
}
4041
4078
}
4042
4079
}
4043
4080
return hasDeadlock ;
4044
4081
}
4082
+
4083
+ static bool
4084
+ MtmDetectGlobalDeadLock (PGPROC * proc )
4085
+ {
4086
+ PGXACT * pgxact = & ProcGlobal -> allPgXact [proc -> pgprocno ];
4087
+
4088
+ MTM_LOG1 ("Detect global deadlock for %d by backend %d" , pgxact -> xid , MyProcPid );
4089
+
4090
+ return MtmDetectGlobalDeadLockFortXid (pgxact -> xid );
4091
+ }
4092
+
4093
+ Datum mtm_check_deadlock (PG_FUNCTION_ARGS )
4094
+ {
4095
+ TransactionId xid = PG_GETARG_INT32 (0 );
4096
+ PG_RETURN_BOOL (MtmDetectGlobalDeadLockFortXid (xid ));
4097
+ }
0 commit comments