@@ -214,6 +214,7 @@ bool MtmVolksWagenMode;
214
214
TransactionId MtmUtilityProcessedInXid ;
215
215
216
216
static char * MtmConnStrs ;
217
+ static char * MtmClusterName ;
217
218
static int MtmQueueSize ;
218
219
static int MtmWorkers ;
219
220
static int MtmVacuumDelay ;
@@ -1867,6 +1868,39 @@ static void MtmRaftableInitialize()
1867
1868
raftable_start (MtmNodeId - 1 );
1868
1869
}
1869
1870
1871
+ static void MtmCheckControlFile (void )
1872
+ {
1873
+ char controlFilePath [MAXPGPATH ];
1874
+ char buf [MULTIMASTER_MAX_CTL_STR_SIZE ];
1875
+ FILE * f ;
1876
+ snprintf (controlFilePath , MAXPGPATH , "%s/global/mmts_control" , DataDir );
1877
+ f = fopen (controlFilePath , "r" );
1878
+ if (f != NULL && fgets (buf , sizeof buf , f )) {
1879
+ char * sep = strchr (buf , ':' );
1880
+ if (sep == NULL ) {
1881
+ elog (FATAL , "File mmts_control doesn't contain cluster name" );
1882
+ }
1883
+ * sep = '\0' ;
1884
+ if (strcmp (buf , MtmClusterName ) != 0 ) {
1885
+ elog (FATAL , "Database belongs to some other cluster %s rather than %s" , buf , MtmClusterName );
1886
+ }
1887
+ if (sscanf (sep + 1 , "%d" , & Mtm -> donorNodeId ) != 1 ) {
1888
+ elog (FATAL , "File mmts_control doesn't contain node id" );
1889
+ }
1890
+ fclose (f );
1891
+ } else {
1892
+ if (f != NULL ) {
1893
+ fclose (f );
1894
+ }
1895
+ f = fopen (controlFilePath , "w" );
1896
+ if (f == NULL ) {
1897
+ elog (FATAL , "Failed to create mmts_control file: %m" );
1898
+ }
1899
+ Mtm -> donorNodeId = -1 ;
1900
+ fprintf (f , "%s:%d\n" , MtmClusterName , Mtm -> donorNodeId );
1901
+ fclose (f );
1902
+ }
1903
+ }
1870
1904
1871
1905
static void MtmInitialize ()
1872
1906
{
@@ -1931,6 +1965,8 @@ static void MtmInitialize()
1931
1965
MtmDoReplication = true;
1932
1966
TM = & MtmTM ;
1933
1967
LWLockRelease (AddinShmemInitLock );
1968
+
1969
+ MtmCheckControlFile ();
1934
1970
}
1935
1971
1936
1972
static void
@@ -2472,6 +2508,19 @@ _PG_init(void)
2472
2508
NULL /* GucShowHook show_hook */
2473
2509
);
2474
2510
2511
+ DefineCustomStringVariable (
2512
+ "multimaster.cluster_name" ,
2513
+ "Name of the cluster" ,
2514
+ NULL ,
2515
+ & MtmClusterName ,
2516
+ "mmts" ,
2517
+ PGC_BACKEND , /* context */
2518
+ 0 , /* flags */
2519
+ NULL , /* GucStringCheckHook check_hook */
2520
+ NULL , /* GucStringAssignHook assign_hook */
2521
+ NULL /* GucShowHook show_hook */
2522
+ );
2523
+
2475
2524
DefineCustomIntVariable (
2476
2525
"multimaster.node_id" ,
2477
2526
"Multimaster node ID" ,
@@ -2609,8 +2658,10 @@ MtmReplicationMode MtmGetReplicationMode(int nodeId, sig_atomic_t volatile* shut
2609
2658
MtmLock (LW_EXCLUSIVE );
2610
2659
if (Mtm -> status == MTM_RECOVERY ) {
2611
2660
recovery = true;
2612
- if (Mtm -> recoverySlot == 0 || Mtm -> recoverySlot == nodeId ) {
2613
- /* Choose for recovery first available slot */
2661
+ if ((Mtm -> recoverySlot == 0 && (Mtm -> donorNodeId < 0 || Mtm -> donorNodeId == nodeId ))
2662
+ || Mtm -> recoverySlot == nodeId )
2663
+ {
2664
+ /* Choose for recovery first available slot or slot of donor node (if any) */
2614
2665
elog (WARNING , "Process %d starts recovery from node %d" , MyProcPid , nodeId );
2615
2666
Mtm -> recoverySlot = nodeId ;
2616
2667
Mtm -> nReceivers = 0 ;
@@ -2698,6 +2749,8 @@ MtmReplicationStartupHook(struct PGLogicalStartupHookArgs* args)
2698
2749
{
2699
2750
ListCell * param ;
2700
2751
bool recoveryCompleted = false;
2752
+ XLogRecPtr recoveryStartPos = InvalidXLogRecPtr ;
2753
+
2701
2754
MtmIsRecoverySession = false;
2702
2755
Mtm -> nodes [MtmReplicationNodeId - 1 ].senderPid = MyProcPid ;
2703
2756
Mtm -> nodes [MtmReplicationNodeId - 1 ].senderStartTime = MtmGetSystemTime ();
@@ -2717,11 +2770,21 @@ MtmReplicationStartupHook(struct PGLogicalStartupHookArgs* args)
2717
2770
elog (ERROR , "Replication mode is not specified" );
2718
2771
}
2719
2772
break ;
2773
+ } else if (strcmp ("mtm_restart_pos" , elem -> defname ) == 0 ) {
2774
+ if (elem -> arg != NULL && strVal (elem -> arg ) != NULL ) {
2775
+ recoveryStartPos = intVal (elem -> arg );
2776
+ } else {
2777
+ elog (ERROR , "Restart position is not specified" );
2778
+ }
2720
2779
}
2721
2780
}
2722
2781
MtmLock (LW_EXCLUSIVE );
2723
- if (MtmIsRecoverySession ) {
2724
- MTM_LOG1 ("%d: Node %d start recovery of node %d" , MyProcPid , MtmNodeId , MtmReplicationNodeId );
2782
+ if (MtmIsRecoverySession ) {
2783
+ MTM_LOG1 ("%d: Node %d start recovery of node %d at position %lx" , MyProcPid , MtmNodeId , MtmReplicationNodeId , recoveryStartPos );
2784
+ Assert (MyReplicationSlot != NULL );
2785
+ if (recoveryStartPos < MyReplicationSlot -> data .restart_lsn ) {
2786
+ elog (ERROR , "Specified recovery start position %lx is beyond restart lsn %lx" , recoveryStartPos , MyReplicationSlot -> data .restart_lsn );
2787
+ }
2725
2788
if (!BIT_CHECK (Mtm -> disabledNodeMask , MtmReplicationNodeId - 1 )) {
2726
2789
MtmDisableNode (MtmReplicationNodeId );
2727
2790
MtmCheckQuorum ();
0 commit comments