-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathstate.h
154 lines (137 loc) · 5.76 KB
/
state.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#ifndef STATE_H
#define STATE_H
/*
* Generation is a uniquely numbered subset of configured nodes allowed to
* commit transactions. Each xact is stamped with generation it belongs
* to. Transaction must be PREPAREd on *all* generation members before commit;
* this provides recovery -> normal work transition without risk of reordering
* xacts.
*
* The two main properties of generations are
* - At each node all prepares of generation n who might ever be committed
* lie strictly before all such prepares of generation n+1.
* - Node which is MTM_GEN_ONLINE in generation n holds all committable
* xacts of all generations < n.
* See generations2.md and MtmGenerations.tla for details.
*
* Normal (making xacts) generation contains at least majority
* members. However, we allow to elect generation with less members as a sort
* of mark that its members are recovered enough to be included in the
* following normal generations. It allows nodes always add *only myself* (but
* remove anyone else) when campaigning for new generations; thus only node
* itself decides when it is recovered enough to force others wait for it,
* which simplifies reasoning who should be next gen members.
*
* Another reason for minority gens existence is usage of generations to
* directly abort transactions when we know they can't ever be prepared; this
* allows to participate in normal transaction resolution iff node has
* PREPARE. For that to work, we must be sure live connectivity clique forming
* majority eventually forms its generation regardless of recovery process.
* c.f. handle_1a for details.
*/
typedef struct MtmGeneration
{
uint64 num; /* logical clock aka term number aka ballot */
uint64 members; /* xxx extract nodemask.h and use it here */
/*
* Generation has fixed set of configured nodes, which helps consistent
* xact resolving with dynamic add/rm of nodes.
*/
uint64 configured; /* xxx extract nodemask.h and use it here */
} MtmGeneration;
#define MtmInvalidGenNum 0
#define EQUAL_GENS(g1, g2) \
((g1).num == (g2).num && (g1).members == (g2).members && (g1).configured == (g2).configured)
/*
* Referee is enabled only with 2 nodes and single member gen is ever proposed
* as referee one (requiring referee vote and allowing to be online this
* single node), so instead of separate flag use this check.
*
* First condition is important as single node cluster shouldn't access
* referee; also, with > 2 nodes there is at least theoretical possibility of
* electing single-node generation after two consecutive minority gen
* elections.
*/
#define IS_REFEREE_GEN(members, configured) \
(popcount(configured) == 2 && popcount(members) == 1)
typedef enum
{
MTM_GEN_DEAD, /* can't ever be online in this gen */
MTM_GEN_RECOVERY, /* need to pull in recovery latest xacts before */
/* starting making my own and receiving normally */
MTM_GEN_ONLINE /* participating normally */
} MtmStatusInGen;
typedef enum
{
/*
* We were not excluded to the best of our knowledge, but we don't see all
* peers from current generation, so commits will likely fail.
*/
MTM_ISOLATED,
/*
* We were excluded and definitely need recovery, but not yet sure from
* whom as we don't see majority.
*/
MTM_DISABLED,
/*
* We are catching up, eating changes committed without us participating.
* Other nodes don't wait for us yet, so this doesn't freeze the cluster.
*/
MTM_CATCHUP,
/*
* Generation with us was elected and others started waiting for us, but
* we need to eat the latest changes in recovery mode to participate
* normally.
*/
MTM_RECOVERY,
/*
* It's Twelve O'clock and All's Well.
*/
MTM_ONLINE,
} MtmNodeStatus;
extern char const *const MtmNodeStatusMnem[];
extern void MtmStateInit(void);
extern void MtmStateShmemStartup(void);
extern void MtmStateStartup(void);
/* generation management */
extern uint64 MtmGetCurrentGenNum(void);
extern MtmGeneration MtmGetCurrentGen(bool locked);
extern void MtmConsiderGenSwitch(MtmGeneration gen, nodemask_t donors);
extern bool MtmHandleParallelSafe(MtmGeneration ps_gen, nodemask_t ps_donors,
bool is_recovery, XLogRecPtr end_lsn);
extern MtmStatusInGen MtmGetCurrentStatusInGen(void);
extern MtmStatusInGen MtmGetCurrentStatusInGenNotLocked(void);
extern MtmNodeStatus MtmGetCurrentStatus(bool gen_locked, bool vote_locked);
/* receiver bits */
extern void MtmReportReceiverCaughtup(int node_id);
/* we should recover, but not not sure from whom yet */
#define RECEIVE_MODE_DISABLED (~(uint32)0)
/* all receivers work normally */
#define RECEIVE_MODE_NORMAL 0
#define IS_RECEIVE_MODE_DONOR(rcv_mode) ((rcv_mode) != RECEIVE_MODE_NORMAL && \
((rcv_mode) != RECEIVE_MODE_DISABLED))
extern MtmReplicationMode MtmGetReceiverMode(int nodeId);
/* connectivity */
extern nodemask_t MtmGetDmqReceiversMask(void);
extern nodemask_t MtmGetConnectedMask(bool locked);
extern nodemask_t MtmGetConnectedMaskWithMe(bool locked);
extern void *MtmOnDmqReceiverConnect(char *node_name);
extern void MtmOnDmqReceiverHeartbeat(char *node_name, StringInfo msg, void *extra);
extern void MtmOnDmqReceiverDisconnect(char *node_name);
extern void MtmOnDmqSenderConnect(char *node_name);
extern void MtmOnDmqSenderHeartbeat(char *node_name, StringInfo buf);
extern void MtmOnDmqSenderDisconnect(char *node_name);
extern void AcquirePBByPreparer(bool backend);
extern void AcquirePBByHolder(bool full);
extern void ReleasePB(void);
/* bgws */
extern void CampaignerMain(Datum main_arg);
extern void ReplierMain(Datum main_arg);
extern void MtmMonitor(Datum arg);
extern void MtmMonitorStart(Oid db_id, Oid user_id);
/* not cleaned up yet */
extern void MtmRefreshClusterStatus(void);
extern nodemask_t MtmGetDisabledNodeMask(void);
extern nodemask_t MtmGetEnabledNodeMask(bool ignore_disabled);
extern void CampaignerStop(void);
#endif