21
21
#include "catalog/pg_subscription.h"
22
22
#include "tcop/tcopprot.h"
23
23
#include "postmaster/autovacuum.h"
24
+ #include "libpq/pqformat.h"
24
25
25
26
#include "multimaster.h"
26
27
#include "logger.h"
27
28
#include "ddl.h"
28
29
#include "state.h"
29
30
#include "syncpoint.h"
30
31
32
+ typedef struct
33
+ {
34
+ StringInfo message ;
35
+ int node_id ;
36
+ } mtm_msg ;
37
+
31
38
static bool force_in_bgworker ;
32
39
33
40
static bool subchange_cb_registered ;
@@ -38,10 +45,7 @@ static MtmConfig *mtm_cfg;
38
45
39
46
MtmCurrentTrans MtmTx ;
40
47
41
- static bool GatherPrepares (TransactionId xid , nodemask_t participantsMask ,
42
- int * failed_at );
43
- static void GatherPrecommits (TransactionId xid , nodemask_t participantsMask ,
44
- MtmMessageCode code );
48
+ static void gather (uint64 participants , mtm_msg * messages , int * msg_count );
45
49
46
50
static void
47
51
pubsub_change_cb (Datum arg , int cacheid , uint32 hashvalue )
@@ -170,12 +174,13 @@ MtmBeginTransaction()
170
174
bool
171
175
MtmTwoPhaseCommit ()
172
176
{
173
- nodemask_t participantsMask ;
174
- bool ret ;
175
- int failed_at = 0 ;
177
+ uint64 participants ;
178
+ bool ret ;
176
179
TransactionId xid ;
177
- char stream [DMQ_NAME_MAXLEN ];
178
- pgid_t gid ;
180
+ char stream [DMQ_NAME_MAXLEN ];
181
+ char gid [GIDSIZE ];
182
+ mtm_msg messages [MTM_MAX_NODES ];
183
+ int n_messages ;
179
184
180
185
if (!MtmTx .contains_persistent_ddl && !MtmTx .contains_dml )
181
186
return false;
@@ -206,7 +211,7 @@ MtmTwoPhaseCommit()
206
211
*
207
212
* It is only used during startup of WalSender(node_id) in recovered mode
208
213
* to create a barrier after which all transactions doing our 3PC are
209
- * guaranted to have seen participantsMask with node_id enabled, so the
214
+ * guaranted to have seen participants with node_id enabled, so the
210
215
* receiver can apply them in parallel and be sure that precommit will
211
216
* not happens before node_id applies prepare.
212
217
*
@@ -217,8 +222,8 @@ MtmTwoPhaseCommit()
217
222
218
223
LWLockAcquire (MtmCommitBarrier , LW_SHARED );
219
224
220
- participantsMask = MtmGetEnabledNodeMask () &
221
- ~((nodemask_t )1 << (mtm_cfg -> my_node_id - 1 ));
225
+ participants = MtmGetEnabledNodeMask () &
226
+ ~((nodemask_t )1 << (mtm_cfg -> my_node_id - 1 ));
222
227
223
228
ret = PrepareTransactionBlock (gid );
224
229
if (!ret )
@@ -230,119 +235,62 @@ MtmTwoPhaseCommit()
230
235
mtm_log (MtmTxFinish , "TXFINISH: %s prepared" , gid );
231
236
CommitTransactionCommand ();
232
237
233
- ret = GatherPrepares (xid , participantsMask , & failed_at );
234
- if (!ret )
238
+ gather (participants , messages , & n_messages );
239
+ dmq_stream_unsubscribe (stream );
240
+
241
+ for (int i = 0 ; i < n_messages ; i ++ )
235
242
{
236
- dmq_stream_unsubscribe (stream );
237
- FinishPreparedTransaction (gid , false, false);
238
- mtm_log (MtmTxFinish , "TXFINISH: %s aborted" , gid );
239
- mtm_log (ERROR , "Failed to prepare transaction %s at node %d" ,
240
- gid , failed_at );
243
+ MtmMessageCode status = pq_getmsgbyte (messages [i ].message );
244
+
245
+ Assert (status == MSG_PREPARED || status == MSG_ABORTED );
246
+ if (status == MSG_ABORTED )
247
+ {
248
+ FinishPreparedTransaction (gid , false, false);
249
+ mtm_log (MtmTxFinish , "TXFINISH: %s aborted" , gid );
250
+ mtm_log (ERROR , "Failed to prepare transaction %s at node %d" ,
251
+ gid , messages [i ].node_id );
252
+ }
241
253
}
242
254
255
+ dmq_stream_subscribe (gid );
256
+
243
257
SetPreparedTransactionState (gid , MULTIMASTER_PRECOMMITTED );
244
258
mtm_log (MtmTxFinish , "TXFINISH: %s precommitted" , gid );
245
- GatherPrecommits ( xid , participantsMask , MSG_PRECOMMITTED );
259
+ gather ( participants , messages , & n_messages );
246
260
247
261
StartTransactionCommand ();
248
262
FinishPreparedTransaction (gid , true, false);
249
263
mtm_log (MtmTxFinish , "TXFINISH: %s committed" , gid );
250
- GatherPrecommits (xid , participantsMask , MSG_COMMITTED );
264
+ // XXX: make this conditional
265
+ gather (participants , messages , & n_messages );
251
266
252
267
LWLockRelease (MtmCommitBarrier );
253
268
254
- dmq_stream_unsubscribe (stream );
255
- mtm_log (MtmTxTrace , "%s unsubscribed for %s" , gid , stream );
269
+ dmq_stream_unsubscribe (gid );
270
+ mtm_log (MtmTxTrace , "%s unsubscribed for %s" , gid , gid );
256
271
257
272
MaybeLogSyncpoint (false);
258
273
259
274
return true;
260
275
}
261
276
262
- static bool
263
- GatherPrepares (TransactionId xid , nodemask_t participantsMask , int * failed_at )
264
- {
265
- bool prepared = true;
266
-
267
- while (participantsMask != 0 )
268
- {
269
- bool ret ;
270
- DmqSenderId sender_id ;
271
- StringInfoData buffer ;
272
- MtmArbiterMessage * msg ;
273
-
274
- ret = dmq_pop (& sender_id , & buffer , participantsMask );
275
-
276
- if (ret )
277
- {
278
- msg = (MtmArbiterMessage * ) buffer .data ;
279
-
280
- Assert (msg -> node == sender_to_node [sender_id ]);
281
- Assert (msg -> code == MSG_PREPARED || msg -> code == MSG_ABORTED );
282
- Assert (msg -> dxid == xid );
283
- Assert (BIT_CHECK (participantsMask , sender_to_node [sender_id ] - 1 ));
284
-
285
- mtm_log (MtmTxTrace ,
286
- "GatherPrepares: got '%s' for tx" XID_FMT " from node%d" ,
287
- msg -> code == MSG_PREPARED ? "ok" : "failed" ,
288
- xid , sender_to_node [sender_id ]);
289
-
290
- BIT_CLEAR (participantsMask , sender_to_node [sender_id ] - 1 );
291
-
292
- if (msg -> code == MSG_ABORTED )
293
- {
294
- prepared = false;
295
- * failed_at = msg -> node ;
296
- }
297
- }
298
- else
299
- {
300
- /*
301
- * If queue is detached then the neignbour node is probably
302
- * disconnected. Let's wait when it became disabled as we can
303
- * became offline by this time.
304
- */
305
- if (!BIT_CHECK (MtmGetEnabledNodeMask (), sender_to_node [sender_id ] - 1 ))
306
- {
307
- BIT_CLEAR (participantsMask , sender_to_node [sender_id ] - 1 );
308
- mtm_log (MtmTxTrace ,
309
- "GatherPrepares: dropping node%d from participants of tx" XID_FMT ,
310
- sender_to_node [sender_id ], xid );
311
- }
312
- }
313
- }
314
-
315
- // XXX: assert that majority has responded
316
-
317
- return prepared ;
318
- }
319
-
320
277
static void
321
- GatherPrecommits ( TransactionId xid , nodemask_t participantsMask , MtmMessageCode code )
278
+ gather ( uint64 participants , mtm_msg * messages , int * msg_count )
322
279
{
323
- while (participantsMask != 0 )
280
+ * msg_count = 0 ;
281
+ while (participants != 0 )
324
282
{
325
283
bool ret ;
326
284
DmqSenderId sender_id ;
327
- StringInfoData buffer ;
328
- MtmArbiterMessage * msg ;
329
-
330
- ret = dmq_pop (& sender_id , & buffer , participantsMask );
285
+ StringInfo msg = makeStringInfo ();
331
286
287
+ ret = dmq_pop (& sender_id , msg , participants );
332
288
if (ret )
333
289
{
334
- msg = (MtmArbiterMessage * ) buffer .data ;
335
-
336
- Assert (msg -> node == sender_to_node [sender_id ]);
337
- Assert (msg -> code == code );
338
- Assert (msg -> dxid == xid );
339
- Assert (BIT_CHECK (participantsMask , sender_to_node [sender_id ] - 1 ));
340
-
341
- mtm_log (MtmTxTrace ,
342
- "GatherPrecommits: got 'ok' for tx" XID_FMT " from node%d" ,
343
- xid , sender_to_node [sender_id ]);
344
-
345
- BIT_CLEAR (participantsMask , sender_to_node [sender_id ] - 1 );
290
+ messages [* msg_count ].message = msg ;
291
+ messages [* msg_count ].node_id = sender_to_node [sender_id ];
292
+ (* msg_count )++ ;
293
+ BIT_CLEAR (participants , sender_to_node [sender_id ] - 1 );
346
294
}
347
295
else
348
296
{
@@ -353,15 +301,13 @@ GatherPrecommits(TransactionId xid, nodemask_t participantsMask, MtmMessageCode
353
301
*/
354
302
if (!BIT_CHECK (MtmGetEnabledNodeMask (), sender_to_node [sender_id ] - 1 ))
355
303
{
356
- BIT_CLEAR (participantsMask , sender_to_node [sender_id ] - 1 );
304
+ BIT_CLEAR (participants , sender_to_node [sender_id ] - 1 );
357
305
mtm_log (MtmTxTrace ,
358
- "GatherPrecommit: dropping node%d from participants of tx" XID_FMT ,
359
- sender_to_node [sender_id ], xid );
306
+ "GatherPrecommit: dropping node%d from tx participants" ,
307
+ sender_to_node [sender_id ]);
360
308
}
361
309
}
362
310
}
363
-
364
- // XXX: assert that majority has responded
365
311
}
366
312
367
313
/*
0 commit comments