@@ -180,32 +180,65 @@ GatherPrepares(MtmCurrentTrans* x, nodemask_t participantsMask, int *failed_at)
180
180
181
181
while (participantsMask != 0 )
182
182
{
183
+ bool ret ;
183
184
DmqSenderId sender_id ;
184
185
StringInfoData buffer ;
185
186
MtmArbiterMessage * msg ;
186
187
187
- dmq_pop (& sender_id , & buffer , participantsMask );
188
- msg = (MtmArbiterMessage * ) buffer .data ;
188
+ ret = dmq_pop (& sender_id , & buffer , participantsMask );
189
189
190
- Assert (msg -> node == sender_to_node [sender_id ]);
191
- Assert (msg -> code == MSG_PREPARED || msg -> code == MSG_ABORTED );
192
- Assert (msg -> dxid == x -> xid );
193
- Assert (BIT_CHECK (participantsMask , sender_to_node [sender_id ] - 1 ));
190
+ if (ret )
191
+ {
192
+ msg = (MtmArbiterMessage * ) buffer .data ;
193
+
194
+ Assert (msg -> node == sender_to_node [sender_id ]);
195
+ Assert (msg -> code == MSG_PREPARED || msg -> code == MSG_ABORTED );
196
+ Assert (msg -> dxid == x -> xid );
197
+ Assert (BIT_CHECK (participantsMask , sender_to_node [sender_id ] - 1 ));
194
198
195
- mtm_log (MtmTxTrace ,
196
- "GatherPrepares: got '%s' for %s from node%d" ,
197
- msg -> code == MSG_PREPARED ? "ok" : "failed" ,
198
- msg -> gid , sender_to_node [sender_id ]);
199
+ mtm_log (MtmTxTrace ,
200
+ "GatherPrepares: got '%s' for tx" XID_FMT " from node%d" ,
201
+ msg -> code == MSG_PREPARED ? "ok" : "failed" ,
202
+ x -> xid , sender_to_node [sender_id ]);
199
203
200
- BIT_CLEAR (participantsMask , sender_to_node [sender_id ] - 1 );
204
+ BIT_CLEAR (participantsMask , sender_to_node [sender_id ] - 1 );
201
205
202
- if (msg -> code == MSG_ABORTED )
206
+ if (msg -> code == MSG_ABORTED )
207
+ {
208
+ prepared = false;
209
+ * failed_at = msg -> node ;
210
+ }
211
+ }
212
+ else
203
213
{
204
- prepared = false;
205
- * failed_at = msg -> node ;
214
+ /*
215
+ * If queue is detached then the neignbour node is probably
216
+ * disconnected. Let's wait when it became disabled as we can
217
+ * became offline by this time.
218
+ */
219
+ MtmLock (LW_SHARED );
220
+ if (BIT_CHECK (Mtm -> disabledNodeMask , sender_to_node [sender_id ] - 1 ))
221
+ {
222
+ if (Mtm -> status != MTM_ONLINE )
223
+ {
224
+ elog (ERROR , "our node was disabled during transaction commit" );
225
+ }
226
+ else
227
+ {
228
+ BIT_CLEAR (participantsMask , sender_to_node [sender_id ] - 1 );
229
+ mtm_log (MtmTxTrace ,
230
+ "GatherPrepares: dropping node%d from participants of tx" XID_FMT ,
231
+ sender_to_node [sender_id ], x -> xid );
232
+ prepared = false;
233
+ * failed_at = sender_to_node [sender_id ];
234
+ }
235
+ }
236
+ MtmUnlock ();
206
237
}
207
238
}
208
239
240
+ // XXX: assert that majority has responded
241
+
209
242
return prepared ;
210
243
}
211
244
@@ -216,22 +249,53 @@ GatherPrecommits(MtmCurrentTrans* x, nodemask_t participantsMask)
216
249
217
250
while (participantsMask != 0 )
218
251
{
252
+ bool ret ;
219
253
DmqSenderId sender_id ;
220
254
StringInfoData buffer ;
221
255
MtmArbiterMessage * msg ;
222
256
223
- dmq_pop (& sender_id , & buffer , participantsMask );
224
- msg = (MtmArbiterMessage * ) buffer .data ;
257
+ ret = dmq_pop (& sender_id , & buffer , participantsMask );
225
258
226
- Assert (msg -> node == sender_to_node [sender_id ]);
227
- Assert (msg -> code == MSG_PRECOMMITTED );
228
- Assert (msg -> dxid == x -> xid );
229
- Assert (BIT_CHECK (participantsMask , sender_to_node [sender_id ] - 1 ));
259
+ if (ret )
260
+ {
261
+ msg = (MtmArbiterMessage * ) buffer .data ;
262
+
263
+ Assert (msg -> node == sender_to_node [sender_id ]);
264
+ Assert (msg -> code == MSG_PRECOMMITTED );
265
+ Assert (msg -> dxid == x -> xid );
266
+ Assert (BIT_CHECK (participantsMask , sender_to_node [sender_id ] - 1 ));
230
267
231
- mtm_log (MtmTxTrace ,
232
- "GatherPrecommits: got 'ok' for %s from node%d" ,
233
- msg -> gid , sender_to_node [sender_id ]);
268
+ mtm_log (MtmTxTrace ,
269
+ "GatherPrecommits: got 'ok' for tx" XID_FMT " from node%d" ,
270
+ x -> xid , sender_to_node [sender_id ]);
234
271
235
- BIT_CLEAR (participantsMask , sender_to_node [sender_id ] - 1 );
272
+ BIT_CLEAR (participantsMask , sender_to_node [sender_id ] - 1 );
273
+ }
274
+ else
275
+ {
276
+ /*
277
+ * If queue is detached then the neignbour node is probably
278
+ * disconnected. Let's wait when it became disabled as we can
279
+ * became offline by this time.
280
+ */
281
+ MtmLock (LW_SHARED );
282
+ if (BIT_CHECK (Mtm -> disabledNodeMask , sender_to_node [sender_id ] - 1 ))
283
+ {
284
+ if (Mtm -> status != MTM_ONLINE )
285
+ {
286
+ elog (ERROR , "our node was disabled during transaction commit" );
287
+ }
288
+ else
289
+ {
290
+ BIT_CLEAR (participantsMask , sender_to_node [sender_id ] - 1 );
291
+ mtm_log (MtmTxTrace ,
292
+ "GatherPrecommit: dropping node%d from participants of tx" XID_FMT ,
293
+ sender_to_node [sender_id ], x -> xid );
294
+ }
295
+ }
296
+ MtmUnlock ();
297
+ }
236
298
}
299
+
300
+ // XXX: assert that majority has responded
237
301
}
0 commit comments