Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit a10b29d

Browse files
committed
Updating sync_standby_names in one trxn with pub creation again.
Instead of executing it in separate transaction, just issue SET LOCAL synchronous_commit TO local to avoid hangup.
1 parent 2ca66a9 commit a10b29d

File tree

2 files changed

+29
-57
lines changed

2 files changed

+29
-57
lines changed

shard.sql

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,10 @@ BEGIN
117117
-- Create publication for new data channel prev replica -> dst, make it sync
118118
EXECUTE format('DROP PUBLICATION IF EXISTS %I', lname);
119119
EXECUTE format('CREATE PUBLICATION %I FOR TABLE %I', lname, p_name);
120+
-- This is neccessary since sub is not created, and with sync commit we will
121+
-- hang forever
122+
SET LOCAL synchronous_commit TO local;
123+
PERFORM shardman.ensure_sync_standby(lname);
120124
END $$ LANGUAGE plpgsql STRICT;
121125

122126
-- Executed on node with new part, see mp_rebuild_lr
@@ -139,6 +143,10 @@ BEGIN
139143
EXECUTE format('DROP PUBLICATION IF EXISTS %I', next_lname);
140144
EXECUTE format('CREATE PUBLICATION %I FOR TABLE %I',
141145
next_lname, p_name);
146+
-- This is neccessary since sub is not created, and with sync commit we will
147+
-- hang forever
148+
SET LOCAL synchronous_commit TO local;
149+
PERFORM shardman.ensure_sync_standby(next_lname);
142150
END IF;
143151

144152
IF prev_rep IS NOT NULL THEN -- we need to setup channel prev replica -> dst
@@ -257,14 +265,17 @@ DECLARE
257265
lname name := shardman.get_data_lname(part_name, oldtail, newtail);
258266
BEGIN
259267
-- Repslot for new data channel. Must be first, since we "cannot create
260-
-- logical replication slot in transaction that has performed writes"
268+
-- logical replication slot in transaction that has performed writes".
261269
PERFORM shardman.create_repslot(lname);
262270
-- Drop publication & repslot used for copy
263271
PERFORM shardman.drop_repslot_and_pub(cp_logname);
264272
-- Create publication for new data channel
265273
EXECUTE format('DROP PUBLICATION IF EXISTS %I', lname);
266274
EXECUTE format('CREATE PUBLICATION %I FOR TABLE %I', lname, part_name);
267-
-- Make this channel sync
275+
-- Make this channel sync.
276+
-- This is neccessary since sub is not created, and with sync commit we will
277+
-- hang forever.
278+
SET LOCAL synchronous_commit TO local;
268279
PERFORM shardman.ensure_sync_standby(lname);
269280
-- Now it is safe to make old tail writable again
270281
PERFORM shardman.readonly_table_off(part_name::regclass);
@@ -638,8 +649,17 @@ BEGIN
638649
RETURN format('shardman_copy_%s_%s_%s', part_name, src, dst);
639650
END $$ LANGUAGE plpgsql STRICT;
640651

652+
/*
653+
* Convention about pub, repslot, sub and application_name used for data
654+
* replication. We recreate sub while switching pub and recreate pub when
655+
* switching sub, so including both in the name. See top comment on why we
656+
* don't reuse pubs and subs.
657+
*/
641658
CREATE FUNCTION get_data_lname(part_name text, pub_node int, sub_node int)
642-
RETURNS text AS 'pg_shardman' LANGUAGE C STRICT;
659+
RETURNS name AS $$
660+
BEGIN
661+
RETURN format('shardman_data_%s_%s_%s', part_name, pub_node, sub_node);
662+
END $$ LANGUAGE plpgsql STRICT;
643663

644664
-- Make sure that standby_name is present in synchronous_standby_names. If not,
645665
-- add it via ALTER SYSTEM and SIGHUP postmaster to reread conf.

src/shard.c

Lines changed: 6 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,7 @@
4444
* wrapper which will continiously try to create subscription if it fails.
4545
* Besides, there is no way to create logical replication slot if current trxn
4646
* had written something, and so it is impossible to do that from trigger on
47-
* update. Finally, it is a pretty bad idea to add entry to
48-
* synchronous_standy_names (obviously non-transactional action) from the
49-
* transaction that wrote something, because if remote end is not up, such
50-
* transaction will hang forever during the commit. The moral is that we
51-
* manage LR only manually.
47+
* update. The moral is that we manage LR only manually.
5248
*
5349
* As always, implementations must be written atomically, so that if anything
5450
* reboots, things are not broken. This requires special attention while
@@ -407,9 +403,6 @@ void
407403
init_mp_state(MovePartState *mps, const char *part_name, int32 src_node,
408404
int32 dst_node)
409405
{
410-
char *prev_dst_lname;
411-
char *dst_next_lname;
412-
413406
/* Set up fields neccesary to call init_cp_state */
414407
mps->cp.part_name = part_name;
415408
if (src_node == SHMN_INVALID_NODE_ID)
@@ -479,32 +472,17 @@ init_mp_state(MovePartState *mps, const char *part_name, int32 src_node,
479472
mps->cp.dst_node, part_name, mps->cp.src_node,
480473
mps->cp.dst_node, part_name, mps->cp.src_node);
481474

482-
/*
483-
* Note the careful placement of ensure_sync_standby's. They will
484-
* immediately block the database, because we firstly create pub &
485-
* repslots along with calling those, and only then create subs. We
486-
* execute them in separate transactions to allow other changes commit.
487-
*/
488475
if (mps->prev_node != SHMN_INVALID_NODE_ID)
489476
{
490-
prev_dst_lname = get_data_lname_cstr(part_name, mps->prev_node,
491-
mps->cp.dst_node);
492477
mps->prev_sql = psprintf(
493-
"begin; select shardman.part_moved_prev('%s', %d, %d); end;"
494-
" select shardman.ensure_sync_standby('%s');",
495-
part_name, mps->cp.src_node, mps->cp.dst_node,
496-
prev_dst_lname);
478+
"select shardman.part_moved_prev('%s', %d, %d);",
479+
part_name, mps->cp.src_node, mps->cp.dst_node);
497480
}
498481
mps->dst_sql = psprintf(
499-
"begin; select shardman.part_moved_dst('%s', %d, %d); end;",
482+
"select shardman.part_moved_dst('%s', %d, %d);",
500483
part_name, mps->cp.src_node, mps->cp.dst_node);
501484
if (mps->next_node != SHMN_INVALID_NODE_ID)
502485
{
503-
dst_next_lname = get_data_lname_cstr(part_name, mps->cp.dst_node,
504-
mps->next_node);
505-
mps->dst_sql = psprintf(
506-
"%s select shardman.ensure_sync_standby('%s');",
507-
mps->dst_sql, dst_next_lname);
508486
mps->next_sql = psprintf(
509487
"select shardman.part_moved_next('%s', %d, %d);",
510488
part_name, mps->cp.src_node, mps->cp.dst_node);
@@ -538,7 +516,6 @@ init_cr_state(CreateReplicaState *crs, const char *part_name, int32 dst_node)
538516
{
539517
char *sql;
540518
uint64 shard_exists;
541-
char *lname;
542519

543520
/* Check that table with such name is not already exists on dst node */
544521
sql = psprintf(
@@ -580,7 +557,6 @@ init_cr_state(CreateReplicaState *crs, const char *part_name, int32 dst_node)
580557
crs->drop_cp_sub_sql = psprintf(
581558
"select shardman.replica_created_drop_cp_sub('%s', %d, %d);",
582559
part_name, crs->cp.src_node, crs->cp.dst_node);
583-
lname = get_data_lname_cstr(part_name, crs->cp.src_node, crs->cp.dst_node);
584560
/*
585561
* Separate trxn for ensure_sync_standby as in init_mp_state. It is
586562
* interesting that while I got expected behaviour (hanged transaction) in
@@ -589,9 +565,8 @@ init_cr_state(CreateReplicaState *crs, const char *part_name, int32 dst_node)
589565
* settings are not getting reloaded, but not sure why.
590566
*/
591567
crs->create_data_pub_sql = psprintf(
592-
"begin; select shardman.replica_created_create_data_pub('%s', %d, %d); end;"
593-
" select shardman.ensure_sync_standby('%s');",
594-
part_name, crs->cp.src_node, crs->cp.dst_node, lname);
568+
"select shardman.replica_created_create_data_pub('%s', %d, %d);",
569+
part_name, crs->cp.src_node, crs->cp.dst_node);
595570
crs->create_data_sub_sql = psprintf(
596571
"select shardman.replica_created_create_data_sub('%s', %d, %d);",
597572
part_name, crs->cp.src_node, crs->cp.dst_node);
@@ -1381,29 +1356,6 @@ void configure_retry(CopyPartState *cps, int millis)
13811356
cps->exec_res = TASK_WAKEMEUP;
13821357
}
13831358

1384-
/*
1385-
* Convention about pub, repslot, sub and application_name used for data
1386-
* replication. We recreate sub while switching pub and recreate pub when
1387-
* switching sub, so including both in the name. See top comment on why we
1388-
* don't reuse pubs and subs.
1389-
*/
1390-
char *
1391-
get_data_lname_cstr(const char *part_name, int32 pub_node, int32 sub_node)
1392-
{
1393-
return psprintf("shardman_data_%s_%d_%d", part_name, pub_node, sub_node);
1394-
}
1395-
/* SQL interface to it */
1396-
PG_FUNCTION_INFO_V1(get_data_lname);
1397-
Datum
1398-
get_data_lname(PG_FUNCTION_ARGS)
1399-
{
1400-
char *part_name = text_to_cstring(PG_GETARG_TEXT_PP(0));
1401-
int32 pub_node = PG_GETARG_INT32(1);
1402-
int32 sub_node = PG_GETARG_INT32(2);
1403-
PG_RETURN_TEXT_P(cstring_to_text(
1404-
get_data_lname_cstr(part_name, pub_node, sub_node)));
1405-
}
1406-
14071359
/*
14081360
* Get current CLOCK_MONOTONIC time. Fails with PG elog(FATAL) if gettime
14091361
* failed.

0 commit comments

Comments
 (0)