Updating sync_standby_names in one trxn with pub creation again.

arssher · arssher · commit a10b29d84de8 · 2017-08-08T19:26:02.000+03:00
Instead of executing it in separate transaction, just issue SET LOCAL
synchronous_commit TO local to avoid hangup.
diff --git a/shard.sql b/shard.sql
@@ -117,6 +117,10 @@ BEGIN
 	-- Create publication for new data channel prev replica -> dst, make it sync
 	EXECUTE format('DROP PUBLICATION IF EXISTS %I', lname);
 	EXECUTE format('CREATE PUBLICATION %I FOR TABLE %I', lname, p_name);
+	-- This is neccessary since sub is not created, and with sync commit we will
+	-- hang forever
+	SET LOCAL synchronous_commit TO local;
+	PERFORM shardman.ensure_sync_standby(lname);
 END $$ LANGUAGE plpgsql STRICT;
 
 -- Executed on node with new part, see mp_rebuild_lr
@@ -139,6 +143,10 @@ BEGIN
 		EXECUTE format('DROP PUBLICATION IF EXISTS %I', next_lname);
 		EXECUTE format('CREATE PUBLICATION %I FOR TABLE %I',
 					   next_lname, p_name);
+		-- This is neccessary since sub is not created, and with sync commit we will
+		-- hang forever
+		SET LOCAL synchronous_commit TO local;
+		PERFORM shardman.ensure_sync_standby(next_lname);
 	END IF;
 
 	IF prev_rep IS NOT NULL THEN -- we need to setup channel prev replica -> dst
@@ -257,14 +265,17 @@ DECLARE
 	lname name := shardman.get_data_lname(part_name, oldtail, newtail);
 BEGIN
 	-- Repslot for new data channel. Must be first, since we "cannot create
-	-- logical replication slot in transaction that has performed writes"
+	-- logical replication slot in transaction that has performed writes".
 	PERFORM shardman.create_repslot(lname);
 	-- Drop publication & repslot used for copy
 	PERFORM shardman.drop_repslot_and_pub(cp_logname);
 	-- Create publication for new data channel
 	EXECUTE format('DROP PUBLICATION IF EXISTS %I', lname);
 	EXECUTE format('CREATE PUBLICATION %I FOR TABLE %I', lname, part_name);
-	-- Make this channel sync
+	-- Make this channel sync.
+	-- This is neccessary since sub is not created, and with sync commit we will
+	-- hang forever.
+	SET LOCAL synchronous_commit TO local;
 	PERFORM shardman.ensure_sync_standby(lname);
 	-- Now it is safe to make old tail writable again
 	PERFORM shardman.readonly_table_off(part_name::regclass);
@@ -638,8 +649,17 @@ BEGIN
 	RETURN format('shardman_copy_%s_%s_%s', part_name, src, dst);
 END $$ LANGUAGE plpgsql STRICT;
 
+/*
+ * Convention about pub, repslot, sub and application_name used for data
+ * replication. We recreate sub while switching pub and recreate pub when
+ * switching sub, so including both in the name. See top comment on why we
+ * don't reuse pubs and subs.
+ */
 CREATE FUNCTION get_data_lname(part_name text, pub_node int, sub_node int)
-	RETURNS text AS  'pg_shardman' LANGUAGE C STRICT;
+	RETURNS name AS $$
+BEGIN
+	RETURN format('shardman_data_%s_%s_%s', part_name, pub_node, sub_node);
+END $$ LANGUAGE plpgsql STRICT;
 
 -- Make sure that standby_name is present in synchronous_standby_names. If not,
 -- add it via ALTER SYSTEM and SIGHUP postmaster to reread conf.
diff --git a/src/shard.c b/src/shard.c
@@ -44,11 +44,7 @@
  * wrapper which will continiously try to create subscription if it fails.
  * Besides, there is no way to create logical replication slot if current trxn
  * had written something, and so it is impossible to do that from trigger on
- * update. Finally, it is a pretty bad idea to add entry to
- * synchronous_standy_names (obviously non-transactional action) from the
- * transaction that wrote something, because if remote end is not up, such
- * transaction will hang forever during the commit. The moral is that we
- * manage LR only manually.
+ * update. The moral is that we manage LR only manually.
  *
  * As always, implementations must be written atomically, so that if anything
  * reboots, things are not broken. This requires special attention while
@@ -407,9 +403,6 @@ void
 init_mp_state(MovePartState *mps, const char *part_name, int32 src_node,
 			  int32 dst_node)
 {
-	char *prev_dst_lname;
-	char *dst_next_lname;
-
 	/* Set up fields neccesary to call init_cp_state */
 	mps->cp.part_name = part_name;
 	if (src_node == SHMN_INVALID_NODE_ID)
@@ -479,32 +472,17 @@ init_mp_state(MovePartState *mps, const char *part_name, int32 src_node,
 		mps->cp.dst_node, part_name, mps->cp.src_node,
 		mps->cp.dst_node, part_name, mps->cp.src_node);
 
-	/*
-	 * Note the careful placement of ensure_sync_standby's. They will
-	 * immediately block the database, because we firstly create pub &
-	 * repslots along with calling those, and only then create subs. We
-	 * execute them in separate transactions to allow other changes commit.
-	 */
 	if (mps->prev_node != SHMN_INVALID_NODE_ID)
 	{
-		prev_dst_lname = get_data_lname_cstr(part_name, mps->prev_node,
-										  mps->cp.dst_node);
 		mps->prev_sql = psprintf(
-			"begin; select shardman.part_moved_prev('%s', %d, %d); end;"
-			" select shardman.ensure_sync_standby('%s');",
-			part_name, mps->cp.src_node, mps->cp.dst_node,
-			prev_dst_lname);
+			"select shardman.part_moved_prev('%s', %d, %d);",
+			part_name, mps->cp.src_node, mps->cp.dst_node);
 	}
 	mps->dst_sql = psprintf(
-		"begin; select shardman.part_moved_dst('%s', %d, %d); end;",
+		"select shardman.part_moved_dst('%s', %d, %d);",
 		part_name, mps->cp.src_node, mps->cp.dst_node);
 	if (mps->next_node != SHMN_INVALID_NODE_ID)
 	{
-		dst_next_lname = get_data_lname_cstr(part_name, mps->cp.dst_node,
-											 mps->next_node);
-		mps->dst_sql = psprintf(
-			"%s select shardman.ensure_sync_standby('%s');",
-			mps->dst_sql, dst_next_lname);
 		mps->next_sql = psprintf(
 			"select shardman.part_moved_next('%s', %d, %d);",
 			part_name, mps->cp.src_node, mps->cp.dst_node);
@@ -538,7 +516,6 @@ init_cr_state(CreateReplicaState *crs, const char *part_name, int32 dst_node)
 {
 	char *sql;
 	uint64 shard_exists;
-	char *lname;
 
 	/* Check that table with such name is not already exists on dst node */
 	sql = psprintf(
@@ -580,7 +557,6 @@ init_cr_state(CreateReplicaState *crs, const char *part_name, int32 dst_node)
 	crs->drop_cp_sub_sql = psprintf(
 		"select shardman.replica_created_drop_cp_sub('%s', %d, %d);",
 		part_name, crs->cp.src_node, crs->cp.dst_node);
-	lname = get_data_lname_cstr(part_name, crs->cp.src_node, crs->cp.dst_node);
 	/*
 	 * Separate trxn for ensure_sync_standby as in init_mp_state. It is
 	 * interesting that while I got expected behaviour (hanged transaction) in
@@ -589,9 +565,8 @@ init_cr_state(CreateReplicaState *crs, const char *part_name, int32 dst_node)
 	 * settings are not getting reloaded, but not sure why.
 	 */
 	crs->create_data_pub_sql = psprintf(
-		"begin; select shardman.replica_created_create_data_pub('%s', %d, %d); end;"
-		" select shardman.ensure_sync_standby('%s');",
-		part_name, crs->cp.src_node, crs->cp.dst_node, lname);
+		"select shardman.replica_created_create_data_pub('%s', %d, %d);",
+		part_name, crs->cp.src_node, crs->cp.dst_node);
 	crs->create_data_sub_sql = psprintf(
 		"select shardman.replica_created_create_data_sub('%s', %d, %d);",
 		part_name, crs->cp.src_node, crs->cp.dst_node);
@@ -1381,29 +1356,6 @@ void configure_retry(CopyPartState *cps, int millis)
 	cps->exec_res = TASK_WAKEMEUP;
 }
 
-/*
- * Convention about pub, repslot, sub and application_name used for data
- * replication. We recreate sub while switching pub and recreate pub when
- * switching sub, so including both in the name. See top comment on why we
- * don't reuse pubs and subs.
- */
-char *
-get_data_lname_cstr(const char *part_name, int32 pub_node, int32 sub_node)
-{
-	return psprintf("shardman_data_%s_%d_%d", part_name, pub_node, sub_node);
-}
-/* SQL interface to it */
-PG_FUNCTION_INFO_V1(get_data_lname);
-Datum
-get_data_lname(PG_FUNCTION_ARGS)
-{
-	char *part_name = text_to_cstring(PG_GETARG_TEXT_PP(0));
-	int32 pub_node = PG_GETARG_INT32(1);
-	int32 sub_node = PG_GETARG_INT32(2);
-	PG_RETURN_TEXT_P(cstring_to_text(
-						 get_data_lname_cstr(part_name, pub_node, sub_node)));
-}
-
 /*
  * Get current CLOCK_MONOTONIC time. Fails with PG elog(FATAL) if gettime
  * failed.