Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 21213ed

Browse files
committed
Review for 82f8d62, creating LR slots in separate xacts.
Without this, creating replicas for large number of partitions leads to strange visibility bug: historic snapshot in walsender doesn't see publication which was created in one xact with replication slot. This patch helps, but are still not sure on the root of the problem, so we need to investigate this more. Besides, for more safety we divide here slots dropping & creation in separate xacts, probably we will unite them back again later.
1 parent 21e0505 commit 21213ed

File tree

2 files changed

+34
-35
lines changed

2 files changed

+34
-35
lines changed

shard.sql

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -371,8 +371,6 @@ DECLARE
371371
cp_logname text := shardman.get_cp_logname(part_name, oldtail, newtail);
372372
lname name := shardman.get_data_lname(part_name, oldtail, newtail);
373373
BEGIN
374-
-- Repslot for new data channel. Must be first, since we "cannot create
375-
-- logical replication slot in transaction that has performed writes".
376374
PERFORM shardman.drop_repslot(lname);
377375
-- Drop publication & repslot used for copy
378376
PERFORM shardman.drop_repslot_and_pub(cp_logname);

src/copypart.c

Lines changed: 34 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -74,12 +74,6 @@
7474
* specify since which lsn start replication, tables must be synced anyway
7575
* during these operations, so what the point of reusing old sub?
7676
*
77-
* About fdws on replicas: we have to keep partition of parent table as fdw,
78-
* because otherwise we would not be able to write anything to it. On the
79-
* other hand, keeping the whole list of replicas is a bit excessive and
80-
* slower in case of primary failure: we need actually only primary and
81-
* ourself.
82-
*
8377
* Currently we don't save progress of separate tasks (e.g. for copy part
8478
* whether initial sync started or done, lsn, etc), so we have to start
8579
* everything from the ground if shardlord reboots. This is arguably fine.
@@ -140,7 +134,7 @@ static XLogRecPtr pg_lsn_in_c(const char *lsn);
140134
static struct timespec timespec_now_plus_millis(int millis);
141135
struct timespec timespec_now(void);
142136

143-
static char*
137+
static char*
144138
get_data_lname(char const* part_name, int pub_node, int sub_node)
145139
{
146140
return psprintf("shardman_data_%s_%d_%d", part_name, pub_node, sub_node);
@@ -229,19 +223,23 @@ init_mp_state(MovePartState *mps, const char *part_name, int32 src_node,
229223
if (mps->prev_node != SHMN_INVALID_NODE_ID)
230224
{
231225
mps->prev_sql = psprintf(
232-
"select shardman.part_moved_prev('%s', %d, %d); SELECT pg_create_logical_replication_slot('%s', 'pgoutput');",
226+
"select shardman.part_moved_prev('%s', %d, %d);"
227+
" select pg_create_logical_replication_slot('%s', 'pgoutput');",
233228
part_name, mps->cp.src_node, mps->cp.dst_node,
234229
get_data_lname(part_name, shardman_my_id, mps->cp.dst_node));
235230
}
236231
mps->dst_sql = psprintf(
237-
"select shardman.part_moved_dst('%s', %d, %d); SELECT pg_create_logical_replication_slot('%s', 'pgoutput');",
238-
part_name, mps->cp.src_node, mps->cp.dst_node,
239-
get_data_lname(part_name, mps->cp.dst_node, get_next_node(part_name, mps->cp.src_node)));
232+
"select shardman.part_moved_dst('%s', %d, %d);",
233+
part_name, mps->cp.src_node, mps->cp.dst_node);
240234
if (mps->next_node != SHMN_INVALID_NODE_ID)
241235
{
242236
mps->next_sql = psprintf(
243237
"select shardman.part_moved_next('%s', %d, %d);",
244238
part_name, mps->cp.src_node, mps->cp.dst_node);
239+
mps->dst_sql = psprintf(
240+
"%s select pg_create_logical_replication_slot('%s', 'pgoutput');",
241+
mps->dst_sql,
242+
get_data_lname(part_name, mps->cp.dst_node, mps->next_node));
245243
}
246244
}
247245

@@ -280,17 +278,12 @@ init_cr_state(CreateReplicaState *crs, const char *part_name, int32 dst_node)
280278
crs->drop_cp_sub_sql = psprintf(
281279
"select shardman.replica_created_drop_cp_sub('%s', %d, %d);",
282280
part_name, crs->cp.src_node, crs->cp.dst_node);
283-
/*
284-
* Separate trxn for ensure_sync_standby as in init_mp_state. It is
285-
* interesting that while I got expected behaviour (hanged transaction) in
286-
* move_part if ensure_sync_standby was executed in one trxn with create
287-
* repslot and pub, here I didn't. Probably it is committing so fast that
288-
* settings are not getting reloaded, but not sure why.
289-
*/
290-
crs->create_data_pub_sql = psprintf(
291-
"select shardman.replica_created_create_data_pub('%s', %d, %d); SELECT pg_create_logical_replication_slot('%s', 'pgoutput');",
292-
part_name, crs->cp.src_node, crs->cp.dst_node,
293-
get_data_lname(part_name, crs->cp.src_node, crs->cp.dst_node));
281+
282+
crs->create_data_pub_sql =
283+
psprintf("select shardman.replica_created_create_data_pub('%s', %d, %d);"
284+
" select pg_create_logical_replication_slot('%s', 'pgoutput');",
285+
part_name, crs->cp.src_node, crs->cp.dst_node,
286+
get_data_lname(part_name, crs->cp.src_node, crs->cp.dst_node));
294287
crs->create_data_sub_sql = psprintf(
295288
"select shardman.replica_created_create_data_sub('%s', %d, %d);",
296289
part_name, crs->cp.src_node, crs->cp.dst_node);
@@ -350,7 +343,7 @@ init_cp_state(CopyPartState *cps)
350343
"drop publication if exists %s cascade;"
351344
"create publication %s for table %s;"
352345
"select shardman.drop_repslot('%s');"
353-
"SELECT pg_create_logical_replication_slot('%s', 'pgoutput');",
346+
"select pg_create_logical_replication_slot('%s', 'pgoutput');",
354347
cps->logname, cps->logname, cps->part_name, cps->logname, cps->logname
355348
);
356349
cps->relation = get_partition_relation(cps->part_name);
@@ -648,14 +641,21 @@ exec_move_part(MovePartState *mps)
648641
mps->cp.exec_res = TASK_DONE;
649642
}
650643

644+
/*
645+
* Execute given statement in separate transactions. In case of any failure
646+
* return false, destroy connection and configure_retry on given cps.
647+
* This function is used only for internal SQL, where we guarantee no ';'
648+
* in statements.
649+
*/
651650
static bool remote_exec(PGconn** conn, CopyPartState *cps, char* stmts)
652651
{
653652
char* sql = stmts, *sep;
654653
PGresult *res;
655654
while ((sep = strchr(sql, ';')) != NULL) {
656655
*sep = '\0';
657656
res = PQexec(*conn, sql);
658-
if (PQresultStatus(res) != PGRES_TUPLES_OK && PQresultStatus(res) != PGRES_COMMAND_OK)
657+
if (PQresultStatus(res) != PGRES_TUPLES_OK &&
658+
PQresultStatus(res) != PGRES_COMMAND_OK)
659659
{
660660
shmn_elog(LOG, "REMOTE_EXEC: execution of query '%s' failed for paritions %s: %s",
661661
sql, cps->part_name, PQerrorMessage(*conn));
@@ -686,15 +686,15 @@ mp_rebuild_lr(MovePartState *mps)
686686
if (ensure_pqconn(&mps->prev_conn, mps->prev_connstr,
687687
(CopyPartState *) mps) == -1)
688688
return -1;
689-
if (!remote_exec(&mps->prev_conn, (CopyPartState *)mps, mps->prev_sql))
689+
if (!remote_exec(&mps->prev_conn, (CopyPartState *) mps, mps->prev_sql))
690690
return -1;
691691
shmn_elog(DEBUG1, "mp %s: LR conf on prev done", mps->cp.part_name);
692692
}
693693

694694
if (ensure_pqconn_cp((CopyPartState *) mps,
695695
ENSURE_PQCONN_DST) == -1)
696696
return -1;
697-
if (!remote_exec(&mps->cp.dst_conn, (CopyPartState *)mps, mps->dst_sql))
697+
if (!remote_exec(&mps->cp.dst_conn, (CopyPartState *) mps, mps->dst_sql))
698698
return -1;
699699
shmn_elog(DEBUG1, "mp %s: LR conf on dst done", mps->cp.part_name);
700700

@@ -703,7 +703,7 @@ mp_rebuild_lr(MovePartState *mps)
703703
if (ensure_pqconn(&mps->next_conn, mps->next_connstr,
704704
(CopyPartState *) mps) == -1)
705705
return -1;
706-
if (!remote_exec(&mps->next_conn, (CopyPartState *)mps, mps->next_sql))
706+
if (!remote_exec(&mps->next_conn, (CopyPartState *) mps, mps->next_sql))
707707
return -1;
708708
shmn_elog(DEBUG1, "mp %s: LR conf on next done", mps->cp.part_name);
709709
}
@@ -850,12 +850,12 @@ cp_start_tablesync(CopyPartState *cps)
850850
if (check_sub_sync("shardman_meta_sub", &cps->src_conn, lord_lsn,
851851
"meta sub") == -1)
852852
{
853-
goto fail;
853+
goto configure_retry_and_fail;
854854
}
855855
if (check_sub_sync("shardman_meta_sub", &cps->dst_conn, lord_lsn,
856856
"meta sub") == -1)
857857
{
858-
goto fail;
858+
goto configure_retry_and_fail;
859859
}
860860

861861
if (!remote_exec(&cps->dst_conn, cps, cps->dst_drop_sub_sql))
@@ -866,16 +866,17 @@ cp_start_tablesync(CopyPartState *cps)
866866
goto fail;
867867
shmn_elog(DEBUG1, "cp %s: pub and rs recreated on src", cps->part_name);
868868

869-
if (!remote_exec(&cps->dst_conn, cps, cps->dst_create_tab_and_sub_sql))
869+
if (!remote_exec(&cps->dst_conn, cps, cps->dst_create_tab_and_sub_sql))
870870
goto fail;
871871
shmn_elog(DEBUG1, "cp %s: table & sub created on dst, tablesync started",
872872
cps->part_name);
873873

874874
cps->curstep = COPYPART_START_FINALSYNC;
875875
return 0;
876876

877-
fail:
877+
configure_retry_and_fail:
878878
configure_retry(cps, shardman_cmd_retry_naptime);
879+
fail:
879880
return -1;
880881
}
881882

@@ -1078,8 +1079,8 @@ ensure_pqconn(PGconn **conn, const char *connstr,
10781079
*conn = PQconnectdb(connstr);
10791080
if (PQstatus(*conn) != CONNECTION_OK)
10801081
{
1081-
shmn_elog(NOTICE, "Connection to node failed: %s",
1082-
PQerrorMessage(*conn));
1082+
shmn_elog(NOTICE, "Connection to node %s failed: %s",
1083+
connstr, PQerrorMessage(*conn));
10831084
reset_pqconn(conn);
10841085
configure_retry(cps, shardman_cmd_retry_naptime);
10851086
return -1;

0 commit comments

Comments
 (0)