Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 6bf6563

Browse files
committed
Alter sync_standby_names separately, using local sync commit always.
Using synchronous_commit = local by sharlord prevents unneccesary hanging/waiting for sync replicas. Separate 'alter synchronous_standby_names' probably will help us avoid strange bug when walsender sets xmin too early on pg_reload_conf receival.
1 parent 29ebc48 commit 6bf6563

File tree

3 files changed

+56
-41
lines changed

3 files changed

+56
-41
lines changed

shard.sql

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -119,10 +119,6 @@ BEGIN
119119
-- Create publication for new data channel prev replica -> dst, make it sync
120120
EXECUTE format('DROP PUBLICATION IF EXISTS %I', lname);
121121
EXECUTE format('CREATE PUBLICATION %I FOR TABLE %I', lname, p_name);
122-
-- This is neccessary since sub is not created, and with sync commit we will
123-
-- hang forever
124-
SET LOCAL synchronous_commit TO local;
125-
PERFORM shardman.ensure_sync_standby(lname);
126122
END $$ LANGUAGE plpgsql STRICT;
127123

128124
-- Executed on node with new part, see mp_rebuild_lr
@@ -140,15 +136,10 @@ BEGIN
140136
ASSERT dst = shardman.my_id(), 'part_moved_dst must be called on dst';
141137
IF next_rep IS NOT NULL THEN -- we need to setup channel dst -> next replica
142138
next_lname := shardman.get_data_lname(p_name, dst, next_rep);
143-
-- This must be first write in the transaction!
144139
PERFORM shardman.drop_repslot(next_lname);
145140
EXECUTE format('DROP PUBLICATION IF EXISTS %I', next_lname);
146141
EXECUTE format('CREATE PUBLICATION %I FOR TABLE %I',
147142
next_lname, p_name);
148-
-- This is neccessary since sub is not created, and with sync commit we will
149-
-- hang forever
150-
SET LOCAL synchronous_commit TO local;
151-
PERFORM shardman.ensure_sync_standby(next_lname);
152143
END IF;
153144

154145
IF prev_rep IS NOT NULL THEN -- we need to setup channel prev replica -> dst
@@ -377,13 +368,6 @@ BEGIN
377368
-- Create publication for new data channel
378369
EXECUTE format('DROP PUBLICATION IF EXISTS %I', lname);
379370
EXECUTE format('CREATE PUBLICATION %I FOR TABLE %I', lname, part_name);
380-
-- Make this channel sync.
381-
-- This is neccessary since sub is not created, and with sync commit we will
382-
-- hang forever.
383-
SET LOCAL synchronous_commit TO local;
384-
PERFORM shardman.ensure_sync_standby(lname);
385-
-- Now it is safe to make old tail writable again
386-
PERFORM shardman.readonly_table_off(part_name::regclass);
387371
END $$ LANGUAGE plpgsql;
388372

389373
-- Executed on newtail node, see cr_rebuild_lr

src/copypart.c

Lines changed: 53 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -222,24 +222,32 @@ init_mp_state(MovePartState *mps, const char *part_name, int32 src_node,
222222

223223
if (mps->prev_node != SHMN_INVALID_NODE_ID)
224224
{
225+
char *prev_dst_lname = get_data_lname(part_name, shardman_my_id,
226+
mps->cp.dst_node);
225227
mps->prev_sql = psprintf(
226228
"select shardman.part_moved_prev('%s', %d, %d);"
227229
" select pg_create_logical_replication_slot('%s', 'pgoutput');",
228-
part_name, mps->cp.src_node, mps->cp.dst_node,
229-
get_data_lname(part_name, shardman_my_id, mps->cp.dst_node));
230+
part_name, mps->cp.src_node, mps->cp.dst_node, prev_dst_lname);
231+
232+
mps->sync_standby_prev_sql = psprintf(
233+
"select shardman.ensure_sync_standby('%s');", prev_dst_lname);
230234
}
231235
mps->dst_sql = psprintf(
232236
"select shardman.part_moved_dst('%s', %d, %d);",
233237
part_name, mps->cp.src_node, mps->cp.dst_node);
234238
if (mps->next_node != SHMN_INVALID_NODE_ID)
235239
{
240+
char *dst_next_lname = get_data_lname(part_name, mps->cp.dst_node,
241+
mps->next_node);
236242
mps->next_sql = psprintf(
237243
"select shardman.part_moved_next('%s', %d, %d);",
238244
part_name, mps->cp.src_node, mps->cp.dst_node);
239245
mps->dst_sql = psprintf(
240246
"%s select pg_create_logical_replication_slot('%s', 'pgoutput');",
241-
mps->dst_sql,
242-
get_data_lname(part_name, mps->cp.dst_node, mps->next_node));
247+
mps->dst_sql, dst_next_lname);
248+
249+
mps->sync_standby_dst_sql = psprintf(
250+
"select shardman.ensure_sync_standby('%s');", dst_next_lname);
243251
}
244252
}
245253

@@ -287,6 +295,11 @@ init_cr_state(CreateReplicaState *crs, const char *part_name, int32 dst_node)
287295
crs->create_data_sub_sql = psprintf(
288296
"select shardman.replica_created_create_data_sub('%s', %d, %d);",
289297
part_name, crs->cp.src_node, crs->cp.dst_node);
298+
crs->sync_standby_sql = psprintf(
299+
"select shardman.ensure_sync_standby('%s');"
300+
" select shardman.readonly_table_off('%s'::regclass);",
301+
get_data_lname(part_name, crs->cp.src_node, crs->cp.dst_node),
302+
part_name);
290303
}
291304

292305
/*
@@ -386,18 +399,14 @@ static void finalize_cp_state(CopyPartState *cps)
386399
/* Failed tasks never open pq connections */
387400
if (cps->res != TASK_FAILED)
388401
{
389-
if (cps->src_conn != NULL)
390-
reset_pqconn(&cps->src_conn);
391-
if (cps->dst_conn != NULL)
392-
reset_pqconn(&cps->dst_conn);
402+
reset_pqconn(&cps->src_conn);
403+
reset_pqconn(&cps->dst_conn);
393404
if (cps->type == COPYPARTTASK_MOVE_PRIMARY ||
394405
cps->type == COPYPARTTASK_MOVE_REPLICA)
395406
{
396407
MovePartState *mps = (MovePartState *) cps;
397-
if (mps->prev_conn != NULL)
398-
reset_pqconn(&mps->prev_conn);
399-
if (mps->next_conn != NULL)
400-
reset_pqconn(&mps->next_conn);
408+
reset_pqconn(&mps->prev_conn);
409+
reset_pqconn(&mps->next_conn);
401410
}
402411
}
403412
}
@@ -649,7 +658,8 @@ exec_move_part(MovePartState *mps)
649658
*/
650659
static bool remote_exec(PGconn** conn, CopyPartState *cps, char* stmts)
651660
{
652-
char* sql = stmts, *sep;
661+
char *sql = stmts;
662+
char *sep;
653663
PGresult *res;
654664
while ((sep = strchr(sql, ';')) != NULL) {
655665
*sep = '\0';
@@ -698,6 +708,14 @@ mp_rebuild_lr(MovePartState *mps)
698708
return -1;
699709
shmn_elog(DEBUG1, "mp %s: LR conf on dst done", mps->cp.part_name);
700710

711+
if (mps->prev_node != SHMN_INVALID_NODE_ID)
712+
{
713+
if (!remote_exec(&mps->prev_conn, (CopyPartState *) mps,
714+
mps->sync_standby_prev_sql))
715+
return -1;
716+
shmn_elog(DEBUG1, "mp %s: make sync standby on prev", mps->cp.part_name);
717+
}
718+
701719
if (mps->next_node != SHMN_INVALID_NODE_ID)
702720
{
703721
if (ensure_pqconn(&mps->next_conn, mps->next_connstr,
@@ -706,6 +724,10 @@ mp_rebuild_lr(MovePartState *mps)
706724
if (!remote_exec(&mps->next_conn, (CopyPartState *) mps, mps->next_sql))
707725
return -1;
708726
shmn_elog(DEBUG1, "mp %s: LR conf on next done", mps->cp.part_name);
727+
728+
if (!remote_exec(&mps->cp.dst_conn, (CopyPartState *) mps,
729+
mps->sync_standby_dst_sql))
730+
return -1;
709731
}
710732

711733
return 0;
@@ -742,14 +764,6 @@ exec_create_replica(CreateReplicaState *crs)
742764
* Work to do in general is described below. We execute them in steps written
743765
* in parentheses so that every time we create sub, pub is already exist and
744766
* every time we drop pub, sub is already dropped.
745-
* On old tail node, i.e. src:
746-
* - Drop repslot & pub used for copy (create_data_pub)
747-
* - Create repslot & pub for new data channel (create_data_pub)
748-
* - Make it synchronous; make table writable. (create_data_pub)
749-
* On new tail node, i.e. dst:
750-
* - Make table read-only for all but apply workers (drop_cp_sub)
751-
* - Drop sub used for copy (drop_cp_sub)
752-
* - Create sub for new data channel. (create_data_sub)
753767
*/
754768
int
755769
cr_rebuild_lr(CreateReplicaState *crs)
@@ -758,18 +772,25 @@ cr_rebuild_lr(CreateReplicaState *crs)
758772
ENSURE_PQCONN_SRC | ENSURE_PQCONN_DST) == -1)
759773
return -1;
760774

761-
if (!remote_exec(&crs->cp.dst_conn, (CopyPartState *) crs, crs->drop_cp_sub_sql))
775+
if (!remote_exec(&crs->cp.dst_conn, (CopyPartState *) crs,
776+
crs->drop_cp_sub_sql))
762777
return -1;
763778
shmn_elog(DEBUG1, "cr %s: drop_cp_sub done", crs->cp.part_name);
764779

765780
if (!remote_exec(&crs->cp.src_conn, (CopyPartState *) crs, crs->create_data_pub_sql))
766781
return -1;
767782
shmn_elog(DEBUG1, "cr %s: create_data_pub done", crs->cp.part_name);
768783

769-
if (!remote_exec(&crs->cp.dst_conn, (CopyPartState *) crs, crs->create_data_sub_sql))
784+
if (!remote_exec(&crs->cp.dst_conn, (CopyPartState *) crs,
785+
crs->create_data_sub_sql))
770786
return -1;
771787
shmn_elog(DEBUG1, "cr %s: create_data_sub done", crs->cp.part_name);
772788

789+
if (!remote_exec(&crs->cp.src_conn, (CopyPartState *) crs,
790+
crs->sync_standby_sql))
791+
return -1;
792+
shmn_elog(DEBUG1, "cr %s: sync_standby done", crs->cp.part_name);
793+
773794
return 0;
774795
}
775796

@@ -1075,17 +1096,24 @@ ensure_pqconn(PGconn **conn, const char *connstr,
10751096
}
10761097
if (*conn == NULL)
10771098
{
1099+
char s[] = "set session synchronous_commit to local;";
1100+
10781101
Assert(connstr != NULL);
10791102
*conn = PQconnectdb(connstr);
10801103
if (PQstatus(*conn) != CONNECTION_OK)
10811104
{
1082-
shmn_elog(NOTICE, "Connection to node %s failed: %s",
1083-
connstr, PQerrorMessage(*conn));
1105+
shmn_elog(NOTICE, "Connection to node %s failed: %s", connstr,
1106+
PQerrorMessage(*conn));
10841107
reset_pqconn(conn);
10851108
configure_retry(cps, shardman_cmd_retry_naptime);
10861109
return -1;
10871110
}
10881111
shmn_elog(DEBUG1, "Connection to %s established", connstr);
1112+
1113+
/* All our cmds don't need to wait for sync replication */
1114+
/* remote_exec modifies sql, so it must be writable */
1115+
if (!remote_exec(conn, cps, s))
1116+
return -1;
10891117
}
10901118
return 0;
10911119
}

src/include/copypart.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ typedef struct
9696
char *drop_cp_sub_sql;
9797
char *create_data_pub_sql;
9898
char *create_data_sub_sql;
99+
char *sync_standby_sql;
99100
} CreateReplicaState;
100101

101102
/* State of move partition task */
@@ -111,7 +112,9 @@ typedef struct
111112
/* SQL executed to reconfigure LR channels */
112113
char *prev_sql;
113114
char *dst_sql;
115+
char *sync_standby_prev_sql;
114116
char *next_sql;
117+
char *sync_standby_dst_sql;
115118
} MovePartState;
116119

117120
extern void init_mp_state(MovePartState *mps, const char *part_name,

0 commit comments

Comments
 (0)