Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit dd151e2

Browse files
committed
Initial tablesync started, though start_shardman must be debugged again.
1 parent 25bb8d1 commit dd151e2

File tree

2 files changed

+47
-24
lines changed

2 files changed

+47
-24
lines changed

pg_shardman--0.0.1.sql

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -288,19 +288,6 @@ BEGIN
288288
END;
289289
$$ LANGUAGE plpgsql;
290290

291-
-- These tables will be replicated to worker nodes, notifying them about changes.
292-
-- Called on worker nodes.
293-
CREATE FUNCTION create_meta_sub() RETURNS void AS $$
294-
DECLARE
295-
master_connstring text;
296-
BEGIN
297-
SELECT pg_settings.setting into master_connstring from pg_settings
298-
WHERE NAME = 'shardman.master_connstring';
299-
-- Note that 'CONNECTION $1...' USING master_connstring won't work here
300-
EXECUTE format('CREATE SUBSCRIPTION shardman_meta_sub CONNECTION %L PUBLICATION shardman_meta_pub', master_connstring);
301-
END;
302-
$$ LANGUAGE plpgsql;
303-
304291
-- Recreate logical pgoutput replication slot. Drops existing slot.
305292
CREATE FUNCTION create_repslot(slot_name text) RETURNS void AS $$
306293
BEGIN

src/shard.c

Lines changed: 47 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ typedef enum
4343
/* Current step of 1 master partition move */
4444
typedef enum
4545
{
46-
MOVEMPARTSTEP_START_TABLESYNC
46+
MOVEMPARTSTEP_START_TABLESYNC,
47+
MOVEMPARTSTEP_WAIT_TABLESYNC
4748
} MoveMPartStep;
4849

4950
typedef struct
@@ -314,18 +315,37 @@ init_mmp_state(MoveMPartState *mmps, const char *part_name, int32 dst_node)
314315
mmps->part_name, mmps->src_node, mmps->dst_node);
315316
mmps->dst_drop_sub_sql = psprintf(
316317
"drop subscription if exists %s cascade;", mmps->logname);
318+
/*
319+
* Note that we run stmts in separate txns: repslot can't be created in in
320+
* transaction that performed writes
321+
*/
317322
mmps->src_create_pub_and_rs_sql = psprintf(
318-
"drop publication if exists %s cascade;"
319-
" create publication %s for table %s;"
323+
"begin; drop publication if exists %s cascade;"
324+
" create publication %s for table %s; end;"
320325
" select shardman.create_repslot('%s');",
321326
mmps->logname, mmps->logname, mmps->part_name, mmps->logname
322327
);
323328
mmps->relation = get_partition_relation(part_name);
324329
mmps->dst_create_tab_and_sub_sql = psprintf(
325330
"drop table if exists %s cascade;"
331+
/*
332+
* TODO: we are mimicking pathman's partition creation here. At least
333+
* one difference is that we don't copy foreign keys, so this should
334+
* be fixed. For example, we could directly call pathman's
335+
* create_single_partition_internal func here, though currently it is
336+
* static. We could also just use old empty partition and not remove
337+
* it, but considering (in very far perspective) ALTER TABLE this is
338+
* wrong approach.
339+
*/
326340
" create table %s (like %s including defaults including indexes"
327-
" including storage);",
328-
mmps->part_name, mmps->part_name, mmps->relation);
341+
" including storage);"
342+
" drop subscription if exists %s cascade;"
343+
" create subscription %s connection '%s' publication %s with"
344+
" (create_slot = false, slot_name = '%s');",
345+
mmps->part_name,
346+
mmps->part_name, mmps->relation,
347+
mmps->logname,
348+
mmps->logname, mmps->src_connstr, mmps->logname, mmps->logname);
329349

330350
mmps->curstep = MOVEMPARTSTEP_START_TABLESYNC;
331351
mmps->res = MOVEMPART_IN_PROGRESS;
@@ -543,12 +563,14 @@ exec_move_mpart(MoveMPartState *mmps)
543563
/* Mark waketm as invalid for safety */
544564
mmps->waketm = (struct timespec) {0};
545565

546-
switch (mmps->curstep)
566+
if (mmps->curstep == MOVEMPARTSTEP_START_TABLESYNC)
547567
{
548-
case MOVEMPARTSTEP_START_TABLESYNC:
549-
if (start_tablesync(mmps) == -1)
550-
return;
568+
if (start_tablesync(mmps) == -1)
569+
return;
570+
else
571+
mmps->curstep = MOVEMPARTSTEP_WAIT_TABLESYNC;
551572
}
573+
552574
shmn_elog(DEBUG1, "Partition %s is moved", mmps->part_name);
553575
mmps->res = MOVEMPART_SUCCESS;
554576
mmps->exec_res = EXECMOVEMPART_DONE;
@@ -579,17 +601,31 @@ start_tablesync(MoveMPartState *mmps)
579601
shmn_elog(DEBUG1, "mmp %s: sub on dst dropped, if any", mmps->part_name);
580602

581603
res = PQexec(mmps->src_conn, mmps->src_create_pub_and_rs_sql);
582-
if (PQresultStatus(res) != PGRES_COMMAND_OK)
604+
if (PQresultStatus(res) != PGRES_TUPLES_OK)
583605
{
584606
shmn_elog(NOTICE, "Failed to create pub and repslot on src: %s",
585607
PQerrorMessage(mmps->src_conn));
586608
reset_pqconn_and_res(&mmps->src_conn, res);
587609
configure_retry(mmps, shardman_cmd_retry_naptime);
588610
return -1;
589611
}
590-
return 0;
591612
PQclear(res);
592613
shmn_elog(DEBUG1, "mmp %s: pub and rs recreated on src", mmps->part_name);
614+
615+
res = PQexec(mmps->dst_conn, mmps->dst_create_tab_and_sub_sql);
616+
if (PQresultStatus(res) != PGRES_COMMAND_OK)
617+
{
618+
shmn_elog(NOTICE, "Failed to recreate table & sub on dst: %s",
619+
PQerrorMessage(mmps->dst_conn));
620+
reset_pqconn_and_res(&mmps->dst_conn, res);
621+
configure_retry(mmps, shardman_cmd_retry_naptime);
622+
return -1;
623+
}
624+
PQclear(res);
625+
shmn_elog(DEBUG1, "mmp %s: table & sub created on dst, tablesync started",
626+
mmps->part_name);
627+
628+
return 0;
593629
}
594630

595631
/*

0 commit comments

Comments
 (0)