Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 605a856

Browse files
committed
rm_node works
1 parent 88ae538 commit 605a856

File tree

4 files changed

+156
-57
lines changed

4 files changed

+156
-57
lines changed

bin/common.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ set -e
55
# Params
66

77
pgpath=~/postgres/install/vanilla/
8+
pathmanpath=~/postgres/pg_pathman
89

910
master_datadir=~/postgres/data1
1011
master_port=5432

bin/shardman_init.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
script_dir=`dirname "$(readlink -f "$0")"`
44
source "${script_dir}/common.sh"
55

6+
echo $PATH
7+
cd $pathmanpath
8+
USE_PGXS=1 make install
9+
610
cd "${script_dir}/.."
711
make clean
812
make install

pg_shardman--0.0.1.sql

Lines changed: 63 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,33 @@ BEGIN
1313
END
1414
$$;
1515

16+
-- active is the normal mode, others needed only for proper node add and removal
17+
CREATE TYPE node_status AS ENUM ('active', 'add_in_progress', 'rm_in_progress');
18+
1619
-- list of nodes present in the cluster
1720
CREATE TABLE nodes (
1821
id serial PRIMARY KEY,
1922
connstring text,
20-
active bool NOT NULL -- if false, we haven't yet finished adding it
23+
status node_status NOT NULL
2124
);
2225

26+
-- Master is removing us, so reset our state, removing all subscriptions. A bit
27+
-- tricky part: we can't DROP SUBSCRIPTION here, because that would mean
28+
-- shooting (sending SIGTERM) ourselvers (to replication apply worker) in the
29+
-- leg. So for now we just disable subscription, worker will stop after the end
30+
-- of transaction. Later we should delete subscriptions fully.
31+
CREATE FUNCTION rm_node_worker_side() RETURNS TRIGGER AS $$
32+
BEGIN
33+
PERFORM shardman.pg_shardman_cleanup(false);
34+
RETURN NULL;
35+
END
36+
$$ language plpgsql;
37+
CREATE TRIGGER rm_node_worker_side AFTER UPDATE ON shardman.nodes
38+
FOR EACH ROW WHEN (OLD.status = 'active' AND NEW.status = 'rm_in_progress')
39+
EXECUTE PROCEDURE rm_node_worker_side();
40+
-- fire trigger only on worker nodes
41+
ALTER TABLE shardman.nodes ENABLE REPLICA TRIGGER rm_node_worker_side;
42+
2343
-- Currently it is used just to store node id, in general we can keep any local
2444
-- node metadata here. If is ever used extensively, probably hstore suits better.
2545
CREATE TABLE local_meta (
@@ -29,16 +49,17 @@ CREATE TABLE local_meta (
2949
INSERT INTO @extschema@.local_meta VALUES ('node_id', NULL);
3050

3151
-- available commands
32-
CREATE TYPE cmd AS ENUM ('add_node', 'remove_node');
52+
CREATE TYPE cmd AS ENUM ('add_node', 'rm_node');
3353
-- command status
3454
CREATE TYPE cmd_status AS ENUM ('waiting', 'canceled', 'failed', 'in progress', 'success');
3555

3656
CREATE TABLE cmd_log (
3757
id bigserial PRIMARY KEY,
3858
cmd_type cmd NOT NULL,
3959
status cmd_status DEFAULT 'waiting' NOT NULL,
40-
-- only for add_node cmd -- generated id for newly added node. Cleaner
41-
-- to keep that is separate table...
60+
-- only for add_node cmd -- generated id for newly added node. Exists only
61+
-- when node adding is in progress or node is active. Cleaner to keep this
62+
-- in separate table...
4263
node_id int REFERENCES nodes(id)
4364
);
4465

@@ -99,7 +120,8 @@ DECLARE
99120
BEGIN
100121
SELECT node_id FROM @extschema@.cmd_log INTO n_id WHERE id = cmd_id;
101122
IF n_id IS NULL THEN
102-
INSERT INTO @extschema@.nodes VALUES (DEFAULT, quote_literal(connstring), false)
123+
INSERT INTO @extschema@.nodes
124+
VALUES (DEFAULT, quote_literal(connstring), 'add_in_progress')
103125
RETURNING id INTO n_id;
104126
UPDATE @extschema@.cmd_log SET node_id = n_id WHERE id = cmd_id;
105127
END IF;
@@ -115,12 +137,25 @@ BEGIN
115137
EXECUTE format('SELECT EXISTS (SELECT * FROM pg_replication_slots
116138
WHERE slot_name=%L)', slot_name) INTO slot_exists;
117139
IF NOT slot_exists THEN
118-
EXECUTE format('SELECT * FROM pg_create_logical_replication_slot(%L, %L)',
140+
EXECUTE format('SELECT pg_create_logical_replication_slot(%L, %L)',
119141
slot_name, 'pgoutput');
120142
END IF;
121143
END
122144
$$ LANGUAGE plpgsql;
123145

146+
-- Drop replication slot, if it exists
147+
CREATE FUNCTION drop_repslot(slot_name text) RETURNS void AS $$
148+
DECLARE
149+
slot_exists bool;
150+
BEGIN
151+
EXECUTE format('SELECT EXISTS (SELECT * FROM pg_replication_slots
152+
WHERE slot_name=%L)', slot_name) INTO slot_exists;
153+
IF slot_exists THEN
154+
EXECUTE format('SELECT pg_drop_replication_slot(%L)', slot_name);
155+
END IF;
156+
END
157+
$$ LANGUAGE plpgsql;
158+
124159
-- Remove all our logical replication stuff in case of drop extension.
125160
-- Dropping extension cleanup is not that easy:
126161
-- - pg offers event triggers sql_drop, dd_command_end and ddl_command_start
@@ -133,10 +168,12 @@ $$ LANGUAGE plpgsql;
133168
-- is deleting.
134169
-- - because of that I resort to C function which examines parse tree and if
135170
-- it is our extension is deleting, it calls plpgsql cleanup func
136-
CREATE OR REPLACE FUNCTION pg_shardman_cleanup() RETURNS void AS $$
171+
CREATE OR REPLACE FUNCTION pg_shardman_cleanup(drop_subs bool DEFAULT true)
172+
RETURNS void AS $$
137173
DECLARE
138174
pub record;
139175
sub record;
176+
rs record;
140177
BEGIN
141178
FOR pub IN SELECT pubname FROM pg_publication WHERE pubname LIKE 'shardman_%' LOOP
142179
EXECUTE format('DROP PUBLICATION %I', pub.pubname);
@@ -145,7 +182,13 @@ BEGIN
145182
-- we are managing rep slots manually, so we need to detach it beforehand
146183
EXECUTE format('ALTER SUBSCRIPTION %I DISABLE', sub.subname);
147184
EXECUTE format('ALTER SUBSCRIPTION %I SET (slot_name = NONE)', sub.subname);
148-
EXECUTE format('DROP SUBSCRIPTION %I', sub.subname);
185+
IF drop_subs THEN
186+
EXECUTE format('DROP SUBSCRIPTION %I', sub.subname);
187+
END IF;
188+
END LOOP;
189+
FOR rs IN SELECT slot_name FROM pg_replication_slots
190+
WHERE slot_name LIKE 'shardman_%' AND slot_type = 'logical' LOOP
191+
EXECUTE format('SELECT pg_drop_replication_slot(%L)', rs.slot_name);
149192
END LOOP;
150193
END;
151194
$$ LANGUAGE plpgsql;
@@ -173,8 +216,7 @@ $$ LANGUAGE sql;
173216

174217
-- Interface functions
175218

176-
-- TODO: during the initial connection, ensure that nodes id (if any) is not
177-
-- present in the cluster
219+
-- Add a node. Its state will be reset, all shardman data lost.
178220
CREATE FUNCTION add_node(connstring text) RETURNS void AS $$
179221
DECLARE
180222
c_id int;
@@ -184,3 +226,14 @@ BEGIN
184226
INSERT INTO @extschema@.cmd_opts VALUES (DEFAULT, c_id, connstring);
185227
END
186228
$$ LANGUAGE plpgsql;
229+
230+
-- Remove node. Its state will be reset, all shardman data lost.
231+
CREATE FUNCTION rm_node(node_id int) RETURNS void AS $$
232+
DECLARE
233+
c_id int;
234+
BEGIN
235+
INSERT INTO @extschema@.cmd_log VALUES (DEFAULT, 'rm_node')
236+
RETURNING id INTO c_id;
237+
INSERT INTO @extschema@.cmd_opts VALUES (DEFAULT, c_id, node_id);
238+
END
239+
$$ LANGUAGE plpgsql;

src/pg_shardman.c

Lines changed: 88 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,13 @@ static void shardmaster_sigterm(SIGNAL_ARGS);
4949
static void shardmaster_sigusr1(SIGNAL_ARGS);
5050
static void check_for_sigterm(void);
5151
static void pg_shardman_installed_local(void);
52+
5253
static void add_node(Cmd *cmd);
5354
static int insert_node(const char *connstring, int64 cmd_id);
5455
static bool node_in_cluster(int id);
55-
static void activate_node(int64 cmd_id, int node_id);
56+
57+
static void rm_node(Cmd *cmd);
58+
static bool is_node_active(int node_id);
5659

5760
/* flags set by signal handlers */
5861
static volatile sig_atomic_t got_sigterm = false;
@@ -135,8 +138,8 @@ _PG_init()
135138
shardmaster_worker.bgw_flags = BGWORKER_SHMEM_ACCESS |
136139
BGWORKER_BACKEND_DATABASE_CONNECTION;
137140
shardmaster_worker.bgw_start_time = BgWorkerStart_RecoveryFinished;
138-
/* shardmaster_worker.bgw_restart_time = 10; */
139-
shardmaster_worker.bgw_restart_time = BGW_NEVER_RESTART;
141+
shardmaster_worker.bgw_restart_time = 10;
142+
/* shardmaster_worker.bgw_restart_time = BGW_NEVER_RESTART; */
140143
sprintf(shardmaster_worker.bgw_library_name, "pg_shardman");
141144
sprintf(shardmaster_worker.bgw_function_name, "shardmaster_main");
142145
shardmaster_worker.bgw_notify_pid = 0;
@@ -181,6 +184,8 @@ shardmaster_main(Datum main_arg)
181184
shmn_elog(LOG, "%s", *opts);
182185
if (strcmp(cmd->cmd_type, "add_node") == 0)
183186
add_node(cmd);
187+
else if (strcmp(cmd->cmd_type, "rm_node") == 0)
188+
rm_node(cmd);
184189
else
185190
shmn_elog(FATAL, "Unknown cmd type %s", cmd->cmd_type);
186191
}
@@ -371,7 +376,7 @@ pg_shardman_installed_local(void)
371376
{
372377
installed = false;
373378
shmn_elog(WARNING, "pg_shardman library is preloaded, but extenstion"
374-
"is not created");
379+
" is not created");
375380
}
376381
PopActiveSnapshot();
377382
CommitTransactionCommand();
@@ -419,14 +424,18 @@ check_for_sigterm(void)
419424

420425
/*
421426
* Adding node consists of
422-
* - verifying that the node is not present in the cluster at the moment
423-
* - extension recreation
424-
* - repl slot recreation
425-
* - subscription creation
426-
* - setting node id
427-
* - adding node to 'nodes' table
427+
* - verifying the node is not 'active' in the cluster, i.e. 'nodes' table
428+
* - adding node to the 'nodes' as not active, get its new id
429+
* - reinstalling extenstion
430+
* - recreating repslot
431+
* - recreating subscription
432+
* - setting node id on the node itself
433+
* - marking node as active and cmd as success
434+
* We do all this stuff to make all actions are idempodent to be able to retry
435+
* them in case of any failure.
428436
*/
429-
static void add_node(Cmd *cmd)
437+
void
438+
add_node(Cmd *cmd)
430439
{
431440
PGconn *conn = NULL;
432441
const char *connstring = cmd->opts[0];
@@ -473,7 +482,7 @@ static void add_node(Cmd *cmd)
473482

474483
if (!PQgetisnull(res, 0, 0))
475484
{
476-
/* Node is in cluster. Is it active in our cluster? */
485+
/* Node is in cluster. Was it there before we started adding? */
477486
node_id = atoi(PQgetvalue(res, 0, 0));
478487
PQclear(res);
479488
if (node_in_cluster(node_id))
@@ -529,12 +538,24 @@ static void add_node(Cmd *cmd)
529538
PQerrorMessage(conn));
530539
goto attempt_failed;
531540
}
532-
pg_shardman_installed = PQntuples(res) == 1 && !PQgetisnull(res, 0, 0);
541+
533542
PQclear(res);
543+
PQfinish(conn);
544+
545+
/*
546+
* Mark add_node cmd as success and node as active, we must do that in
547+
* one txn.
548+
*/
549+
sql = psprintf(
550+
"update shardman.nodes set status = 'active' where id = %d;"
551+
"update shardman.cmd_log set status = 'success' where id = %ld;",
552+
node_id, cmd->id);
553+
void_spi(sql);
554+
pfree(sql);
534555

535556
/* done */
536-
PQfinish(conn);
537-
activate_node(cmd->id, node_id);
557+
elog(INFO, "Node %s successfully added, it is assigned id %d",
558+
connstring, node_id);
538559
return;
539560

540561
attempt_failed: /* clean resources, sleep, check sigusr1 and try again */
@@ -544,6 +565,7 @@ static void add_node(Cmd *cmd)
544565
PQfinish(conn);
545566

546567
shmn_elog(LOG, "Attempt to execute add_node failed, sleeping and retrying");
568+
/* TODO: sleep using waitlatch? */
547569
pg_usleep(shardman_cmd_retry_naptime * 1000L);
548570
}
549571

@@ -581,53 +603,72 @@ insert_node(const char *connstring, int64 cmd_id)
581603
}
582604

583605
/*
584-
* Returns true, if node 'id' is active in our cluster, false otherwise.
606+
* Returns true, if node 'id' is in cluster and not in add_in_progress state
585607
*/
586608
static bool
587609
node_in_cluster(int id)
588610
{
589-
int e;
590-
const char *sql = "select id from shardman.nodes where active;";
591-
bool res = false;
592-
HeapTuple tuple;
593-
TupleDesc rowdesc;
594-
uint64 i;
595-
bool isnull;
611+
char *sql = psprintf(
612+
"select id from shardman.nodes where id = %d and status != 'add_in_progress';",
613+
id);
614+
bool res;
596615

597616
SPI_PROLOG;
598-
e = SPI_execute(sql, true, 0);
599-
if (e < 0)
617+
if (SPI_execute(sql, true, 0) < 0)
600618
shmn_elog(FATAL, "Stmt failed: %s", sql);
619+
pfree(sql);
620+
res = SPI_processed == 1;
601621

602-
rowdesc = SPI_tuptable->tupdesc;
603-
for (i = 0; i < SPI_processed; i++)
604-
{
605-
tuple = SPI_tuptable->vals[i];
606-
if (id == DatumGetInt32(SPI_getbinval(tuple, rowdesc,
607-
SPI_fnumber(rowdesc, "id"),
608-
&isnull)))
609-
res = true;
610-
}
611622
SPI_EPILOG;
612-
613623
return res;
614624
}
615625

616626
/*
617-
* Mark add_node cmd as success and node as active, we must do that in one txn
627+
* Remove node, losing all data on it. We
628+
* - ensure that there is active node with given id in the cluster
629+
* - mark node as rm_in_progress and commit so this reaches node via LR
630+
* - wait a bit to let it unsubscribe
631+
* - drop replication slot, remove node row and mark cmd as success
632+
* Everything is idempotent. Note that we are not allowed to remove repl slot
633+
* when the walsender connection is alive, that's why we sleep here.
618634
*/
619-
void activate_node(int64 cmd_id, int node_id)
635+
void
636+
rm_node(Cmd *cmd)
620637
{
621-
int e;
622-
char *sql = psprintf(
623-
"update shardman.nodes set active = true where id = %d;"
624-
"update shardman.cmd_log set status = 'success' where id = %ld;",
625-
node_id, cmd_id);
638+
int node_id = atoi(cmd->opts[0]);
639+
char *sql;
626640

627-
SPI_PROLOG;
628-
e = SPI_exec(sql, 0);
641+
if (!node_in_cluster(node_id))
642+
{
643+
shmn_elog(WARNING, "node %d not in cluster, won't rm it.", node_id);
644+
update_cmd_status(cmd->id, "failed");
645+
return;
646+
}
647+
648+
sql = psprintf(
649+
"update shardman.nodes set status = 'rm_in_progress' where id = %d;",
650+
node_id);
651+
void_spi(sql);
629652
pfree(sql);
630-
if (e < 0)
631-
shmn_elog(FATAL, "Stmt failed: %s", sql);
632-
SPI_EPILOG;
653+
654+
/* Let node drop the subscription */
655+
pg_usleep(2 * 1000000L);
656+
657+
/*
658+
* It is extremely unlikely that node still keeps walsender process
659+
* connected but ignored our node status update, so this should succeed.
660+
* If not, bgw exits, but postmaster will restart us to try again.
661+
* TODO: at this stage, user can't cancel command at all, this should be
662+
* fixed.
663+
*/
664+
sql = psprintf(
665+
"select shardman.drop_repslot('shardman_meta_sub_%d');"
666+
/* keep silent cmd_log fk constraint */
667+
"update shardman.cmd_log set node_id = null where node_id = %d;"
668+
"delete from shardman.nodes where id = %d;"
669+
"update shardman.cmd_log set status = 'success' where id = %ld;",
670+
node_id, node_id, node_id, cmd->id);
671+
void_spi(sql);
672+
pfree(sql);
673+
elog(INFO, "Node %d successfully removed", node_id);
633674
}

0 commit comments

Comments
 (0)