Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit e0762a9

Browse files
committed
Remove partitions in rm_node with force option
1 parent 72be591 commit e0762a9

File tree

3 files changed

+126
-1
lines changed

3 files changed

+126
-1
lines changed

init.sql

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,13 +72,16 @@ END
7272
$$ LANGUAGE plpgsql;
7373

7474
-- Remove node. Its state will be reset, all shardman data lost.
75-
CREATE FUNCTION rm_node(node_id int) RETURNS int AS $$
75+
CREATE FUNCTION rm_node(node_id int, force bool default false) RETURNS int AS $$
7676
DECLARE
7777
c_id int;
7878
BEGIN
7979
INSERT INTO @extschema@.cmd_log VALUES (DEFAULT, 'rm_node')
8080
RETURNING id INTO c_id;
8181
INSERT INTO @extschema@.cmd_opts VALUES (DEFAULT, c_id, node_id);
82+
IF force THEN
83+
INSERT INTO @extschema@.cmd_opts VALUES (DEFAULT, c_id, 'force');
84+
END IF;
8285
RETURN c_id;
8386
END
8487
$$ LANGUAGE plpgsql;

shard.sql

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,81 @@ CREATE TRIGGER part_moved AFTER UPDATE ON shardman.partitions
251251
-- fire trigger only on worker nodes
252252
ALTER TABLE shardman.partitions ENABLE REPLICA TRIGGER part_moved;
253253

254+
255+
-- Partition removed: drop old LR channels.
256+
CREATE FUNCTION part_removed() RETURNS TRIGGER AS $$
257+
DECLARE
258+
me int := shardman.my_id();
259+
prev_src_lname text;
260+
src_next_lname text;
261+
new_primary partitions;
262+
drop_slot_delay int := 2000; -- two seconds
263+
BEGIN
264+
RAISE DEBUG '[SHMN] part_removed trigger called for part %, owner %',
265+
OLD.part_name, OLD.owner;
266+
267+
IF OLD.prv IS NOT NULL THEN
268+
prev_src_lname := shardman.get_data_lname(OLD.part_name, OLD.prv, OLD.owner);
269+
ELSE
270+
select * from shardman.partitions where owner=OLD.nxt and part_name=OLD.part_name into new_primary;
271+
END IF;
272+
IF OLD.nxt IS NOT NULL THEN
273+
src_next_lname := shardman.get_data_lname(OLD.part_name, OLD.owner, OLD.nxt);
274+
END IF;
275+
276+
277+
IF me = OLD.owner THEN -- src node
278+
-- If primary part was moved, replace on src node its partition with
279+
-- foreign one
280+
IF OLD.prv IS NULL THEN
281+
PERFORM shardman.replace_usual_part_with_foreign(new_primary);
282+
ELSE
283+
-- On the other hand, if prev replica existed, drop sub for old
284+
-- channel prev -> src
285+
PERFORM shardman.eliminate_sub(prev_src_lname);
286+
END IF;
287+
IF OLD.nxt IS NOT NULL THEN
288+
-- If next replica existed, drop pub for old channel src -> next
289+
-- Wait sometime to let other node first remove subscription
290+
PERFORM pg_sleep(drop_slot_delay);
291+
PERFORM shardman.drop_repslot_and_pub(src_next_lname);
292+
PERFORM shardman.remove_sync_standby(src_next_lname);
293+
END IF;
294+
-- Drop old table anyway
295+
-- ???? Can we really do it now? We will have FDW pointing to removed table...
296+
EXECUTE format('DROP TABLE IF EXISTS %I', OLD.part_name);
297+
ELSEIF me = OLD.prv THEN -- node with prev replica
298+
-- Wait sometime to let other node first remove subscription
299+
PERFORM pg_sleep(drop_slot_delay);
300+
-- Drop pub for old channel prev -> src
301+
PERFORM shardman.drop_repslot_and_pub(prev_src_lname);
302+
PERFORM shardman.remove_sync_standby(prev_src_lname);
303+
PERFORM update shardman.partitions set nxt=OLD.nxt where owner=me and part_name=OLD.part_name;
304+
ELSEIF me = OLD.nxt THEN -- node with next replica
305+
-- Drop sub for old channel src -> next
306+
PERFORM shardman.eliminate_sub(src_next_lname);
307+
PERFORM update shardman.partitions set prv=OLD.prv where owner=me and part_name=OLD.part_name;
308+
END IF;
309+
310+
-- If primary was moved
311+
IF OLD.prv IS NULL THEN
312+
-- And update fdw almost everywhere
313+
PERFORM shardman.update_fdw_server(new_primary);
314+
END IF;
315+
316+
RETURN NULL;
317+
END
318+
$$ LANGUAGE plpgsql;
319+
320+
CREATE TRIGGER part_removed AFTER REMOVE ON shardman.partitions
321+
FOR EACH ROW
322+
EXECUTE PROCEDURE part_removed();
323+
-- fire trigger only on worker nodes
324+
ALTER TABLE shardman.partitions ENABLE REPLICA TRIGGER part_removed;
325+
326+
327+
328+
254329
-- Executed on newtail node, see cr_rebuild_lr
255330
CREATE FUNCTION replica_created_drop_cp_sub(
256331
part_name name, oldtail int, newtail int) RETURNS void AS $$

src/pg_shardman.c

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -760,6 +760,16 @@ node_in_cluster(int id)
760760
return res;
761761
}
762762

763+
static void
764+
rm_partition(int node_id, char const* part_name)
765+
{
766+
char* sql;
767+
sql = psprintf("delete from shardman.partitions where owner=%d and part_name='%s'",
768+
node_id, part_name);
769+
void_spi(sql);
770+
pfree(sql);
771+
}
772+
763773
/*
764774
* Remove node, losing all data on it. We
765775
* - ensure that there is active node with given id in the cluster
@@ -774,6 +784,43 @@ rm_node(Cmd *cmd)
774784
{
775785
int32 node_id = atoi(cmd->opts[0]);
776786
char *sql;
787+
char **opts;
788+
bool force = false;
789+
int i, e;
790+
791+
for (opts = cmd->opts; *opts; opts++)
792+
{
793+
if (strcmp(*opts, "force") == 0)
794+
{
795+
force = true;
796+
break;
797+
}
798+
}
799+
800+
SPI_PROLOG;
801+
sql = psprintf("select part_name from shardman.partitions where owner=%d", node_id);
802+
e = SPI_execute(sql, true, 0);
803+
if (e < 0)
804+
shmn_elog(FATAL, "Stmt failed: %s", sql);
805+
pfree(sql);
806+
if (SPI_processed > 0)
807+
{
808+
TupleDesc rowdesc = SPI_tuptable->tupdesc;
809+
if (!force)
810+
{
811+
812+
ereport(ERROR, (errmsg("Can not remove node with existed partitions"),
813+
errhint("Add \"force\" option to remove node with existed partitions.")));
814+
}
815+
/* Remove partitions belonging to this node */
816+
for (i = 0; i < SPI_processed; i++)
817+
{
818+
HeapTuple tuple = SPI_tuptable->vals[i];
819+
char const* partition = SPI_getvalue(tuple, rowdesc, 1);
820+
rm_partition(node_id, partition);
821+
}
822+
}
823+
SPI_EPILOG;
777824

778825
elog(INFO, "Removing node %d ", node_id);
779826
if (!node_in_cluster(node_id))

0 commit comments

Comments
 (0)