11
11
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
12
12
\echo Use " CREATE EXTENSION pg_shardman" to load this file. \quit
13
13
14
+ -- We define several GUCs (though user can see them in SHOW if she sets it
15
+ -- explicitly even without loaded lib) and have to inform pathman that we want
16
+ -- shardman's COPY FROM, so it makes sense to load the lib on server start.
17
+ DO $$
18
+ BEGIN
19
+ -- -- Yes, malicious user might have another extension containing 'pg_shardman'...
20
+ -- -- Probably better just call no-op func from the library
21
+ IF strpos(current_setting(' shared_preload_libraries' ), ' pg_shardman' ) = 0 THEN
22
+ RAISE EXCEPTION ' pg_shardman must be loaded via shared_preload_libraries. Refusing to proceed.' ;
23
+ END IF;
24
+ END
25
+ $$;
26
+
14
27
-- Shardman tables
15
28
16
29
-- List of nodes present in the cluster
17
30
CREATE TABLE nodes (
18
31
id serial PRIMARY KEY ,
19
- system_id bigint NOT NULL ,
32
+ system_id bigint NOT NULL UNIQUE ,
20
33
super_connection_string text UNIQUE NOT NULL ,
21
34
connection_string text UNIQUE NOT NULL ,
22
35
replication_group text NOT NULL -- group of nodes within which shard replicas are allocated
@@ -62,7 +75,6 @@ CREATE FUNCTION add_node(super_conn_string text, conn_string text = NULL,
62
75
repl_group text = ' default' ) RETURNS int AS $$
63
76
DECLARE
64
77
new_node_id int ;
65
- system_id bigint ;
66
78
node shardman .nodes ;
67
79
part shardman .partitions ;
68
80
t shardman .tables ;
@@ -93,21 +105,31 @@ DECLARE
93
105
BEGIN
94
106
IF NOT shardman .is_shardlord ()
95
107
THEN
96
- RETURN shardman .broadcast (format(' 0:SELECT shardman.add_node(%L, %L, %L)' , super_conn_string, conn_string, repl_group))::int ;
108
+ RETURN shardman .broadcast (
109
+ format(' 0:SELECT shardman.add_node(%L, %L, %L)' ,
110
+ super_conn_string, conn_string, repl_group))::int ;
97
111
END IF;
98
112
99
113
-- Insert new node in nodes table
100
- INSERT INTO shardman .nodes (system_id, super_connection_string, connection_string, replication_group)
114
+ INSERT INTO shardman .nodes (system_id, super_connection_string,
115
+ connection_string, replication_group)
101
116
VALUES (0 , super_conn_string, conn_string_effective, repl_group)
102
- RETURNING id INTO new_node_id;
103
-
104
- -- We have to update system_id after insert, because otherwise broadcast will not work
105
- sys_id := shardman .broadcast (format(' %s:SELECT shardman.get_system_identifier();' , new_node_id))::bigint ;
117
+ RETURNING id INTO new_node_id;
118
+
119
+ -- We have to update system_id after insert, because otherwise broadcast
120
+ -- will not work
121
+ sys_id := shardman .broadcast (
122
+ format(' %s:SELECT shardman.get_system_identifier();' ,
123
+ new_node_id))::bigint ;
124
+ IF EXISTS(SELECT 1 FROM shardman .nodes where system_id = sys_id) THEN
125
+ RAISE EXCEPTION ' Node with system id % is already in the cluster' , sys_id;
126
+ END IF;
106
127
UPDATE shardman .nodes SET system_id= sys_id WHERE id= new_node_id;
107
128
108
129
-- Adjust replication channels within replication group.
109
130
-- We need all-to-all replication channels between all group members.
110
- FOR node IN SELECT * FROM shardman .nodes WHERE replication_group = repl_group AND id <> new_node_id
131
+ FOR node IN SELECT * FROM shardman .nodes WHERE replication_group = repl_group
132
+ AND id <> new_node_id
111
133
LOOP
112
134
-- Add to new node publications for all existing nodes and add
113
135
-- publication for new node to all existing nodes
@@ -443,8 +465,10 @@ $$ LANGUAGE plpgsql;
443
465
-- Shard table with hash partitions. Parameters are the same as in pathman.
444
466
-- It also scatter partitions through all nodes.
445
467
-- This function expects that empty table is created at shardlord.
446
- -- So it can be executed only at shardlord and there is no need to redirect this function to shardlord.
447
- CREATE FUNCTION create_hash_partitions (rel regclass, expr text , part_count int , redundancy int = 0 )
468
+ -- It can be executed only at shardlord and there is no need to redirect this
469
+ -- function to shardlord.
470
+ CREATE FUNCTION create_hash_partitions (rel regclass, expr text , part_count int ,
471
+ redundancy int = 0 )
448
472
RETURNS void AS $$
449
473
DECLARE
450
474
create_table text ;
@@ -1857,7 +1881,7 @@ BEGIN
1857
1881
dst_node_id, slot));
1858
1882
IF response::bool THEN
1859
1883
synced := true;
1860
- RAISE DEBUG ' Table % sync completed' , part_name;
1884
+ RAISE DEBUG ' [SHMN] Table % sync completed' , part_name;
1861
1885
CONTINUE;
1862
1886
END IF;
1863
1887
ELSE
@@ -1868,7 +1892,7 @@ BEGIN
1868
1892
END IF;
1869
1893
lag := response::bigint ;
1870
1894
1871
- RAISE DEBUG ' Replication lag %' , lag;
1895
+ RAISE DEBUG ' [SHMN] Replication lag %' , lag;
1872
1896
IF locked THEN
1873
1897
IF lag<= 0 THEN
1874
1898
RETURN;
@@ -2245,3 +2269,101 @@ CREATE VIEW replication_lag(pubnode, subnode, lag) AS
2245
2269
-- be explicitly excluded by filter condition, otherwise error will be reported.
2246
2270
CREATE VIEW replication_state (part_name, node_id, last_seqno) AS
2247
2271
SELECT part_name,node_id,shardman .broadcast (format(' %s:SELECT max(seqno) FROM %s_change_log;' ,node_id,part_name))::bigint FROM shardman .replicas ;
2272
+
2273
+
2274
+ -- Drop replication slot, if it exists.
2275
+ -- About 'with_fire' option: we can't just drop replication slots because
2276
+ -- pg_drop_replication_slot will bail out with ERROR if connection is active.
2277
+ -- Therefore the caller must either ensure that the connection is dead (e.g.
2278
+ -- drop subscription on far end) or pass 'true' to 'with_fire' option, which
2279
+ -- does the following dirty hack. It kills several times active walsender with
2280
+ -- short interval. After the first kill, replica will immediately try to
2281
+ -- reconnect, so the connection resurrects instantly. However, if we kill it
2282
+ -- second time, replica won't try to reconnect until wal_retrieve_retry_interval
2283
+ -- after its first reaction passes, which is 5 secs by default. Of course, this
2284
+ -- is not reliable and should be redesigned.
2285
+ CREATE FUNCTION drop_repslot (slot_name text , with_fire bool DEFAULT true)
2286
+ RETURNS void AS $$
2287
+ DECLARE
2288
+ slot_exists bool;
2289
+ kill_ws_times int := 3 ;
2290
+ BEGIN
2291
+ RAISE DEBUG ' [SHMN] Dropping repslot %' , slot_name;
2292
+ EXECUTE format(' SELECT EXISTS (SELECT * FROM pg_replication_slots
2293
+ WHERE slot_name = %L)' , slot_name) INTO slot_exists;
2294
+ IF slot_exists THEN
2295
+ IF with_fire THEN -- kill walsender several times
2296
+ RAISE DEBUG ' [SHMN] Killing repslot % with fire' , slot_name;
2297
+ FOR i IN 1 ..kill_ws_times LOOP
2298
+ RAISE DEBUG ' [SHMN] Killing walsender for slot %' , slot_name;
2299
+ PERFORM shardman .terminate_repslot_walsender (slot_name);
2300
+ IF i != kill_ws_times THEN
2301
+ PERFORM pg_sleep(0 .05 );
2302
+ END IF;
2303
+ END LOOP;
2304
+ END IF;
2305
+ EXECUTE format(' SELECT pg_drop_replication_slot(%L)' , slot_name);
2306
+ END IF;
2307
+ END
2308
+ $$ LANGUAGE plpgsql STRICT;
2309
+ CREATE FUNCTION terminate_repslot_walsender (slot_name text ) RETURNS void AS $$
2310
+ BEGIN
2311
+ EXECUTE format(' SELECT pg_terminate_backend(active_pid) FROM
2312
+ pg_replication_slots WHERE slot_name = %L' , slot_name);
2313
+ END
2314
+ $$ LANGUAGE plpgsql STRICT;
2315
+
2316
+ -- Drop sub unilaterally: If sub exists, disable it, detach repslot from it and
2317
+ -- drop.
2318
+ CREATE FUNCTION eliminate_sub (subname name)
2319
+ RETURNS void AS $$
2320
+ DECLARE
2321
+ sub_exists bool;
2322
+ BEGIN
2323
+ EXECUTE format(' SELECT EXISTS (SELECT 1 FROM pg_subscription WHERE subname
2324
+ = %L)' , subname) INTO sub_exists;
2325
+ IF sub_exists THEN
2326
+ EXECUTE format(' ALTER SUBSCRIPTION %I DISABLE' , subname);
2327
+ EXECUTE format(' ALTER SUBSCRIPTION %I SET (slot_name = NONE)' , subname);
2328
+ EXECUTE format(' DROP SUBSCRIPTION %I' , subname);
2329
+ END IF;
2330
+ END
2331
+ $$ LANGUAGE plpgsql STRICT;
2332
+
2333
+
2334
+ -- Remove all shardman state (LR stuff, synchronous_standby_names). If
2335
+ -- drop_slots_with_fire is true, we will kill walsenders before dropping LR
2336
+ -- slots.
2337
+ -- We reset synchronous_standby_names to empty string after commit,
2338
+ -- -- this is non-transactional action and might be not performed.
2339
+ CREATE OR REPLACE FUNCTION wipe_state (drop_slots_with_fire bool DEFAULT true)
2340
+ RETURNS void AS $$
2341
+ DECLARE
2342
+ srv record;
2343
+ pub record;
2344
+ sub record;
2345
+ rs record;
2346
+ BEGIN
2347
+ -- otherwise we might hang
2348
+ SET LOCAL synchronous_commit TO LOCAL;
2349
+
2350
+ FOR srv IN SELECT srvname FROM pg_foreign_server WHERE srvname LIKE ' node_%' LOOP
2351
+ EXECUTE format(' DROP SERVER %I CASCADE' , srv .srvname );
2352
+ END LOOP;
2353
+
2354
+ FOR pub IN SELECT pubname FROM pg_publication WHERE pubname LIKE ' node_%' LOOP
2355
+ EXECUTE format(' DROP PUBLICATION %I' , pub .pubname );
2356
+ END LOOP;
2357
+ FOR sub IN SELECT subname FROM pg_subscription WHERE subname LIKE ' sub_%' LOOP
2358
+ PERFORM shardman .eliminate_sub (sub .subname );
2359
+ END LOOP;
2360
+ FOR rs IN SELECT slot_name FROM pg_replication_slots
2361
+ WHERE slot_name LIKE ' node_%' AND slot_type = ' logical' LOOP
2362
+ PERFORM shardman .drop_repslot (rs .slot_name , drop_slots_with_fire);
2363
+ END LOOP;
2364
+ -- TODO: remove only shardman's standbys
2365
+ PERFORM shardman .reset_synchronous_standby_names_on_commit ();
2366
+ END;
2367
+ $$ LANGUAGE plpgsql;
2368
+ CREATE FUNCTION reset_synchronous_standby_names_on_commit ()
2369
+ RETURNS void AS ' pg_shardman' LANGUAGE C STRICT;
0 commit comments