@@ -52,11 +52,11 @@ CREATE TABLE replicas (
52
52
-- the node as superuser, and 'conn_string' can be some other connstring.
53
53
-- The former is used for configuring logical replication, the latter for DDL
54
54
-- and for setting up FDW. This separation serves two purposes:
55
- -- * It allows to access data without requiring superuser priviliges ;
55
+ -- * It allows to access data without requiring superuser privileges ;
56
56
-- * It allows to set up pgbouncer, as replication can't go through it.
57
57
-- If conn_string is null, super_conn_string is used everywhere.
58
58
CREATE FUNCTION add_node (super_conn_string text , conn_string text = NULL ,
59
- repl_group text = ' all ' ) RETURNS void AS $$
59
+ repl_group text = ' default ' ) RETURNS void AS $$
60
60
DECLARE
61
61
new_node_id int ;
62
62
node shardman .nodes ;
@@ -127,7 +127,7 @@ BEGIN
127
127
IF shardman .synchronous_replication () AND
128
128
(SELECT COUNT (* ) FROM shardman .nodes WHERE replication_group = repl_group) > 1
129
129
THEN
130
- -- Take all nodes in replicationg group excluding myself
130
+ -- Take all nodes in replication group excluding myself
131
131
FOR node IN SELECT * FROM shardman .nodes WHERE replication_group = repl_group LOOP
132
132
-- Construct list of synchronous standbyes=subscriptions to this node
133
133
sync_standbys :=
@@ -145,7 +145,7 @@ BEGIN
145
145
146
146
-- Add foreign servers for connection to the new node and backward
147
147
-- Construct foreign server options from connection string of new node
148
- SELECT * FROM shardman .conninfo_to_postgres_fdw_opts (conn_string ) INTO new_server_opts, new_um_opts;
148
+ SELECT * FROM shardman .conninfo_to_postgres_fdw_opts (conn_string_effective ) INTO new_server_opts, new_um_opts;
149
149
FOR node IN SELECT * FROM shardman .nodes WHERE id<> new_node_id
150
150
LOOP
151
151
-- Construct foreign server options from connection string of this node
@@ -696,6 +696,65 @@ BEGIN
696
696
END
697
697
$$ LANGUAGE plpgsql;
698
698
699
+ -- Count number of partitions at particular node.
700
+ -- This command can be executed only at shardlord.
701
+ CREATE FUNCTION get_node_partitions_count (node int ) returns bigint AS $$
702
+ SELECT count (* ) from shardman .partitions WHERE node_id= node;
703
+ $$ LANGUAGE sql;
704
+
705
+ -- Rebalance partitions between nodes. This function tries to evenly redistribute partition between all nodes of replication groups.
706
+ -- It is not able to move partition between replication groups.
707
+ -- This function intentionally move one partition per time to minimize influence on system performance.
708
+ CREATE FUNCTION rebalance (table_pattern text = ' %' ) RETURNS void AS $$
709
+ DECLARE
710
+ dst_node int ;
711
+ src_node int ;
712
+ min_count bigint ;
713
+ max_count bigint ;
714
+ mv_part_name text ;
715
+ repl_group text ;
716
+ done bool;
717
+ BEGIN
718
+ IF shardman .redirect_to_shardlord (format(' rebalance(%L)' , table_pattern))
719
+ THEN
720
+ RETURN;
721
+ END IF;
722
+
723
+ LOOP
724
+ done := true;
725
+ -- Repeat for all replication groups
726
+ FOR repl_group IN SELECT DISTINCT replication_group FROM shardman .nodes
727
+ LOOP
728
+ -- Select node in this group with minimal number of partitions
729
+ SELECT node_id, count (* ) n_parts INTO dst_node,min_count
730
+ FROM shardman .partitions p JOIN shardman .nodes n ON p .node_id = n .id
731
+ WHERE n .replication_group = repl_group AND p .relation LIKE table_pattern
732
+ GROUP BY node_id
733
+ ORDER BY n_parts ASC LIMIT 1 ;
734
+ -- Select node in this group with maximal number of partitions
735
+ SELECT node_id, count (* ) n_parts INTO src_node,max_count
736
+ FROM shardman .partitions p JOIN shardman .nodes n ON p .node_id = n .id
737
+ WHERE n .replication_group = repl_group AND p .relation LIKE table_pattern
738
+ GROUP BY node_id
739
+ ORDER BY n_parts DESC LIMIT 1 ;
740
+ -- If difference of number of partitions on this nodes is greater than 1, then move random partition
741
+ IF max_count - min_count > 1 THEN
742
+ SELECT p .part_name INTO mv_part_name
743
+ FROM shardman .partitions p
744
+ WHERE p .node_id = src_node AND p .relation LIKE table_pattern AND
745
+ NOT EXISTS(SELECT * from shardman .replicas r
746
+ WHERE r .node_id = dst_node AND r .part_name = p .part_name )
747
+ ORDER BY random() LIMIT 1 ;
748
+ PERFORM shardman .mv_partition (mv_part_name, dst_node);
749
+ done := false;
750
+ END IF;
751
+ END LOOP;
752
+
753
+ EXIT WHEN done;
754
+ END LOOP;
755
+ END
756
+ $$ LANGUAGE plpgsql;
757
+
699
758
-- -------------------------------------------------------------------
700
759
-- Utility functions
701
760
-- -------------------------------------------------------------------
@@ -723,7 +782,7 @@ CREATE FUNCTION reconstruct_table_attrs(relation regclass)
723
782
RETURNS text AS ' pg_shardman' LANGUAGE C STRICT;
724
783
725
784
-- Broadcast SQL commands to nodes and wait their completion.
726
- -- cmds is list of SQL commands separated by semi-columns with node
785
+ -- cmds is list of SQL commands terminated by semi-columns with node
727
786
-- prefix: node-id:sql-statement;
728
787
-- To run multiple statements on node, wrap them in {}:
729
788
-- {node-id:statement; statement;}
@@ -732,10 +791,10 @@ RETURNS text AS 'pg_shardman' LANGUAGE C STRICT;
732
791
-- No escaping is performed, so ';', '{' and '}' inside queries are not supported.
733
792
-- By default functions throws error is execution is failed at some of the
734
793
-- nodes, with ignore_errors=true errors are ignored and function returns string
735
- -- with "Error:" prefix containing list of errors separated by semicolons with
794
+ -- with "Error:" prefix containing list of errors terminated by dots with
736
795
-- nodes prefixes.
737
796
-- In case of normal completion this function return list with node prefixes
738
- -- separated by semi- columns with single result for select queries or number of
797
+ -- separated by columns with single result for select queries or number of
739
798
-- affected rows for other commands.
740
799
-- If two_phase parameter is true, then each statement is wrapped in blocked and
741
800
-- prepared with subsequent commit or rollback of prepared transaction at second
@@ -834,9 +893,9 @@ DECLARE
834
893
BEGIN
835
894
LOOP
836
895
response := shardman .broadcast (format(' %s:SELECT confirmed_flush_lsn - pg_current_wal_lsn() FROM pg_replication_slots WHERE slot_name=%L;' , src_node_id, slot));
837
- lag := trim (trailing ' ; ' from response) ::bigint ;
896
+ lag := response::bigint ;
838
897
839
- RAISE NOTICE ' Replication lag %' , lag;
898
+ RAISE DEBUG ' Replication lag %' , lag;
840
899
IF locked THEN
841
900
IF lag<= 0 THEN
842
901
RETURN;
0 commit comments