14
14
$$;
15
15
16
16
-- active is the normal mode, others needed only for proper node add and removal
17
- CREATE TYPE node_status AS ENUM (' active' , ' add_in_progress' , ' rm_in_progress' );
17
+ CREATE TYPE worker_node_status AS ENUM (' active' , ' add_in_progress' , ' rm_in_progress' );
18
18
19
19
-- list of nodes present in the cluster
20
20
CREATE TABLE nodes (
21
21
id serial PRIMARY KEY ,
22
- connstring text ,
23
- status node_status NOT NULL
22
+ connstring text NOT NULL UNIQUE,
23
+ worker_status worker_node_status,
24
+ -- While currently we don't support master and worker roles on one node,
25
+ -- potentially node can be either worker, master or both, so we need 3 bits.
26
+ -- One bool with NULL might be fine, but it seems a bit counter-intuitive.
27
+ worker bool NOT NULL DEFAULT true,
28
+ master bool NOT NULL DEFAULT false
24
29
);
25
30
26
31
-- Master is removing us, so reset our state, removing all subscriptions. A bit
@@ -33,13 +38,65 @@ BEGIN
33
38
PERFORM shardman .pg_shardman_cleanup (false);
34
39
RETURN NULL ;
35
40
END
36
- $$ language plpgsql;
41
+ $$ LANGUAGE plpgsql;
37
42
CREATE TRIGGER rm_node_worker_side AFTER UPDATE ON shardman .nodes
38
- FOR EACH ROW WHEN (OLD .status = ' active' AND NEW .status = ' rm_in_progress' )
43
+ FOR EACH ROW WHEN (OLD .worker_status = ' active' AND NEW .worker_status = ' rm_in_progress' )
39
44
EXECUTE PROCEDURE rm_node_worker_side();
40
45
-- fire trigger only on worker nodes
41
46
ALTER TABLE shardman .nodes ENABLE REPLICA TRIGGER rm_node_worker_side;
42
47
48
+ -- sharded tables
49
+ CREATE TABLE tables (
50
+ relation text PRIMARY KEY , -- table name
51
+ expr text NOT NULL ,
52
+ partitions_count int NOT NULL ,
53
+ create_sql text NOT NULL , -- sql to create the table
54
+ -- Node on which table was partitioned at the beginning. Used only during
55
+ -- initial tables inflation to distinguish between table owner and other
56
+ -- nodes, probably cleaner keep it in separate table.
57
+ initial_node int NOT NULL REFERENCES nodes(id)
58
+ );
59
+
60
+ -- On adding new table, create it on non-owner nodes using provided sql and
61
+ -- partition
62
+ CREATE FUNCTION new_table_worker_side () RETURNS TRIGGER AS $$
63
+ BEGIN
64
+ IF NEW .initial_node != (SELECT shardman .get_node_id ()) THEN
65
+ EXECUTE format(' %s' , NEW .create_sql );
66
+ EXECUTE format(' select create_hash_partitions(%L, %L, %L);' ,
67
+ NEW .relation , NEW .expr , NEW .partitions_count );
68
+ END IF;
69
+ RETURN NULL ;
70
+ END
71
+ $$ LANGUAGE plpgsql;
72
+ CREATE TRIGGER new_table_worker_side AFTER INSERT ON shardman .tables
73
+ FOR EACH ROW EXECUTE PROCEDURE new_table_worker_side();
74
+ -- fire trigger only on worker nodes
75
+ ALTER TABLE shardman .tables ENABLE REPLICA TRIGGER new_table_worker_side;
76
+ -- On master side, insert partitions
77
+ CREATE FUNCTION new_table_master_side () RETURNS TRIGGER AS $$
78
+ BEGIN
79
+ INSERT INTO shardman .partitions
80
+ -- part names look like tablename_partnum, partnums start from 0
81
+ SELECT NEW .relation || ' _' || range .num AS part_name,
82
+ NEW .relation AS relation,
83
+ NEW .initial_node AS owner
84
+ FROM
85
+ (SELECT num FROM generate_series(0 , NEW .partitions_count , 1 )
86
+ AS range(num)) AS range;
87
+ RETURN NULL ;
88
+ END
89
+ $$ LANGUAGE plpgsql;
90
+ CREATE TRIGGER new_table_master_side AFTER INSERT ON shardman .tables
91
+ FOR EACH ROW EXECUTE PROCEDURE new_table_master_side();
92
+
93
+ CREATE TABLE partitions (
94
+ part_name text PRIMARY KEY ,
95
+ relation text NOT NULL REFERENCES tables(relation),
96
+ owner int REFERENCES nodes(id) -- node on which partition lies
97
+ );
98
+
99
+
43
100
-- Currently it is used just to store node id, in general we can keep any local
44
101
-- node metadata here. If is ever used extensively, probably hstore suits better.
45
102
CREATE TABLE local_meta (
@@ -88,12 +145,44 @@ CREATE TABLE cmd_opts (
88
145
89
146
-- Internal functions
90
147
148
+ -- Called on shardmaster bgw start. Add itself to nodes table, set id, create
149
+ -- publication.
150
+ CREATE FUNCTION master_boot () RETURNS void AS $$
151
+ DECLARE
152
+ -- If we have never booted as a master before, we have a work to do
153
+ init_master bool DEFAULT false;
154
+ master_connstring text ;
155
+ master_id int ;
156
+ BEGIN
157
+ raise INFO ' Booting master' ;
158
+ PERFORM shardman .create_meta_pub ();
159
+
160
+ master_id := shardman .get_node_id ();
161
+ IF master_id IS NULL THEN
162
+ SELECT pg_settings .setting into master_connstring from pg_settings
163
+ WHERE NAME = ' shardman.master_connstring' ;
164
+ EXECUTE format(
165
+ ' INSERT INTO @extschema@.nodes VALUES (DEFAULT, %L, NULL, false, true)
166
+ RETURNING id' , master_connstring) INTO master_id;
167
+ PERFORM shardman .set_node_id (master_id);
168
+ init_master := true;
169
+ ELSE
170
+ EXECUTE ' SELECT NOT (SELECT master FROM shardman.nodes WHERE id = $1)'
171
+ INTO init_master USING master_id;
172
+ EXECUTE ' UPDATE shardman.nodes SET master = true WHERE id = $1' USING master_id;
173
+ END IF;
174
+ IF init_master THEN
175
+ -- TODO: set up lr channels
176
+ END IF;
177
+ END $$ LANGUAGE plpgsql;
178
+
91
179
-- These tables will be replicated to worker nodes, notifying them about changes.
92
180
-- Called on master.
93
181
CREATE FUNCTION create_meta_pub () RETURNS void AS $$
94
182
BEGIN
95
183
IF NOT EXISTS (SELECT * FROM pg_publication WHERE pubname = ' shardman_meta_pub' ) THEN
96
- CREATE PUBLICATION shardman_meta_pub FOR TABLE shardman .nodes ;
184
+ CREATE PUBLICATION shardman_meta_pub FOR TABLE
185
+ shardman .nodes , shardman .tables ;
97
186
END IF;
98
187
END;
99
188
$$ LANGUAGE plpgsql;
@@ -111,24 +200,6 @@ BEGIN
111
200
END;
112
201
$$ LANGUAGE plpgsql;
113
202
114
- -- If for cmd cmd_id we haven't yet inserted new node, do that; mark it as passive
115
- -- for now, we still need to setup lr and set its id on the node itself
116
- -- Return generated or existing node id
117
- CREATE FUNCTION insert_node (connstring text , cmd_id bigint ) RETURNS int AS $$
118
- DECLARE
119
- n_id int ;
120
- BEGIN
121
- SELECT node_id FROM @extschema@.cmd_log INTO n_id WHERE id = cmd_id;
122
- IF n_id IS NULL THEN
123
- INSERT INTO @extschema@.nodes
124
- VALUES (DEFAULT, quote_literal(connstring), ' add_in_progress' )
125
- RETURNING id INTO n_id;
126
- UPDATE @extschema@.cmd_log SET node_id = n_id WHERE id = cmd_id;
127
- END IF;
128
- RETURN n_id;
129
- END
130
- $$ LANGUAGE plpgsql;
131
-
132
203
-- Create logical pgoutput replication slot, if not exists
133
204
CREATE FUNCTION create_repslot (slot_name text ) RETURNS void AS $$
134
205
DECLARE
@@ -213,7 +284,25 @@ CREATE FUNCTION set_node_id(node_id int) RETURNS void AS $$
213
284
UPDATE @extschema@.local_meta SET v = node_id WHERE k = ' node_id' ;
214
285
$$ LANGUAGE sql;
215
286
216
- CREATE FUNCTION gen_create_table_sql (relation text ) RETURNS text
287
+ -- If for cmd cmd_id we haven't yet inserted new node, do that; mark it as passive
288
+ -- for now, we still need to setup lr and set its id on the node itself
289
+ -- Return generated or existing node id
290
+ CREATE FUNCTION insert_node (connstring text , cmd_id bigint ) RETURNS int AS $$
291
+ DECLARE
292
+ n_id int ;
293
+ BEGIN
294
+ SELECT node_id FROM @extschema@.cmd_log INTO n_id WHERE id = cmd_id;
295
+ IF n_id IS NULL THEN
296
+ INSERT INTO @extschema@.nodes
297
+ VALUES (DEFAULT, connstring, ' add_in_progress' )
298
+ RETURNING id INTO n_id;
299
+ UPDATE @extschema@.cmd_log SET node_id = n_id WHERE id = cmd_id;
300
+ END IF;
301
+ RETURN n_id;
302
+ END
303
+ $$ LANGUAGE plpgsql;
304
+
305
+ CREATE FUNCTION gen_create_table_sql (relation text , connstring text ) RETURNS text
217
306
AS ' pg_shardman' LANGUAGE C;
218
307
219
308
-- Interface functions
0 commit comments