Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit ae3cc6c

Browse files
committed
Making replicas read-only for all but apply workers.
create_replica in progress.
1 parent 772d974 commit ae3cc6c

File tree

7 files changed

+246
-51
lines changed

7 files changed

+246
-51
lines changed

postgresql.conf.common.template

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ autovacuum = off
88

99
log_min_messages = DEBUG1
1010
# client_min_messages = NOTICE
11-
# client_min_messages = INFO
12-
client_min_messages = PANIC
11+
client_min_messages = WARNING
1312

1413
wal_level = logical

shard.sql

Lines changed: 69 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ DECLARE
2626
pname text;
2727
BEGIN
2828
IF NEW.initial_node != (SELECT shardman.get_node_id()) THEN
29-
RAISE DEBUG '[SHARDMAN] new table trig, pid %', (select pg_backend_pid());
3029
EXECUTE format('DROP TABLE IF EXISTS %I CASCADE;', NEW.relation);
3130
partition_names :=
3231
(SELECT ARRAY(SELECT part_name FROM shardman.gen_part_names(
@@ -210,12 +209,12 @@ END $$ LANGUAGE plpgsql;
210209

211210
-- On adding new partition, create proper foreign server & foreign table and
212211
-- replace tmp (empty) partition with it.
213-
-- TODO: race condition between this trigger and new_table_worker_side
214-
-- definitely deserves attention.
212+
-- TODO: There is a race condition between this trigger and
213+
-- new_table_worker_side trigger during initial tablesync, we should deal with
214+
-- it.
215215
CREATE FUNCTION new_primary() RETURNS TRIGGER AS $$
216216
BEGIN
217217
IF NEW.owner != (SELECT shardman.get_node_id()) THEN
218-
RAISE DEBUG 'SHARDMAN new prim trigger, pid %', (select pg_backend_pid());
219218
PERFORM shardman.replace_usual_part_with_foreign(NEW);
220219
END IF;
221220
RETURN NULL;
@@ -297,28 +296,86 @@ $$ LANGUAGE plpgsql;
297296
CREATE FUNCTION readonly_table_on(relation regclass)
298297
RETURNS void AS $$
299298
BEGIN
300-
-- Create go away trigger to prevent any new ones
299+
-- Create go away trigger to prevent any modifications
301300
PERFORM shardman.readonly_table_off(relation);
302-
EXECUTE format(
303-
'CREATE TRIGGER shardman_readonly BEFORE INSERT OR UPDATE OR DELETE OR
304-
TRUNCATE ON %I FOR EACH STATEMENT EXECUTE PROCEDURE shardman.go_away();',
305-
relation);
306-
EXECUTE format(
307-
'ALTER TABLE %I ENABLE ALWAYS TRIGGER shardman_readonly;', relation);
301+
PERFORM shardman.create_modification_triggers(relation, 'shardman_readonly',
302+
'shardman.go_away()');
308303
END
309304
$$ LANGUAGE plpgsql STRICT;
310305
CREATE FUNCTION go_away() RETURNS TRIGGER AS $$
311306
BEGIN
312307
RAISE EXCEPTION 'The "%" table is read only.', TG_TABLE_NAME
313308
USING HINT = 'Probably table copy is in progress';
314-
RETURN NULL;
315309
END;
316310
$$ LANGUAGE plpgsql;
317311
-- And make it writable again
318312
CREATE FUNCTION readonly_table_off(relation regclass)
319313
RETURNS void AS $$
320314
BEGIN
321315
EXECUTE format('DROP TRIGGER IF EXISTS shardman_readonly ON %s', relation);
316+
EXECUTE format('DROP TRIGGER IF EXISTS shardman_readonly_stmt ON %s', relation);
317+
END $$ LANGUAGE plpgsql STRICT;
318+
319+
-- Make replica read-only, i.e. readonly for all but LR apply workers
320+
CREATE FUNCTION readonly_replica_on(relation regclass)
321+
RETURNS void AS $$
322+
BEGIN
323+
RAISE DEBUG '[SHARDMAN] table % made read-only for all but apply workers', relation;
324+
PERFORM shardman.readonly_replica_off(relation);
325+
PERFORM shardman.create_modification_triggers(
326+
relation, 'shardman_readonly_replica', 'shardman.ror_go_away()');
327+
END $$ LANGUAGE plpgsql STRICT;
328+
-- This function is impudent because it is used as both stmt and row trigger.
329+
-- The idea is that we must never reach RETURN NEW after stmt row trigger,
330+
-- because stmt trigger fires only on TRUNCATE which is impossible in LR.
331+
-- Besides, I checked that nothing bad happens if we return NEW from stmt
332+
-- trigger function anyway.
333+
CREATE FUNCTION ror_go_away() RETURNS TRIGGER AS $$
334+
BEGIN
335+
IF NOT shardman.inside_apply_worker() THEN
336+
RAISE EXCEPTION 'The "%" table is read only for non-apply workers', TG_TABLE_NAME
337+
USING HINT =
338+
'If you see this, most probably node with primary part has failed and' ||
339+
' you need to promote replica. Promotion is not yet implemented, sorry :(';
340+
END IF;
341+
raise warning 'NEW IS %', NEW;
342+
RETURN NEW;
343+
END $$ LANGUAGE plpgsql;
344+
-- And make replica writable again
345+
CREATE FUNCTION readonly_replica_off(relation regclass) RETURNS void AS $$
346+
BEGIN
347+
EXECUTE format('DROP TRIGGER IF EXISTS shardman_readonly_replica ON %s',
348+
relation);
349+
EXECUTE format('DROP TRIGGER IF EXISTS shardman_readonly_replica_stmt ON %s',
350+
relation);
351+
END $$ LANGUAGE plpgsql STRICT;
352+
CREATE FUNCTION inside_apply_worker() RETURNS bool AS 'pg_shardman' LANGUAGE C;
353+
354+
-- Create two triggers firing exec_proc before any modification operation, make
355+
-- them ALWAYS ENABLE. We need two triggers because TRUNCATE doesn't work with
356+
-- FOR EACH ROW, while LR doesn't support STATEMENT triggers (well, there is no
357+
-- statements in WAL) and changes may sneak through it.
358+
-- If you are curious, we use %I to format any identifiers (e.g. quote identifier
359+
-- with "" if it contains spaces) and use %s while formatting regclass, because
360+
-- it quotes everything automatically while casting oid to name.
361+
CREATE FUNCTION create_modification_triggers(
362+
relation regclass, trigname name, exec_proc text) RETURNS void AS $$
363+
DECLARE
364+
stmt_trigname text;
365+
BEGIN
366+
EXECUTE format(
367+
'CREATE TRIGGER %I BEFORE INSERT OR UPDATE OR DELETE
368+
ON %s FOR EACH ROW EXECUTE PROCEDURE %s;',
369+
trigname, relation, exec_proc);
370+
EXECUTE format(
371+
'ALTER TABLE %s ENABLE ALWAYS TRIGGER %I;', relation::text, trigname);
372+
stmt_trigname := format('%s_stmt', trigname);
373+
EXECUTE format(
374+
'CREATE TRIGGER %I BEFORE
375+
TRUNCATE ON %s FOR EACH STATEMENT EXECUTE PROCEDURE %s;',
376+
stmt_trigname, relation, exec_proc);
377+
EXECUTE format(
378+
'ALTER TABLE %s ENABLE ALWAYS TRIGGER %I;', relation::text, stmt_trigname);
322379
END $$ LANGUAGE plpgsql STRICT;
323380

324381
CREATE FUNCTION gen_create_table_sql(relation text, connstring text) RETURNS text

src/include/pg_shardman.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ extern void update_cmd_status(int64 id, const char *new_status);
4747
extern void cmd_canceled(Cmd *cmd);
4848
extern char *get_worker_node_connstr(int node_id);
4949
extern int32 get_primary_owner(const char *part_name);
50+
extern int32 get_reptail_owner(const char *part_name, int32 *owner,
51+
int32 *partnum);
5052
extern char *get_partition_relation(const char *part_name);
5153

5254
#endif /* PG_SHARDMAN_H */

src/include/shard.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,6 @@
55

66
extern void create_hash_partitions(Cmd *cmd);
77
extern void move_primary(Cmd *cmd);
8+
extern void create_replica(Cmd *cmd);
89

910
#endif /* SHARD_H */

src/pg_shardman.c

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ wait_notify()
287287
/* eat all notifications at once */
288288
while ((notify = PQnotifies(conn)) != NULL)
289289
{
290-
shmn_elog(LOG, "NOTIFY %s received from backend PID %d",
290+
shmn_elog(DEBUG1, "NOTIFY %s received from backend PID %d",
291291
notify->relname, notify->be_pid);
292292
PQfreemem(notify);
293293
}
@@ -446,7 +446,7 @@ void
446446
cmd_canceled(Cmd *cmd)
447447
{
448448
got_sigusr1 = false;
449-
shmn_elog(LOG, "Command %ld canceled", cmd->id);
449+
shmn_elog(INFO, "Command %ld canceled", cmd->id);
450450
update_cmd_status(cmd->id, "canceled");
451451
}
452452

@@ -755,14 +755,10 @@ get_primary_owner(const char *part_name)
755755
part_name);
756756

757757
if (SPI_execute(sql, true, 0) < 0)
758-
{
759758
shmn_elog(FATAL, "Stmt failed : %s", sql);
760-
}
761759

762760
if (SPI_processed == 0)
763-
{
764761
owner = -1;
765-
}
766762
else
767763
{
768764
owner = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[0],
@@ -774,6 +770,42 @@ get_primary_owner(const char *part_name)
774770
return owner;
775771
}
776772

773+
/*
774+
* Get node id on which the last replica in the 'part_name' replica chain
775+
* resides, and its partnum. -1 is returned if such partition doesn't exist
776+
* at all.
777+
*/
778+
int32
779+
get_reptail_owner(const char *part_name, int32 *owner, int32 *partnum)
780+
{
781+
char *sql;
782+
bool isnull;
783+
int result = 0;
784+
785+
SPI_PROLOG;
786+
sql = psprintf( /* allocated in SPI ctxt, freed with ctxt release */
787+
"select owner from shardman.partitions where part_name = '%s'"
788+
" and nxt is NULL;", part_name);
789+
790+
if (SPI_execute(sql, true, 0) < 0)
791+
shmn_elog(FATAL, "Stmt failed : %s", sql);
792+
793+
if (SPI_processed == 0)
794+
result = -1;
795+
else
796+
{
797+
*owner = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[0],
798+
SPI_tuptable->tupdesc,
799+
1, &isnull));
800+
*partnum = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[0],
801+
SPI_tuptable->tupdesc,
802+
2, &isnull));
803+
}
804+
805+
SPI_EPILOG;
806+
return result;
807+
}
808+
777809
/*
778810
* Get relation name of partition part_name. Memory is palloc'ed.
779811
* NULL is returned, if there is no such partition.

0 commit comments

Comments
 (0)