Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit e4a953c

Browse files
committed
Sane logging.
Now we store node id in shmem and prepend [SHND node_id] to all log messages if '%z' if specified in log line prefix. Besides, we add [SHMN] to any shardman-related log message -- via macro in C and unfortunately manually in plpgsql.
1 parent beea55c commit e4a953c

File tree

9 files changed

+150
-54
lines changed

9 files changed

+150
-54
lines changed

init.sql

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,7 @@ BEGIN
1818
-- Yes, malicious user might have another extension containing 'pg_shardman'...
1919
-- Probably better just call no-op func from the library
2020
IF strpos(current_setting('shared_preload_libraries'), 'pg_shardman') = 0 THEN
21-
RAISE EXCEPTION 'pg_shardman must be loaded via shared_preload_libraries. Refusing to
22-
proceed.';
21+
RAISE EXCEPTION 'pg_shardman must be loaded via shared_preload_libraries. Refusing to proceed.';
2322
END IF;
2423
END
2524
$$;
@@ -173,7 +172,7 @@ DECLARE
173172
lord_connstring text;
174173
lord_id int;
175174
BEGIN
176-
raise INFO 'Booting lord';
175+
RAISE INFO '[SHMN] Booting lord';
177176
PERFORM shardman.create_meta_pub();
178177

179178
lord_id := shardman.my_id();
@@ -232,12 +231,12 @@ CREATE FUNCTION drop_repslot(slot_name text, with_fire bool DEFAULT false)
232231
DECLARE
233232
slot_exists bool;
234233
BEGIN
235-
RAISE DEBUG '[SHARDMAN] Dropping repslot %', slot_name;
234+
RAISE DEBUG '[SHMN] Dropping repslot %', slot_name;
236235
EXECUTE format('SELECT EXISTS (SELECT * FROM pg_replication_slots
237236
WHERE slot_name = %L)', slot_name) INTO slot_exists;
238237
IF slot_exists THEN
239238
IF with_fire THEN -- kill walsender twice
240-
RAISE DEBUG '[SHARDMAN] Killing repslot % with fire', slot_name;
239+
RAISE DEBUG '[SHMN] Killing repslot % with fire', slot_name;
241240
PERFORM shardman.terminate_repslot_walsender(slot_name);
242241
PERFORM pg_sleep(1);
243242
PERFORM shardman.terminate_repslot_walsender(slot_name);
@@ -268,8 +267,7 @@ CREATE FUNCTION eliminate_sub(subname name, drop_sub bool DEFAULT true)
268267
DECLARE
269268
sub_exists bool;
270269
BEGIN
271-
RAISE DEBUG '[SHARDMAN %] eliminating sub %, drop_sub %',
272-
shardman.my_id(), subname, drop_sub;
270+
RAISE DEBUG '[SHMN] eliminating sub %, drop_sub %', subname, drop_sub;
273271
EXECUTE format('SELECT EXISTS (SELECT 1 FROM pg_subscription WHERE subname
274272
= %L)', subname) INTO sub_exists;
275273
IF sub_exists THEN
@@ -300,7 +298,7 @@ DECLARE
300298
sub record;
301299
rs record;
302300
BEGIN
303-
RAISE DEBUG '[SHARDMAN %] pg_shardman is dropping, cleaning up', shardman.my_id();
301+
RAISE DEBUG '[SHMN] pg_shardman is dropping, cleaning up';
304302

305303
FOR pub IN SELECT pubname FROM pg_publication WHERE pubname LIKE 'shardman_%' LOOP
306304
EXECUTE format('DROP PUBLICATION %I', pub.pubname);

membership.sql

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ CREATE UNIQUE INDEX unique_node_connstr ON shardman.nodes (connstring)
3636
CREATE FUNCTION rm_node_worker_side() RETURNS TRIGGER AS $$
3737
BEGIN
3838
IF OLD.id = (SELECT shardman.my_id()) THEN
39-
RAISE DEBUG '[SHARDMAN] rm_node_worker_side called';
39+
RAISE DEBUG '[SHMN] rm_node_worker_side called';
4040
PERFORM shardman.pg_shardman_cleanup(false);
4141
END IF;
4242
RETURN NULL;
@@ -105,7 +105,7 @@ DECLARE
105105
connstr text := connstring FROM shardman.nodes WHERE id = shardman.my_id();
106106
BEGIN
107107
IF connstr IS NULL THEN
108-
RAISE EXCEPTION 'Node not in cluster, can''t get its connstring';
108+
RAISE EXCEPTION '[SHMN] Node not in cluster, can''t get its connstring';
109109
END IF;
110110
RETURN connstr;
111111
END $$ LANGUAGE plpgsql;
@@ -118,7 +118,7 @@ DECLARE
118118
worker_status IS NOT NULL;
119119
BEGIN
120120
IF connstr IS NULL THEN
121-
RAISE EXCEPTION 'Worker node with id % not found', node_id;
121+
RAISE EXCEPTION '[SHMN] Worker node with id % not found', node_id;
122122
END IF;
123123
RETURN connstr;
124124
END $$ LANGUAGE plpgsql STRICT;

shard.sql

Lines changed: 17 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,8 @@ CREATE TABLE partitions (
9595
-- it.
9696
CREATE FUNCTION new_primary() RETURNS TRIGGER AS $$
9797
BEGIN
98-
RAISE DEBUG '[SHARDMAN] new_primary trigger called on node % for part %, owner %',
99-
shardman.my_id(), NEW.part_name, NEW.owner;
98+
RAISE DEBUG '[SHMN] new_primary trigger called for part %, owner %',
99+
NEW.part_name, NEW.owner;
100100
IF NEW.owner != shardman.my_id() THEN
101101
PERFORM shardman.replace_usual_part_with_foreign(NEW);
102102
END IF;
@@ -190,8 +190,8 @@ DECLARE
190190
src_next_lname text;
191191
BEGIN
192192
ASSERT NEW.owner != OLD.owner, 'part_moved handles only moved parts';
193-
RAISE DEBUG '[SHARDMAN %] part_moved trigger called for part %, owner % -> %',
194-
me, NEW.part_name, OLD.owner, NEW.owner;
193+
RAISE DEBUG '[SHMN] part_moved trigger called for part %, owner % -> %',
194+
NEW.part_name, OLD.owner, NEW.owner;
195195
ASSERT NEW.nxt = OLD.nxt OR (NEW.nxt IS NULL AND OLD.nxt IS NULL),
196196
'both part and replica must not be moved in one update';
197197
ASSERT NEW.prv = OLD.prv OR (NEW.prv IS NULL AND OLD.prv IS NULL),
@@ -223,8 +223,7 @@ BEGIN
223223
-- Drop old table anyway;
224224
EXECUTE format('DROP TABLE IF EXISTS %I', NEW.part_name);
225225
ELSEIF me = NEW.owner THEN -- dst node
226-
RAISE DEBUG '[SHARDMAN %] part_moved trigger on dst node',
227-
me;
226+
RAISE DEBUG '[SHMN] part_moved trigger on dst node';
228227
-- Drop subscription used for copy
229228
PERFORM shardman.eliminate_sub(cp_logname);
230229
-- If primary part was moved, replace moved table with foreign one
@@ -371,19 +370,17 @@ DECLARE
371370
me int := shardman.my_id();
372371
my_part shardman.partitions;
373372
BEGIN
374-
RAISE DEBUG '[SHARDMAN %] update_fdw_server called for part %, owner %',
375-
shardman.my_id(), part.part_name, part.owner;
373+
RAISE DEBUG '[SHMN] update_fdw_server called for part %, owner %',
374+
part.part_name, part.owner;
376375

377376
SELECT * FROM shardman.partitions WHERE part_name = part.part_name AND
378377
owner = me INTO my_part;
379378
IF my_part.part_name IS NOT NULL THEN -- we are holding the part
380379
IF my_part.prv IS NULL THEN
381-
RAISE DEBUG '[SHARDMAN %] we are holding primary for part %, not updating fdw server for it',
382-
me, part.part_name;
380+
RAISE DEBUG '[SHMN] we are holding primary for part %, not updating fdw server for it', part.part_name;
383381
RETURN;
384382
ELSE
385-
RAISE DEBUG '[SHARDMAN %] we are holding replica for part %, updating fdw server for it',
386-
me, part.part_name;
383+
RAISE DEBUG '[SHMN] we are holding replica for part %, updating fdw server for it', part.part_name;
387384
END IF;
388385
END IF;
389386

@@ -446,8 +443,7 @@ BEGIN
446443
-- and dangerous: what if table was created and dropped before this
447444
-- change reached us? We might also use it with local table (create
448445
-- foreign server pointing to it, etc), but that's just ugly.
449-
RAISE DEBUG '[SHARDMAN] my id: %, creating ft %',
450-
shardman.my_id(), part.part_name;
446+
RAISE DEBUG '[SHMN] creating ft %', part.part_name;
451447
EXECUTE format('CREATE FOREIGN TABLE %I %s SERVER %I OPTIONS (table_name %L)',
452448
fdw_part_name,
453449
(SELECT
@@ -551,7 +547,7 @@ END
551547
$$ LANGUAGE plpgsql STRICT;
552548
CREATE FUNCTION go_away() RETURNS TRIGGER AS $$
553549
BEGIN
554-
RAISE EXCEPTION 'The "%" table is read only.', TG_TABLE_NAME
550+
RAISE EXCEPTION '[SHMN] The "%" table is read only.', TG_TABLE_NAME
555551
USING HINT = 'Probably table copy is in progress';
556552
END;
557553
$$ LANGUAGE plpgsql;
@@ -567,7 +563,8 @@ END $$ LANGUAGE plpgsql STRICT;
567563
CREATE FUNCTION readonly_replica_on(relation regclass)
568564
RETURNS void AS $$
569565
BEGIN
570-
RAISE DEBUG '[SHARDMAN] table % made read-only for all but apply workers', relation;
566+
RAISE DEBUG '[SHMN] table % made read-only for all but apply workers',
567+
relation;
571568
PERFORM shardman.readonly_replica_off(relation);
572569
PERFORM shardman.create_modification_triggers(
573570
relation, 'shardman_readonly_replica', 'shardman.ror_go_away()');
@@ -580,7 +577,7 @@ END $$ LANGUAGE plpgsql STRICT;
580577
CREATE FUNCTION ror_go_away() RETURNS TRIGGER AS $$
581578
BEGIN
582579
IF NOT shardman.inside_apply_worker() THEN
583-
RAISE EXCEPTION 'The "%" table is read only for non-apply workers', TG_TABLE_NAME
580+
RAISE EXCEPTION '[SHMN] The "%" table is read only for non-apply workers', TG_TABLE_NAME
584581
USING HINT =
585582
'If you see this, most probably node with primary part has failed and' ||
586583
' you need to promote replica. Promotion is not yet implemented, sorry :(';
@@ -685,7 +682,7 @@ DECLARE
685682
newval text := shardman.ensure_sync_standby_c(standby);
686683
BEGIN
687684
IF newval IS NOT NULL THEN
688-
RAISE DEBUG '[SHARDMAN] Adding standby %, new value is %', standby, newval;
685+
RAISE DEBUG '[SHMN] Adding standby %, new value is %', standby, newval;
689686
PERFORM shardman.set_sync_standbys(newval);
690687
END IF;
691688
END $$ LANGUAGE plpgsql STRICT;
@@ -699,7 +696,7 @@ DECLARE
699696
newval text := shardman.remove_sync_standby_c(standby);
700697
BEGIN
701698
IF newval IS NOT NULL THEN
702-
RAISE DEBUG '[SHARDMAN] Removing standby %, new value is %', standby, newval;
699+
RAISE DEBUG '[SHMN] Removing standby %, new value is %', standby, newval;
703700
PERFORM shardman.set_sync_standbys(newval);
704701
END IF;
705702
END $$ LANGUAGE plpgsql STRICT;
@@ -710,7 +707,7 @@ CREATE FUNCTION set_sync_standbys(standby text) RETURNS void AS $$
710707
BEGIN
711708
PERFORM pg_reload_conf();
712709
PERFORM shardman.set_sync_standbys_c(standby);
713-
RAISE DEBUG '[SHARDMAN] sync_standbys set to %', standby;
710+
RAISE DEBUG '[SHMN] sync_standbys set to %', standby;
714711
END $$ LANGUAGE plpgsql STRICT;
715712
CREATE FUNCTION set_sync_standbys_c(standby text) RETURNS void
716713
AS 'pg_shardman' LANGUAGE C STRICT;

src/include/pg_shardman.h

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,18 @@
1212
#include <signal.h>
1313

1414
#include "miscadmin.h"
15+
#include "storage/lwlock.h"
1516
#include "libpq-fe.h"
1617

17-
#define shmn_elog(level,fmt,...) elog(level, "[SHARDMAN] " fmt, ## __VA_ARGS__)
18+
/*
19+
* By convention, we prefix shardman-related messages with [SHMN].
20+
* Unfortunately, currently it is impossible to have variadic polymorhic
21+
* functions accepting args with different types in plpgsql, so in sql funcs
22+
* we just type it manually.
23+
*/
24+
#define shmn_elog(level,fmt,...) elog(level, "[SHMN] " fmt, ## __VA_ARGS__)
1825

26+
/* Commonly used SPI stuff */
1927
#define SPI_PROLOG do { \
2028
StartTransactionCommand(); \
2129
SPI_connect(); \
@@ -63,7 +71,16 @@ extern char *shardman_shardlord_connstring;
6371
extern int shardman_cmd_retry_naptime;
6472
extern int shardman_poll_interval;
6573

66-
extern int32 shardman_my_node_id;
74+
/*
75+
* State in shared memory. We hold exclusive lock while modifying this, and
76+
* shard lock while reading.
77+
*/
78+
typedef struct ShmnSharedState
79+
{
80+
int32 my_id;
81+
LWLock *lock;
82+
} ShmnSharedState;
83+
extern ShmnSharedState *snss;
6784
#define SHMN_INVALID_NODE_ID -1
6885

6986
typedef struct Cmd
@@ -87,6 +104,9 @@ typedef struct RepCount
87104
} RepCount;
88105

89106
extern void _PG_init(void);
107+
extern void _PG_fini(void);
108+
extern int32 my_id(void);
109+
extern void set_my_id(int32 new_id);
90110
extern void shardlord_main(Datum main_arg);
91111
extern bool signal_pending(void);
92112
extern void check_for_sigterm(void);

src/include/shardman_hooks.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,12 @@
55
#ifndef SHARDMAN_HOOKS_H
66
#define SHARDMAN_HOOKS_H
77

8-
extern emit_log_hook_type log_hook_next;
8+
#include "storage/ipc.h"
99

10-
extern void shardman_log_hook(ErrorData *edata);
10+
extern emit_log_hook_type old_log_hook;
11+
extern shmem_startup_hook_type old_shmem_startup_hook;
12+
13+
extern void shardman_log(ErrorData *edata);
14+
extern void shardman_shmem_startup(void);
1115

1216
#endif /* SHARDMAN_HOOKS_H */

src/pg_shardman.c

Lines changed: 53 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,10 @@ int shardman_poll_interval;
6868
* cleanup after receiving SIGTERM.
6969
*/
7070
static PGconn *conn = NULL;
71-
/* This node id. */
7271
int32 shardman_my_node_id = -1;
72+
/* Link to shared memory state */
73+
ShmnSharedState *snss = NULL;
74+
7375

7476
/*
7577
* Entrypoint of the module. Define variables and register background worker.
@@ -86,9 +88,19 @@ _PG_init()
8688
errhint("Add pg_shardman to shared_preload_libraries.")));
8789
}
8890

91+
/*
92+
* Request additional shared resources. (These are no-ops if we're not in
93+
* the postmaster process.) We'll allocate or attach to the shared
94+
* resources in shardman_shmem_startup().
95+
*/
96+
RequestAddinShmemSpace(sizeof(ShmnSharedState));
97+
RequestNamedLWLockTranche("pg_shardman", 1);
98+
8999
/* remember & set hooks */
90-
log_hook_next = emit_log_hook;
91-
emit_log_hook = shardman_log_hook;
100+
old_log_hook = emit_log_hook;
101+
emit_log_hook = shardman_log;
102+
old_shmem_startup_hook = shmem_startup_hook;
103+
shmem_startup_hook = shardman_shmem_startup;
92104

93105
DefineCustomBoolVariable("shardman.shardlord",
94106
"This node is the shardlord?",
@@ -165,6 +177,42 @@ _PG_init()
165177
/* TODO: clean up publications if we were shardlord before */
166178
}
167179

180+
/*
181+
* Module unload callback
182+
*/
183+
void
184+
_PG_fini(void)
185+
{
186+
/* Uninstall hooks. */
187+
emit_log_hook = old_log_hook;
188+
shmem_startup_hook = old_shmem_startup_hook;
189+
}
190+
191+
/* Get this node id stored in shmem */
192+
int32
193+
my_id(void)
194+
{
195+
int32 id;
196+
197+
/* If MyProc is NULL, shmem is not yet inited or we are bootstrapping */
198+
if (MyProc == NULL)
199+
return SHMN_INVALID_NODE_ID;
200+
201+
LWLockAcquire(snss->lock, LW_SHARED);
202+
id = snss->my_id;
203+
LWLockRelease(snss->lock);
204+
return id;
205+
}
206+
207+
/* Get this node id stored in shmem */
208+
void
209+
set_my_id(int32 new_id)
210+
{
211+
LWLockAcquire(snss->lock, LW_EXCLUSIVE);
212+
snss->my_id = new_id;
213+
LWLockRelease(snss->lock);
214+
}
215+
168216
/*
169217
* shardlord bgw starts here
170218
*/
@@ -412,8 +460,8 @@ pg_shardman_installed_local(void)
412460
if (get_extension_oid("pg_shardman", true) == InvalidOid)
413461
{
414462
installed = false;
415-
shmn_elog(WARNING, "pg_shardman library is preloaded on shardlord, but"
416-
" extenstion is not created");
463+
shmn_elog(WARNING,
464+
"Terminating shardlord: pg_shardman lib is preloaded, but ext is not created");
417465
}
418466
PopActiveSnapshot();
419467
CommitTransactionCommand();

src/shard.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ rebalance(Cmd *cmd)
232232
exec_tasks(tasks, num_parts);
233233
SHMN_CHECK_FOR_INTERRUPTS_CMD(cmd);
234234

235-
shmn_elog(INFO, "Relation %s rebalanced:", relation);
235+
shmn_elog(INFO, "Relation %s rebalanced", relation);
236236
update_cmd_status(cmd->id, "done");
237237
}
238238

0 commit comments

Comments
 (0)