Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 5748312

Browse files
committed
Distributed deadlock detection
1 parent 52287bf commit 5748312

File tree

1 file changed

+100
-3
lines changed

1 file changed

+100
-3
lines changed

pg_shardman--1.0.sql

Lines changed: 100 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1146,16 +1146,22 @@ BEGIN
11461146
END
11471147
$$ LANGUAGE plpgsql;
11481148

1149-
-- Get self node identifier
1150-
CREATE FUNCTION get_my_id() RETURNS int AS $$
1149+
-- Map system identifier to node identifier.
1150+
CREATE FUNCTION get_node_by_sysid(sysid bigint) RETURNS int AS $$
11511151
DECLARE
11521152
node_id int;
11531153
BEGIN
1154-
SELECT shardman.broadcast(format('0:SELECT id FROM shardman.nodes WHERE system_id=%s;', shardman.get_system_identifier()))::int INTO node_id;
1154+
SELECT shardman.broadcast(format('0:SELECT id FROM shardman.nodes WHERE system_id=%s;', sysid))::int INTO node_id;
11551155
RETURN node_id;
11561156
END
11571157
$$ LANGUAGE plpgsql;
11581158

1159+
-- Get self node identifier.
1160+
CREATE FUNCTION get_my_id() RETURNS int AS $$
1161+
BEGIN
1162+
RETURN shardman.get_node_by_sysid(shardman.get_system_identifier());
1163+
END
1164+
$$ LANGUAGE plpgsql;
11591165

11601166
-- Check consistency of cluster with metadata and perform recovery
11611167
CREATE FUNCTION recovery() RETURNS void AS $$
@@ -1922,6 +1928,97 @@ $$ LANGUAGE plpgsql;
19221928
CREATE FUNCTION get_system_identifier()
19231929
RETURNS bigint AS 'pg_shardman' LANGUAGE C STRICT;
19241930

1931+
-----------------------------------------------------------------------
1932+
-- Some useful views.
1933+
-----------------------------------------------------------------------
1934+
1935+
create type process as (node int, pid int);
1936+
1937+
-- View to build lock graph which can be used to detect global deadlock
1938+
CREATE VIEW lock_graph(wait,hold) AS
1939+
SELECT
1940+
ROW(shardman.get_my_id(),
1941+
wait.pid)::shardman.process,
1942+
ROW(CASE WHEN hold.pid IS NOT NULL THEN shardman.get_my_id() ELSE shardman.get_node_by_sysid(split_part(gid,':',3)::bigint) END,
1943+
COALESCE(hold.pid, split_part(gid,':',1)::int))::shardman.process
1944+
FROM pg_locks wait, pg_locks hold LEFT OUTER JOIN pg_prepared_xacts twopc ON twopc.transaction=hold.transactionid
1945+
WHERE
1946+
NOT wait.granted AND wait.pid IS NOT NULL AND hold.granted
1947+
AND (wait.transactionid=hold.transactionid OR (wait.page=hold.page AND wait.tuple=hold.tuple))
1948+
AND (hold.pid IS NOT NULL OR twopc.gid IS NOT NULL)
1949+
UNION ALL
1950+
SELECT ROW(shardman.get_node_by_sysid(split_part(application_name,':',2)::bigint),
1951+
split_part(application_name,':',3)::int)::shardman.process,
1952+
ROW(shardman.get_my_id(),
1953+
pid)::shardman.process
1954+
FROM pg_stat_activity WHERE application_name LIKE 'pgfdw:%';
1955+
1956+
-- Pack lock graph into string
1957+
CREATE FUNCTION serialize_lock_graph() RETURNS TEXT AS $$
1958+
SELECT string_agg((wait).node||':'||(wait).pid||'->'||(hold).node||':'||(hold).pid, ',') FROM shardman.lock_graph;
1959+
$$ LANGUAGE sql;
1960+
1961+
-- Unpack lock graph from string
1962+
CREATE FUNCTION deserialize_lock_graph(edges text) RETURNS SETOF shardman.lock_graph AS $$
1963+
SELECT ROW(split_part(split_part(edge, '->', 1), ':', 1)::int,
1964+
split_part(split_part(edge, '->', 1), ':', 2)::int)::shardman.process AS wait,
1965+
ROW(split_part(split_part(edge, '->', 2), ':', 1)::int,
1966+
split_part(split_part(edge, '->', 2), ':', 2)::int)::shardman.process AS hold
1967+
FROM regexp_split_to_table(edges, ',') edge;
1968+
$$ LANGUAGE sql;
1969+
1970+
-- Collect lock graphs from all nodes
1971+
CREATE global_lock_graph() RETURNS text AS $$
1972+
DECLARE
1973+
node_id int;
1974+
poll text = '';
1975+
graph text;
1976+
BEGIN
1977+
IF NOT shardman.is_shardlord() THEN
1978+
RETURN shardman.broadcast('0:SELECT shardman.global_lock_graph();');
1979+
END IF;
1980+
1981+
FOR node_id IN SELECT id FROM shardman.nodes
1982+
LOOP
1983+
poll := format('%s%s:SELECT shardman.serialize_lock_graph();', poll, node_id);
1984+
END LOOP;
1985+
SELECT shardman.broadcast(poll) INTO graph;
1986+
1987+
RETURN graph;
1988+
END;
1989+
$$ LANGUAGE plpgsql;
1990+
1991+
1992+
-- Detect distributed deadlock and return set of process involed in deadlock. If there is no deadlock then this view ias empty.
1993+
--
1994+
-- This query is based on the algorithm described by Knuth for detecting a cycle in a linked list. In one column, keep track of the children,
1995+
-- the children's children, the children's children's children, etc. In another column, keep track of the grandchildren, the grandchildren's grandchildren,
1996+
-- the grandchildren's grandchildren's grandchildren, etc.
1997+
--
1998+
-- For the initial selection, the distance between Child and Grandchild columns is 1. Every selection from union all increases the depth of Child by 1, and that of Grandchild by 2.
1999+
-- The distance between them increases by 1.
2000+
--
2001+
-- If there is any loop, since the distance only increases by 1 each time, at some point after Child is in the loop, the distance will be a multiple of the cycle length.
2002+
-- When that happens, the Child and the Grandchild columns are the same. Use that as an additional condition to stop the recursion, and detect it in the rest of your code as an error.
2003+
CREATE VIEW deadlock AS
2004+
WITH RECURSIVE LinkTable AS (SELECT wait AS Parent, hold AS Child FROM shardman.deserialize_lock_graph(shardman.global_lock_graph())),
2005+
cte AS (
2006+
SELECT lt1.Parent, lt1.Child, lt2.Child AS Grandchild
2007+
FROM LinkTable lt1
2008+
INNER JOIN LinkTable lt2 on lt2.Parent = lt1.Child
2009+
UNION ALL
2010+
SELECT cte.Parent, lt1.Child, lt3.Child AS Grandchild
2011+
FROM cte
2012+
INNER JOIN LinkTable lt1 ON lt1.Parent = cte.Child
2013+
INNER JOIN LinkTable lt2 ON lt2.Parent = cte.Grandchild
2014+
INNER JOIN LinkTable lt3 ON lt3.Parent = lt2.Child
2015+
WHERE cte.Child <> cte.Grandchild
2016+
)
2017+
SELECT DISTINCT Parent
2018+
FROM cte
2019+
WHERE Child = Grandchild;
2020+
2021+
19252022
-- View for monitoring logical replication lag.
19262023
-- Can be used only at shardlord.
19272024
CREATE VIEW replication_lag(pubnode, subnode, lag) AS

0 commit comments

Comments
 (0)