Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit c83c025

Browse files
author
Amit Kapila
committed
Fix deadlock for multiple replicating truncates of the same table.
While applying the truncate change, the logical apply worker acquires RowExclusiveLock on the relation being truncated. This allowed truncate on the relation at a time by two apply workers which lead to a deadlock. The reason was that one of the workers after updating the pg_class tuple tries to acquire SHARE lock on the relation and started to wait for the second worker which has acquired RowExclusiveLock on the relation. And when the second worker tries to update the pg_class tuple, it starts to wait for the first worker which leads to a deadlock. Fix it by acquiring AccessExclusiveLock on the relation before applying the truncate change as we do for normal truncate operation. Author: Peter Smith, test case by Haiying Tang Reviewed-by: Dilip Kumar, Amit Kapila Backpatch-through: 11 Discussion: https://postgr.es/m/CAHut+PsNm43p0jM+idTvWwiGZPcP0hGrHMPK9TOAkc+a4UpUqw@mail.gmail.com
1 parent c64183f commit c83c025

File tree

2 files changed

+57
-5
lines changed

2 files changed

+57
-5
lines changed

src/backend/replication/logical/worker.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1248,6 +1248,7 @@ apply_handle_truncate(StringInfo s)
12481248
List *relids = NIL;
12491249
List *relids_logged = NIL;
12501250
ListCell *lc;
1251+
LOCKMODE lockmode = AccessExclusiveLock;
12511252

12521253
ensure_transaction();
12531254

@@ -1258,14 +1259,14 @@ apply_handle_truncate(StringInfo s)
12581259
LogicalRepRelId relid = lfirst_oid(lc);
12591260
LogicalRepRelMapEntry *rel;
12601261

1261-
rel = logicalrep_rel_open(relid, RowExclusiveLock);
1262+
rel = logicalrep_rel_open(relid, lockmode);
12621263
if (!should_apply_changes_for_rel(rel))
12631264
{
12641265
/*
12651266
* The relation can't become interesting in the middle of the
12661267
* transaction so it's safe to unlock it.
12671268
*/
1268-
logicalrep_rel_close(rel, RowExclusiveLock);
1269+
logicalrep_rel_close(rel, lockmode);
12691270
continue;
12701271
}
12711272

@@ -1283,7 +1284,7 @@ apply_handle_truncate(StringInfo s)
12831284
{
12841285
ListCell *child;
12851286
List *children = find_all_inheritors(rel->localreloid,
1286-
RowExclusiveLock,
1287+
lockmode,
12871288
NULL);
12881289

12891290
foreach(child, children)
@@ -1303,7 +1304,7 @@ apply_handle_truncate(StringInfo s)
13031304
*/
13041305
if (RELATION_IS_OTHER_TEMP(childrel))
13051306
{
1306-
table_close(childrel, RowExclusiveLock);
1307+
table_close(childrel, lockmode);
13071308
continue;
13081309
}
13091310

src/test/subscription/t/010_truncate.pl

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
use warnings;
44
use PostgresNode;
55
use TestLib;
6-
use Test::More tests => 9;
6+
use Test::More tests => 12;
77

88
# setup
99

@@ -13,6 +13,8 @@
1313

1414
my $node_subscriber = get_new_node('subscriber');
1515
$node_subscriber->init(allows_streaming => 'logical');
16+
$node_subscriber->append_conf('postgresql.conf',
17+
qq(max_logical_replication_workers = 6));
1618
$node_subscriber->start;
1719

1820
my $publisher_connstr = $node_publisher->connstr . ' dbname=postgres';
@@ -158,3 +160,52 @@
158160
$result = $node_subscriber->safe_psql('postgres',
159161
"SELECT count(*), min(a), max(a) FROM tab2");
160162
is($result, qq(3|1|3), 'truncate of multiple tables some not published');
163+
164+
# test that truncate works for logical replication when there are multiple
165+
# subscriptions for a single table
166+
167+
$node_publisher->safe_psql('postgres',
168+
"CREATE TABLE tab5 (a int)");
169+
170+
$node_subscriber->safe_psql('postgres',
171+
"CREATE TABLE tab5 (a int)");
172+
173+
$node_publisher->safe_psql('postgres',
174+
"CREATE PUBLICATION pub5 FOR TABLE tab5");
175+
$node_subscriber->safe_psql('postgres',
176+
"CREATE SUBSCRIPTION sub5_1 CONNECTION '$publisher_connstr' PUBLICATION pub5"
177+
);
178+
$node_subscriber->safe_psql('postgres',
179+
"CREATE SUBSCRIPTION sub5_2 CONNECTION '$publisher_connstr' PUBLICATION pub5"
180+
);
181+
182+
# wait for initial data sync
183+
$node_subscriber->poll_query_until('postgres', $synced_query)
184+
or die "Timed out while waiting for subscriber to synchronize data";
185+
186+
# insert data to truncate
187+
188+
$node_publisher->safe_psql('postgres',
189+
"INSERT INTO tab5 VALUES (1), (2), (3)");
190+
191+
$node_publisher->wait_for_catchup('sub5_1');
192+
$node_publisher->wait_for_catchup('sub5_2');
193+
194+
$result = $node_subscriber->safe_psql('postgres',
195+
"SELECT count(*), min(a), max(a) FROM tab5");
196+
is($result, qq(6|1|3), 'insert replicated for multiple subscriptions');
197+
198+
$node_publisher->safe_psql('postgres', "TRUNCATE tab5");
199+
200+
$node_publisher->wait_for_catchup('sub5_1');
201+
$node_publisher->wait_for_catchup('sub5_2');
202+
203+
$result = $node_subscriber->safe_psql('postgres',
204+
"SELECT count(*), min(a), max(a) FROM tab5");
205+
is($result, qq(0||),
206+
'truncate replicated for multiple subscriptions');
207+
208+
# check deadlocks
209+
$result = $node_subscriber->safe_psql('postgres',
210+
"SELECT deadlocks FROM pg_stat_database WHERE datname='postgres'");
211+
is($result, qq(0), 'no deadlocks detected');

0 commit comments

Comments
 (0)