Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit ab55d74

Browse files
committed
Fix multiple crasher bugs in partitioned-table replication logic.
apply_handle_tuple_routing(), having detected and reported that the tuple it needed to update didn't exist, tried to update that tuple anyway, leading to a null-pointer dereference. logicalrep_partition_open() failed to ensure that the LogicalRepPartMapEntry it built for a partition was fully independent of that for the partition root, leading to trouble if the root entry was later freed or rebuilt. Meanwhile, on the publisher's side, pgoutput_change() sometimes attempted to apply execute_attr_map_tuple() to a NULL tuple. The first of these was reported by Sergey Bernikov in bug #17055; I found the other two while developing some test cases for this sadly under-tested code. Diagnosis and patch for the first issue by Amit Langote; patches for the others by me; new test cases by me. Back-patch to v13 where this logic came in. Discussion: https://postgr.es/m/17055-9ba800ec8522668b@postgresql.org
1 parent 4efcf47 commit ab55d74

File tree

5 files changed

+146
-29
lines changed

5 files changed

+146
-29
lines changed

src/backend/replication/logical/relation.c

+10-3
Original file line numberDiff line numberDiff line change
@@ -620,7 +620,9 @@ logicalrep_partmap_init(void)
620620
* logicalrep_partition_open
621621
*
622622
* Returned entry reuses most of the values of the root table's entry, save
623-
* the attribute map, which can be different for the partition.
623+
* the attribute map, which can be different for the partition. However,
624+
* we must physically copy all the data, in case the root table's entry
625+
* gets freed/rebuilt.
624626
*
625627
* Note there's no logicalrep_partition_close, because the caller closes the
626628
* component relation.
@@ -656,7 +658,7 @@ logicalrep_partition_open(LogicalRepRelMapEntry *root,
656658

657659
part_entry->partoid = partOid;
658660

659-
/* Remote relation is used as-is from the root entry. */
661+
/* Remote relation is copied as-is from the root entry. */
660662
entry = &part_entry->relmapentry;
661663
entry->remoterel.remoteid = remoterel->remoteid;
662664
entry->remoterel.nspname = pstrdup(remoterel->nspname);
@@ -699,7 +701,12 @@ logicalrep_partition_open(LogicalRepRelMapEntry *root,
699701
}
700702
}
701703
else
702-
entry->attrmap = attrmap;
704+
{
705+
/* Lacking copy_attmap, do this the hard way. */
706+
entry->attrmap = make_attrmap(attrmap->maplen);
707+
memcpy(entry->attrmap->attnums, attrmap->attnums,
708+
attrmap->maplen * sizeof(AttrNumber));
709+
}
703710

704711
entry->updatable = root->updatable;
705712

src/backend/replication/logical/worker.c

+27-21
Original file line numberDiff line numberDiff line change
@@ -1477,12 +1477,13 @@ apply_handle_update_internal(ApplyExecutionData *edata,
14771477
else
14781478
{
14791479
/*
1480-
* The tuple to be updated could not be found.
1480+
* The tuple to be updated could not be found. Do nothing except for
1481+
* emitting a log message.
14811482
*
1482-
* TODO what to do here, change the log level to LOG perhaps?
1483+
* XXX should this be promoted to ereport(LOG) perhaps?
14831484
*/
14841485
elog(DEBUG1,
1485-
"logical replication did not find row for update "
1486+
"logical replication did not find row to be updated "
14861487
"in replication target relation \"%s\"",
14871488
RelationGetRelationName(localrel));
14881489
}
@@ -1589,9 +1590,14 @@ apply_handle_delete_internal(ApplyExecutionData *edata,
15891590
}
15901591
else
15911592
{
1592-
/* The tuple to be deleted could not be found. */
1593+
/*
1594+
* The tuple to be deleted could not be found. Do nothing except for
1595+
* emitting a log message.
1596+
*
1597+
* XXX should this be promoted to ereport(LOG) perhaps?
1598+
*/
15931599
elog(DEBUG1,
1594-
"logical replication did not find row for delete "
1600+
"logical replication did not find row to be deleted "
15951601
"in replication target relation \"%s\"",
15961602
RelationGetRelationName(localrel));
15971603
}
@@ -1728,30 +1734,30 @@ apply_handle_tuple_routing(ApplyExecutionData *edata,
17281734
found = FindReplTupleInLocalRel(estate, partrel,
17291735
&part_entry->remoterel,
17301736
remoteslot_part, &localslot);
1731-
1732-
oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
1733-
if (found)
1734-
{
1735-
/* Apply the update. */
1736-
slot_modify_data(remoteslot_part, localslot,
1737-
part_entry,
1738-
newtup);
1739-
MemoryContextSwitchTo(oldctx);
1740-
}
1741-
else
1737+
if (!found)
17421738
{
17431739
/*
1744-
* The tuple to be updated could not be found.
1740+
* The tuple to be updated could not be found. Do nothing
1741+
* except for emitting a log message.
17451742
*
1746-
* TODO what to do here, change the log level to LOG
1747-
* perhaps?
1743+
* XXX should this be promoted to ereport(LOG) perhaps?
17481744
*/
17491745
elog(DEBUG1,
1750-
"logical replication did not find row for update "
1751-
"in replication target relation \"%s\"",
1746+
"logical replication did not find row to be updated "
1747+
"in replication target relation's partition \"%s\"",
17521748
RelationGetRelationName(partrel));
1749+
return;
17531750
}
17541751

1752+
/*
1753+
* Apply the update to the local tuple, putting the result in
1754+
* remoteslot_part.
1755+
*/
1756+
oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
1757+
slot_modify_data(remoteslot_part, localslot, part_entry,
1758+
newtup);
1759+
MemoryContextSwitchTo(oldctx);
1760+
17551761
/*
17561762
* Does the updated tuple still satisfy the current
17571763
* partition's constraint?

src/backend/replication/pgoutput/pgoutput.c

+5-2
Original file line numberDiff line numberDiff line change
@@ -612,8 +612,11 @@ pgoutput_change(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
612612
/* Convert tuples if needed. */
613613
if (relentry->map)
614614
{
615-
oldtuple = execute_attr_map_tuple(oldtuple, relentry->map);
616-
newtuple = execute_attr_map_tuple(newtuple, relentry->map);
615+
if (oldtuple)
616+
oldtuple = execute_attr_map_tuple(oldtuple,
617+
relentry->map);
618+
newtuple = execute_attr_map_tuple(newtuple,
619+
relentry->map);
617620
}
618621
}
619622

src/test/subscription/t/001_rep_changes.pl

+34-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
use warnings;
77
use PostgresNode;
88
use TestLib;
9-
use Test::More tests => 28;
9+
use Test::More tests => 31;
1010

1111
# Initialize publisher node
1212
my $node_publisher = get_new_node('publisher');
@@ -118,7 +118,7 @@
118118
"INSERT INTO tab_mixed VALUES (2, 'bar', 2.2)");
119119

120120
$node_publisher->safe_psql('postgres',
121-
"INSERT INTO tab_full_pk VALUES (1, 'foo')");
121+
"INSERT INTO tab_full_pk VALUES (1, 'foo'), (2, 'baz')");
122122

123123
$node_publisher->safe_psql('postgres',
124124
"INSERT INTO tab_nothing VALUES (generate_series(1,20))");
@@ -297,6 +297,38 @@
297297
local|2.2|bar|2),
298298
'update works with different column order and subscriber local values');
299299

300+
$result = $node_subscriber->safe_psql('postgres',
301+
"SELECT * FROM tab_full_pk ORDER BY a");
302+
is( $result, qq(1|bar
303+
2|baz),
304+
'update works with REPLICA IDENTITY FULL and a primary key');
305+
306+
# Check that subscriber handles cases where update/delete target tuple
307+
# is missing. We have to look for the DEBUG1 log messages about that,
308+
# so temporarily bump up the log verbosity.
309+
$node_subscriber->append_conf('postgresql.conf', "log_min_messages = debug1");
310+
$node_subscriber->reload;
311+
312+
$node_subscriber->safe_psql('postgres', "DELETE FROM tab_full_pk");
313+
314+
$node_publisher->safe_psql('postgres',
315+
"UPDATE tab_full_pk SET b = 'quux' WHERE a = 1");
316+
$node_publisher->safe_psql('postgres', "DELETE FROM tab_full_pk WHERE a = 2");
317+
318+
$node_publisher->wait_for_catchup('tap_sub');
319+
320+
my $logfile = slurp_file($node_subscriber->logfile());
321+
ok( $logfile =~
322+
qr/logical replication did not find row to be updated in replication target relation "tab_full_pk"/,
323+
'update target row is missing');
324+
ok( $logfile =~
325+
qr/logical replication did not find row to be deleted in replication target relation "tab_full_pk"/,
326+
'delete target row is missing');
327+
328+
$node_subscriber->append_conf('postgresql.conf',
329+
"log_min_messages = warning");
330+
$node_subscriber->reload;
331+
300332
# check behavior with toasted values
301333

302334
$node_publisher->safe_psql('postgres',

src/test/subscription/t/013_partition.pl

+70-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
use warnings;
77
use PostgresNode;
88
use TestLib;
9-
use Test::More tests => 56;
9+
use Test::More tests => 62;
1010

1111
# setup
1212

@@ -347,6 +347,46 @@ BEGIN
347347
$node_subscriber2->safe_psql('postgres', "SELECT a FROM tab1 ORDER BY 1");
348348
is($result, qq(), 'truncate of tab1 replicated');
349349

350+
# Check that subscriber handles cases where update/delete target tuple
351+
# is missing. We have to look for the DEBUG1 log messages about that,
352+
# so temporarily bump up the log verbosity.
353+
$node_subscriber1->append_conf('postgresql.conf',
354+
"log_min_messages = debug1");
355+
$node_subscriber1->reload;
356+
357+
$node_publisher->safe_psql('postgres',
358+
"INSERT INTO tab1 VALUES (1, 'foo'), (4, 'bar'), (10, 'baz')");
359+
360+
$node_publisher->wait_for_catchup('sub1');
361+
$node_publisher->wait_for_catchup('sub2');
362+
363+
$node_subscriber1->safe_psql('postgres', "DELETE FROM tab1");
364+
365+
$node_publisher->safe_psql('postgres',
366+
"UPDATE tab1 SET b = 'quux' WHERE a = 4");
367+
$node_publisher->safe_psql('postgres', "DELETE FROM tab1");
368+
369+
$node_publisher->wait_for_catchup('sub1');
370+
$node_publisher->wait_for_catchup('sub2');
371+
372+
my $logfile = slurp_file($node_subscriber1->logfile());
373+
ok( $logfile =~
374+
qr/logical replication did not find row to be updated in replication target relation's partition "tab1_2_2"/,
375+
'update target row is missing in tab1_2_2');
376+
ok( $logfile =~
377+
qr/logical replication did not find row to be deleted in replication target relation "tab1_1"/,
378+
'delete target row is missing in tab1_1');
379+
ok( $logfile =~
380+
qr/logical replication did not find row to be deleted in replication target relation "tab1_2_2"/,
381+
'delete target row is missing in tab1_2_2');
382+
ok( $logfile =~
383+
qr/logical replication did not find row to be deleted in replication target relation "tab1_def"/,
384+
'delete target row is missing in tab1_def');
385+
386+
$node_subscriber1->append_conf('postgresql.conf',
387+
"log_min_messages = warning");
388+
$node_subscriber1->reload;
389+
350390
# Tests for replication using root table identity and schema
351391

352392
# publisher
@@ -650,3 +690,32 @@ BEGIN
650690
pub_tab2|3|yyy
651691
pub_tab2|5|zzz
652692
xxx_c|6|aaa), 'inserts into tab2 replicated');
693+
694+
# Check that subscriber handles cases where update/delete target tuple
695+
# is missing. We have to look for the DEBUG1 log messages about that,
696+
# so temporarily bump up the log verbosity.
697+
$node_subscriber1->append_conf('postgresql.conf',
698+
"log_min_messages = debug1");
699+
$node_subscriber1->reload;
700+
701+
$node_subscriber1->safe_psql('postgres', "DELETE FROM tab2");
702+
703+
$node_publisher->safe_psql('postgres',
704+
"UPDATE tab2 SET b = 'quux' WHERE a = 5");
705+
$node_publisher->safe_psql('postgres', "DELETE FROM tab2 WHERE a = 1");
706+
707+
$node_publisher->wait_for_catchup('sub_viaroot');
708+
$node_publisher->wait_for_catchup('sub2');
709+
710+
$logfile = slurp_file($node_subscriber1->logfile());
711+
ok( $logfile =~
712+
qr/logical replication did not find row to be updated in replication target relation's partition "tab2_1"/,
713+
'update target row is missing in tab2_1');
714+
ok( $logfile =~
715+
qr/logical replication did not find row to be deleted in replication target relation "tab2_1"/,
716+
'delete target row is missing in tab2_1');
717+
718+
# No need for this until more tests are added.
719+
# $node_subscriber1->append_conf('postgresql.conf',
720+
# "log_min_messages = warning");
721+
# $node_subscriber1->reload;

0 commit comments

Comments
 (0)