Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 4d9ceb0

Browse files
committed
Fix bogus tuple-slot management in logical replication UPDATE handling.
slot_modify_cstrings seriously abused the TupleTableSlot API by relying on a slot's underlying data to stay valid across ExecClearTuple. Since this abuse was also quite undocumented, it's little surprise that the case got broken during the v12 slot rewrites. As reported in bug #16129 from Ondřej Jirman, this could lead to crashes or data corruption when a logical replication subscriber processes a row update. Problems would only arise if the subscriber's table contained columns of pass-by-ref types that were not being copied from the publisher. Fix by explicitly copying the datum/isnull arrays from the source slot that the old row was in already. This ends up being about the same thing that happened pre-v12, but hopefully in a less opaque and fragile way. We might've caught the problem sooner if there were any test cases dealing with updates involving non-replicated or dropped columns. Now there are. Back-patch to v10 where this code came in. Even though the failure does not manifest before v12, IMO this code is too fragile to leave as-is. In any case we certainly want the additional test coverage. Patch by me; thanks to Tomas Vondra for initial investigation. Discussion: https://postgr.es/m/16129-a0c0f48e71741e5f@postgresql.org
1 parent 8959a5e commit 4d9ceb0

File tree

2 files changed

+70
-17
lines changed

2 files changed

+70
-17
lines changed

src/backend/replication/logical/worker.c

+24-8
Original file line numberDiff line numberDiff line change
@@ -363,24 +363,39 @@ slot_store_cstrings(TupleTableSlot *slot, LogicalRepRelMapEntry *rel,
363363
}
364364

365365
/*
366-
* Modify slot with user data provided as C strings.
366+
* Replace selected columns with user data provided as C strings.
367367
* This is somewhat similar to heap_modify_tuple but also calls the type
368-
* input function on the user data as the input is the text representation
369-
* of the types.
368+
* input functions on the user data.
369+
* "slot" is filled with a copy of the tuple in "srcslot", with
370+
* columns selected by the "replaces" array replaced with data values
371+
* from "values".
372+
* Caution: unreplaced pass-by-ref columns in "slot" will point into the
373+
* storage for "srcslot". This is OK for current usage, but someday we may
374+
* need to materialize "slot" at the end to make it independent of "srcslot".
370375
*/
371376
static void
372-
slot_modify_cstrings(TupleTableSlot *slot, LogicalRepRelMapEntry *rel,
377+
slot_modify_cstrings(TupleTableSlot *slot, TupleTableSlot *srcslot,
378+
LogicalRepRelMapEntry *rel,
373379
char **values, bool *replaces)
374380
{
375381
int natts = slot->tts_tupleDescriptor->natts;
376382
int i;
377383
SlotErrCallbackArg errarg;
378384
ErrorContextCallback errcallback;
379385

380-
slot_getallattrs(slot);
386+
/* We'll fill "slot" with a virtual tuple, so we must start with ... */
381387
ExecClearTuple(slot);
382388

383-
/* Push callback + info on the error context stack */
389+
/*
390+
* Copy all the column data from srcslot, so that we'll have valid values
391+
* for unreplaced columns.
392+
*/
393+
Assert(natts == srcslot->tts_tupleDescriptor->natts);
394+
slot_getallattrs(srcslot);
395+
memcpy(slot->tts_values, srcslot->tts_values, natts * sizeof(Datum));
396+
memcpy(slot->tts_isnull, srcslot->tts_isnull, natts * sizeof(bool));
397+
398+
/* For error reporting, push callback + info on the error context stack */
384399
errarg.rel = rel;
385400
errarg.local_attnum = -1;
386401
errarg.remote_attnum = -1;
@@ -428,6 +443,7 @@ slot_modify_cstrings(TupleTableSlot *slot, LogicalRepRelMapEntry *rel,
428443
/* Pop the error context stack */
429444
error_context_stack = errcallback.previous;
430445

446+
/* And finally, declare that "slot" contains a valid virtual tuple */
431447
ExecStoreVirtualTuple(slot);
432448
}
433449

@@ -740,8 +756,8 @@ apply_handle_update(StringInfo s)
740756
{
741757
/* Process and store remote tuple in the slot */
742758
oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
743-
ExecCopySlot(remoteslot, localslot);
744-
slot_modify_cstrings(remoteslot, rel, newtup.values, newtup.changed);
759+
slot_modify_cstrings(remoteslot, localslot, rel,
760+
newtup.values, newtup.changed);
745761
MemoryContextSwitchTo(oldctx);
746762

747763
EvalPlanQualSetSlot(&epqstate, remoteslot);

src/test/subscription/t/001_rep_changes.pl

+46-9
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
use warnings;
44
use PostgresNode;
55
use TestLib;
6-
use Test::More tests => 17;
6+
use Test::More tests => 20;
77

88
# Initialize publisher node
99
my $node_publisher = get_new_node('publisher');
@@ -28,9 +28,9 @@
2828
$node_publisher->safe_psql('postgres',
2929
"CREATE TABLE tab_rep (a int primary key)");
3030
$node_publisher->safe_psql('postgres',
31-
"CREATE TABLE tab_mixed (a int primary key, b text)");
31+
"CREATE TABLE tab_mixed (a int primary key, b text, c numeric)");
3232
$node_publisher->safe_psql('postgres',
33-
"INSERT INTO tab_mixed (a, b) VALUES (1, 'foo')");
33+
"INSERT INTO tab_mixed (a, b, c) VALUES (1, 'foo', 1.1)");
3434
$node_publisher->safe_psql('postgres',
3535
"CREATE TABLE tab_include (a int, b text, CONSTRAINT covering PRIMARY KEY(a) INCLUDE(b))"
3636
);
@@ -45,7 +45,8 @@
4545

4646
# different column count and order than on publisher
4747
$node_subscriber->safe_psql('postgres',
48-
"CREATE TABLE tab_mixed (c text, b text, a int primary key)");
48+
"CREATE TABLE tab_mixed (d text default 'local', c numeric, b text, a int primary key)"
49+
);
4950

5051
# replication of the table with included index
5152
$node_subscriber->safe_psql('postgres',
@@ -94,7 +95,7 @@
9495
$node_publisher->safe_psql('postgres', "UPDATE tab_rep SET a = -a");
9596

9697
$node_publisher->safe_psql('postgres',
97-
"INSERT INTO tab_mixed VALUES (2, 'bar')");
98+
"INSERT INTO tab_mixed VALUES (2, 'bar', 2.2)");
9899

99100
$node_publisher->safe_psql('postgres',
100101
"INSERT INTO tab_include SELECT generate_series(1,50)");
@@ -112,10 +113,9 @@
112113
"SELECT count(*), min(a), max(a) FROM tab_rep");
113114
is($result, qq(20|-20|-1), 'check replicated changes on subscriber');
114115

115-
$result =
116-
$node_subscriber->safe_psql('postgres', "SELECT c, b, a FROM tab_mixed");
117-
is( $result, qq(|foo|1
118-
|bar|2), 'check replicated changes with different column order');
116+
$result = $node_subscriber->safe_psql('postgres', "SELECT * FROM tab_mixed");
117+
is( $result, qq(local|1.1|foo|1
118+
local|2.2|bar|2), 'check replicated changes with different column order');
119119

120120
$result = $node_subscriber->safe_psql('postgres',
121121
"SELECT count(*), min(a), max(a) FROM tab_include");
@@ -139,11 +139,14 @@
139139
"ALTER TABLE tab_ins REPLICA IDENTITY FULL");
140140
$node_subscriber->safe_psql('postgres',
141141
"ALTER TABLE tab_ins REPLICA IDENTITY FULL");
142+
# tab_mixed can use DEFAULT, since it has a primary key
142143

143144
# and do the updates
144145
$node_publisher->safe_psql('postgres', "UPDATE tab_full SET a = a * a");
145146
$node_publisher->safe_psql('postgres',
146147
"UPDATE tab_full2 SET x = 'bb' WHERE x = 'b'");
148+
$node_publisher->safe_psql('postgres',
149+
"UPDATE tab_mixed SET b = 'baz' WHERE a = 1");
147150

148151
$node_publisher->wait_for_catchup('tap_sub');
149152

@@ -159,6 +162,40 @@
159162
bb),
160163
'update works with REPLICA IDENTITY FULL and text datums');
161164

165+
$result = $node_subscriber->safe_psql('postgres',
166+
"SELECT * FROM tab_mixed ORDER BY a");
167+
is( $result, qq(local|1.1|baz|1
168+
local|2.2|bar|2),
169+
'update works with different column order and subscriber local values');
170+
171+
# check behavior with dropped columns
172+
173+
$node_publisher->safe_psql('postgres', "ALTER TABLE tab_mixed DROP COLUMN b");
174+
$node_publisher->safe_psql('postgres',
175+
"UPDATE tab_mixed SET c = 11.11 WHERE a = 1");
176+
177+
$node_publisher->wait_for_catchup('tap_sub');
178+
179+
$result = $node_subscriber->safe_psql('postgres',
180+
"SELECT * FROM tab_mixed ORDER BY a");
181+
is( $result, qq(local|11.11|baz|1
182+
local|2.2|bar|2),
183+
'update works with dropped publisher column');
184+
185+
$node_subscriber->safe_psql('postgres',
186+
"ALTER TABLE tab_mixed DROP COLUMN d");
187+
188+
$node_publisher->safe_psql('postgres',
189+
"UPDATE tab_mixed SET c = 22.22 WHERE a = 2");
190+
191+
$node_publisher->wait_for_catchup('tap_sub');
192+
193+
$result = $node_subscriber->safe_psql('postgres',
194+
"SELECT * FROM tab_mixed ORDER BY a");
195+
is( $result, qq(11.11|baz|1
196+
22.22|bar|2),
197+
'update works with dropped subscriber column');
198+
162199
# check that change of connection string and/or publication list causes
163200
# restart of subscription workers. Not all of these are registered as tests
164201
# as we need to poll for a change but the test suite will fail none the less

0 commit comments

Comments
 (0)