|
| 1 | +# |
| 2 | +# Test situation where a target data directory contains |
| 3 | +# WAL records beyond both the last checkpoint and the divergence |
| 4 | +# point: |
| 5 | +# |
| 6 | +# Target WAL (TLI 2): |
| 7 | +# |
| 8 | +# backup ... Checkpoint A ... INSERT 'rewind this' |
| 9 | +# (TLI 1 -> 2) |
| 10 | +# |
| 11 | +# ^ last common ^ minRecoveryPoint |
| 12 | +# checkpoint |
| 13 | +# |
| 14 | +# Source WAL (TLI 3): |
| 15 | +# |
| 16 | +# backup ... Checkpoint A ... Checkpoint B ... INSERT 'keep this' |
| 17 | +# (TLI 1 -> 2) (TLI 2 -> 3) |
| 18 | +# |
| 19 | +# |
| 20 | +# The last common checkpoint is Checkpoint A. But there is WAL on TLI 2 |
| 21 | +# after the last common checkpoint that needs to be rewound. We used to |
| 22 | +# have a bug where minRecoveryPoint was ignored, and pg_rewind concluded |
| 23 | +# that the target doesn't need rewinding in this scenario, because the |
| 24 | +# last checkpoint on the target TLI was an ancestor of the source TLI. |
| 25 | +# |
| 26 | +# |
| 27 | +# This test does not make use of RewindTest as it requires three |
| 28 | +# nodes. |
| 29 | + |
| 30 | +use strict; |
| 31 | +use warnings; |
| 32 | +use PostgresNode; |
| 33 | +use TestLib; |
| 34 | +use Test::More tests => 3; |
| 35 | + |
| 36 | +use File::Copy; |
| 37 | + |
| 38 | +my $tmp_folder = TestLib::tempdir; |
| 39 | + |
| 40 | +my $node_1 = get_new_node('node_1'); |
| 41 | +$node_1->init(allows_streaming => 1); |
| 42 | +$node_1->append_conf('postgresql.conf', qq( |
| 43 | +wal_keep_size='100 MB' |
| 44 | +)); |
| 45 | + |
| 46 | +$node_1->start; |
| 47 | + |
| 48 | +# Create a couple of test tables |
| 49 | +$node_1->safe_psql('postgres', 'CREATE TABLE public.foo (t TEXT)'); |
| 50 | +$node_1->safe_psql('postgres', 'CREATE TABLE public.bar (t TEXT)'); |
| 51 | +$node_1->safe_psql('postgres', "INSERT INTO public.bar VALUES ('in both')"); |
| 52 | + |
| 53 | + |
| 54 | +# Take backup |
| 55 | +my $backup_name = 'my_backup'; |
| 56 | +$node_1->backup($backup_name); |
| 57 | + |
| 58 | +# Create streaming standby from backup |
| 59 | +my $node_2 = get_new_node('node_2'); |
| 60 | +$node_2->init_from_backup($node_1, $backup_name, |
| 61 | + has_streaming => 1); |
| 62 | +$node_2->start; |
| 63 | + |
| 64 | +# Create streaming standby from backup |
| 65 | +my $node_3 = get_new_node('node_3'); |
| 66 | +$node_3->init_from_backup($node_1, $backup_name, |
| 67 | + has_streaming => 1); |
| 68 | +$node_3->start; |
| 69 | + |
| 70 | +# Stop node_1 |
| 71 | + |
| 72 | +$node_1->stop('fast'); |
| 73 | + |
| 74 | +# Promote node_3 |
| 75 | +$node_3->promote; |
| 76 | + |
| 77 | +# node_1 rejoins node_3 |
| 78 | + |
| 79 | +my $node_3_connstr = $node_3->connstr; |
| 80 | + |
| 81 | +$node_1->append_conf('postgresql.conf', qq( |
| 82 | +primary_conninfo='$node_3_connstr' |
| 83 | +)); |
| 84 | +$node_1->set_standby_mode(); |
| 85 | +$node_1->start(); |
| 86 | + |
| 87 | +# node_2 follows node_3 |
| 88 | + |
| 89 | +$node_2->append_conf('postgresql.conf', qq( |
| 90 | +primary_conninfo='$node_3_connstr' |
| 91 | +)); |
| 92 | +$node_2->restart(); |
| 93 | + |
| 94 | +# Promote node_1 |
| 95 | + |
| 96 | +$node_1->promote; |
| 97 | + |
| 98 | +# We now have a split-brain with two primaries. Insert a row on both to |
| 99 | +# demonstratively create a split brain. After the rewind, we should only |
| 100 | +# see the insert on 1, as the insert on node 3 is rewound away. |
| 101 | +$node_1->safe_psql('postgres', "INSERT INTO public.foo (t) VALUES ('keep this')"); |
| 102 | + |
| 103 | +# Insert more rows in node 1, to bump up the XID counter. Otherwise, if |
| 104 | +# rewind doesn't correctly rewind the changes made on the other node, |
| 105 | +# we might fail to notice if the inserts are invisible because the XIDs |
| 106 | +# are not marked as committed. |
| 107 | +$node_1->safe_psql('postgres', "INSERT INTO public.foo (t) VALUES ('and this')"); |
| 108 | +$node_1->safe_psql('postgres', "INSERT INTO public.foo (t) VALUES ('and this too')"); |
| 109 | + |
| 110 | +# Also insert a row in 'bar' on node 3. It is unmodified in node 1, so it won't get |
| 111 | +# overwritten by replaying the WAL from node 1. |
| 112 | +$node_3->safe_psql('postgres', "INSERT INTO public.bar (t) VALUES ('rewind this')"); |
| 113 | + |
| 114 | +# Wait for node 2 to catch up |
| 115 | +$node_2->poll_query_until('postgres', |
| 116 | + q|SELECT COUNT(*) > 1 FROM public.bar|, 't'); |
| 117 | + |
| 118 | +# At this point node_2 will shut down without a shutdown checkpoint, |
| 119 | +# but with WAL entries beyond the preceding shutdown checkpoint. |
| 120 | +$node_2->stop('fast'); |
| 121 | +$node_3->stop('fast'); |
| 122 | + |
| 123 | +my $node_2_pgdata = $node_2->data_dir; |
| 124 | +my $node_1_connstr = $node_1->connstr; |
| 125 | + |
| 126 | +# Keep a temporary postgresql.conf or it would be overwritten during the rewind. |
| 127 | +copy( |
| 128 | + "$node_2_pgdata/postgresql.conf", |
| 129 | + "$tmp_folder/node_2-postgresql.conf.tmp"); |
| 130 | + |
| 131 | +command_ok( |
| 132 | + [ |
| 133 | + 'pg_rewind', |
| 134 | + "--source-server=$node_1_connstr", |
| 135 | + "--target-pgdata=$node_2_pgdata" |
| 136 | + ], |
| 137 | + 'pg_rewind detects rewind needed'); |
| 138 | + |
| 139 | +# Now move back postgresql.conf with old settings |
| 140 | +move( |
| 141 | + "$tmp_folder/node_2-postgresql.conf.tmp", |
| 142 | + "$node_2_pgdata/postgresql.conf"); |
| 143 | + |
| 144 | +$node_2->start; |
| 145 | + |
| 146 | +# Check contents of the test tables after rewind. The rows inserted in node 3 |
| 147 | +# before rewind should've been overwritten with the data from node 1. |
| 148 | +my $result; |
| 149 | +$result = $node_2->safe_psql('postgres', 'checkpoint'); |
| 150 | +$result = $node_2->safe_psql('postgres', 'SELECT * FROM public.foo'); |
| 151 | +is($result, qq(keep this |
| 152 | +and this |
| 153 | +and this too), 'table foo after rewind'); |
| 154 | + |
| 155 | +$result = $node_2->safe_psql('postgres', 'SELECT * FROM public.bar'); |
| 156 | +is($result, qq(in both), 'table bar after rewind'); |
0 commit comments