Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 97ddda8

Browse files
committed
Fix data loss in wal_level=minimal crash recovery of CREATE TABLESPACE.
If the system crashed between CREATE TABLESPACE and the next checkpoint, the result could be some files in the tablespace unexpectedly containing no rows. Affected files would be those for which the system did not write WAL; see the wal_skip_threshold documentation. Before v13, a different set of conditions governed the writing of WAL; see v12's <sect2 id="populate-pitr">. (The v12 conditions were broader in some ways and narrower in others.) Users may want to audit non-default tablespaces for unexpected short files. The bug could have truncated an index without affecting the associated table, and reindexing the index would fix that particular problem. This fixes the bug by making create_tablespace_directories() more like TablespaceCreateDbspace(). create_tablespace_directories() was recursively removing tablespace contents, reasoning that WAL redo would recreate everything removed that way. That assumption holds for other wal_level values. Under wal_level=minimal, the old approach could delete files for which no other copy existed. Back-patch to 9.6 (all supported versions). Reviewed by Robert Haas and Prabhat Sahu. Reported by Robert Haas. Discussion: https://postgr.es/m/CA+TgmoaLO9ncuwvr2nN-J4VEP5XyAcy=zKiHxQzBbFRxxGxm0w@mail.gmail.com
1 parent 3778bcb commit 97ddda8

File tree

2 files changed

+46
-27
lines changed

2 files changed

+46
-27
lines changed

src/backend/commands/tablespace.c

+19-23
Original file line numberDiff line numberDiff line change
@@ -614,40 +614,36 @@ create_tablespace_directories(const char *location, const Oid tablespaceoid)
614614
location)));
615615
}
616616

617-
if (InRecovery)
618-
{
619-
/*
620-
* Our theory for replaying a CREATE is to forcibly drop the target
621-
* subdirectory if present, and then recreate it. This may be more
622-
* work than needed, but it is simple to implement.
623-
*/
624-
if (stat(location_with_version_dir, &st) == 0 && S_ISDIR(st.st_mode))
625-
{
626-
if (!rmtree(location_with_version_dir, true))
627-
/* If this failed, MakePGDirectory() below is going to error. */
628-
ereport(WARNING,
629-
(errmsg("some useless files may be left behind in old database directory \"%s\"",
630-
location_with_version_dir)));
631-
}
632-
}
633-
634617
/*
635618
* The creation of the version directory prevents more than one tablespace
636-
* in a single location.
619+
* in a single location. This imitates TablespaceCreateDbspace(), but it
620+
* ignores concurrency and missing parent directories. The chmod() would
621+
* have failed in the absence of a parent. pg_tablespace_spcname_index
622+
* prevents concurrency.
637623
*/
638-
if (MakePGDirectory(location_with_version_dir) < 0)
624+
if (stat(location_with_version_dir, &st) < 0)
639625
{
640-
if (errno == EEXIST)
626+
if (errno != ENOENT)
641627
ereport(ERROR,
642-
(errcode(ERRCODE_OBJECT_IN_USE),
643-
errmsg("directory \"%s\" already in use as a tablespace",
628+
(errcode_for_file_access(),
629+
errmsg("could not stat directory \"%s\": %m",
644630
location_with_version_dir)));
645-
else
631+
else if (MakePGDirectory(location_with_version_dir) < 0)
646632
ereport(ERROR,
647633
(errcode_for_file_access(),
648634
errmsg("could not create directory \"%s\": %m",
649635
location_with_version_dir)));
650636
}
637+
else if (!S_ISDIR(st.st_mode))
638+
ereport(ERROR,
639+
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
640+
errmsg("\"%s\" exists but is not a directory",
641+
location_with_version_dir)));
642+
else if (!InRecovery)
643+
ereport(ERROR,
644+
(errcode(ERRCODE_OBJECT_IN_USE),
645+
errmsg("directory \"%s\" already in use as a tablespace",
646+
location_with_version_dir)));
651647

652648
/*
653649
* In recovery, remove old symlink, in case it points to the wrong place.

src/test/recovery/t/018_wal_optimize.pl

+27-4
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
use PostgresNode;
1616
use TestLib;
17-
use Test::More tests => 34;
17+
use Test::More tests => 38;
1818

1919
sub check_orphan_relfilenodes
2020
{
@@ -59,8 +59,31 @@ sub run_wal_optimize
5959
my $tablespace_dir = $node->basedir . '/tablespace_other';
6060
mkdir($tablespace_dir);
6161
$tablespace_dir = TestLib::perl2host($tablespace_dir);
62-
$node->safe_psql('postgres',
63-
"CREATE TABLESPACE other LOCATION '$tablespace_dir';");
62+
my $result;
63+
64+
# Test redo of CREATE TABLESPACE.
65+
$node->safe_psql(
66+
'postgres', "
67+
CREATE TABLE moved (id int);
68+
INSERT INTO moved VALUES (1);
69+
CREATE TABLESPACE other LOCATION '$tablespace_dir';
70+
BEGIN;
71+
ALTER TABLE moved SET TABLESPACE other;
72+
CREATE TABLE originated (id int);
73+
INSERT INTO originated VALUES (1);
74+
CREATE UNIQUE INDEX ON originated(id) TABLESPACE other;
75+
COMMIT;");
76+
$node->stop('immediate');
77+
$node->start;
78+
$result = $node->safe_psql('postgres', "SELECT count(*) FROM moved;");
79+
is($result, qq(1), "wal_level = $wal_level, CREATE+SET TABLESPACE");
80+
$result = $node->safe_psql(
81+
'postgres', "
82+
INSERT INTO originated VALUES (1) ON CONFLICT (id)
83+
DO UPDATE set id = originated.id + 1
84+
RETURNING id;");
85+
is($result, qq(2),
86+
"wal_level = $wal_level, CREATE TABLESPACE, CREATE INDEX");
6487

6588
# Test direct truncation optimization. No tuples.
6689
$node->safe_psql(
@@ -71,7 +94,7 @@ sub run_wal_optimize
7194
COMMIT;");
7295
$node->stop('immediate');
7396
$node->start;
74-
my $result = $node->safe_psql('postgres', "SELECT count(*) FROM trunc;");
97+
$result = $node->safe_psql('postgres', "SELECT count(*) FROM trunc;");
7598
is($result, qq(0), "wal_level = $wal_level, TRUNCATE with empty table");
7699

77100
# Test truncation with inserted tuples within the same transaction.

0 commit comments

Comments
 (0)