Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit c66a7d7

Browse files
committed
Handle DROP DATABASE getting interrupted
Until now, when DROP DATABASE got interrupted in the wrong moment, the removal of the pg_database row would also roll back, even though some irreversible steps have already been taken. E.g. DropDatabaseBuffers() might have thrown out dirty buffers, or files could have been unlinked. But we continued to allow connections to such a corrupted database. To fix this, mark databases invalid with an in-place update, just before starting to perform irreversible steps. As we can't add a new column in the back branches, we use pg_database.datconnlimit = -2 for this purpose. An invalid database cannot be connected to anymore, but can still be dropped. Unfortunately we can't easily add output to psql's \l to indicate that some database is invalid, it doesn't fit in any of the existing columns. Add tests verifying that a interrupted DROP DATABASE is handled correctly in the backend and in various tools. Reported-by: Evgeny Morozov <postgresql3@realityexists.net> Author: Andres Freund <andres@anarazel.de> Reviewed-by: Daniel Gustafsson <daniel@yesql.se> Reviewed-by: Thomas Munro <thomas.munro@gmail.com> Discussion: https://postgr.es/m/20230509004637.cgvmfwrbht7xm7p6@awork3.anarazel.de Discussion: https://postgr.es/m/20230314174521.74jl6ffqsee5mtug@awork3.anarazel.de Backpatch: 11-, bug present in all supported versions
1 parent 83ecfa9 commit c66a7d7

20 files changed

+421
-28
lines changed

doc/src/sgml/catalogs.sgml

+2-1
Original file line numberDiff line numberDiff line change
@@ -3042,7 +3042,8 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
30423042
</para>
30433043
<para>
30443044
Sets maximum number of concurrent connections that can be made
3045-
to this database. -1 means no limit.
3045+
to this database. -1 means no limit, -2 indicates the database is
3046+
invalid.
30463047
</para></entry>
30473048
</row>
30483049

src/backend/commands/dbcommands.c

+90-20
Original file line numberDiff line numberDiff line change
@@ -719,7 +719,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
719719
int encoding = -1;
720720
bool dbistemplate = false;
721721
bool dballowconnections = true;
722-
int dbconnlimit = -1;
722+
int dbconnlimit = DATCONNLIMIT_UNLIMITED;
723723
char *dbcollversion = NULL;
724724
int notherbackends;
725725
int npreparedxacts;
@@ -926,7 +926,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
926926
if (dconnlimit && dconnlimit->arg)
927927
{
928928
dbconnlimit = defGetInt32(dconnlimit);
929-
if (dbconnlimit < -1)
929+
if (dbconnlimit < DATCONNLIMIT_UNLIMITED)
930930
ereport(ERROR,
931931
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
932932
errmsg("invalid connection limit: %d", dbconnlimit)));
@@ -977,6 +977,16 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
977977
errmsg("template database \"%s\" does not exist",
978978
dbtemplate)));
979979

980+
/*
981+
* If the source database was in the process of being dropped, we can't
982+
* use it as a template.
983+
*/
984+
if (database_is_invalid_oid(src_dboid))
985+
ereport(ERROR,
986+
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
987+
errmsg("cannot use invalid database \"%s\" as template", dbtemplate),
988+
errhint("Use DROP DATABASE to drop invalid databases."));
989+
980990
/*
981991
* Permission check: to copy a DB that's not marked datistemplate, you
982992
* must be superuser or the owner thereof.
@@ -1576,6 +1586,7 @@ dropdb(const char *dbname, bool missing_ok, bool force)
15761586
bool db_istemplate;
15771587
Relation pgdbrel;
15781588
HeapTuple tup;
1589+
Form_pg_database datform;
15791590
int notherbackends;
15801591
int npreparedxacts;
15811592
int nslots,
@@ -1691,17 +1702,6 @@ dropdb(const char *dbname, bool missing_ok, bool force)
16911702
dbname),
16921703
errdetail_busy_db(notherbackends, npreparedxacts)));
16931704

1694-
/*
1695-
* Remove the database's tuple from pg_database.
1696-
*/
1697-
tup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(db_id));
1698-
if (!HeapTupleIsValid(tup))
1699-
elog(ERROR, "cache lookup failed for database %u", db_id);
1700-
1701-
CatalogTupleDelete(pgdbrel, &tup->t_self);
1702-
1703-
ReleaseSysCache(tup);
1704-
17051705
/*
17061706
* Delete any comments or security labels associated with the database.
17071707
*/
@@ -1718,6 +1718,37 @@ dropdb(const char *dbname, bool missing_ok, bool force)
17181718
*/
17191719
dropDatabaseDependencies(db_id);
17201720

1721+
/*
1722+
* Tell the cumulative stats system to forget it immediately, too.
1723+
*/
1724+
pgstat_drop_database(db_id);
1725+
1726+
tup = SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(db_id));
1727+
if (!HeapTupleIsValid(tup))
1728+
elog(ERROR, "cache lookup failed for database %u", db_id);
1729+
datform = (Form_pg_database) GETSTRUCT(tup);
1730+
1731+
/*
1732+
* Except for the deletion of the catalog row, subsequent actions are not
1733+
* transactional (consider DropDatabaseBuffers() discarding modified
1734+
* buffers). But we might crash or get interrupted below. To prevent
1735+
* accesses to a database with invalid contents, mark the database as
1736+
* invalid using an in-place update.
1737+
*
1738+
* We need to flush the WAL before continuing, to guarantee the
1739+
* modification is durable before performing irreversible filesystem
1740+
* operations.
1741+
*/
1742+
datform->datconnlimit = DATCONNLIMIT_INVALID_DB;
1743+
heap_inplace_update(pgdbrel, tup);
1744+
XLogFlush(XactLastRecEnd);
1745+
1746+
/*
1747+
* Also delete the tuple - transactionally. If this transaction commits,
1748+
* the row will be gone, but if we fail, dropdb() can be invoked again.
1749+
*/
1750+
CatalogTupleDelete(pgdbrel, &tup->t_self);
1751+
17211752
/*
17221753
* Drop db-specific replication slots.
17231754
*/
@@ -1730,11 +1761,6 @@ dropdb(const char *dbname, bool missing_ok, bool force)
17301761
*/
17311762
DropDatabaseBuffers(db_id);
17321763

1733-
/*
1734-
* Tell the cumulative stats system to forget it immediately, too.
1735-
*/
1736-
pgstat_drop_database(db_id);
1737-
17381764
/*
17391765
* Tell checkpointer to forget any pending fsync and unlink requests for
17401766
* files in the database; else the fsyncs will fail at next checkpoint, or
@@ -2248,7 +2274,7 @@ AlterDatabase(ParseState *pstate, AlterDatabaseStmt *stmt, bool isTopLevel)
22482274
ListCell *option;
22492275
bool dbistemplate = false;
22502276
bool dballowconnections = true;
2251-
int dbconnlimit = -1;
2277+
int dbconnlimit = DATCONNLIMIT_UNLIMITED;
22522278
DefElem *distemplate = NULL;
22532279
DefElem *dallowconnections = NULL;
22542280
DefElem *dconnlimit = NULL;
@@ -2319,7 +2345,7 @@ AlterDatabase(ParseState *pstate, AlterDatabaseStmt *stmt, bool isTopLevel)
23192345
if (dconnlimit && dconnlimit->arg)
23202346
{
23212347
dbconnlimit = defGetInt32(dconnlimit);
2322-
if (dbconnlimit < -1)
2348+
if (dbconnlimit < DATCONNLIMIT_UNLIMITED)
23232349
ereport(ERROR,
23242350
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
23252351
errmsg("invalid connection limit: %d", dbconnlimit)));
@@ -2346,6 +2372,14 @@ AlterDatabase(ParseState *pstate, AlterDatabaseStmt *stmt, bool isTopLevel)
23462372
datform = (Form_pg_database) GETSTRUCT(tuple);
23472373
dboid = datform->oid;
23482374

2375+
if (database_is_invalid_form(datform))
2376+
{
2377+
ereport(FATAL,
2378+
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2379+
errmsg("cannot alter invalid database \"%s\"", stmt->dbname),
2380+
errhint("Use DROP DATABASE to drop invalid databases."));
2381+
}
2382+
23492383
if (!object_ownercheck(DatabaseRelationId, dboid, GetUserId()))
23502384
aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
23512385
stmt->dbname);
@@ -3064,6 +3098,42 @@ get_database_name(Oid dbid)
30643098
return result;
30653099
}
30663100

3101+
3102+
/*
3103+
* While dropping a database the pg_database row is marked invalid, but the
3104+
* catalog contents still exist. Connections to such a database are not
3105+
* allowed.
3106+
*/
3107+
bool
3108+
database_is_invalid_form(Form_pg_database datform)
3109+
{
3110+
return datform->datconnlimit == DATCONNLIMIT_INVALID_DB;
3111+
}
3112+
3113+
3114+
/*
3115+
* Convenience wrapper around database_is_invalid_form()
3116+
*/
3117+
bool
3118+
database_is_invalid_oid(Oid dboid)
3119+
{
3120+
HeapTuple dbtup;
3121+
Form_pg_database dbform;
3122+
bool invalid;
3123+
3124+
dbtup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(dboid));
3125+
if (!HeapTupleIsValid(dbtup))
3126+
elog(ERROR, "cache lookup failed for database %u", dboid);
3127+
dbform = (Form_pg_database) GETSTRUCT(dbtup);
3128+
3129+
invalid = database_is_invalid_form(dbform);
3130+
3131+
ReleaseSysCache(dbtup);
3132+
3133+
return invalid;
3134+
}
3135+
3136+
30673137
/*
30683138
* recovery_create_dbdir()
30693139
*

src/backend/commands/vacuum.c

+14
Original file line numberDiff line numberDiff line change
@@ -1850,6 +1850,20 @@ vac_truncate_clog(TransactionId frozenXID,
18501850
Assert(TransactionIdIsNormal(datfrozenxid));
18511851
Assert(MultiXactIdIsValid(datminmxid));
18521852

1853+
/*
1854+
* If database is in the process of getting dropped, or has been
1855+
* interrupted while doing so, no connections to it are possible
1856+
* anymore. Therefore we don't need to take it into account here.
1857+
* Which is good, because it can't be processed by autovacuum either.
1858+
*/
1859+
if (database_is_invalid_form((Form_pg_database) dbform))
1860+
{
1861+
elog(DEBUG2,
1862+
"skipping invalid database \"%s\" while computing relfrozenxid",
1863+
NameStr(dbform->datname));
1864+
continue;
1865+
}
1866+
18531867
/*
18541868
* If things are working properly, no database should have a
18551869
* datfrozenxid or datminmxid that is "in the future". However, such

src/backend/postmaster/autovacuum.c

+12
Original file line numberDiff line numberDiff line change
@@ -1972,6 +1972,18 @@ get_database_list(void)
19721972
avw_dbase *avdb;
19731973
MemoryContext oldcxt;
19741974

1975+
/*
1976+
* If database has partially been dropped, we can't, nor need to,
1977+
* vacuum it.
1978+
*/
1979+
if (database_is_invalid_form(pgdatabase))
1980+
{
1981+
elog(DEBUG2,
1982+
"autovacuum: skipping invalid database \"%s\"",
1983+
NameStr(pgdatabase->datname));
1984+
continue;
1985+
}
1986+
19751987
/*
19761988
* Allocate our results in the caller's context, not the
19771989
* transaction's. We do this inside the loop, and restore the original

src/backend/utils/init/postinit.c

+10
Original file line numberDiff line numberDiff line change
@@ -1116,6 +1116,7 @@ InitPostgres(const char *in_dbname, Oid dboid,
11161116
if (!bootstrap)
11171117
{
11181118
HeapTuple tuple;
1119+
Form_pg_database datform;
11191120

11201121
tuple = GetDatabaseTuple(dbname);
11211122
if (!HeapTupleIsValid(tuple) ||
@@ -1125,6 +1126,15 @@ InitPostgres(const char *in_dbname, Oid dboid,
11251126
(errcode(ERRCODE_UNDEFINED_DATABASE),
11261127
errmsg("database \"%s\" does not exist", dbname),
11271128
errdetail("It seems to have just been dropped or renamed.")));
1129+
1130+
datform = (Form_pg_database) GETSTRUCT(tuple);
1131+
if (database_is_invalid_form(datform))
1132+
{
1133+
ereport(FATAL,
1134+
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1135+
errmsg("cannot connect to invalid database \"%s\"", dbname),
1136+
errhint("Use DROP DATABASE to drop invalid databases."));
1137+
}
11281138
}
11291139

11301140
/*

src/bin/pg_amcheck/pg_amcheck.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -1590,7 +1590,7 @@ compile_database_list(PGconn *conn, SimplePtrList *databases,
15901590
"FROM pg_catalog.pg_database d "
15911591
"LEFT OUTER JOIN exclude_raw e "
15921592
"ON d.datname ~ e.rgx "
1593-
"\nWHERE d.datallowconn "
1593+
"\nWHERE d.datallowconn AND datconnlimit != -2 "
15941594
"AND e.pattern_id IS NULL"
15951595
"),"
15961596

src/bin/pg_amcheck/t/002_nonesuch.pl

+34
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,40 @@
291291
'many unmatched patterns and one matched pattern under --no-strict-names'
292292
);
293293

294+
295+
#########################################
296+
# Test that an invalid / partially dropped database won't be targeted
297+
298+
$node->safe_psql(
299+
'postgres', q(
300+
CREATE DATABASE regression_invalid;
301+
UPDATE pg_database SET datconnlimit = -2 WHERE datname = 'regression_invalid';
302+
));
303+
304+
$node->command_checks_all(
305+
[
306+
'pg_amcheck', '-d', 'regression_invalid'
307+
],
308+
1,
309+
[qr/^$/],
310+
[
311+
qr/pg_amcheck: error: no connectable databases to check matching "regression_invalid"/,
312+
],
313+
'checking handling of invalid database');
314+
315+
$node->command_checks_all(
316+
[
317+
'pg_amcheck', '-d', 'postgres',
318+
'-t', 'regression_invalid.public.foo',
319+
],
320+
1,
321+
[qr/^$/],
322+
[
323+
qr/pg_amcheck: error: no connectable databases to check matching "regression_invalid.public.foo"/,
324+
],
325+
'checking handling of object in invalid database');
326+
327+
294328
#########################################
295329
# Test checking otherwise existent objects but in databases where they do not exist
296330

src/bin/pg_dump/pg_dumpall.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -1345,7 +1345,7 @@ dropDBs(PGconn *conn)
13451345
res = executeQuery(conn,
13461346
"SELECT datname "
13471347
"FROM pg_database d "
1348-
"WHERE datallowconn "
1348+
"WHERE datallowconn AND datconnlimit != -2 "
13491349
"ORDER BY datname");
13501350

13511351
if (PQntuples(res) > 0)
@@ -1488,7 +1488,7 @@ dumpDatabases(PGconn *conn)
14881488
res = executeQuery(conn,
14891489
"SELECT datname "
14901490
"FROM pg_database d "
1491-
"WHERE datallowconn "
1491+
"WHERE datallowconn AND datconnlimit != -2 "
14921492
"ORDER BY (datname <> 'template1'), datname");
14931493

14941494
if (PQntuples(res) > 0)

src/bin/pg_dump/t/002_pg_dump.pl

+19
Original file line numberDiff line numberDiff line change
@@ -1907,6 +1907,17 @@
19071907
},
19081908
},
19091909

1910+
'CREATE DATABASE regression_invalid...' => {
1911+
create_order => 1,
1912+
create_sql => q(
1913+
CREATE DATABASE regression_invalid;
1914+
UPDATE pg_database SET datconnlimit = -2 WHERE datname = 'regression_invalid'),
1915+
regexp => qr/^CREATE DATABASE regression_invalid/m,
1916+
not_like => {
1917+
pg_dumpall_dbprivs => 1,
1918+
},
1919+
},
1920+
19101921
'CREATE ACCESS METHOD gist2' => {
19111922
create_order => 52,
19121923
create_sql =>
@@ -4690,6 +4701,14 @@
46904701
qr/pg_dump: error: connection to server .* failed: FATAL: database "qqq" does not exist/,
46914702
'connecting to a non-existent database');
46924703
4704+
#########################################
4705+
# Test connecting to an invalid database
4706+
4707+
$node->command_fails_like(
4708+
[ 'pg_dump', '-d', 'regression_invalid' ],
4709+
qr/pg_dump: error: connection to server .* failed: FATAL: cannot connect to invalid database "regression_invalid"/,
4710+
'connecting to an invalid database');
4711+
46934712
#########################################
46944713
# Test connecting with an unprivileged user
46954714

0 commit comments

Comments
 (0)