/* GUC variable */
bool ignore_invalid_pages = false;
+
+/*
+ * If a create database WAL record is being replayed more than once during
+ * crash recovery on a standby, it is possible that either the tablespace
+ * directory or the template database directory is missing. This happens when
+ * the directories are removed by replay of subsequent drop records. Note
+ * that this problem happens only on standby and not on master. On master, a
+ * checkpoint is created at the end of create database operation. On standby,
+ * however, such a strategy (creating restart points during replay) is not
+ * viable because it will slow down WAL replay.
+ *
+ * The alternative is to track references to each missing directory
+ * encountered when performing crash recovery in the following hash table.
+ * Similar to invalid page table above, the expectation is that each missing
+ * directory entry should be matched with a drop database or drop tablespace
+ * WAL record by the end of crash recovery.
+ */
+typedef struct xl_missing_dir_key
+{
+ Oid spcNode;
+ Oid dbNode;
+} xl_missing_dir_key;
+
+typedef struct xl_missing_dir
+{
+ xl_missing_dir_key key;
+ char path[MAXPGPATH];
+} xl_missing_dir;
+
+static HTAB *missing_dir_tab = NULL;
+
+
+/*
+ * Keep track of a directory that wasn't found while replaying database
+ * creation records. These should match up with tablespace removal records
+ * later in the WAL stream; we verify that before reaching consistency.
+ */
+void
+XLogRememberMissingDir(Oid spcNode, Oid dbNode, char *path)
+{
+ xl_missing_dir_key key;
+ bool found;
+ xl_missing_dir *entry;
+
+ /*
+ * Database OID may be invalid but tablespace OID must be valid. If
+ * dbNode is InvalidOid, we are logging a missing tablespace directory,
+ * otherwise we are logging a missing database directory.
+ */
+ Assert(OidIsValid(spcNode));
+
+ if (missing_dir_tab == NULL)
+ {
+ /* create hash table when first needed */
+ HASHCTL ctl;
+
+ memset(&ctl, 0, sizeof(ctl));
+ ctl.keysize = sizeof(xl_missing_dir_key);
+ ctl.entrysize = sizeof(xl_missing_dir);
+
+ missing_dir_tab = hash_create("XLOG missing directory table",
+ 100,
+ &ctl,
+ HASH_ELEM | HASH_BLOBS);
+ }
+
+ key.spcNode = spcNode;
+ key.dbNode = dbNode;
+
+ entry = hash_search(missing_dir_tab, &key, HASH_ENTER, &found);
+
+ if (found)
+ {
+ if (dbNode == InvalidOid)
+ elog(DEBUG1, "missing directory %s (tablespace %u) already exists: %s",
+ path, spcNode, entry->path);
+ else
+ elog(DEBUG1, "missing directory %s (tablespace %u database %u) already exists: %s",
+ path, spcNode, dbNode, entry->path);
+ }
+ else
+ {
+ strlcpy(entry->path, path, sizeof(entry->path));
+ if (dbNode == InvalidOid)
+ elog(DEBUG1, "logged missing dir %s (tablespace %u)",
+ path, spcNode);
+ else
+ elog(DEBUG1, "logged missing dir %s (tablespace %u database %u)",
+ path, spcNode, dbNode);
+ }
+}
+
+/*
+ * Remove an entry from the list of directories not found. This is to be done
+ * when the matching tablespace removal WAL record is found.
+ */
+void
+XLogForgetMissingDir(Oid spcNode, Oid dbNode)
+{
+ xl_missing_dir_key key;
+
+ key.spcNode = spcNode;
+ key.dbNode = dbNode;
+
+ /* Database OID may be invalid but tablespace OID must be valid. */
+ Assert(OidIsValid(spcNode));
+
+ if (missing_dir_tab == NULL)
+ return;
+
+ if (hash_search(missing_dir_tab, &key, HASH_REMOVE, NULL) != NULL)
+ {
+ if (dbNode == InvalidOid)
+ {
+ elog(DEBUG2, "forgot missing dir (tablespace %u)", spcNode);
+ }
+ else
+ {
+ char *path = GetDatabasePath(dbNode, spcNode);
+
+ elog(DEBUG2, "forgot missing dir %s (tablespace %u database %u)",
+ path, spcNode, dbNode);
+ pfree(path);
+ }
+ }
+}
+
+/*
+ * This is called at the end of crash recovery, before entering archive
+ * recovery on a standby. PANIC if the hash table is not empty.
+ */
+void
+XLogCheckMissingDirs(void)
+{
+ HASH_SEQ_STATUS status;
+ xl_missing_dir *hentry;
+ bool foundone = false;
+
+ if (missing_dir_tab == NULL)
+ return; /* nothing to do */
+
+ hash_seq_init(&status, missing_dir_tab);
+
+ while ((hentry = (xl_missing_dir *) hash_seq_search(&status)) != NULL)
+ {
+ elog(WARNING, "missing directory \"%s\" tablespace %u database %u",
+ hentry->path, hentry->key.spcNode, hentry->key.dbNode);
+ foundone = true;
+ }
+
+ if (foundone)
+ elog(PANIC, "WAL contains references to missing directories");
+
+ hash_destroy(missing_dir_tab);
+ missing_dir_tab = NULL;
+}
+
+
/*
* During XLOG replay, we may see XLOG records for incremental updates of
* pages that no longer exist, because their relation was later dropped or
static HTAB *invalid_page_tab = NULL;
-
/* Report a reference to an invalid page */
static void
report_invalid_page(int elevel, RelFileNode node, ForkNumber forkno,
xl_dbase_create_rec *xlrec = (xl_dbase_create_rec *) XLogRecGetData(record);
char *src_path;
char *dst_path;
+ char *parent_path;
struct stat st;
+ bool skip = false;
src_path = GetDatabasePath(xlrec->src_db_id, xlrec->src_tablespace_id);
dst_path = GetDatabasePath(xlrec->db_id, xlrec->tablespace_id);
(errmsg("some useless files may be left behind in old database directory \"%s\"",
dst_path)));
}
+ else if (!reachedConsistency)
+ {
+ /*
+ * It is possible that a drop tablespace record appearing later in
+ * WAL has already been replayed -- in other words, that we are
+ * replaying the database creation record a second time with no
+ * intervening checkpoint. In that case, the tablespace directory
+ * has already been removed and the create database operation
+ * cannot be replayed. Skip the replay itself, but remember the
+ * fact that the tablespace directory is missing, to be matched
+ * with the expected tablespace drop record later.
+ */
+ parent_path = pstrdup(dst_path);
+ get_parent_directory(parent_path);
+ if (!(stat(parent_path, &st) == 0 && S_ISDIR(st.st_mode)))
+ {
+ XLogRememberMissingDir(xlrec->tablespace_id, InvalidOid, parent_path);
+ skip = true;
+ ereport(WARNING,
+ (errmsg("skipping replay of database creation WAL record"),
+ errdetail("The target tablespace \"%s\" directory was not found.",
+ parent_path),
+ errhint("A future WAL record that removes the directory before reaching consistent mode is expected.")));
+ }
+ pfree(parent_path);
+ }
+
+ /*
+ * If the source directory is missing, skip the copy and make a note of
+ * it for later.
+ *
+ * One possible reason for this is that the template database used for
+ * creating this database may have been dropped, as noted above.
+ * Moving a database from one tablespace may also be a partner in the
+ * crime.
+ */
+ if (!(stat(src_path, &st) == 0 && S_ISDIR(st.st_mode)) &&
+ !reachedConsistency)
+ {
+ XLogRememberMissingDir(xlrec->src_tablespace_id, xlrec->src_db_id, src_path);
+ skip = true;
+ ereport(WARNING,
+ (errmsg("skipping replay of database creation WAL record"),
+ errdetail("The source database directory \"%s\" was not found.",
+ src_path),
+ errhint("A future WAL record that removes the directory before reaching consistent mode is expected.")));
+ }
+
+ if (skip)
+ return;
/*
* Force dirty buffers out to disk, to ensure source database is
ereport(WARNING,
(errmsg("some useless files may be left behind in old database directory \"%s\"",
dst_path)));
+
+ if (!reachedConsistency)
+ XLogForgetMissingDir(xlrec->tablespace_ids[i], xlrec->db_id);
+
pfree(dst_path);
}
#include "access/xact.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
+#include "access/xlogutils.h"
#include "catalog/catalog.h"
#include "catalog/dependency.h"
#include "catalog/indexing.h"
{
xl_tblspc_drop_rec *xlrec = (xl_tblspc_drop_rec *) XLogRecGetData(record);
+ if (!reachedConsistency)
+ XLogForgetMissingDir(xlrec->ts_id, InvalidOid);
+
+ /*
+ * Before we remove the tablespace directory, update minimum recovery
+ * point to cover this WAL record. Once the tablespace is removed,
+ * there's no going back. This manually enforces the WAL-first rule.
+ * Doing this before the removal means that if the removal fails for
+ * some reason, the directory is left alone and needs to be manually
+ * removed. Alternatively we could update the minimum recovery point
+ * after removal, but that would leave a small window where the
+ * WAL-first rule could be violated.
+ */
+ if (!reachedConsistency)
+ XLogFlush(record->EndRecPtr);
+
/*
* If we issued a WAL record for a drop tablespace it implies that
* there were no files in it at all when the DROP was done. That means