pg_upgrade: detect stale postmaster.pid lock files

bmomjian · bmomjian · commit a9ceaa53be76 · 2013-01-24T15:20:40.000-05:00
If the postmaster.pid lock file exists, try starting/stopping the
cluster to check if the lock file is valid.

Per request from Tom.
diff --git a/contrib/pg_upgrade/check.c b/contrib/pg_upgrade/check.c
@@ -56,11 +56,10 @@ fix_path_separator(char *path)
 }
 
 void
-output_check_banner(bool *live_check)
+output_check_banner(bool live_check)
 {
-	if (user_opts.check && is_server_running(old_cluster.pgdata))
+	if (user_opts.check && live_check)
 	{
-		*live_check = true;
 		pg_log(PG_REPORT, "Performing Consistency Checks on Old Live Server\n");
 		pg_log(PG_REPORT, "------------------------------------------------\n");
 	}
@@ -78,7 +77,7 @@ check_and_dump_old_cluster(bool live_check, char **sequence_script_file_name)
 	/* -- OLD -- */
 
 	if (!live_check)
-		start_postmaster(&old_cluster);
+		start_postmaster(&old_cluster, true);
 
 	set_locale_and_encoding(&old_cluster);
 
@@ -201,7 +200,7 @@ issue_warnings(char *sequence_script_file_name)
 	/* old = PG 8.3 warnings? */
 	if (GET_MAJOR_VERSION(old_cluster.major_version) <= 803)
 	{
-		start_postmaster(&new_cluster);
+		start_postmaster(&new_cluster, true);
 
 		/* restore proper sequence values using file created from old server */
 		if (sequence_script_file_name)
@@ -224,7 +223,7 @@ issue_warnings(char *sequence_script_file_name)
 	/* Create dummy large object permissions for old < PG 9.0? */
 	if (GET_MAJOR_VERSION(old_cluster.major_version) <= 804)
 	{
-		start_postmaster(&new_cluster);
+		start_postmaster(&new_cluster, true);
 		new_9_0_populate_pg_largeobject_metadata(&new_cluster, false);
 		stop_postmaster(false);
 	}
diff --git a/contrib/pg_upgrade/exec.c b/contrib/pg_upgrade/exec.c
@@ -140,13 +140,12 @@ exec_prog(const char *log_file, const char *opt_log_file,
 
 
 /*
- * is_server_running()
+ * pid_lock_file_exists()
  *
- * checks whether postmaster on the given data directory is running or not.
- * The check is performed by looking for the existence of postmaster.pid file.
+ * Checks whether the postmaster.pid file exists.
  */
 bool
-is_server_running(const char *datadir)
+pid_lock_file_exists(const char *datadir)
 {
 	char		path[MAXPGPATH];
 	int			fd;
@@ -180,8 +179,6 @@ void
 verify_directories(void)
 {
 
-	prep_status("Checking current, bin, and data directories");
-
 #ifndef WIN32
 	if (access(".", R_OK | W_OK | X_OK) != 0)
 #else
@@ -194,7 +191,6 @@ verify_directories(void)
 	check_data_dir(old_cluster.pgdata);
 	check_bin_dir(&new_cluster);
 	check_data_dir(new_cluster.pgdata);
-	check_ok();
 }
 
 
diff --git a/contrib/pg_upgrade/pg_upgrade.c b/contrib/pg_upgrade/pg_upgrade.c
@@ -48,7 +48,7 @@ static void prepare_new_databases(void);
 static void create_new_objects(void);
 static void copy_clog_xlog_xid(void);
 static void set_frozenxids(void);
-static void setup(char *argv0, bool live_check);
+static void setup(char *argv0, bool *live_check);
 static void cleanup(void);
 
 ClusterInfo old_cluster,
@@ -80,9 +80,9 @@ main(int argc, char **argv)
 	adjust_data_dir(&old_cluster);
 	adjust_data_dir(&new_cluster);
 
-	output_check_banner(&live_check);
+	setup(argv[0], &live_check);
 
-	setup(argv[0], live_check);
+	output_check_banner(live_check);
 
 	check_cluster_versions();
 
@@ -95,7 +95,7 @@ main(int argc, char **argv)
 
 
 	/* -- NEW -- */
-	start_postmaster(&new_cluster);
+	start_postmaster(&new_cluster, true);
 
 	check_new_cluster();
 	report_clusters_compatible();
@@ -116,7 +116,7 @@ main(int argc, char **argv)
 	/* New now using xids of the old system */
 
 	/* -- NEW -- */
-	start_postmaster(&new_cluster);
+	start_postmaster(&new_cluster, true);
 
 	prepare_new_databases();
 
@@ -177,7 +177,7 @@ main(int argc, char **argv)
 
 
 static void
-setup(char *argv0, bool live_check)
+setup(char *argv0, bool *live_check)
 {
 	char		exec_path[MAXPGPATH];	/* full path to my executable */
 
@@ -189,15 +189,39 @@ setup(char *argv0, bool live_check)
 
 	verify_directories();
 
-	/* no postmasters should be running */
-	if (!live_check && is_server_running(old_cluster.pgdata))
-		pg_log(PG_FATAL, "There seems to be a postmaster servicing the old cluster.\n"
-			   "Please shutdown that postmaster and try again.\n");
+	/* no postmasters should be running, except for a live check */
+	if (pid_lock_file_exists(old_cluster.pgdata))
+	{
+		/*
+		 *	If we have a postmaster.pid file, try to start the server.  If
+		 *	it starts, the pid file was stale, so stop the server.  If it
+		 *	doesn't start, assume the server is running.  If the pid file
+		 *	is left over from a server crash, this also allows any committed
+		 *	transactions stored in the WAL to be replayed so they are not
+		 *	lost, because WAL files are not transfered from old to new
+		 *	servers.
+		 */		
+		if (start_postmaster(&old_cluster, false))
+			stop_postmaster(false);
+		else
+		{
+			if (!user_opts.check)
+				pg_log(PG_FATAL, "There seems to be a postmaster servicing the old cluster.\n"
+					   "Please shutdown that postmaster and try again.\n");
+			else
+				*live_check = true;
+		}
+	}
 
 	/* same goes for the new postmaster */
-	if (is_server_running(new_cluster.pgdata))
-		pg_log(PG_FATAL, "There seems to be a postmaster servicing the new cluster.\n"
+	if (pid_lock_file_exists(new_cluster.pgdata))
+	{
+		if (start_postmaster(&new_cluster, false))
+			stop_postmaster(false);
+		else
+			pg_log(PG_FATAL, "There seems to be a postmaster servicing the new cluster.\n"
 			   "Please shutdown that postmaster and try again.\n");
+	}
 
 	/* get path to pg_upgrade executable */
 	if (find_my_exec(argv0, exec_path) < 0)
diff --git a/contrib/pg_upgrade/pg_upgrade.h b/contrib/pg_upgrade/pg_upgrade.h
@@ -307,7 +307,7 @@ extern OSInfo os_info;
 
 /* check.c */
 
-void		output_check_banner(bool *live_check);
+void		output_check_banner(bool live_check);
 void		check_and_dump_old_cluster(bool live_check,
 				  char **sequence_script_file_name);
 void		check_new_cluster(void);
@@ -341,7 +341,7 @@ exec_prog(const char *log_file, const char *opt_log_file,
 		  bool throw_error, const char *fmt,...)
 __attribute__((format(PG_PRINTF_ATTRIBUTE, 4, 5)));
 void		verify_directories(void);
-bool		is_server_running(const char *datadir);
+bool		pid_lock_file_exists(const char *datadir);
 
 
 /* file.c */
@@ -429,7 +429,7 @@ __attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3)));
 
 char	   *cluster_conn_opts(ClusterInfo *cluster);
 
-void		start_postmaster(ClusterInfo *cluster);
+bool		start_postmaster(ClusterInfo *cluster, bool throw_error);
 void		stop_postmaster(bool fast);
 uint32		get_major_server_version(ClusterInfo *cluster);
 void		check_pghost_envvar(void);
diff --git a/contrib/pg_upgrade/server.c b/contrib/pg_upgrade/server.c
@@ -170,8 +170,8 @@ stop_postmaster_atexit(void)
 }
 
 
-void
-start_postmaster(ClusterInfo *cluster)
+bool
+start_postmaster(ClusterInfo *cluster, bool throw_error)
 {
 	char		cmd[MAXPGPATH * 4 + 1000];
 	PGconn	   *conn;
@@ -236,6 +236,9 @@ start_postmaster(ClusterInfo *cluster)
 							  false,
 							  "%s", cmd);
 
+	if (!pg_ctl_return && !throw_error)
+		return false;
+							  
 	/* Check to see if we can connect to the server; if not, report it. */
 	if ((conn = get_db_conn(cluster, "template1")) == NULL ||
 		PQstatus(conn) != CONNECTION_OK)
@@ -256,6 +259,8 @@ start_postmaster(ClusterInfo *cluster)
 			   CLUSTER_NAME(cluster));
 
 	os_info.running_cluster = cluster;
+
+	return true;
 }
 
 

Original file line number	Diff line number	Diff line change
`@@ -56,11 +56,10 @@ fix_path_separator(char *path)`
`56`	`56`	`}`
`57`	`57`
`58`	`58`	`void`
`59`		`-output_check_banner(bool *live_check)`
	`59`	`+output_check_banner(bool live_check)`
`60`	`60`	`{`
`61`		`- if (user_opts.check && is_server_running(old_cluster.pgdata))`
	`61`	`+ if (user_opts.check && live_check)`
`62`	`62`	`{`
`63`		`- *live_check = true;`
`64`	`63`	`pg_log(PG_REPORT, "Performing Consistency Checks on Old Live Server\n");`
`65`	`64`	`pg_log(PG_REPORT, "------------------------------------------------\n");`
`66`	`65`	`}`
`@@ -78,7 +77,7 @@ check_and_dump_old_cluster(bool live_check, char **sequence_script_file_name)`
`78`	`77`	`/* -- OLD -- */`
`79`	`78`
`80`	`79`	`if (!live_check)`
`81`		`- start_postmaster(&old_cluster);`
	`80`	`+ start_postmaster(&old_cluster, true);`
`82`	`81`
`83`	`82`	`set_locale_and_encoding(&old_cluster);`
`84`	`83`
`@@ -201,7 +200,7 @@ issue_warnings(char *sequence_script_file_name)`
`201`	`200`	`/* old = PG 8.3 warnings? */`
`202`	`201`	`if (GET_MAJOR_VERSION(old_cluster.major_version) <= 803)`
`203`	`202`	`{`
`204`		`- start_postmaster(&new_cluster);`
	`203`	`+ start_postmaster(&new_cluster, true);`
`205`	`204`
`206`	`205`	`/* restore proper sequence values using file created from old server */`
`207`	`206`	`if (sequence_script_file_name)`
`@@ -224,7 +223,7 @@ issue_warnings(char *sequence_script_file_name)`
`224`	`223`	`/* Create dummy large object permissions for old < PG 9.0? */`
`225`	`224`	`if (GET_MAJOR_VERSION(old_cluster.major_version) <= 804)`
`226`	`225`	`{`
`227`		`- start_postmaster(&new_cluster);`
	`226`	`+ start_postmaster(&new_cluster, true);`
`228`	`227`	`new_9_0_populate_pg_largeobject_metadata(&new_cluster, false);`
`229`	`228`	`stop_postmaster(false);`
`230`	`229`	`}`