Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit a9ceaa5

Browse files
committed
pg_upgrade: detect stale postmaster.pid lock files
If the postmaster.pid lock file exists, try starting/stopping the cluster to check if the lock file is valid. Per request from Tom.
1 parent 74ebba8 commit a9ceaa5

File tree

5 files changed

+54
-30
lines changed

5 files changed

+54
-30
lines changed

contrib/pg_upgrade/check.c

+5-6
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,10 @@ fix_path_separator(char *path)
5656
}
5757

5858
void
59-
output_check_banner(bool *live_check)
59+
output_check_banner(bool live_check)
6060
{
61-
if (user_opts.check && is_server_running(old_cluster.pgdata))
61+
if (user_opts.check && live_check)
6262
{
63-
*live_check = true;
6463
pg_log(PG_REPORT, "Performing Consistency Checks on Old Live Server\n");
6564
pg_log(PG_REPORT, "------------------------------------------------\n");
6665
}
@@ -78,7 +77,7 @@ check_and_dump_old_cluster(bool live_check, char **sequence_script_file_name)
7877
/* -- OLD -- */
7978

8079
if (!live_check)
81-
start_postmaster(&old_cluster);
80+
start_postmaster(&old_cluster, true);
8281

8382
set_locale_and_encoding(&old_cluster);
8483

@@ -201,7 +200,7 @@ issue_warnings(char *sequence_script_file_name)
201200
/* old = PG 8.3 warnings? */
202201
if (GET_MAJOR_VERSION(old_cluster.major_version) <= 803)
203202
{
204-
start_postmaster(&new_cluster);
203+
start_postmaster(&new_cluster, true);
205204

206205
/* restore proper sequence values using file created from old server */
207206
if (sequence_script_file_name)
@@ -224,7 +223,7 @@ issue_warnings(char *sequence_script_file_name)
224223
/* Create dummy large object permissions for old < PG 9.0? */
225224
if (GET_MAJOR_VERSION(old_cluster.major_version) <= 804)
226225
{
227-
start_postmaster(&new_cluster);
226+
start_postmaster(&new_cluster, true);
228227
new_9_0_populate_pg_largeobject_metadata(&new_cluster, false);
229228
stop_postmaster(false);
230229
}

contrib/pg_upgrade/exec.c

+3-7
Original file line numberDiff line numberDiff line change
@@ -140,13 +140,12 @@ exec_prog(const char *log_file, const char *opt_log_file,
140140

141141

142142
/*
143-
* is_server_running()
143+
* pid_lock_file_exists()
144144
*
145-
* checks whether postmaster on the given data directory is running or not.
146-
* The check is performed by looking for the existence of postmaster.pid file.
145+
* Checks whether the postmaster.pid file exists.
147146
*/
148147
bool
149-
is_server_running(const char *datadir)
148+
pid_lock_file_exists(const char *datadir)
150149
{
151150
char path[MAXPGPATH];
152151
int fd;
@@ -180,8 +179,6 @@ void
180179
verify_directories(void)
181180
{
182181

183-
prep_status("Checking current, bin, and data directories");
184-
185182
#ifndef WIN32
186183
if (access(".", R_OK | W_OK | X_OK) != 0)
187184
#else
@@ -194,7 +191,6 @@ verify_directories(void)
194191
check_data_dir(old_cluster.pgdata);
195192
check_bin_dir(&new_cluster);
196193
check_data_dir(new_cluster.pgdata);
197-
check_ok();
198194
}
199195

200196

contrib/pg_upgrade/pg_upgrade.c

+36-12
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ static void prepare_new_databases(void);
4848
static void create_new_objects(void);
4949
static void copy_clog_xlog_xid(void);
5050
static void set_frozenxids(void);
51-
static void setup(char *argv0, bool live_check);
51+
static void setup(char *argv0, bool *live_check);
5252
static void cleanup(void);
5353

5454
ClusterInfo old_cluster,
@@ -80,9 +80,9 @@ main(int argc, char **argv)
8080
adjust_data_dir(&old_cluster);
8181
adjust_data_dir(&new_cluster);
8282

83-
output_check_banner(&live_check);
83+
setup(argv[0], &live_check);
8484

85-
setup(argv[0], live_check);
85+
output_check_banner(live_check);
8686

8787
check_cluster_versions();
8888

@@ -95,7 +95,7 @@ main(int argc, char **argv)
9595

9696

9797
/* -- NEW -- */
98-
start_postmaster(&new_cluster);
98+
start_postmaster(&new_cluster, true);
9999

100100
check_new_cluster();
101101
report_clusters_compatible();
@@ -116,7 +116,7 @@ main(int argc, char **argv)
116116
/* New now using xids of the old system */
117117

118118
/* -- NEW -- */
119-
start_postmaster(&new_cluster);
119+
start_postmaster(&new_cluster, true);
120120

121121
prepare_new_databases();
122122

@@ -177,7 +177,7 @@ main(int argc, char **argv)
177177

178178

179179
static void
180-
setup(char *argv0, bool live_check)
180+
setup(char *argv0, bool *live_check)
181181
{
182182
char exec_path[MAXPGPATH]; /* full path to my executable */
183183

@@ -189,15 +189,39 @@ setup(char *argv0, bool live_check)
189189

190190
verify_directories();
191191

192-
/* no postmasters should be running */
193-
if (!live_check && is_server_running(old_cluster.pgdata))
194-
pg_log(PG_FATAL, "There seems to be a postmaster servicing the old cluster.\n"
195-
"Please shutdown that postmaster and try again.\n");
192+
/* no postmasters should be running, except for a live check */
193+
if (pid_lock_file_exists(old_cluster.pgdata))
194+
{
195+
/*
196+
* If we have a postmaster.pid file, try to start the server. If
197+
* it starts, the pid file was stale, so stop the server. If it
198+
* doesn't start, assume the server is running. If the pid file
199+
* is left over from a server crash, this also allows any committed
200+
* transactions stored in the WAL to be replayed so they are not
201+
* lost, because WAL files are not transfered from old to new
202+
* servers.
203+
*/
204+
if (start_postmaster(&old_cluster, false))
205+
stop_postmaster(false);
206+
else
207+
{
208+
if (!user_opts.check)
209+
pg_log(PG_FATAL, "There seems to be a postmaster servicing the old cluster.\n"
210+
"Please shutdown that postmaster and try again.\n");
211+
else
212+
*live_check = true;
213+
}
214+
}
196215

197216
/* same goes for the new postmaster */
198-
if (is_server_running(new_cluster.pgdata))
199-
pg_log(PG_FATAL, "There seems to be a postmaster servicing the new cluster.\n"
217+
if (pid_lock_file_exists(new_cluster.pgdata))
218+
{
219+
if (start_postmaster(&new_cluster, false))
220+
stop_postmaster(false);
221+
else
222+
pg_log(PG_FATAL, "There seems to be a postmaster servicing the new cluster.\n"
200223
"Please shutdown that postmaster and try again.\n");
224+
}
201225

202226
/* get path to pg_upgrade executable */
203227
if (find_my_exec(argv0, exec_path) < 0)

contrib/pg_upgrade/pg_upgrade.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ extern OSInfo os_info;
307307

308308
/* check.c */
309309

310-
void output_check_banner(bool *live_check);
310+
void output_check_banner(bool live_check);
311311
void check_and_dump_old_cluster(bool live_check,
312312
char **sequence_script_file_name);
313313
void check_new_cluster(void);
@@ -341,7 +341,7 @@ exec_prog(const char *log_file, const char *opt_log_file,
341341
bool throw_error, const char *fmt,...)
342342
__attribute__((format(PG_PRINTF_ATTRIBUTE, 4, 5)));
343343
void verify_directories(void);
344-
bool is_server_running(const char *datadir);
344+
bool pid_lock_file_exists(const char *datadir);
345345

346346

347347
/* file.c */
@@ -429,7 +429,7 @@ __attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3)));
429429

430430
char *cluster_conn_opts(ClusterInfo *cluster);
431431

432-
void start_postmaster(ClusterInfo *cluster);
432+
bool start_postmaster(ClusterInfo *cluster, bool throw_error);
433433
void stop_postmaster(bool fast);
434434
uint32 get_major_server_version(ClusterInfo *cluster);
435435
void check_pghost_envvar(void);

contrib/pg_upgrade/server.c

+7-2
Original file line numberDiff line numberDiff line change
@@ -170,8 +170,8 @@ stop_postmaster_atexit(void)
170170
}
171171

172172

173-
void
174-
start_postmaster(ClusterInfo *cluster)
173+
bool
174+
start_postmaster(ClusterInfo *cluster, bool throw_error)
175175
{
176176
char cmd[MAXPGPATH * 4 + 1000];
177177
PGconn *conn;
@@ -236,6 +236,9 @@ start_postmaster(ClusterInfo *cluster)
236236
false,
237237
"%s", cmd);
238238

239+
if (!pg_ctl_return && !throw_error)
240+
return false;
241+
239242
/* Check to see if we can connect to the server; if not, report it. */
240243
if ((conn = get_db_conn(cluster, "template1")) == NULL ||
241244
PQstatus(conn) != CONNECTION_OK)
@@ -256,6 +259,8 @@ start_postmaster(ClusterInfo *cluster)
256259
CLUSTER_NAME(cluster));
257260

258261
os_info.running_cluster = cluster;
262+
263+
return true;
259264
}
260265

261266

0 commit comments

Comments
 (0)