Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 4695da5

Browse files
committed
pg_ctl promote
Fujii Masao, reviewed by Robert Haas, Stephen Frost, and Magnus Hagander.
1 parent 8ddc05f commit 4695da5

File tree

7 files changed

+152
-21
lines changed

7 files changed

+152
-21
lines changed

doc/src/sgml/high-availability.sgml

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -615,8 +615,9 @@ protocol to make nodes agree on a serializable transactional order.
615615
</para>
616616

617617
<para>
618-
Standby mode is exited and the server switches to normal operation,
619-
when a trigger file is found (<varname>trigger_file</>). Before failover,
618+
Standby mode is exited and the server switches to normal operation
619+
when <command>pg_ctl promote</> is run or a trigger file is found
620+
(<varname>trigger_file</>). Before failover,
620621
any WAL immediately available in the archive or in <filename>pg_xlog</> will be
621622
restored, but no attempt is made to connect to the master.
622623
</para>
@@ -685,11 +686,7 @@ protocol to make nodes agree on a serializable transactional order.
685686
If you're setting up the standby server for high availability purposes,
686687
set up WAL archiving, connections and authentication like the primary
687688
server, because the standby server will work as a primary server after
688-
failover. You will also need to set <varname>trigger_file</> to make
689-
it possible to fail over.
690-
If you're setting up the standby server for reporting
691-
purposes, with no plans to fail over to it, <varname>trigger_file</>
692-
is not required.
689+
failover.
693690
</para>
694691

695692
<para>
@@ -710,7 +707,6 @@ protocol to make nodes agree on a serializable transactional order.
710707
standby_mode = 'on'
711708
primary_conninfo = 'host=192.168.1.50 port=5432 user=foo password=foopass'
712709
restore_command = 'cp /path/to/archive/%f %p'
713-
trigger_file = '/path/to/trigger_file'
714710
archive_cleanup_command = 'pg_archivecleanup /path/to/archive %r'
715711
</programlisting>
716712
</para>
@@ -949,13 +945,15 @@ primary_conninfo = 'host=192.168.1.50 port=5432 user=foo password=foopass'
949945
</para>
950946

951947
<para>
952-
To trigger failover of a log-shipping standby server, create a trigger
948+
To trigger failover of a log-shipping standby server,
949+
run <command>pg_ctl promote</> or create a trigger
953950
file with the filename and path specified by the <varname>trigger_file</>
954-
setting in <filename>recovery.conf</>. If <varname>trigger_file</> is
955-
not given, there is no way to exit recovery in the standby and promote
956-
it to a master. That can be useful for e.g reporting servers that are
951+
setting in <filename>recovery.conf</>. If you're planning to use
952+
<command>pg_ctl promote</> to fail over, <varname>trigger_file</> is
953+
not required. If you're setting up the reporting servers that are
957954
only used to offload read-only queries from the primary, not for high
958-
availability purposes.
955+
availability purposes, you don't need to exit recovery in the standby
956+
and promote it to a master.
959957
</para>
960958
</sect1>
961959

doc/src/sgml/recovery-config.sgml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -343,8 +343,8 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows
343343
<listitem>
344344
<para>
345345
Specifies a trigger file whose presence ends recovery in the
346-
standby. If no trigger file is specified, the standby never exits
347-
recovery.
346+
standby. Even if this value is not set, you can still promote
347+
the standby using <command>pg_ctl promote</>.
348348
This setting has no effect if <varname>standby_mode</> is <literal>off</>.
349349
</para>
350350
</listitem>

doc/src/sgml/ref/pg_ctl-ref.sgml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,13 @@ PostgreSQL documentation
7575
<arg>-o <replaceable>options</replaceable></arg>
7676
</cmdsynopsis>
7777

78+
<cmdsynopsis>
79+
<command>pg_ctl</command>
80+
<arg choice="plain">promote</arg>
81+
<arg>-s</arg>
82+
<arg>-D <replaceable>datadir</replaceable></arg>
83+
</cmdsynopsis>
84+
7885
<cmdsynopsis>
7986
<command>pg_ctl</command>
8087
<arg choice="plain">reload</arg>
@@ -183,6 +190,12 @@ PostgreSQL documentation
183190
command-line options.
184191
</para>
185192

193+
<para>
194+
In <option>promote</option> mode, the standby server that is
195+
running in the specified data directory is commanded to exit
196+
recovery and begin read-write operations.
197+
</para>
198+
186199
<para>
187200
<option>reload</option> mode simply sends the
188201
<command>postgres</command> process a <systemitem>SIGHUP</>

src/backend/access/transam/xlog.c

Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
/* File path names (all relative to $PGDATA) */
6363
#define RECOVERY_COMMAND_FILE "recovery.conf"
6464
#define RECOVERY_COMMAND_DONE "recovery.done"
65+
#define PROMOTE_SIGNAL_FILE "promote"
6566

6667

6768
/* User-settable parameters */
@@ -565,6 +566,7 @@ typedef struct xl_restore_point
565566
*/
566567
static volatile sig_atomic_t got_SIGHUP = false;
567568
static volatile sig_atomic_t shutdown_requested = false;
569+
static volatile sig_atomic_t promote_triggered = false;
568570

569571
/*
570572
* Flag set when executing a restore command, to tell SIGTERM signal handler
@@ -9669,6 +9671,14 @@ StartupProcSigUsr1Handler(SIGNAL_ARGS)
96699671
latch_sigusr1_handler();
96709672
}
96719673

9674+
/* SIGUSR2: set flag to finish recovery */
9675+
static void
9676+
StartupProcTriggerHandler(SIGNAL_ARGS)
9677+
{
9678+
promote_triggered = true;
9679+
WakeupRecovery();
9680+
}
9681+
96729682
/* SIGHUP: set flag to re-read config file at next convenient time */
96739683
static void
96749684
StartupProcSigHupHandler(SIGNAL_ARGS)
@@ -9746,7 +9756,7 @@ StartupProcessMain(void)
97469756
pqsignal(SIGALRM, SIG_IGN);
97479757
pqsignal(SIGPIPE, SIG_IGN);
97489758
pqsignal(SIGUSR1, StartupProcSigUsr1Handler);
9749-
pqsignal(SIGUSR2, SIG_IGN);
9759+
pqsignal(SIGUSR2, StartupProcTriggerHandler);
97509760

97519761
/*
97529762
* Reset some signals that are accepted by postmaster but not here
@@ -10192,9 +10202,9 @@ emode_for_corrupt_record(int emode, XLogRecPtr RecPtr)
1019210202
}
1019310203

1019410204
/*
10195-
* Check to see if the trigger file exists. If it does, request postmaster
10196-
* to shut down walreceiver, wait for it to exit, remove the trigger
10197-
* file, and return true.
10205+
* Check to see whether the user-specified trigger file exists and whether a
10206+
* promote request has arrived. If either condition holds, request postmaster
10207+
* to shut down walreceiver, wait for it to exit, and return true.
1019810208
*/
1019910209
static bool
1020010210
CheckForStandbyTrigger(void)
@@ -10205,6 +10215,16 @@ CheckForStandbyTrigger(void)
1020510215
if (triggered)
1020610216
return true;
1020710217

10218+
if (promote_triggered)
10219+
{
10220+
ereport(LOG,
10221+
(errmsg("received promote request")));
10222+
ShutdownWalRcv();
10223+
promote_triggered = false;
10224+
triggered = true;
10225+
return true;
10226+
}
10227+
1020810228
if (TriggerFile == NULL)
1020910229
return false;
1021010230

@@ -10220,6 +10240,27 @@ CheckForStandbyTrigger(void)
1022010240
return false;
1022110241
}
1022210242

10243+
/*
10244+
* Check to see if a promote request has arrived. Should be
10245+
* called by postmaster after receiving SIGUSR1.
10246+
*/
10247+
bool
10248+
CheckPromoteSignal(void)
10249+
{
10250+
struct stat stat_buf;
10251+
10252+
if (stat(PROMOTE_SIGNAL_FILE, &stat_buf) == 0)
10253+
{
10254+
/*
10255+
* Since we are in a signal handler, it's not safe
10256+
* to elog. We silently ignore any error from unlink.
10257+
*/
10258+
unlink(PROMOTE_SIGNAL_FILE);
10259+
return true;
10260+
}
10261+
return false;
10262+
}
10263+
1022310264
/*
1022410265
* Wake up startup process to replay newly arrived WAL, or to notice that
1022510266
* failover has been requested.

src/backend/postmaster/postmaster.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4284,6 +4284,14 @@ sigusr1_handler(SIGNAL_ARGS)
42844284
WalReceiverPID = StartWalReceiver();
42854285
}
42864286

4287+
if (CheckPromoteSignal() && StartupPID != 0 &&
4288+
(pmState == PM_STARTUP || pmState == PM_RECOVERY ||
4289+
pmState == PM_HOT_STANDBY || pmState == PM_WAIT_READONLY))
4290+
{
4291+
/* Tell startup process to finish recovery */
4292+
signal_child(StartupPID, SIGUSR2);
4293+
}
4294+
42874295
PG_SETMASK(&UnBlockSig);
42884296

42894297
errno = save_errno;

src/bin/pg_ctl/pg_ctl.c

Lines changed: 72 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ typedef enum
6262
START_COMMAND,
6363
STOP_COMMAND,
6464
RESTART_COMMAND,
65+
PROMOTE_COMMAND,
6566
RELOAD_COMMAND,
6667
STATUS_COMMAND,
6768
KILL_COMMAND,
@@ -96,6 +97,7 @@ static char postopts_file[MAXPGPATH];
9697
static char pid_file[MAXPGPATH];
9798
static char backup_file[MAXPGPATH];
9899
static char recovery_file[MAXPGPATH];
100+
static char promote_file[MAXPGPATH];
99101

100102
#if defined(WIN32) || defined(__CYGWIN__)
101103
static DWORD pgctl_start_type = SERVICE_AUTO_START;
@@ -124,6 +126,7 @@ static void do_init(void);
124126
static void do_start(void);
125127
static void do_stop(void);
126128
static void do_restart(void);
129+
static void do_promote(void);
127130
static void do_reload(void);
128131
static void do_status(void);
129132
static void do_kill(pgpid_t pid);
@@ -872,7 +875,7 @@ do_stop(void)
872875

873876

874877
/*
875-
* restart/reload routines
878+
* restart/promote/reload routines
876879
*/
877880

878881
static void
@@ -965,6 +968,66 @@ do_restart(void)
965968
do_start();
966969
}
967970

971+
static void
972+
do_promote(void)
973+
{
974+
FILE *prmfile;
975+
pgpid_t pid;
976+
struct stat statbuf;
977+
978+
pid = get_pgpid();
979+
980+
if (pid == 0) /* no pid file */
981+
{
982+
write_stderr(_("%s: PID file \"%s\" does not exist\n"), progname, pid_file);
983+
write_stderr(_("Is server running?\n"));
984+
exit(1);
985+
}
986+
else if (pid < 0) /* standalone backend, not postmaster */
987+
{
988+
pid = -pid;
989+
write_stderr(_("%s: cannot promote server; "
990+
"single-user server is running (PID: %ld)\n"),
991+
progname, pid);
992+
exit(1);
993+
}
994+
995+
/* If recovery.conf doesn't exist, the server is not in standby mode */
996+
if (stat(recovery_file, &statbuf) != 0)
997+
{
998+
write_stderr(_("%s: cannot promote server; "
999+
"server is not in standby mode\n"),
1000+
progname);
1001+
exit(1);
1002+
}
1003+
1004+
if ((prmfile = fopen(promote_file, "w")) == NULL)
1005+
{
1006+
write_stderr(_("%s: could not create promote signal file \"%s\": %s\n"),
1007+
progname, promote_file, strerror(errno));
1008+
exit(1);
1009+
}
1010+
if (fclose(prmfile))
1011+
{
1012+
write_stderr(_("%s: could not write promote signal file \"%s\": %s\n"),
1013+
progname, promote_file, strerror(errno));
1014+
exit(1);
1015+
}
1016+
1017+
sig = SIGUSR1;
1018+
if (kill((pid_t) pid, sig) != 0)
1019+
{
1020+
write_stderr(_("%s: could not send promote signal (PID: %ld): %s\n"),
1021+
progname, pid, strerror(errno));
1022+
if (unlink(promote_file) != 0)
1023+
write_stderr(_("%s: could not remove promote signal file \"%s\": %s\n"),
1024+
progname, promote_file, strerror(errno));
1025+
exit(1);
1026+
}
1027+
1028+
print_msg(_("server promoting\n"));
1029+
}
1030+
9681031

9691032
static void
9701033
do_reload(void)
@@ -1617,14 +1680,15 @@ do_advice(void)
16171680
static void
16181681
do_help(void)
16191682
{
1620-
printf(_("%s is a utility to start, stop, restart, reload configuration files,\n"
1683+
printf(_("%s is a utility to start, stop, restart, promote, reload configuration files,\n"
16211684
"report the status of a PostgreSQL server, or signal a PostgreSQL process.\n\n"), progname);
16221685
printf(_("Usage:\n"));
16231686
printf(_(" %s init[db] [-D DATADIR] [-s] [-o \"OPTIONS\"]\n"), progname);
16241687
printf(_(" %s start [-w] [-t SECS] [-D DATADIR] [-s] [-l FILENAME] [-o \"OPTIONS\"]\n"), progname);
16251688
printf(_(" %s stop [-W] [-t SECS] [-D DATADIR] [-s] [-m SHUTDOWN-MODE]\n"), progname);
16261689
printf(_(" %s restart [-w] [-t SECS] [-D DATADIR] [-s] [-m SHUTDOWN-MODE]\n"
16271690
" [-o \"OPTIONS\"]\n"), progname);
1691+
printf(_(" %s promote [-D DATADIR] [-s]\n"), progname);
16281692
printf(_(" %s reload [-D DATADIR] [-s]\n"), progname);
16291693
printf(_(" %s status [-D DATADIR]\n"), progname);
16301694
printf(_(" %s kill SIGNALNAME PID\n"), progname);
@@ -1950,6 +2014,8 @@ main(int argc, char **argv)
19502014
ctl_command = STOP_COMMAND;
19512015
else if (strcmp(argv[optind], "restart") == 0)
19522016
ctl_command = RESTART_COMMAND;
2017+
else if (strcmp(argv[optind], "promote") == 0)
2018+
ctl_command = PROMOTE_COMMAND;
19532019
else if (strcmp(argv[optind], "reload") == 0)
19542020
ctl_command = RELOAD_COMMAND;
19552021
else if (strcmp(argv[optind], "status") == 0)
@@ -2036,6 +2102,7 @@ main(int argc, char **argv)
20362102
snprintf(pid_file, MAXPGPATH, "%s/postmaster.pid", pg_data);
20372103
snprintf(backup_file, MAXPGPATH, "%s/backup_label", pg_data);
20382104
snprintf(recovery_file, MAXPGPATH, "%s/recovery.conf", pg_data);
2105+
snprintf(promote_file, MAXPGPATH, "%s/promote", pg_data);
20392106
}
20402107

20412108
switch (ctl_command)
@@ -2055,6 +2122,9 @@ main(int argc, char **argv)
20552122
case RESTART_COMMAND:
20562123
do_restart();
20572124
break;
2125+
case PROMOTE_COMMAND:
2126+
do_promote();
2127+
break;
20582128
case RELOAD_COMMAND:
20592129
do_reload();
20602130
break;

src/include/access/xlog.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,7 @@ extern TimeLineID GetRecoveryTargetTLI(void);
313313

314314
extern void HandleStartupProcInterrupts(void);
315315
extern void StartupProcessMain(void);
316+
extern bool CheckPromoteSignal(void);
316317
extern void WakeupRecovery(void);
317318

318319
/*

0 commit comments

Comments
 (0)