Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 82233ce

Browse files
committed
Send SIGKILL to children if they don't die quickly in immediate shutdown
On immediate shutdown, or during a restart-after-crash sequence, postmaster used to send SIGQUIT (and then abandon ship if shutdown); but this is not a good strategy if backends don't die because of that signal. (This might happen, for example, if a backend gets tangled trying to malloc() due to gettext(), as in an example illustrated by MauMau.) This causes problems when later trying to restart the server, because some processes are still attached to the shared memory segment. Instead of just abandoning such backends to their fates, we now have postmaster hang around for a little while longer, send a SIGKILL after some reasonable waiting period, and then exit. This makes immediate shutdown more reliable. There is disagreement on whether it's best for postmaster to exit after sending SIGKILL, or to stick around until all children have reported death. If this controversy is resolved differently than what this patch implements, it's an easy change to make. Bug reported by MauMau in message 20DAEA8949EC4E2289C6E8E58560DEC0@maumau MauMau and Álvaro Herrera
1 parent 457d6cf commit 82233ce

File tree

3 files changed

+142
-44
lines changed

3 files changed

+142
-44
lines changed

doc/src/sgml/runtime.sgml

+5-5
Original file line numberDiff line numberDiff line change
@@ -1362,11 +1362,11 @@ echo -1000 > /proc/self/oom_score_adj
13621362
<listitem>
13631363
<para>
13641364
This is the <firstterm>Immediate Shutdown</firstterm> mode.
1365-
The master <command>postgres</command> process will send a
1366-
<systemitem>SIGQUIT</systemitem> to all child processes and exit
1367-
immediately, without properly shutting itself down. The child processes
1368-
likewise exit immediately upon receiving
1369-
<systemitem>SIGQUIT</systemitem>. This will lead to recovery (by
1365+
The server will send <systemitem>SIGQUIT</systemitem> to all child
1366+
processes and wait for them to terminate. Those that don't terminate
1367+
within 5 seconds, will be sent <systemitem>SIGKILL</systemitem> by the
1368+
master <command>postgres</command> process, which will then terminate
1369+
without further waiting. This will lead to recovery (by
13701370
replaying the WAL log) upon next start-up. This is recommended
13711371
only in emergencies.
13721372
</para>

src/backend/postmaster/postmaster.c

+117-39
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ static pid_t StartupPID = 0,
275275
#define NoShutdown 0
276276
#define SmartShutdown 1
277277
#define FastShutdown 2
278+
#define ImmediateShutdown 3
278279

279280
static int Shutdown = NoShutdown;
280281

@@ -345,6 +346,10 @@ typedef enum
345346

346347
static PMState pmState = PM_INIT;
347348

349+
/* Start time of abort processing at immediate shutdown or child crash */
350+
static time_t AbortStartTime;
351+
#define SIGKILL_CHILDREN_AFTER_SECS 5
352+
348353
static bool ReachedNormalRunning = false; /* T if we've reached PM_RUN */
349354

350355
bool ClientAuthInProgress = false; /* T during new-client
@@ -421,6 +426,7 @@ static void RandomSalt(char *md5Salt);
421426
static void signal_child(pid_t pid, int signal);
422427
static bool SignalSomeChildren(int signal, int targets);
423428
static bool SignalUnconnectedWorkers(int signal);
429+
static void TerminateChildren(int signal);
424430

425431
#define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
426432

@@ -1427,8 +1433,18 @@ DetermineSleepTime(struct timeval * timeout)
14271433
if (Shutdown > NoShutdown ||
14281434
(!StartWorkerNeeded && !HaveCrashedWorker))
14291435
{
1430-
timeout->tv_sec = 60;
1431-
timeout->tv_usec = 0;
1436+
if (AbortStartTime > 0)
1437+
{
1438+
/* remaining time, but at least 1 second */
1439+
timeout->tv_sec = Min(SIGKILL_CHILDREN_AFTER_SECS -
1440+
(time(NULL) - AbortStartTime), 1);
1441+
timeout->tv_usec = 0;
1442+
}
1443+
else
1444+
{
1445+
timeout->tv_sec = 60;
1446+
timeout->tv_usec = 0;
1447+
}
14321448
return;
14331449
}
14341450

@@ -1660,6 +1676,28 @@ ServerLoop(void)
16601676
TouchSocketLockFiles();
16611677
last_touch_time = now;
16621678
}
1679+
1680+
/*
1681+
* If we already sent SIGQUIT to children and they are slow to shut
1682+
* down, it's time to send them SIGKILL. This doesn't happen normally,
1683+
* but under certain conditions backends can get stuck while shutting
1684+
* down. This is a last measure to get them unwedged.
1685+
*
1686+
* Note we also do this during recovery from a process crash.
1687+
*/
1688+
if ((Shutdown >= ImmediateShutdown || (FatalError && !SendStop)) &&
1689+
now - AbortStartTime >= SIGKILL_CHILDREN_AFTER_SECS)
1690+
{
1691+
/* We were gentle with them before. Not anymore */
1692+
TerminateChildren(SIGKILL);
1693+
1694+
/*
1695+
* Additionally, unless we're recovering from a process crash, it's
1696+
* now the time for postmaster to abandon ship.
1697+
*/
1698+
if (!FatalError)
1699+
ExitPostmaster(1);
1700+
}
16631701
}
16641702
}
16651703

@@ -2455,30 +2493,27 @@ pmdie(SIGNAL_ARGS)
24552493
/*
24562494
* Immediate Shutdown:
24572495
*
2458-
* abort all children with SIGQUIT and exit without attempt to
2459-
* properly shut down data base system.
2496+
* abort all children with SIGQUIT, wait for them to exit,
2497+
* terminate remaining ones with SIGKILL, then exit without
2498+
* attempt to properly shut down the data base system.
24602499
*/
2500+
if (Shutdown >= ImmediateShutdown)
2501+
break;
2502+
Shutdown = ImmediateShutdown;
24612503
ereport(LOG,
24622504
(errmsg("received immediate shutdown request")));
2463-
SignalChildren(SIGQUIT);
2464-
if (StartupPID != 0)
2465-
signal_child(StartupPID, SIGQUIT);
2466-
if (BgWriterPID != 0)
2467-
signal_child(BgWriterPID, SIGQUIT);
2468-
if (CheckpointerPID != 0)
2469-
signal_child(CheckpointerPID, SIGQUIT);
2470-
if (WalWriterPID != 0)
2471-
signal_child(WalWriterPID, SIGQUIT);
2472-
if (WalReceiverPID != 0)
2473-
signal_child(WalReceiverPID, SIGQUIT);
2474-
if (AutoVacPID != 0)
2475-
signal_child(AutoVacPID, SIGQUIT);
2476-
if (PgArchPID != 0)
2477-
signal_child(PgArchPID, SIGQUIT);
2478-
if (PgStatPID != 0)
2479-
signal_child(PgStatPID, SIGQUIT);
2480-
SignalUnconnectedWorkers(SIGQUIT);
2481-
ExitPostmaster(0);
2505+
2506+
TerminateChildren(SIGQUIT);
2507+
pmState = PM_WAIT_BACKENDS;
2508+
2509+
/* set stopwatch for them to die */
2510+
AbortStartTime = time(NULL);
2511+
2512+
/*
2513+
* Now wait for backends to exit. If there are none,
2514+
* PostmasterStateMachine will take the next step.
2515+
*/
2516+
PostmasterStateMachine();
24822517
break;
24832518
}
24842519

@@ -2952,12 +2987,17 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
29522987
dlist_mutable_iter iter;
29532988
slist_iter siter;
29542989
Backend *bp;
2990+
bool take_action;
29552991

29562992
/*
2957-
* Make log entry unless there was a previous crash (if so, nonzero exit
2958-
* status is to be expected in SIGQUIT response; don't clutter log)
2993+
* We only log messages and send signals if this is the first process crash
2994+
* and we're not doing an immediate shutdown; otherwise, we're only here to
2995+
* update postmaster's idea of live processes. If we have already signalled
2996+
* children, nonzero exit status is to be expected, so don't clutter log.
29592997
*/
2960-
if (!FatalError)
2998+
take_action = !FatalError && Shutdown != ImmediateShutdown;
2999+
3000+
if (take_action)
29613001
{
29623002
LogChildExit(LOG, procname, pid, exitstatus);
29633003
ereport(LOG,
@@ -3003,7 +3043,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
30033043
* (-s on command line), then we send SIGSTOP instead, so that we
30043044
* can get core dumps from all backends by hand.
30053045
*/
3006-
if (!FatalError)
3046+
if (take_action)
30073047
{
30083048
ereport(DEBUG2,
30093049
(errmsg_internal("sending %s to process %d",
@@ -3055,7 +3095,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
30553095
if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
30563096
continue;
30573097

3058-
if (!FatalError)
3098+
if (take_action)
30593099
{
30603100
ereport(DEBUG2,
30613101
(errmsg_internal("sending %s to process %d",
@@ -3069,7 +3109,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
30693109
/* Take care of the startup process too */
30703110
if (pid == StartupPID)
30713111
StartupPID = 0;
3072-
else if (StartupPID != 0 && !FatalError)
3112+
else if (StartupPID != 0 && take_action)
30733113
{
30743114
ereport(DEBUG2,
30753115
(errmsg_internal("sending %s to process %d",
@@ -3081,7 +3121,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
30813121
/* Take care of the bgwriter too */
30823122
if (pid == BgWriterPID)
30833123
BgWriterPID = 0;
3084-
else if (BgWriterPID != 0 && !FatalError)
3124+
else if (BgWriterPID != 0 && take_action)
30853125
{
30863126
ereport(DEBUG2,
30873127
(errmsg_internal("sending %s to process %d",
@@ -3093,7 +3133,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
30933133
/* Take care of the checkpointer too */
30943134
if (pid == CheckpointerPID)
30953135
CheckpointerPID = 0;
3096-
else if (CheckpointerPID != 0 && !FatalError)
3136+
else if (CheckpointerPID != 0 && take_action)
30973137
{
30983138
ereport(DEBUG2,
30993139
(errmsg_internal("sending %s to process %d",
@@ -3105,7 +3145,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
31053145
/* Take care of the walwriter too */
31063146
if (pid == WalWriterPID)
31073147
WalWriterPID = 0;
3108-
else if (WalWriterPID != 0 && !FatalError)
3148+
else if (WalWriterPID != 0 && take_action)
31093149
{
31103150
ereport(DEBUG2,
31113151
(errmsg_internal("sending %s to process %d",
@@ -3117,7 +3157,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
31173157
/* Take care of the walreceiver too */
31183158
if (pid == WalReceiverPID)
31193159
WalReceiverPID = 0;
3120-
else if (WalReceiverPID != 0 && !FatalError)
3160+
else if (WalReceiverPID != 0 && take_action)
31213161
{
31223162
ereport(DEBUG2,
31233163
(errmsg_internal("sending %s to process %d",
@@ -3129,7 +3169,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
31293169
/* Take care of the autovacuum launcher too */
31303170
if (pid == AutoVacPID)
31313171
AutoVacPID = 0;
3132-
else if (AutoVacPID != 0 && !FatalError)
3172+
else if (AutoVacPID != 0 && take_action)
31333173
{
31343174
ereport(DEBUG2,
31353175
(errmsg_internal("sending %s to process %d",
@@ -3144,7 +3184,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
31443184
* simplifies the state-machine logic in the case where a shutdown request
31453185
* arrives during crash processing.)
31463186
*/
3147-
if (PgArchPID != 0 && !FatalError)
3187+
if (PgArchPID != 0 && take_action)
31483188
{
31493189
ereport(DEBUG2,
31503190
(errmsg_internal("sending %s to process %d",
@@ -3159,7 +3199,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
31593199
* simplifies the state-machine logic in the case where a shutdown request
31603200
* arrives during crash processing.)
31613201
*/
3162-
if (PgStatPID != 0 && !FatalError)
3202+
if (PgStatPID != 0 && take_action)
31633203
{
31643204
ereport(DEBUG2,
31653205
(errmsg_internal("sending %s to process %d",
@@ -3171,7 +3211,9 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
31713211

31723212
/* We do NOT restart the syslogger */
31733213

3174-
FatalError = true;
3214+
if (Shutdown != ImmediateShutdown)
3215+
FatalError = true;
3216+
31753217
/* We now transit into a state of waiting for children to die */
31763218
if (pmState == PM_RECOVERY ||
31773219
pmState == PM_HOT_STANDBY ||
@@ -3180,6 +3222,13 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
31803222
pmState == PM_WAIT_READONLY ||
31813223
pmState == PM_SHUTDOWN)
31823224
pmState = PM_WAIT_BACKENDS;
3225+
3226+
/*
3227+
* .. and if this doesn't happen quickly enough, now the clock is ticking
3228+
* for us to kill them without mercy.
3229+
*/
3230+
if (AbortStartTime == 0)
3231+
AbortStartTime = time(NULL);
31833232
}
31843233

31853234
/*
@@ -3316,7 +3365,7 @@ PostmasterStateMachine(void)
33163365
WalWriterPID == 0 &&
33173366
AutoVacPID == 0)
33183367
{
3319-
if (FatalError)
3368+
if (Shutdown >= ImmediateShutdown || FatalError)
33203369
{
33213370
/*
33223371
* Start waiting for dead_end children to die. This state
@@ -3326,7 +3375,8 @@ PostmasterStateMachine(void)
33263375

33273376
/*
33283377
* We already SIGQUIT'd the archiver and stats processes, if
3329-
* any, when we entered FatalError state.
3378+
* any, when we started immediate shutdown or entered
3379+
* FatalError state.
33303380
*/
33313381
}
33323382
else
@@ -3511,6 +3561,7 @@ signal_child(pid_t pid, int signal)
35113561
case SIGTERM:
35123562
case SIGQUIT:
35133563
case SIGSTOP:
3564+
case SIGKILL:
35143565
if (kill(-pid, signal) < 0)
35153566
elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal);
35163567
break;
@@ -3597,6 +3648,33 @@ SignalSomeChildren(int signal, int target)
35973648
return signaled;
35983649
}
35993650

3651+
/*
3652+
* Send a termination signal to children. This considers all of our children
3653+
* processes, except syslogger and dead_end backends.
3654+
*/
3655+
static void
3656+
TerminateChildren(int signal)
3657+
{
3658+
SignalChildren(signal);
3659+
if (StartupPID != 0)
3660+
signal_child(StartupPID, signal);
3661+
if (BgWriterPID != 0)
3662+
signal_child(BgWriterPID, signal);
3663+
if (CheckpointerPID != 0)
3664+
signal_child(CheckpointerPID, signal);
3665+
if (WalWriterPID != 0)
3666+
signal_child(WalWriterPID, signal);
3667+
if (WalReceiverPID != 0)
3668+
signal_child(WalReceiverPID, signal);
3669+
if (AutoVacPID != 0)
3670+
signal_child(AutoVacPID, signal);
3671+
if (PgArchPID != 0)
3672+
signal_child(PgArchPID, signal);
3673+
if (PgStatPID != 0)
3674+
signal_child(PgStatPID, signal);
3675+
SignalUnconnectedWorkers(signal);
3676+
}
3677+
36003678
/*
36013679
* BackendStartup -- start backend process
36023680
*

src/port/kill.c

+20
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,26 @@ pgkill(int pid, int sig)
3838
errno = EINVAL;
3939
return -1;
4040
}
41+
42+
/* special case for SIGKILL: just ask the system to terminate the target */
43+
if (sig == SIGKILL)
44+
{
45+
HANDLE prochandle;
46+
47+
if ((prochandle = OpenProcess(PROCESS_TERMINATE, FALSE, (DWORD) pid)) == NULL)
48+
{
49+
errno = ESRCH;
50+
return -1;
51+
}
52+
if (!TerminateProcess(prochandle, 255))
53+
{
54+
_dosmaperr(GetLastError());
55+
CloseHandle(prochandle);
56+
return -1;
57+
}
58+
CloseHandle(prochandle);
59+
return 0;
60+
}
4161
snprintf(pipename, sizeof(pipename), "\\\\.\\pipe\\pgsignal_%u", pid);
4262

4363
if (CallNamedPipe(pipename, &sigData, 1, &sigRet, 1, &bytes, 1000))

0 commit comments

Comments
 (0)