@@ -249,6 +249,17 @@ static pid_t StartupPID = 0,
249
249
PgStatPID = 0 ,
250
250
SysLoggerPID = 0 ;
251
251
252
+ /* Startup process's status */
253
+ typedef enum
254
+ {
255
+ STARTUP_NOT_RUNNING ,
256
+ STARTUP_RUNNING ,
257
+ STARTUP_SIGNALED , /* we sent it a SIGQUIT or SIGKILL */
258
+ STARTUP_CRASHED
259
+ } StartupStatusEnum ;
260
+
261
+ static StartupStatusEnum StartupStatus = STARTUP_NOT_RUNNING ;
262
+
252
263
/* Startup/shutdown state */
253
264
#define NoShutdown 0
254
265
#define SmartShutdown 1
@@ -258,7 +269,6 @@ static pid_t StartupPID = 0,
258
269
static int Shutdown = NoShutdown ;
259
270
260
271
static bool FatalError = false; /* T if recovering from backend crash */
261
- static bool RecoveryError = false; /* T if WAL recovery failed */
262
272
263
273
/*
264
274
* We use a simple state machine to control startup, shutdown, and
@@ -301,8 +311,6 @@ static bool RecoveryError = false; /* T if WAL recovery failed */
301
311
* states, nor in PM_SHUTDOWN states (because we don't enter those states
302
312
* when trying to recover from a crash). It can be true in PM_STARTUP state,
303
313
* because we don't clear it until we've successfully started WAL redo.
304
- * Similarly, RecoveryError means that we have crashed during recovery, and
305
- * should not try to restart.
306
314
*/
307
315
typedef enum
308
316
{
@@ -1246,6 +1254,7 @@ PostmasterMain(int argc, char *argv[])
1246
1254
*/
1247
1255
StartupPID = StartupDataBase ();
1248
1256
Assert (StartupPID != 0 );
1257
+ StartupStatus = STARTUP_RUNNING ;
1249
1258
pmState = PM_STARTUP ;
1250
1259
1251
1260
/* Some workers may be scheduled to start now */
@@ -1666,7 +1675,7 @@ ServerLoop(void)
1666
1675
1667
1676
/* If we have lost the archiver, try to start a new one. */
1668
1677
if (PgArchPID == 0 && PgArchStartupAllowed ())
1669
- PgArchPID = pgarch_start ();
1678
+ PgArchPID = pgarch_start ();
1670
1679
1671
1680
/* If we need to signal the autovacuum launcher, do so now */
1672
1681
if (avlauncher_needs_signal )
@@ -2591,6 +2600,7 @@ reaper(SIGNAL_ARGS)
2591
2600
if (Shutdown > NoShutdown &&
2592
2601
(EXIT_STATUS_0 (exitstatus ) || EXIT_STATUS_1 (exitstatus )))
2593
2602
{
2603
+ StartupStatus = STARTUP_NOT_RUNNING ;
2594
2604
pmState = PM_WAIT_BACKENDS ;
2595
2605
/* PostmasterStateMachine logic does the rest */
2596
2606
continue ;
@@ -2600,6 +2610,7 @@ reaper(SIGNAL_ARGS)
2600
2610
{
2601
2611
ereport (LOG ,
2602
2612
(errmsg ("shutdown at recovery target" )));
2613
+ StartupStatus = STARTUP_NOT_RUNNING ;
2603
2614
Shutdown = SmartShutdown ;
2604
2615
TerminateChildren (SIGTERM );
2605
2616
pmState = PM_WAIT_BACKENDS ;
@@ -2624,16 +2635,18 @@ reaper(SIGNAL_ARGS)
2624
2635
/*
2625
2636
* After PM_STARTUP, any unexpected exit (including FATAL exit) of
2626
2637
* the startup process is catastrophic, so kill other children,
2627
- * and set RecoveryError so we don't try to reinitialize after
2628
- * they're gone. Exception: if FatalError is already set, that
2629
- * implies we previously sent the startup process a SIGQUIT, so
2638
+ * and set StartupStatus so we don't try to reinitialize after
2639
+ * they're gone. Exception: if StartupStatus is STARTUP_SIGNALED,
2640
+ * then we previously sent the startup process a SIGQUIT; so
2630
2641
* that's probably the reason it died, and we do want to try to
2631
2642
* restart in that case.
2632
2643
*/
2633
2644
if (!EXIT_STATUS_0 (exitstatus ))
2634
2645
{
2635
- if (!FatalError )
2636
- RecoveryError = true;
2646
+ if (StartupStatus == STARTUP_SIGNALED )
2647
+ StartupStatus = STARTUP_NOT_RUNNING ;
2648
+ else
2649
+ StartupStatus = STARTUP_CRASHED ;
2637
2650
HandleChildCrash (pid , exitstatus ,
2638
2651
_ ("startup process" ));
2639
2652
continue ;
@@ -2642,6 +2655,7 @@ reaper(SIGNAL_ARGS)
2642
2655
/*
2643
2656
* Startup succeeded, commence normal operations
2644
2657
*/
2658
+ StartupStatus = STARTUP_NOT_RUNNING ;
2645
2659
FatalError = false;
2646
2660
Assert (AbortStartTime == 0 );
2647
2661
ReachedNormalRunning = true;
@@ -2962,7 +2976,7 @@ CleanupBackgroundWorker(int pid,
2962
2976
ReportBackgroundWorkerPID (rw ); /* report child death */
2963
2977
2964
2978
LogChildExit (EXIT_STATUS_0 (exitstatus ) ? DEBUG1 : LOG ,
2965
- namebuf , pid , exitstatus );
2979
+ namebuf , pid , exitstatus );
2966
2980
2967
2981
return true;
2968
2982
}
@@ -3190,14 +3204,18 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
3190
3204
3191
3205
/* Take care of the startup process too */
3192
3206
if (pid == StartupPID )
3207
+ {
3193
3208
StartupPID = 0 ;
3209
+ StartupStatus = STARTUP_CRASHED ;
3210
+ }
3194
3211
else if (StartupPID != 0 && take_action )
3195
3212
{
3196
3213
ereport (DEBUG2 ,
3197
3214
(errmsg_internal ("sending %s to process %d" ,
3198
3215
(SendStop ? "SIGSTOP" : "SIGQUIT" ),
3199
3216
(int ) StartupPID )));
3200
3217
signal_child (StartupPID , (SendStop ? SIGSTOP : SIGQUIT ));
3218
+ StartupStatus = STARTUP_SIGNALED ;
3201
3219
}
3202
3220
3203
3221
/* Take care of the bgwriter too */
@@ -3589,13 +3607,14 @@ PostmasterStateMachine(void)
3589
3607
}
3590
3608
3591
3609
/*
3592
- * If recovery failed, or the user does not want an automatic restart
3593
- * after backend crashes, wait for all non-syslogger children to exit, and
3594
- * then exit postmaster. We don't try to reinitialize when recovery fails,
3595
- * because more than likely it will just fail again and we will keep
3596
- * trying forever.
3610
+ * If the startup process failed, or the user does not want an automatic
3611
+ * restart after backend crashes, wait for all non-syslogger children to
3612
+ * exit, and then exit postmaster. We don't try to reinitialize when the
3613
+ * startup process fails, because more than likely it will just fail again
3614
+ * and we will keep trying forever.
3597
3615
*/
3598
- if (pmState == PM_NO_CHILDREN && (RecoveryError || !restart_after_crash ))
3616
+ if (pmState == PM_NO_CHILDREN &&
3617
+ (StartupStatus == STARTUP_CRASHED || !restart_after_crash ))
3599
3618
ExitPostmaster (1 );
3600
3619
3601
3620
/*
@@ -3615,6 +3634,7 @@ PostmasterStateMachine(void)
3615
3634
3616
3635
StartupPID = StartupDataBase ();
3617
3636
Assert (StartupPID != 0 );
3637
+ StartupStatus = STARTUP_RUNNING ;
3618
3638
pmState = PM_STARTUP ;
3619
3639
/* crash recovery started, reset SIGKILL flag */
3620
3640
AbortStartTime = 0 ;
@@ -3746,7 +3766,11 @@ TerminateChildren(int signal)
3746
3766
{
3747
3767
SignalChildren (signal );
3748
3768
if (StartupPID != 0 )
3769
+ {
3749
3770
signal_child (StartupPID , signal );
3771
+ if (signal == SIGQUIT || signal == SIGKILL )
3772
+ StartupStatus = STARTUP_SIGNALED ;
3773
+ }
3750
3774
if (BgWriterPID != 0 )
3751
3775
signal_child (BgWriterPID , signal );
3752
3776
if (CheckpointerPID != 0 )
0 commit comments