37
37
*
38
38
*
39
39
* IDENTIFICATION
40
- * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.550 2008/01/01 19:45:51 momjian Exp $
40
+ * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.551 2008/01/11 00:54:09 tgl Exp $
41
41
*
42
42
* NOTES
43
43
*
@@ -244,7 +244,7 @@ static bool FatalError = false; /* T if recovering from backend crash */
244
244
* Notice that this state variable does not distinguish *why* we entered
245
245
* PM_WAIT_BACKENDS or later states --- Shutdown and FatalError must be
246
246
* consulted to find that out. FatalError is never true in PM_RUN state, nor
247
- * in PM_SHUTDOWN state (because we don't enter that state when trying to
247
+ * in PM_SHUTDOWN states (because we don't enter those states when trying to
248
248
* recover from a crash). It can be true in PM_STARTUP state, because we
249
249
* don't clear it until we've successfully recovered.
250
250
*/
@@ -255,6 +255,7 @@ typedef enum
255
255
PM_RUN , /* normal "database is alive" state */
256
256
PM_WAIT_BACKENDS , /* waiting for live backends to exit */
257
257
PM_SHUTDOWN , /* waiting for bgwriter to do shutdown ckpt */
258
+ PM_SHUTDOWN_2 , /* waiting for archiver to finish */
258
259
PM_WAIT_DEAD_END , /* waiting for dead_end children to exit */
259
260
PM_NO_CHILDREN /* all important children have exited */
260
261
} PMState ;
@@ -1312,12 +1313,8 @@ ServerLoop(void)
1312
1313
start_autovac_launcher = false; /* signal processed */
1313
1314
}
1314
1315
1315
- /*
1316
- * If we have lost the archiver, try to start a new one. We do this
1317
- * even if we are shutting down, to allow archiver to take care of any
1318
- * remaining WAL files.
1319
- */
1320
- if (XLogArchivingActive () && PgArchPID == 0 && pmState >= PM_RUN )
1316
+ /* If we have lost the archiver, try to start a new one */
1317
+ if (XLogArchivingActive () && PgArchPID == 0 && pmState == PM_RUN )
1321
1318
PgArchPID = pgarch_start ();
1322
1319
1323
1320
/* If we have lost the stats collector, try to start a new one */
@@ -2175,12 +2172,31 @@ reaper(SIGNAL_ARGS)
2175
2172
* checkpoint. (If for some reason it didn't, recovery will
2176
2173
* occur on next postmaster start.)
2177
2174
*
2178
- * At this point we should have no normal children left (else
2179
- * we'd not be in PM_SHUTDOWN state) but we might have
2180
- * dead_end children.
2175
+ * At this point we should have no normal backend children
2176
+ * left (else we'd not be in PM_SHUTDOWN state) but we might
2177
+ * have dead_end children to wait for.
2178
+ *
2179
+ * If we have an archiver subprocess, tell it to do a last
2180
+ * archive cycle and quit; otherwise we can go directly to
2181
+ * PM_WAIT_DEAD_END state.
2181
2182
*/
2182
2183
Assert (Shutdown > NoShutdown );
2183
- pmState = PM_WAIT_DEAD_END ;
2184
+
2185
+ if (PgArchPID != 0 )
2186
+ {
2187
+ /* Waken archiver for the last time */
2188
+ signal_child (PgArchPID , SIGUSR2 );
2189
+ pmState = PM_SHUTDOWN_2 ;
2190
+ }
2191
+ else
2192
+ pmState = PM_WAIT_DEAD_END ;
2193
+
2194
+ /*
2195
+ * We can also shut down the stats collector now; there's
2196
+ * nothing left for it to do.
2197
+ */
2198
+ if (PgStatPID != 0 )
2199
+ signal_child (PgStatPID , SIGQUIT );
2184
2200
}
2185
2201
else
2186
2202
{
@@ -2227,16 +2243,19 @@ reaper(SIGNAL_ARGS)
2227
2243
/*
2228
2244
* Was it the archiver? If so, just try to start a new one; no need
2229
2245
* to force reset of the rest of the system. (If fail, we'll try
2230
- * again in future cycles of the main loop.)
2246
+ * again in future cycles of the main loop.) But if we were waiting
2247
+ * for it to shut down, advance to the next shutdown step.
2231
2248
*/
2232
2249
if (pid == PgArchPID )
2233
2250
{
2234
2251
PgArchPID = 0 ;
2235
2252
if (!EXIT_STATUS_0 (exitstatus ))
2236
2253
LogChildExit (LOG , _ ("archiver process" ),
2237
2254
pid , exitstatus );
2238
- if (XLogArchivingActive () && pmState > = PM_RUN )
2255
+ if (XLogArchivingActive () && pmState = = PM_RUN )
2239
2256
PgArchPID = pgarch_start ();
2257
+ else if (pmState == PM_SHUTDOWN_2 )
2258
+ pmState = PM_WAIT_DEAD_END ;
2240
2259
continue ;
2241
2260
}
2242
2261
@@ -2563,6 +2582,11 @@ PostmasterStateMachine(void)
2563
2582
* change causes ServerLoop to stop creating new ones.
2564
2583
*/
2565
2584
pmState = PM_WAIT_DEAD_END ;
2585
+
2586
+ /*
2587
+ * We already SIGQUIT'd the archiver and stats processes,
2588
+ * if any, when we entered FatalError state.
2589
+ */
2566
2590
}
2567
2591
else
2568
2592
{
@@ -2591,13 +2615,13 @@ PostmasterStateMachine(void)
2591
2615
*/
2592
2616
FatalError = true;
2593
2617
pmState = PM_WAIT_DEAD_END ;
2618
+
2619
+ /* Kill the archiver and stats collector too */
2620
+ if (PgArchPID != 0 )
2621
+ signal_child (PgArchPID , SIGQUIT );
2622
+ if (PgStatPID != 0 )
2623
+ signal_child (PgStatPID , SIGQUIT );
2594
2624
}
2595
- /* Tell pgarch to shut down too; nothing left for it to do */
2596
- if (PgArchPID != 0 )
2597
- signal_child (PgArchPID , SIGQUIT );
2598
- /* Tell pgstat to shut down too; nothing left for it to do */
2599
- if (PgStatPID != 0 )
2600
- signal_child (PgStatPID , SIGQUIT );
2601
2625
}
2602
2626
}
2603
2627
}
@@ -2606,16 +2630,26 @@ PostmasterStateMachine(void)
2606
2630
{
2607
2631
/*
2608
2632
* PM_WAIT_DEAD_END state ends when the BackendList is entirely empty
2609
- * (ie, no dead_end children remain).
2633
+ * (ie, no dead_end children remain), and the archiver and stats
2634
+ * collector are gone too.
2635
+ *
2636
+ * The reason we wait for those two is to protect them against a new
2637
+ * postmaster starting conflicting subprocesses; this isn't an
2638
+ * ironclad protection, but it at least helps in the
2639
+ * shutdown-and-immediately-restart scenario. Note that they have
2640
+ * already been sent appropriate shutdown signals, either during a
2641
+ * normal state transition leading up to PM_WAIT_DEAD_END, or during
2642
+ * FatalError processing.
2610
2643
*/
2611
- if (!DLGetHead (BackendList ))
2644
+ if (DLGetHead (BackendList ) == NULL &&
2645
+ PgArchPID == 0 && PgStatPID == 0 )
2612
2646
{
2613
2647
/* These other guys should be dead already */
2614
2648
Assert (StartupPID == 0 );
2615
2649
Assert (BgWriterPID == 0 );
2616
2650
Assert (WalWriterPID == 0 );
2617
2651
Assert (AutoVacPID == 0 );
2618
- /* archiver, stats, and syslogger are not considered here */
2652
+ /* syslogger is not considered here */
2619
2653
pmState = PM_NO_CHILDREN ;
2620
2654
}
2621
2655
}
@@ -2628,14 +2662,9 @@ PostmasterStateMachine(void)
2628
2662
* we got SIGTERM from init --- there may well not be time for recovery
2629
2663
* before init decides to SIGKILL us.)
2630
2664
*
2631
- * Note: we do not wait around for exit of the archiver or stats
2632
- * processes. They've been sent SIGQUIT by this point (either when we
2633
- * entered PM_SHUTDOWN state, or when we set FatalError, and at least one
2634
- * of those must have happened by now). In any case they contain logic to
2635
- * commit hara-kiri if they notice the postmaster is gone. Since they
2636
- * aren't connected to shared memory, they pose no problem for shutdown.
2637
- * The syslogger is not considered either, since it's intended to survive
2638
- * till the postmaster exits.
2665
+ * Note that the syslogger continues to run. It will exit when it sees
2666
+ * EOF on its input pipe, which happens when there are no more upstream
2667
+ * processes.
2639
2668
*/
2640
2669
if (Shutdown > NoShutdown && pmState == PM_NO_CHILDREN )
2641
2670
{
@@ -2652,10 +2681,8 @@ PostmasterStateMachine(void)
2652
2681
}
2653
2682
2654
2683
/*
2655
- * If we need to recover from a crash, wait for all shmem-connected
2656
- * children to exit, then reset shmem and StartupDataBase. (We can ignore
2657
- * the archiver and stats processes here since they are not connected to
2658
- * shmem.)
2684
+ * If we need to recover from a crash, wait for all non-syslogger
2685
+ * children to exit, then reset shmem and StartupDataBase.
2659
2686
*/
2660
2687
if (FatalError && pmState == PM_NO_CHILDREN )
2661
2688
{
@@ -3782,7 +3809,7 @@ sigusr1_handler(SIGNAL_ARGS)
3782
3809
}
3783
3810
3784
3811
if (CheckPostmasterSignal (PMSIGNAL_WAKEN_ARCHIVER ) &&
3785
- PgArchPID != 0 && Shutdown <= SmartShutdown )
3812
+ PgArchPID != 0 )
3786
3813
{
3787
3814
/*
3788
3815
* Send SIGUSR1 to archiver process, to wake it up and begin archiving
0 commit comments