Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndres Freund2025-01-25 16:37:13 +0000
committerAndres Freund2025-01-25 16:37:13 +0000
commit87a6690cc69530703b7da7e72769bae2ac5b2e77 (patch)
tree4628068b7a52517436d671b93e6dfa1e3c6d270f /src/backend/postmaster/checkpointer.c
parent04ace176e08f2c694bb66b5b91cbd9d4d0bd77ea (diff)
Change shutdown sequence to terminate checkpointer last
The main motivation for this change is to have a process that can serialize stats after all other processes have terminated. Serializing stats already happens in checkpointer, even though walsenders can be active longer. The only reason the current shutdown sequence does not actively cause problems is that walsender currently does not generate any stats. However, there is an upcoming patch changing that. Another need for this change originates in the AIO patchset, where IO workers (which, in some edge cases, can emit stats of their own) need to run while the shutdown checkpoint is being written. This commit changes the shutdown sequence so checkpointer is signalled (via SIGINT) to trigger writing the shutdown checkpoint without also causing checkpointer to exit. Once checkpointer wrote the shutdown checkpoint it notifies postmaster via PMSIGNAL_XLOG_IS_SHUTDOWN and waits for the termination signal (SIGUSR2, as before). Checkpointer now is terminated after all children, other than dead-end children and logger, have been terminated, tracked using the new PM_WAIT_CHECKPOINTER PMState. Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi> Reviewed-by: Bertrand Drouvot <bertranddrouvot.pg@gmail.com> Reviewed-by: Nazir Bilal Yavuz <byavuz81@gmail.com> Discussion: https://postgr.es/m/kgng5nrvnlv335evmsuvpnh354rw7qyazl73kdysev2cr2v5zu@m3cfzxicm5kp
Diffstat (limited to 'src/backend/postmaster/checkpointer.c')
-rw-r--r--src/backend/postmaster/checkpointer.c127
1 files changed, 95 insertions, 32 deletions
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index dd2c8376c6e..b94f9cdff21 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -10,10 +10,13 @@
* fill WAL segments; the checkpointer itself doesn't watch for the
* condition.)
*
- * Normal termination is by SIGUSR2, which instructs the checkpointer to
- * execute a shutdown checkpoint and then exit(0). (All backends must be
- * stopped before SIGUSR2 is issued!) Emergency termination is by SIGQUIT;
- * like any backend, the checkpointer will simply abort and exit on SIGQUIT.
+ * The normal termination sequence is that checkpointer is instructed to
+ * execute the shutdown checkpoint by SIGINT. After that checkpointer waits
+ * to be terminated via SIGUSR2, which instructs the checkpointer to exit(0).
+ * All backends must be stopped before SIGINT or SIGUSR2 is issued!
+ *
+ * Emergency termination is by SIGQUIT; like any backend, the checkpointer
+ * will simply abort and exit on SIGQUIT.
*
* If the checkpointer exits unexpectedly, the postmaster treats that the same
* as a backend crash: shared memory may be corrupted, so remaining backends
@@ -51,6 +54,7 @@
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/lwlock.h"
+#include "storage/pmsignal.h"
#include "storage/proc.h"
#include "storage/procsignal.h"
#include "storage/shmem.h"
@@ -141,6 +145,7 @@ double CheckPointCompletionTarget = 0.9;
* Private state
*/
static bool ckpt_active = false;
+static volatile sig_atomic_t ShutdownXLOGPending = false;
/* these values are valid when ckpt_active is true: */
static pg_time_t ckpt_start_time;
@@ -159,6 +164,9 @@ static bool ImmediateCheckpointRequested(void);
static bool CompactCheckpointerRequestQueue(void);
static void UpdateSharedMemoryConfig(void);
+/* Signal handlers */
+static void ReqShutdownXLOG(SIGNAL_ARGS);
+
/*
* Main entry point for checkpointer process
@@ -188,7 +196,7 @@ CheckpointerMain(char *startup_data, size_t startup_data_len)
* tell us it's okay to shut down (via SIGUSR2).
*/
pqsignal(SIGHUP, SignalHandlerForConfigReload);
- pqsignal(SIGINT, SIG_IGN);
+ pqsignal(SIGINT, ReqShutdownXLOG);
pqsignal(SIGTERM, SIG_IGN); /* ignore SIGTERM */
/* SIGQUIT handler was already set up by InitPostmasterChild */
pqsignal(SIGALRM, SIG_IGN);
@@ -211,8 +219,11 @@ CheckpointerMain(char *startup_data, size_t startup_data_len)
* process during a normal shutdown, and since checkpointer is shut down
* very late...
*
- * Walsenders are shut down after the checkpointer, but currently don't
- * report stats. If that changes, we need a more complicated solution.
+ * While e.g. walsenders are active after the shutdown checkpoint has been
+ * written (and thus could produce more stats), checkpointer stays around
+ * after the shutdown checkpoint has been written. postmaster will only
+ * signal checkpointer to exit after all processes that could emit stats
+ * have been shut down.
*/
before_shmem_exit(pgstat_before_server_shutdown, 0);
@@ -327,7 +338,8 @@ CheckpointerMain(char *startup_data, size_t startup_data_len)
ProcGlobal->checkpointerProc = MyProcNumber;
/*
- * Loop forever
+ * Loop until we've been asked to write the shutdown checkpoint or
+ * terminate.
*/
for (;;)
{
@@ -346,7 +358,10 @@ CheckpointerMain(char *startup_data, size_t startup_data_len)
* Process any requests or signals received recently.
*/
AbsorbSyncRequests();
+
HandleCheckpointerInterrupts();
+ if (ShutdownXLOGPending || ShutdownRequestPending)
+ break;
/*
* Detect a pending checkpoint request by checking whether the flags
@@ -517,8 +532,13 @@ CheckpointerMain(char *startup_data, size_t startup_data_len)
ckpt_active = false;
- /* We may have received an interrupt during the checkpoint. */
+ /*
+ * We may have received an interrupt during the checkpoint and the
+ * latch might have been reset (e.g. in CheckpointWriteDelay).
+ */
HandleCheckpointerInterrupts();
+ if (ShutdownXLOGPending || ShutdownRequestPending)
+ break;
}
/* Check for archive_timeout and switch xlog files if necessary. */
@@ -557,6 +577,57 @@ CheckpointerMain(char *startup_data, size_t startup_data_len)
cur_timeout * 1000L /* convert to ms */ ,
WAIT_EVENT_CHECKPOINTER_MAIN);
}
+
+ /*
+ * From here on, elog(ERROR) should end with exit(1), not send control
+ * back to the sigsetjmp block above.
+ */
+ ExitOnAnyError = true;
+
+ if (ShutdownXLOGPending)
+ {
+ /*
+ * Close down the database.
+ *
+ * Since ShutdownXLOG() creates restartpoint or checkpoint, and
+ * updates the statistics, increment the checkpoint request and flush
+ * out pending statistic.
+ */
+ PendingCheckpointerStats.num_requested++;
+ ShutdownXLOG(0, 0);
+ pgstat_report_checkpointer();
+ pgstat_report_wal(true);
+
+ /*
+ * Tell postmaster that we're done.
+ */
+ SendPostmasterSignal(PMSIGNAL_XLOG_IS_SHUTDOWN);
+ ShutdownXLOGPending = false;
+ }
+
+ /*
+ * Wait until we're asked to shut down. By separating the writing of the
+ * shutdown checkpoint from checkpointer exiting, checkpointer can perform
+ * some should-be-as-late-as-possible work like writing out stats.
+ */
+ for (;;)
+ {
+ /* Clear any already-pending wakeups */
+ ResetLatch(MyLatch);
+
+ HandleCheckpointerInterrupts();
+
+ if (ShutdownRequestPending)
+ break;
+
+ (void) WaitLatch(MyLatch,
+ WL_LATCH_SET | WL_EXIT_ON_PM_DEATH,
+ 0,
+ WAIT_EVENT_CHECKPOINTER_SHUTDOWN);
+ }
+
+ /* Normal exit from the checkpointer is here */
+ proc_exit(0); /* done */
}
/*
@@ -586,29 +657,6 @@ HandleCheckpointerInterrupts(void)
*/
UpdateSharedMemoryConfig();
}
- if (ShutdownRequestPending)
- {
- /*
- * From here on, elog(ERROR) should end with exit(1), not send control
- * back to the sigsetjmp block above
- */
- ExitOnAnyError = true;
-
- /*
- * Close down the database.
- *
- * Since ShutdownXLOG() creates restartpoint or checkpoint, and
- * updates the statistics, increment the checkpoint request and flush
- * out pending statistic.
- */
- PendingCheckpointerStats.num_requested++;
- ShutdownXLOG(0, 0);
- pgstat_report_checkpointer();
- pgstat_report_wal(true);
-
- /* Normal exit from the checkpointer is here */
- proc_exit(0); /* done */
- }
/* Perform logging of memory contexts of this process */
if (LogMemoryContextPending)
@@ -729,6 +777,7 @@ CheckpointWriteDelay(int flags, double progress)
* in which case we just try to catch up as quickly as possible.
*/
if (!(flags & CHECKPOINT_IMMEDIATE) &&
+ !ShutdownXLOGPending &&
!ShutdownRequestPending &&
!ImmediateCheckpointRequested() &&
IsCheckpointOnSchedule(progress))
@@ -858,6 +907,20 @@ IsCheckpointOnSchedule(double progress)
/* --------------------------------
+ * signal handler routines
+ * --------------------------------
+ */
+
+/* SIGINT: set flag to trigger writing of shutdown checkpoint */
+static void
+ReqShutdownXLOG(SIGNAL_ARGS)
+{
+ ShutdownXLOGPending = true;
+ SetLatch(MyLatch);
+}
+
+
+/* --------------------------------
* communication with backends
* --------------------------------
*/