Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 9a740f8

Browse files
committed
Refactor code in charge of running shell-based recovery commands
The code specific to the execution of archive_cleanup_command, recovery_end_command and restore_command is moved to a new file named shell_restore.c. The code is split into three functions: - shell_restore(), that attempts the execution of a shell-based restore_command. - shell_archive_cleanup(), for archive_cleanup_command. - shell_recovery_end(), for recovery_end_command. This introduces no functional changes, with failure patterns and logs generated in consequence being the same as before (one case actually generates one less DEBUG2 message "could not restore" when a restore command succeeds but the follow-up stat() to check the size fails, but that only matters with a elevel high enough). This is preparatory work for allowing recovery modules, a facility similar to archive modules, with callbacks shaped similarly to the functions introduced here. Author: Nathan Bossart Reviewed-by: Andres Freund, Michael Paquier Discussion: https://postgr.es/m/20221227192449.GA3672473@nathanxps13
1 parent 02d3448 commit 9a740f8

File tree

6 files changed

+214
-127
lines changed

6 files changed

+214
-127
lines changed

src/backend/access/transam/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ OBJS = \
1919
multixact.o \
2020
parallel.o \
2121
rmgr.o \
22+
shell_restore.o \
2223
slru.o \
2324
subtrans.o \
2425
timeline.o \

src/backend/access/transam/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ backend_sources += files(
77
'multixact.c',
88
'parallel.c',
99
'rmgr.c',
10+
'shell_restore.c',
1011
'slru.c',
1112
'subtrans.c',
1213
'timeline.c',
Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
/*-------------------------------------------------------------------------
2+
*
3+
* shell_restore.c
4+
* Recovery functions for a user-specified shell command.
5+
*
6+
* These recovery functions use a user-specified shell command (e.g. based
7+
* on the GUC restore_command).
8+
*
9+
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
10+
* Portions Copyright (c) 1994, Regents of the University of California
11+
*
12+
* src/backend/access/transam/shell_restore.c
13+
*
14+
*-------------------------------------------------------------------------
15+
*/
16+
17+
#include "postgres.h"
18+
19+
#include <signal.h>
20+
21+
#include "access/xlogarchive.h"
22+
#include "access/xlogrecovery.h"
23+
#include "common/archive.h"
24+
#include "common/percentrepl.h"
25+
#include "storage/ipc.h"
26+
#include "utils/wait_event.h"
27+
28+
static void ExecuteRecoveryCommand(const char *command,
29+
const char *commandName,
30+
bool failOnSignal,
31+
uint32 wait_event_info,
32+
const char *lastRestartPointFileName);
33+
34+
/*
35+
* Attempt to execute a shell-based restore command.
36+
*
37+
* Returns true if the command has succeeded, false otherwise.
38+
*/
39+
bool
40+
shell_restore(const char *file, const char *path,
41+
const char *lastRestartPointFileName)
42+
{
43+
char *cmd;
44+
int rc;
45+
46+
/* Build the restore command to execute */
47+
cmd = BuildRestoreCommand(recoveryRestoreCommand, path, file,
48+
lastRestartPointFileName);
49+
50+
ereport(DEBUG3,
51+
(errmsg_internal("executing restore command \"%s\"", cmd)));
52+
53+
/*
54+
* Copy xlog from archival storage to XLOGDIR
55+
*/
56+
fflush(NULL);
57+
pgstat_report_wait_start(WAIT_EVENT_RESTORE_COMMAND);
58+
rc = system(cmd);
59+
pgstat_report_wait_end();
60+
61+
pfree(cmd);
62+
63+
/*
64+
* Remember, we rollforward UNTIL the restore fails so failure here is
65+
* just part of the process... that makes it difficult to determine
66+
* whether the restore failed because there isn't an archive to restore,
67+
* or because the administrator has specified the restore program
68+
* incorrectly. We have to assume the former.
69+
*
70+
* However, if the failure was due to any sort of signal, it's best to
71+
* punt and abort recovery. (If we "return false" here, upper levels will
72+
* assume that recovery is complete and start up the database!) It's
73+
* essential to abort on child SIGINT and SIGQUIT, because per spec
74+
* system() ignores SIGINT and SIGQUIT while waiting; if we see one of
75+
* those it's a good bet we should have gotten it too.
76+
*
77+
* On SIGTERM, assume we have received a fast shutdown request, and exit
78+
* cleanly. It's pure chance whether we receive the SIGTERM first, or the
79+
* child process. If we receive it first, the signal handler will call
80+
* proc_exit, otherwise we do it here. If we or the child process received
81+
* SIGTERM for any other reason than a fast shutdown request, postmaster
82+
* will perform an immediate shutdown when it sees us exiting
83+
* unexpectedly.
84+
*
85+
* We treat hard shell errors such as "command not found" as fatal, too.
86+
*/
87+
if (rc != 0)
88+
{
89+
if (wait_result_is_signal(rc, SIGTERM))
90+
proc_exit(1);
91+
92+
ereport(wait_result_is_any_signal(rc, true) ? FATAL : DEBUG2,
93+
(errmsg("could not restore file \"%s\" from archive: %s",
94+
file, wait_result_to_str(rc))));
95+
}
96+
97+
return (rc == 0);
98+
}
99+
100+
/*
101+
* Attempt to execute a shell-based archive cleanup command.
102+
*/
103+
void
104+
shell_archive_cleanup(const char *lastRestartPointFileName)
105+
{
106+
ExecuteRecoveryCommand(archiveCleanupCommand, "archive_cleanup_command",
107+
false, WAIT_EVENT_ARCHIVE_CLEANUP_COMMAND,
108+
lastRestartPointFileName);
109+
}
110+
111+
/*
112+
* Attempt to execute a shell-based end-of-recovery command.
113+
*/
114+
void
115+
shell_recovery_end(const char *lastRestartPointFileName)
116+
{
117+
ExecuteRecoveryCommand(recoveryEndCommand, "recovery_end_command", true,
118+
WAIT_EVENT_RECOVERY_END_COMMAND,
119+
lastRestartPointFileName);
120+
}
121+
122+
/*
123+
* Attempt to execute an external shell command during recovery.
124+
*
125+
* 'command' is the shell command to be executed, 'commandName' is a
126+
* human-readable name describing the command emitted in the logs. If
127+
* 'failOnSignal' is true and the command is killed by a signal, a FATAL
128+
* error is thrown. Otherwise a WARNING is emitted.
129+
*
130+
* This is currently used for recovery_end_command and archive_cleanup_command.
131+
*/
132+
static void
133+
ExecuteRecoveryCommand(const char *command, const char *commandName,
134+
bool failOnSignal, uint32 wait_event_info,
135+
const char *lastRestartPointFileName)
136+
{
137+
char *xlogRecoveryCmd;
138+
int rc;
139+
140+
Assert(command && commandName);
141+
142+
/*
143+
* construct the command to be executed
144+
*/
145+
xlogRecoveryCmd = replace_percent_placeholders(command, commandName, "r",
146+
lastRestartPointFileName);
147+
148+
ereport(DEBUG3,
149+
(errmsg_internal("executing %s \"%s\"", commandName, command)));
150+
151+
/*
152+
* execute the constructed command
153+
*/
154+
fflush(NULL);
155+
pgstat_report_wait_start(wait_event_info);
156+
rc = system(xlogRecoveryCmd);
157+
pgstat_report_wait_end();
158+
159+
pfree(xlogRecoveryCmd);
160+
161+
if (rc != 0)
162+
{
163+
/*
164+
* If the failure was due to any sort of signal, it's best to punt and
165+
* abort recovery. See comments in shell_restore().
166+
*/
167+
ereport((failOnSignal && wait_result_is_any_signal(rc, true)) ? FATAL : WARNING,
168+
/*------
169+
translator: First %s represents a postgresql.conf parameter name like
170+
"recovery_end_command", the 2nd is the value of that parameter, the
171+
third an already translated error message. */
172+
(errmsg("%s \"%s\": %s", commandName,
173+
command, wait_result_to_str(rc))));
174+
}
175+
}

src/backend/access/transam/xlog.c

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,7 @@ static char *GetXLogBuffer(XLogRecPtr ptr, TimeLineID tli);
692692
static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos);
693693
static XLogRecPtr XLogBytePosToEndRecPtr(uint64 bytepos);
694694
static uint64 XLogRecPtrToBytePos(XLogRecPtr ptr);
695+
static void GetOldestRestartPointFileName(char *fname);
695696

696697
static void WALInsertLockAcquire(void);
697698
static void WALInsertLockAcquireExclusive(void);
@@ -4887,10 +4888,12 @@ CleanupAfterArchiveRecovery(TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog,
48874888
* Execute the recovery_end_command, if any.
48884889
*/
48894890
if (recoveryEndCommand && strcmp(recoveryEndCommand, "") != 0)
4890-
ExecuteRecoveryCommand(recoveryEndCommand,
4891-
"recovery_end_command",
4892-
true,
4893-
WAIT_EVENT_RECOVERY_END_COMMAND);
4891+
{
4892+
char lastRestartPointFname[MAXFNAMELEN];
4893+
4894+
GetOldestRestartPointFileName(lastRestartPointFname);
4895+
shell_recovery_end(lastRestartPointFname);
4896+
}
48944897

48954898
/*
48964899
* We switched to a new timeline. Clean up segments on the old timeline.
@@ -7307,10 +7310,12 @@ CreateRestartPoint(int flags)
73077310
* Finally, execute archive_cleanup_command, if any.
73087311
*/
73097312
if (archiveCleanupCommand && strcmp(archiveCleanupCommand, "") != 0)
7310-
ExecuteRecoveryCommand(archiveCleanupCommand,
7311-
"archive_cleanup_command",
7312-
false,
7313-
WAIT_EVENT_ARCHIVE_CLEANUP_COMMAND);
7313+
{
7314+
char lastRestartPointFname[MAXFNAMELEN];
7315+
7316+
GetOldestRestartPointFileName(lastRestartPointFname);
7317+
shell_archive_cleanup(lastRestartPointFname);
7318+
}
73147319

73157320
return true;
73167321
}
@@ -8884,6 +8889,22 @@ GetOldestRestartPoint(XLogRecPtr *oldrecptr, TimeLineID *oldtli)
88848889
LWLockRelease(ControlFileLock);
88858890
}
88868891

8892+
/*
8893+
* Returns the WAL file name for the last checkpoint or restartpoint. This is
8894+
* the oldest WAL file that we still need if we have to restart recovery.
8895+
*/
8896+
static void
8897+
GetOldestRestartPointFileName(char *fname)
8898+
{
8899+
XLogRecPtr restartRedoPtr;
8900+
TimeLineID restartTli;
8901+
XLogSegNo restartSegNo;
8902+
8903+
GetOldestRestartPoint(&restartRedoPtr, &restartTli);
8904+
XLByteToSeg(restartRedoPtr, restartSegNo, wal_segment_size);
8905+
XLogFileName(fname, restartTli, restartSegNo, wal_segment_size);
8906+
}
8907+
88878908
/* Thin wrapper around ShutdownWalRcv(). */
88888909
void
88898910
XLogShutdownWalRcv(void)

0 commit comments

Comments
 (0)