Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit c627d94

Browse files
Add built-in ERROR handling for archive callbacks.
Presently, the archiver process restarts when an archive callback ERRORs. To avoid this, archive module authors can use sigsetjmp(), manage a memory context, etc., but that requires a lot of extra code that will likely look roughly the same between modules. This commit adds basic archive callback ERROR handling to pgarch.c so that module authors won't ordinarily need to worry about this. While this built-in handler attempts to clean up anything that an archive module could conceivably have left behind, it is possible that some modules are doing unexpected things that require additional cleanup. Module authors should be sure to do any extra required cleanup in a PG_CATCH block within the archiving callback. The archiving callback is now called in a short-lived memory context that the archiver process resets between invocations. If a module requires longer-lived storage, it must maintain its own memory context. Thanks to these changes, the basic_archive module can be greatly simplified. Suggested-by: Andres Freund Reviewed-by: Andres Freund, Yong Li Discussion: https://postgr.es/m/20230217215624.GA3131134%40nathanxps13
1 parent 5bec1d6 commit c627d94

File tree

4 files changed

+107
-136
lines changed

4 files changed

+107
-136
lines changed

contrib/basic_archive/basic_archive.c

+5-129
Original file line numberDiff line numberDiff line change
@@ -40,26 +40,18 @@
4040

4141
PG_MODULE_MAGIC;
4242

43-
typedef struct BasicArchiveData
44-
{
45-
MemoryContext context;
46-
} BasicArchiveData;
47-
4843
static char *archive_directory = NULL;
4944

50-
static void basic_archive_startup(ArchiveModuleState *state);
5145
static bool basic_archive_configured(ArchiveModuleState *state);
5246
static bool basic_archive_file(ArchiveModuleState *state, const char *file, const char *path);
53-
static void basic_archive_file_internal(const char *file, const char *path);
5447
static bool check_archive_directory(char **newval, void **extra, GucSource source);
5548
static bool compare_files(const char *file1, const char *file2);
56-
static void basic_archive_shutdown(ArchiveModuleState *state);
5749

5850
static const ArchiveModuleCallbacks basic_archive_callbacks = {
59-
.startup_cb = basic_archive_startup,
51+
.startup_cb = NULL,
6052
.check_configured_cb = basic_archive_configured,
6153
.archive_file_cb = basic_archive_file,
62-
.shutdown_cb = basic_archive_shutdown
54+
.shutdown_cb = NULL
6355
};
6456

6557
/*
@@ -93,24 +85,6 @@ _PG_archive_module_init(void)
9385
return &basic_archive_callbacks;
9486
}
9587

96-
/*
97-
* basic_archive_startup
98-
*
99-
* Creates the module's memory context.
100-
*/
101-
void
102-
basic_archive_startup(ArchiveModuleState *state)
103-
{
104-
BasicArchiveData *data;
105-
106-
data = (BasicArchiveData *) MemoryContextAllocZero(TopMemoryContext,
107-
sizeof(BasicArchiveData));
108-
data->context = AllocSetContextCreate(TopMemoryContext,
109-
"basic_archive",
110-
ALLOCSET_DEFAULT_SIZES);
111-
state->private_data = (void *) data;
112-
}
113-
11488
/*
11589
* check_archive_directory
11690
*
@@ -176,74 +150,6 @@ basic_archive_configured(ArchiveModuleState *state)
176150
*/
177151
static bool
178152
basic_archive_file(ArchiveModuleState *state, const char *file, const char *path)
179-
{
180-
sigjmp_buf local_sigjmp_buf;
181-
MemoryContext oldcontext;
182-
BasicArchiveData *data = (BasicArchiveData *) state->private_data;
183-
MemoryContext basic_archive_context = data->context;
184-
185-
/*
186-
* We run basic_archive_file_internal() in our own memory context so that
187-
* we can easily reset it during error recovery (thus avoiding memory
188-
* leaks).
189-
*/
190-
oldcontext = MemoryContextSwitchTo(basic_archive_context);
191-
192-
/*
193-
* Since the archiver operates at the bottom of the exception stack,
194-
* ERRORs turn into FATALs and cause the archiver process to restart.
195-
* However, using ereport(ERROR, ...) when there are problems is easy to
196-
* code and maintain. Therefore, we create our own exception handler to
197-
* catch ERRORs and return false instead of restarting the archiver
198-
* whenever there is a failure.
199-
*/
200-
if (sigsetjmp(local_sigjmp_buf, 1) != 0)
201-
{
202-
/* Since not using PG_TRY, must reset error stack by hand */
203-
error_context_stack = NULL;
204-
205-
/* Prevent interrupts while cleaning up */
206-
HOLD_INTERRUPTS();
207-
208-
/* Report the error and clear ErrorContext for next time */
209-
EmitErrorReport();
210-
FlushErrorState();
211-
212-
/* Close any files left open by copy_file() or compare_files() */
213-
AtEOSubXact_Files(false, InvalidSubTransactionId, InvalidSubTransactionId);
214-
215-
/* Reset our memory context and switch back to the original one */
216-
MemoryContextSwitchTo(oldcontext);
217-
MemoryContextReset(basic_archive_context);
218-
219-
/* Remove our exception handler */
220-
PG_exception_stack = NULL;
221-
222-
/* Now we can allow interrupts again */
223-
RESUME_INTERRUPTS();
224-
225-
/* Report failure so that the archiver retries this file */
226-
return false;
227-
}
228-
229-
/* Enable our exception handler */
230-
PG_exception_stack = &local_sigjmp_buf;
231-
232-
/* Archive the file! */
233-
basic_archive_file_internal(file, path);
234-
235-
/* Remove our exception handler */
236-
PG_exception_stack = NULL;
237-
238-
/* Reset our memory context and switch back to the original one */
239-
MemoryContextSwitchTo(oldcontext);
240-
MemoryContextReset(basic_archive_context);
241-
242-
return true;
243-
}
244-
245-
static void
246-
basic_archive_file_internal(const char *file, const char *path)
247153
{
248154
char destination[MAXPGPATH];
249155
char temp[MAXPGPATH + 256];
@@ -277,7 +183,7 @@ basic_archive_file_internal(const char *file, const char *path)
277183
fsync_fname(destination, false);
278184
fsync_fname(archive_directory, true);
279185

280-
return;
186+
return true;
281187
}
282188

283189
ereport(ERROR,
@@ -317,6 +223,8 @@ basic_archive_file_internal(const char *file, const char *path)
317223

318224
ereport(DEBUG1,
319225
(errmsg("archived \"%s\" via basic_archive", file)));
226+
227+
return true;
320228
}
321229

322230
/*
@@ -399,35 +307,3 @@ compare_files(const char *file1, const char *file2)
399307

400308
return ret;
401309
}
402-
403-
/*
404-
* basic_archive_shutdown
405-
*
406-
* Frees our allocated state.
407-
*/
408-
static void
409-
basic_archive_shutdown(ArchiveModuleState *state)
410-
{
411-
BasicArchiveData *data = (BasicArchiveData *) state->private_data;
412-
MemoryContext basic_archive_context;
413-
414-
/*
415-
* If we didn't get to storing the pointer to our allocated state, we
416-
* don't have anything to clean up.
417-
*/
418-
if (data == NULL)
419-
return;
420-
421-
basic_archive_context = data->context;
422-
Assert(CurrentMemoryContext != basic_archive_context);
423-
424-
if (MemoryContextIsValid(basic_archive_context))
425-
MemoryContextDelete(basic_archive_context);
426-
data->context = NULL;
427-
428-
/*
429-
* Finally, free the state.
430-
*/
431-
pfree(data);
432-
state->private_data = NULL;
433-
}

doc/src/sgml/archive-modules.sgml

+10-1
Original file line numberDiff line numberDiff line change
@@ -140,12 +140,21 @@ typedef bool (*ArchiveFileCB) (ArchiveModuleState *state, const char *file, cons
140140

141141
If <literal>true</literal> is returned, the server proceeds as if the file
142142
was successfully archived, which may include recycling or removing the
143-
original WAL file. If <literal>false</literal> is returned, the server will
143+
original WAL file. If <literal>false</literal> is returned or an error is thrown, the server will
144144
keep the original WAL file and retry archiving later.
145145
<replaceable>file</replaceable> will contain just the file name of the WAL
146146
file to archive, while <replaceable>path</replaceable> contains the full
147147
path of the WAL file (including the file name).
148148
</para>
149+
150+
<note>
151+
<para>
152+
The <function>archive_file_cb</function> callback is called in a
153+
short-lived memory context that will be reset between invocations. If you
154+
need longer-lived storage, create a memory context in the module's
155+
<function>startup_cb</function> callback.
156+
</para>
157+
</note>
149158
</sect2>
150159

151160
<sect2 id="archive-module-shutdown">

src/backend/archive/shell_archive.c

-5
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,6 @@ shell_archive_file(ArchiveModuleState *state, const char *file,
7171
"archive_command", "fp",
7272
file, nativePath);
7373

74-
if (nativePath)
75-
pfree(nativePath);
76-
7774
ereport(DEBUG3,
7875
(errmsg_internal("executing archive command \"%s\"",
7976
xlogarchcmd)));
@@ -132,8 +129,6 @@ shell_archive_file(ArchiveModuleState *state, const char *file,
132129
return false;
133130
}
134131

135-
pfree(xlogarchcmd);
136-
137132
elog(DEBUG1, "archived write-ahead log file \"%s\"", file);
138133
return true;
139134
}

src/backend/postmaster/pgarch.c

+92-1
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include "postmaster/auxprocess.h"
4040
#include "postmaster/interrupt.h"
4141
#include "postmaster/pgarch.h"
42+
#include "storage/condition_variable.h"
4243
#include "storage/fd.h"
4344
#include "storage/ipc.h"
4445
#include "storage/latch.h"
@@ -49,6 +50,8 @@
4950
#include "utils/guc.h"
5051
#include "utils/memutils.h"
5152
#include "utils/ps_status.h"
53+
#include "utils/resowner.h"
54+
#include "utils/timeout.h"
5255

5356

5457
/* ----------
@@ -100,6 +103,7 @@ static time_t last_sigterm_time = 0;
100103
static PgArchData *PgArch = NULL;
101104
static const ArchiveModuleCallbacks *ArchiveCallbacks;
102105
static ArchiveModuleState *archive_module_state;
106+
static MemoryContext archive_context;
103107

104108

105109
/*
@@ -257,6 +261,11 @@ PgArchiverMain(char *startup_data, size_t startup_data_len)
257261
ready_file_comparator, false,
258262
NULL);
259263

264+
/* Initialize our memory context. */
265+
archive_context = AllocSetContextCreate(TopMemoryContext,
266+
"archiver",
267+
ALLOCSET_DEFAULT_SIZES);
268+
260269
/* Load the archive_library. */
261270
LoadArchiveLibrary();
262271

@@ -507,6 +516,8 @@ pgarch_ArchiverCopyLoop(void)
507516
static bool
508517
pgarch_archiveXlog(char *xlog)
509518
{
519+
sigjmp_buf local_sigjmp_buf;
520+
MemoryContext oldcontext;
510521
char pathname[MAXPGPATH];
511522
char activitymsg[MAXFNAMELEN + 16];
512523
bool ret;
@@ -517,7 +528,87 @@ pgarch_archiveXlog(char *xlog)
517528
snprintf(activitymsg, sizeof(activitymsg), "archiving %s", xlog);
518529
set_ps_display(activitymsg);
519530

520-
ret = ArchiveCallbacks->archive_file_cb(archive_module_state, xlog, pathname);
531+
oldcontext = MemoryContextSwitchTo(archive_context);
532+
533+
/*
534+
* Since the archiver operates at the bottom of the exception stack,
535+
* ERRORs turn into FATALs and cause the archiver process to restart.
536+
* However, using ereport(ERROR, ...) when there are problems is easy to
537+
* code and maintain. Therefore, we create our own exception handler to
538+
* catch ERRORs and return false instead of restarting the archiver
539+
* whenever there is a failure.
540+
*
541+
* We assume ERRORs from the archiving callback are the most common
542+
* exceptions experienced by the archiver, so we opt to handle exceptions
543+
* here instead of PgArchiverMain() to avoid reinitializing the archiver
544+
* too frequently. We could instead add a sigsetjmp() block to
545+
* PgArchiverMain() and use PG_TRY/PG_CATCH here, but the extra code to
546+
* avoid the odd archiver restart doesn't seem worth it.
547+
*/
548+
if (sigsetjmp(local_sigjmp_buf, 1) != 0)
549+
{
550+
/* Since not using PG_TRY, must reset error stack by hand */
551+
error_context_stack = NULL;
552+
553+
/* Prevent interrupts while cleaning up */
554+
HOLD_INTERRUPTS();
555+
556+
/* Report the error to the server log. */
557+
EmitErrorReport();
558+
559+
/*
560+
* Try to clean up anything the archive module left behind. We try to
561+
* cover anything that an archive module could conceivably have left
562+
* behind, but it is of course possible that modules could be doing
563+
* unexpected things that require additional cleanup. Module authors
564+
* should be sure to do any extra required cleanup in a PG_CATCH block
565+
* within the archiving callback, and they are encouraged to notify
566+
* the pgsql-hackers mailing list so that we can add it here.
567+
*/
568+
disable_all_timeouts(false);
569+
LWLockReleaseAll();
570+
ConditionVariableCancelSleep();
571+
pgstat_report_wait_end();
572+
ReleaseAuxProcessResources(false);
573+
AtEOXact_Files(false);
574+
AtEOXact_HashTables(false);
575+
576+
/*
577+
* Return to the original memory context and clear ErrorContext for
578+
* next time.
579+
*/
580+
MemoryContextSwitchTo(oldcontext);
581+
FlushErrorState();
582+
583+
/* Flush any leaked data */
584+
MemoryContextReset(archive_context);
585+
586+
/* Remove our exception handler */
587+
PG_exception_stack = NULL;
588+
589+
/* Now we can allow interrupts again */
590+
RESUME_INTERRUPTS();
591+
592+
/* Report failure so that the archiver retries this file */
593+
ret = false;
594+
}
595+
else
596+
{
597+
/* Enable our exception handler */
598+
PG_exception_stack = &local_sigjmp_buf;
599+
600+
/* Archive the file! */
601+
ret = ArchiveCallbacks->archive_file_cb(archive_module_state,
602+
xlog, pathname);
603+
604+
/* Remove our exception handler */
605+
PG_exception_stack = NULL;
606+
607+
/* Reset our memory context and switch back to the original one */
608+
MemoryContextSwitchTo(oldcontext);
609+
MemoryContextReset(archive_context);
610+
}
611+
521612
if (ret)
522613
snprintf(activitymsg, sizeof(activitymsg), "last was %s", xlog);
523614
else

0 commit comments

Comments
 (0)