Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit fc49e24

Browse files
committed
Make WAL segment size configurable at initdb time.
For performance reasons a larger segment size than the default 16MB can be useful. A larger segment size has two main benefits: Firstly, in setups using archiving, it makes it easier to write scripts that can keep up with higher amounts of WAL, secondly, the WAL has to be written and synced to disk less frequently. But at the same time large segment size are disadvantageous for smaller databases. So far the segment size had to be configured at compile time, often making it unrealistic to choose one fitting to a particularly load. Therefore change it to a initdb time setting. This includes a breaking changes to the xlogreader.h API, which now requires the current segment size to be configured. For that and similar reasons a number of binaries had to be taught how to recognize the current segment size. Author: Beena Emerson, editorialized by Andres Freund Reviewed-By: Andres Freund, David Steele, Kuntal Ghosh, Michael Paquier, Peter Eisentraut, Robert Hass, Tushar Ahuja Discussion: https://postgr.es/m/CAOG9ApEAcQ--1ieKbhFzXSQPw_YLmepaa4hNdnY5+ZULpt81Mw@mail.gmail.com
1 parent 5ada1fc commit fc49e24

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+897
-500
lines changed

configure

-54
Original file line numberDiff line numberDiff line change
@@ -821,7 +821,6 @@ enable_tap_tests
821821
with_blocksize
822822
with_segsize
823823
with_wal_blocksize
824-
with_wal_segsize
825824
with_CC
826825
enable_depend
827826
enable_cassert
@@ -1518,8 +1517,6 @@ Optional Packages:
15181517
--with-segsize=SEGSIZE set table segment size in GB [1]
15191518
--with-wal-blocksize=BLOCKSIZE
15201519
set WAL block size in kB [8]
1521-
--with-wal-segsize=SEGSIZE
1522-
set WAL segment size in MB [16]
15231520
--with-CC=CMD set compiler (deprecated)
15241521
--with-icu build with ICU support
15251522
--with-tcl build Tcl modules (PL/Tcl)
@@ -3733,57 +3730,6 @@ cat >>confdefs.h <<_ACEOF
37333730
_ACEOF
37343731

37353732

3736-
#
3737-
# WAL segment size
3738-
#
3739-
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for WAL segment size" >&5
3740-
$as_echo_n "checking for WAL segment size... " >&6; }
3741-
3742-
3743-
3744-
# Check whether --with-wal-segsize was given.
3745-
if test "${with_wal_segsize+set}" = set; then :
3746-
withval=$with_wal_segsize;
3747-
case $withval in
3748-
yes)
3749-
as_fn_error $? "argument required for --with-wal-segsize option" "$LINENO" 5
3750-
;;
3751-
no)
3752-
as_fn_error $? "argument required for --with-wal-segsize option" "$LINENO" 5
3753-
;;
3754-
*)
3755-
wal_segsize=$withval
3756-
;;
3757-
esac
3758-
3759-
else
3760-
wal_segsize=16
3761-
fi
3762-
3763-
3764-
case ${wal_segsize} in
3765-
1) ;;
3766-
2) ;;
3767-
4) ;;
3768-
8) ;;
3769-
16) ;;
3770-
32) ;;
3771-
64) ;;
3772-
128) ;;
3773-
256) ;;
3774-
512) ;;
3775-
1024) ;;
3776-
*) as_fn_error $? "Invalid WAL segment size. Allowed values are 1,2,4,8,16,32,64,128,256,512,1024." "$LINENO" 5
3777-
esac
3778-
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: ${wal_segsize}MB" >&5
3779-
$as_echo "${wal_segsize}MB" >&6; }
3780-
3781-
3782-
cat >>confdefs.h <<_ACEOF
3783-
#define XLOG_SEG_SIZE (${wal_segsize} * 1024 * 1024)
3784-
_ACEOF
3785-
3786-
37873733
#
37883734
# C compiler
37893735
#

configure.in

-31
Original file line numberDiff line numberDiff line change
@@ -343,37 +343,6 @@ AC_DEFINE_UNQUOTED([XLOG_BLCKSZ], ${XLOG_BLCKSZ}, [
343343
Changing XLOG_BLCKSZ requires an initdb.
344344
])
345345

346-
#
347-
# WAL segment size
348-
#
349-
AC_MSG_CHECKING([for WAL segment size])
350-
PGAC_ARG_REQ(with, wal-segsize, [SEGSIZE], [set WAL segment size in MB [16]],
351-
[wal_segsize=$withval],
352-
[wal_segsize=16])
353-
case ${wal_segsize} in
354-
1) ;;
355-
2) ;;
356-
4) ;;
357-
8) ;;
358-
16) ;;
359-
32) ;;
360-
64) ;;
361-
128) ;;
362-
256) ;;
363-
512) ;;
364-
1024) ;;
365-
*) AC_MSG_ERROR([Invalid WAL segment size. Allowed values are 1,2,4,8,16,32,64,128,256,512,1024.])
366-
esac
367-
AC_MSG_RESULT([${wal_segsize}MB])
368-
369-
AC_DEFINE_UNQUOTED([XLOG_SEG_SIZE], [(${wal_segsize} * 1024 * 1024)], [
370-
XLOG_SEG_SIZE is the size of a single WAL file. This must be a power of 2
371-
and larger than XLOG_BLCKSZ (preferably, a great deal larger than
372-
XLOG_BLCKSZ).
373-
374-
Changing XLOG_SEG_SIZE requires an initdb.
375-
])
376-
377346
#
378347
# C compiler
379348
#

contrib/pg_standby/pg_standby.c

+101-14
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@
3636

3737
const char *progname;
3838

39+
int WalSegSz = -1;
40+
3941
/* Options and defaults */
4042
int sleeptime = 5; /* amount of time to sleep between file checks */
4143
int waittime = -1; /* how long we have been waiting, -1 no wait
@@ -100,6 +102,10 @@ int nextWALFileType;
100102

101103
struct stat stat_buf;
102104

105+
static bool SetWALFileNameForCleanup(void);
106+
static bool SetWALSegSize(void);
107+
108+
103109
/* =====================================================================
104110
*
105111
* Customizable section
@@ -175,6 +181,35 @@ CustomizableNextWALFileReady(void)
175181
{
176182
if (stat(WALFilePath, &stat_buf) == 0)
177183
{
184+
/*
185+
* If we've not seen any WAL segments, we don't know the WAL segment
186+
* size, which we need. If it looks like a WAL segment, determine size
187+
* of segments for the cluster.
188+
*/
189+
if (WalSegSz == -1 && IsXLogFileName(nextWALFileName))
190+
{
191+
if (SetWALSegSize())
192+
{
193+
/*
194+
* Successfully determined WAL segment size. Can compute
195+
* cleanup cutoff now.
196+
*/
197+
need_cleanup = SetWALFileNameForCleanup();
198+
if (debug)
199+
{
200+
fprintf(stderr,
201+
_("WAL segment size: %d \n"), WalSegSz);
202+
fprintf(stderr, "Keep archive history: ");
203+
204+
if (need_cleanup)
205+
fprintf(stderr, "%s and later\n",
206+
exclusiveCleanupFileName);
207+
else
208+
fprintf(stderr, "no cleanup required\n");
209+
}
210+
}
211+
}
212+
178213
/*
179214
* If it's a backup file, return immediately. If it's a regular file
180215
* return only if it's the right size already.
@@ -184,7 +219,7 @@ CustomizableNextWALFileReady(void)
184219
nextWALFileType = XLOG_BACKUP_LABEL;
185220
return true;
186221
}
187-
else if (stat_buf.st_size == XLOG_SEG_SIZE)
222+
else if (WalSegSz > 0 && stat_buf.st_size == WalSegSz)
188223
{
189224
#ifdef WIN32
190225

@@ -204,7 +239,7 @@ CustomizableNextWALFileReady(void)
204239
/*
205240
* If still too small, wait until it is the correct size
206241
*/
207-
if (stat_buf.st_size > XLOG_SEG_SIZE)
242+
if (WalSegSz > 0 && stat_buf.st_size > WalSegSz)
208243
{
209244
if (debug)
210245
{
@@ -218,8 +253,6 @@ CustomizableNextWALFileReady(void)
218253
return false;
219254
}
220255

221-
#define MaxSegmentsPerLogFile ( 0xFFFFFFFF / XLOG_SEG_SIZE )
222-
223256
static void
224257
CustomizableCleanupPriorWALFiles(void)
225258
{
@@ -315,6 +348,7 @@ SetWALFileNameForCleanup(void)
315348
uint32 log_diff = 0,
316349
seg_diff = 0;
317350
bool cleanup = false;
351+
int max_segments_per_logfile = (0xFFFFFFFF / WalSegSz);
318352

319353
if (restartWALFileName)
320354
{
@@ -336,12 +370,12 @@ SetWALFileNameForCleanup(void)
336370
sscanf(nextWALFileName, "%08X%08X%08X", &tli, &log, &seg);
337371
if (tli > 0 && seg > 0)
338372
{
339-
log_diff = keepfiles / MaxSegmentsPerLogFile;
340-
seg_diff = keepfiles % MaxSegmentsPerLogFile;
373+
log_diff = keepfiles / max_segments_per_logfile;
374+
seg_diff = keepfiles % max_segments_per_logfile;
341375
if (seg_diff > seg)
342376
{
343377
log_diff++;
344-
seg = MaxSegmentsPerLogFile - (seg_diff - seg);
378+
seg = max_segments_per_logfile - (seg_diff - seg);
345379
}
346380
else
347381
seg -= seg_diff;
@@ -364,6 +398,66 @@ SetWALFileNameForCleanup(void)
364398
return cleanup;
365399
}
366400

401+
/*
402+
* Try to set the wal segment size from the WAL file specified by WALFilePath.
403+
*
404+
* Return true if size could be determined, false otherwise.
405+
*/
406+
static bool
407+
SetWALSegSize(void)
408+
{
409+
bool ret_val = false;
410+
int fd;
411+
char *buf = (char *) malloc(XLOG_BLCKSZ);
412+
413+
Assert(WalSegSz == -1);
414+
415+
if ((fd = open(WALFilePath, O_RDWR, 0)) < 0)
416+
{
417+
fprintf(stderr, "%s: couldn't open WAL file \"%s\"\n",
418+
progname, WALFilePath);
419+
return false;
420+
}
421+
if (read(fd, buf, XLOG_BLCKSZ) == XLOG_BLCKSZ)
422+
{
423+
XLogLongPageHeader longhdr = (XLogLongPageHeader) buf;
424+
425+
WalSegSz = longhdr->xlp_seg_size;
426+
427+
if (IsValidWalSegSize(WalSegSz))
428+
{
429+
/* successfully retrieved WAL segment size */
430+
ret_val = true;
431+
}
432+
else
433+
fprintf(stderr,
434+
"%s: WAL segment size must be a power of two between 1MB and 1GB, but the WAL file header specifies %d bytes\n",
435+
progname, WalSegSz);
436+
close(fd);
437+
}
438+
else
439+
{
440+
/*
441+
* Don't complain loudly, this is to be expected for segments being
442+
* created.
443+
*/
444+
if (errno != 0)
445+
{
446+
if (debug)
447+
fprintf(stderr, "could not read file \"%s\": %s",
448+
WALFilePath, strerror(errno));
449+
}
450+
else
451+
{
452+
if (debug)
453+
fprintf(stderr, "not enough data in file \"%s\"", WALFilePath);
454+
}
455+
}
456+
457+
fflush(stderr);
458+
return ret_val;
459+
}
460+
367461
/*
368462
* CheckForExternalTrigger()
369463
*
@@ -708,8 +802,6 @@ main(int argc, char **argv)
708802

709803
CustomizableInitialize();
710804

711-
need_cleanup = SetWALFileNameForCleanup();
712-
713805
if (debug)
714806
{
715807
fprintf(stderr, "Trigger file: %s\n", triggerPath ? triggerPath : "<not set>");
@@ -721,11 +813,6 @@ main(int argc, char **argv)
721813
fprintf(stderr, "Max wait interval: %d %s\n",
722814
maxwaittime, (maxwaittime > 0 ? "seconds" : "forever"));
723815
fprintf(stderr, "Command for restore: %s\n", restoreCommand);
724-
fprintf(stderr, "Keep archive history: ");
725-
if (need_cleanup)
726-
fprintf(stderr, "%s and later\n", exclusiveCleanupFileName);
727-
else
728-
fprintf(stderr, "no cleanup required\n");
729816
fflush(stderr);
730817
}
731818

doc/src/sgml/backup.sgml

+1-1
Original file line numberDiff line numberDiff line change
@@ -562,7 +562,7 @@ tar -cf backup.tar /usr/local/pgsql/data
562562
produces an indefinitely long sequence of WAL records. The system
563563
physically divides this sequence into WAL <firstterm>segment
564564
files</>, which are normally 16MB apiece (although the segment size
565-
can be altered when building <productname>PostgreSQL</>). The segment
565+
can be altered during <application>initdb</>). The segment
566566
files are given numeric names that reflect their position in the
567567
abstract WAL sequence. When not using WAL archiving, the system
568568
normally creates just a few segment files and then

doc/src/sgml/installation.sgml

-14
Original file line numberDiff line numberDiff line change
@@ -1058,20 +1058,6 @@ su - postgres
10581058
</listitem>
10591059
</varlistentry>
10601060

1061-
<varlistentry>
1062-
<term><option>--with-wal-segsize=<replaceable>SEGSIZE</replaceable></option></term>
1063-
<listitem>
1064-
<para>
1065-
Set the <firstterm>WAL segment size</>, in megabytes. This is
1066-
the size of each individual file in the WAL log. It may be useful
1067-
to adjust this size to control the granularity of WAL log shipping.
1068-
The default size is 16 megabytes.
1069-
The value must be a power of 2 between 1 and 1024 (megabytes).
1070-
Note that changing this value requires an initdb.
1071-
</para>
1072-
</listitem>
1073-
</varlistentry>
1074-
10751061
<varlistentry>
10761062
<term><option>--with-wal-blocksize=<replaceable>BLOCKSIZE</replaceable></option></term>
10771063
<listitem>

doc/src/sgml/ref/initdb.sgml

+15
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,21 @@ PostgreSQL documentation
316316
</varlistentry>
317317

318318
<varlistentry>
319+
<term><option>--wal-segsize=<replaceable>SEGSIZE</replaceable></option></term>
320+
<listitem>
321+
<para>
322+
Set the <firstterm>WAL segment size</>, in megabytes. This is
323+
the size of each individual file in the WAL log. It may be useful
324+
to adjust this size to control the granularity of WAL log shipping.
325+
This option can only be set during initialization, and cannot be
326+
changed later.
327+
The default size is 16 megabytes.
328+
The value must be a power of 2 between 1 and 1024 (megabytes).
329+
</para>
330+
</listitem>
331+
</varlistentry>
332+
333+
<varlistentry>
319334
<term><option>-X <replaceable class="parameter">directory</replaceable></option></term>
320335
<term><option>--waldir=<replaceable class="parameter">directory</replaceable></option></term>
321336
<listitem>

doc/src/sgml/wal.sgml

+6-7
Original file line numberDiff line numberDiff line change
@@ -752,13 +752,12 @@
752752
<acronym>WAL</acronym> logs are stored in the directory
753753
<filename>pg_wal</filename> under the data directory, as a set of
754754
segment files, normally each 16 MB in size (but the size can be changed
755-
by altering the <option>--with-wal-segsize</> configure option when
756-
building the server). Each segment is divided into pages, normally
757-
8 kB each (this size can be changed via the <option>--with-wal-blocksize</>
758-
configure option). The log record headers are described in
759-
<filename>access/xlogrecord.h</filename>; the record content is dependent
760-
on the type of event that is being logged. Segment files are given
761-
ever-increasing numbers as names, starting at
755+
by altering the <option>--wal-segsize</> initdb option). Each segment is
756+
divided into pages, normally 8 kB each (this size can be changed via the
757+
<option>--with-wal-blocksize</> configure option). The log record headers
758+
are described in <filename>access/xlogrecord.h</filename>; the record
759+
content is dependent on the type of event that is being logged. Segment
760+
files are given ever-increasing numbers as names, starting at
762761
<filename>000000010000000000000000</filename>. The numbers do not wrap,
763762
but it will take a very, very long time to exhaust the
764763
available stock of numbers.

src/backend/access/transam/twophase.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -1299,7 +1299,8 @@ XlogReadTwoPhaseData(XLogRecPtr lsn, char **buf, int *len)
12991299
XLogReaderState *xlogreader;
13001300
char *errormsg;
13011301

1302-
xlogreader = XLogReaderAllocate(&read_local_xlog_page, NULL);
1302+
xlogreader = XLogReaderAllocate(wal_segment_size, &read_local_xlog_page,
1303+
NULL);
13031304
if (!xlogreader)
13041305
ereport(ERROR,
13051306
(errcode(ERRCODE_OUT_OF_MEMORY),

0 commit comments

Comments
 (0)