Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit eaef111

Browse files
committed
Define a separately configurable XLOG_BLCKSZ symbol for the page size
used within WAL files. Historically this was the same as the data file BLCKSZ, but there's no necessary connection, and it's possible that performance gains might ensue from reducing XLOG_BLCKSZ. In any case distinguishing two symbols should improve code clarity. This commit does not actually change the page size, only provide the infrastructure to make it possible to do so. initdb forced because of addition of a field to pg_control. Mark Wong, with some help from Simon Riggs and Tom Lane.
1 parent c8c864c commit eaef111

File tree

7 files changed

+89
-64
lines changed

7 files changed

+89
-64
lines changed

doc/src/sgml/runtime.sgml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<!-- $PostgreSQL: pgsql/doc/src/sgml/runtime.sgml,v 1.365 2006/03/10 19:10:49 momjian Exp $ -->
1+
<!-- $PostgreSQL: pgsql/doc/src/sgml/runtime.sgml,v 1.366 2006/04/03 23:35:02 tgl Exp $ -->
22

33
<chapter Id="runtime">
44
<title>Operating System Environment</title>
@@ -1061,7 +1061,7 @@ set semsys:seminfo_semmsl=32
10611061

10621062
<row>
10631063
<entry><xref linkend="guc-wal-buffers"></>
1064-
<entry>8200 (assuming 8K <symbol>BLCKSZ</>)</entry>
1064+
<entry>8200 (assuming 8K <symbol>XLOG_BLCKSZ</>)</entry>
10651065
</row>
10661066

10671067
<row>

src/backend/access/transam/xlog.c

Lines changed: 55 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.231 2006/03/31 23:32:05 tgl Exp $
10+
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.232 2006/04/03 23:35:03 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -113,10 +113,10 @@
113113

114114
/*
115115
* Limitation of buffer-alignment for direct IO depends on OS and filesystem,
116-
* but BLCKSZ is assumed to be enough for it.
116+
* but XLOG_BLCKSZ is assumed to be enough for it.
117117
*/
118118
#ifdef O_DIRECT
119-
#define ALIGNOF_XLOG_BUFFER BLCKSZ
119+
#define ALIGNOF_XLOG_BUFFER XLOG_BLCKSZ
120120
#else
121121
#define ALIGNOF_XLOG_BUFFER ALIGNOF_BUFFER
122122
#endif
@@ -374,7 +374,7 @@ typedef struct XLogCtlData
374374
* and xlblocks values depends on WALInsertLock and WALWriteLock.
375375
*/
376376
char *pages; /* buffers for unwritten XLOG pages */
377-
XLogRecPtr *xlblocks; /* 1st byte ptr-s + BLCKSZ */
377+
XLogRecPtr *xlblocks; /* 1st byte ptr-s + XLOG_BLCKSZ */
378378
Size XLogCacheByte; /* # bytes in xlog buffers */
379379
int XLogCacheBlck; /* highest allocated xlog buffer index */
380380
TimeLineID ThisTimeLineID;
@@ -397,7 +397,7 @@ static ControlFileData *ControlFile = NULL;
397397

398398
/* Free space remaining in the current xlog page buffer */
399399
#define INSERT_FREESPACE(Insert) \
400-
(BLCKSZ - ((Insert)->currpos - (char *) (Insert)->currpage))
400+
(XLOG_BLCKSZ - ((Insert)->currpos - (char *) (Insert)->currpage))
401401

402402
/* Construct XLogRecPtr value for current insertion point */
403403
#define INSERT_RECPTR(recptr,Insert,curridx) \
@@ -441,7 +441,7 @@ static uint32 readId = 0;
441441
static uint32 readSeg = 0;
442442
static uint32 readOff = 0;
443443

444-
/* Buffer for currently read page (BLCKSZ bytes) */
444+
/* Buffer for currently read page (XLOG_BLCKSZ bytes) */
445445
static char *readBuf = NULL;
446446

447447
/* Buffer for current ReadRecord result (expandable) */
@@ -706,7 +706,7 @@ begin:;
706706
* If cache is half filled then try to acquire write lock and do
707707
* XLogWrite. Ignore any fractional blocks in performing this check.
708708
*/
709-
LogwrtRqst.Write.xrecoff -= LogwrtRqst.Write.xrecoff % BLCKSZ;
709+
LogwrtRqst.Write.xrecoff -= LogwrtRqst.Write.xrecoff % XLOG_BLCKSZ;
710710
if (LogwrtRqst.Write.xlogid != LogwrtResult.Write.xlogid ||
711711
(LogwrtRqst.Write.xrecoff >= LogwrtResult.Write.xrecoff +
712712
XLogCtl->XLogCacheByte / 2))
@@ -1228,12 +1228,12 @@ AdvanceXLInsertBuffer(void)
12281228
{
12291229
/* crossing a logid boundary */
12301230
NewPageEndPtr.xlogid += 1;
1231-
NewPageEndPtr.xrecoff = BLCKSZ;
1231+
NewPageEndPtr.xrecoff = XLOG_BLCKSZ;
12321232
}
12331233
else
1234-
NewPageEndPtr.xrecoff += BLCKSZ;
1234+
NewPageEndPtr.xrecoff += XLOG_BLCKSZ;
12351235
XLogCtl->xlblocks[nextidx] = NewPageEndPtr;
1236-
NewPage = (XLogPageHeader) (XLogCtl->pages + nextidx * (Size) BLCKSZ);
1236+
NewPage = (XLogPageHeader) (XLogCtl->pages + nextidx * (Size) XLOG_BLCKSZ);
12371237

12381238
Insert->curridx = nextidx;
12391239
Insert->currpage = NewPage;
@@ -1244,7 +1244,7 @@ AdvanceXLInsertBuffer(void)
12441244
* Be sure to re-zero the buffer so that bytes beyond what we've written
12451245
* will look like zeroes and not valid XLOG records...
12461246
*/
1247-
MemSet((char *) NewPage, 0, BLCKSZ);
1247+
MemSet((char *) NewPage, 0, XLOG_BLCKSZ);
12481248

12491249
/*
12501250
* Fill the new page's header
@@ -1254,7 +1254,7 @@ AdvanceXLInsertBuffer(void)
12541254
/* NewPage->xlp_info = 0; */ /* done by memset */
12551255
NewPage ->xlp_tli = ThisTimeLineID;
12561256
NewPage ->xlp_pageaddr.xlogid = NewPageEndPtr.xlogid;
1257-
NewPage ->xlp_pageaddr.xrecoff = NewPageEndPtr.xrecoff - BLCKSZ;
1257+
NewPage ->xlp_pageaddr.xrecoff = NewPageEndPtr.xrecoff - XLOG_BLCKSZ;
12581258

12591259
/*
12601260
* If first page of an XLOG segment file, make it a long header.
@@ -1428,7 +1428,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
14281428
{
14291429
/* first of group */
14301430
startidx = curridx;
1431-
startoffset = (LogwrtResult.Write.xrecoff - BLCKSZ) % XLogSegSize;
1431+
startoffset = (LogwrtResult.Write.xrecoff - XLOG_BLCKSZ) % XLogSegSize;
14321432
}
14331433
npages++;
14341434

@@ -1439,7 +1439,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
14391439
* segment.
14401440
*/
14411441
finishing_seg = !ispartialpage &&
1442-
(startoffset + npages * BLCKSZ) >= XLogSegSize;
1442+
(startoffset + npages * XLOG_BLCKSZ) >= XLogSegSize;
14431443

14441444
if (!XLByteLT(LogwrtResult.Write, WriteRqst.Write) ||
14451445
curridx == XLogCtl->XLogCacheBlck ||
@@ -1461,8 +1461,8 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
14611461
}
14621462

14631463
/* OK to write the page(s) */
1464-
from = XLogCtl->pages + startidx * (Size) BLCKSZ;
1465-
nbytes = npages * (Size) BLCKSZ;
1464+
from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ;
1465+
nbytes = npages * (Size) XLOG_BLCKSZ;
14661466
errno = 0;
14671467
if (write(openLogFile, from, nbytes) != nbytes)
14681468
{
@@ -1720,7 +1720,7 @@ XLogFileInit(uint32 log, uint32 seg,
17201720
{
17211721
char path[MAXPGPATH];
17221722
char tmppath[MAXPGPATH];
1723-
char zbuffer[BLCKSZ];
1723+
char zbuffer[XLOG_BLCKSZ];
17241724
uint32 installed_log;
17251725
uint32 installed_seg;
17261726
int max_advance;
@@ -1858,7 +1858,7 @@ XLogFileCopy(uint32 log, uint32 seg,
18581858
{
18591859
char path[MAXPGPATH];
18601860
char tmppath[MAXPGPATH];
1861-
char buffer[BLCKSZ];
1861+
char buffer[XLOG_BLCKSZ];
18621862
int srcfd;
18631863
int fd;
18641864
int nbytes;
@@ -2637,7 +2637,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode)
26372637
* (2) a static char array isn't guaranteed to have any particular
26382638
* alignment, whereas malloc() will provide MAXALIGN'd storage.
26392639
*/
2640-
readBuf = (char *) malloc(BLCKSZ);
2640+
readBuf = (char *) malloc(XLOG_BLCKSZ);
26412641
Assert(readBuf != NULL);
26422642
}
26432643

@@ -2651,8 +2651,8 @@ ReadRecord(XLogRecPtr *RecPtr, int emode)
26512651
goto got_record;
26522652
}
26532653
/* align old recptr to next page */
2654-
if (tmpRecPtr.xrecoff % BLCKSZ != 0)
2655-
tmpRecPtr.xrecoff += (BLCKSZ - tmpRecPtr.xrecoff % BLCKSZ);
2654+
if (tmpRecPtr.xrecoff % XLOG_BLCKSZ != 0)
2655+
tmpRecPtr.xrecoff += (XLOG_BLCKSZ - tmpRecPtr.xrecoff % XLOG_BLCKSZ);
26562656
if (tmpRecPtr.xrecoff >= XLogFileSize)
26572657
{
26582658
(tmpRecPtr.xlogid)++;
@@ -2696,7 +2696,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode)
26962696
readOff = (uint32) (-1); /* force read to occur below */
26972697
}
26982698

2699-
targetPageOff = ((RecPtr->xrecoff % XLogSegSize) / BLCKSZ) * BLCKSZ;
2699+
targetPageOff = ((RecPtr->xrecoff % XLogSegSize) / XLOG_BLCKSZ) * XLOG_BLCKSZ;
27002700
if (readOff != targetPageOff)
27012701
{
27022702
readOff = targetPageOff;
@@ -2708,7 +2708,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode)
27082708
readId, readSeg, readOff)));
27092709
goto next_record_is_invalid;
27102710
}
2711-
if (read(readFile, readBuf, BLCKSZ) != BLCKSZ)
2711+
if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
27122712
{
27132713
ereport(emode,
27142714
(errcode_for_file_access(),
@@ -2720,7 +2720,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode)
27202720
goto next_record_is_invalid;
27212721
}
27222722
pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf);
2723-
targetRecOff = RecPtr->xrecoff % BLCKSZ;
2723+
targetRecOff = RecPtr->xrecoff % XLOG_BLCKSZ;
27242724
if (targetRecOff == 0)
27252725
{
27262726
/*
@@ -2746,7 +2746,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode)
27462746
RecPtr->xlogid, RecPtr->xrecoff)));
27472747
goto next_record_is_invalid;
27482748
}
2749-
record = (XLogRecord *) ((char *) readBuf + RecPtr->xrecoff % BLCKSZ);
2749+
record = (XLogRecord *) ((char *) readBuf + RecPtr->xrecoff % XLOG_BLCKSZ);
27502750

27512751
got_record:;
27522752

@@ -2811,17 +2811,18 @@ got_record:;
28112811

28122812
/*
28132813
* Allocate or enlarge readRecordBuf as needed. To avoid useless small
2814-
* increases, round its size to a multiple of BLCKSZ, and make sure it's
2815-
* at least 4*BLCKSZ to start with. (That is enough for all "normal"
2816-
* records, but very large commit or abort records might need more space.)
2814+
* increases, round its size to a multiple of XLOG_BLCKSZ, and make sure
2815+
* it's at least 4*Max(BLCKSZ, XLOG_BLCKSZ) to start with. (That is
2816+
* enough for all "normal" records, but very large commit or abort records
2817+
* might need more space.)
28172818
*/
28182819
total_len = record->xl_tot_len;
28192820
if (total_len > readRecordBufSize)
28202821
{
28212822
uint32 newSize = total_len;
28222823

2823-
newSize += BLCKSZ - (newSize % BLCKSZ);
2824-
newSize = Max(newSize, 4 * BLCKSZ);
2824+
newSize += XLOG_BLCKSZ - (newSize % XLOG_BLCKSZ);
2825+
newSize = Max(newSize, 4 * Max(BLCKSZ, XLOG_BLCKSZ));
28252826
if (readRecordBuf)
28262827
free(readRecordBuf);
28272828
readRecordBuf = (char *) malloc(newSize);
@@ -2839,7 +2840,7 @@ got_record:;
28392840

28402841
buffer = readRecordBuf;
28412842
nextRecord = NULL;
2842-
len = BLCKSZ - RecPtr->xrecoff % BLCKSZ;
2843+
len = XLOG_BLCKSZ - RecPtr->xrecoff % XLOG_BLCKSZ;
28432844
if (total_len > len)
28442845
{
28452846
/* Need to reassemble record */
@@ -2851,7 +2852,7 @@ got_record:;
28512852
buffer += len;
28522853
for (;;)
28532854
{
2854-
readOff += BLCKSZ;
2855+
readOff += XLOG_BLCKSZ;
28552856
if (readOff >= XLogSegSize)
28562857
{
28572858
close(readFile);
@@ -2862,7 +2863,7 @@ got_record:;
28622863
goto next_record_is_invalid;
28632864
readOff = 0;
28642865
}
2865-
if (read(readFile, readBuf, BLCKSZ) != BLCKSZ)
2866+
if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
28662867
{
28672868
ereport(emode,
28682869
(errcode_for_file_access(),
@@ -2890,7 +2891,7 @@ got_record:;
28902891
readId, readSeg, readOff)));
28912892
goto next_record_is_invalid;
28922893
}
2893-
len = BLCKSZ - pageHeaderSize - SizeOfXLogContRecord;
2894+
len = XLOG_BLCKSZ - pageHeaderSize - SizeOfXLogContRecord;
28942895
if (contrecord->xl_rem_len > len)
28952896
{
28962897
memcpy(buffer, (char *) contrecord + SizeOfXLogContRecord, len);
@@ -2905,7 +2906,7 @@ got_record:;
29052906
if (!RecordIsValid(record, *RecPtr, emode))
29062907
goto next_record_is_invalid;
29072908
pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf);
2908-
if (BLCKSZ - SizeOfXLogRecord >= pageHeaderSize +
2909+
if (XLOG_BLCKSZ - SizeOfXLogRecord >= pageHeaderSize +
29092910
MAXALIGN(SizeOfXLogContRecord + contrecord->xl_rem_len))
29102911
{
29112912
nextRecord = (XLogRecord *) ((char *) contrecord +
@@ -2922,7 +2923,7 @@ got_record:;
29222923
/* Record does not cross a page boundary */
29232924
if (!RecordIsValid(record, *RecPtr, emode))
29242925
goto next_record_is_invalid;
2925-
if (BLCKSZ - SizeOfXLogRecord >= RecPtr->xrecoff % BLCKSZ +
2926+
if (XLOG_BLCKSZ - SizeOfXLogRecord >= RecPtr->xrecoff % XLOG_BLCKSZ +
29262927
MAXALIGN(total_len))
29272928
nextRecord = (XLogRecord *) ((char *) record + MAXALIGN(total_len));
29282929
EndRecPtr.xlogid = RecPtr->xlogid;
@@ -3404,6 +3405,7 @@ WriteControlFile(void)
34043405

34053406
ControlFile->blcksz = BLCKSZ;
34063407
ControlFile->relseg_size = RELSEG_SIZE;
3408+
ControlFile->xlog_blcksz = XLOG_BLCKSZ;
34073409
ControlFile->xlog_seg_size = XLOG_SEG_SIZE;
34083410

34093411
ControlFile->nameDataLen = NAMEDATALEN;
@@ -3572,6 +3574,13 @@ ReadControlFile(void)
35723574
" but the server was compiled with RELSEG_SIZE %d.",
35733575
ControlFile->relseg_size, RELSEG_SIZE),
35743576
errhint("It looks like you need to recompile or initdb.")));
3577+
if (ControlFile->xlog_blcksz != XLOG_BLCKSZ)
3578+
ereport(FATAL,
3579+
(errmsg("database files are incompatible with server"),
3580+
errdetail("The database cluster was initialized with XLOG_BLCKSZ %d,"
3581+
" but the server was compiled with XLOG_BLCKSZ %d.",
3582+
ControlFile->xlog_blcksz, XLOG_BLCKSZ),
3583+
errhint("It looks like you need to recompile or initdb.")));
35753584
if (ControlFile->xlog_seg_size != XLOG_SEG_SIZE)
35763585
ereport(FATAL,
35773586
(errmsg("database files are incompatible with server"),
@@ -3696,7 +3705,7 @@ XLOGShmemSize(void)
36963705
/* extra alignment padding for XLOG I/O buffers */
36973706
size = add_size(size, ALIGNOF_XLOG_BUFFER);
36983707
/* and the buffers themselves */
3699-
size = add_size(size, mul_size(BLCKSZ, XLOGbuffers));
3708+
size = add_size(size, mul_size(XLOG_BLCKSZ, XLOGbuffers));
37003709

37013710
/*
37023711
* Note: we don't count ControlFileData, it comes out of the "slop factor"
@@ -3743,13 +3752,13 @@ XLOGShmemInit(void)
37433752
*/
37443753
allocptr = (char *) TYPEALIGN(ALIGNOF_XLOG_BUFFER, allocptr);
37453754
XLogCtl->pages = allocptr;
3746-
memset(XLogCtl->pages, 0, (Size) BLCKSZ * XLOGbuffers);
3755+
memset(XLogCtl->pages, 0, (Size) XLOG_BLCKSZ * XLOGbuffers);
37473756

37483757
/*
37493758
* Do basic initialization of XLogCtl shared data. (StartupXLOG will fill
37503759
* in additional info.)
37513760
*/
3752-
XLogCtl->XLogCacheByte = (Size) BLCKSZ *XLOGbuffers;
3761+
XLogCtl->XLogCacheByte = (Size) XLOG_BLCKSZ * XLOGbuffers;
37533762

37543763
XLogCtl->XLogCacheBlck = XLOGbuffers - 1;
37553764
XLogCtl->Insert.currpage = (XLogPageHeader) (XLogCtl->pages);
@@ -3801,9 +3810,9 @@ BootStrapXLOG(void)
38013810
ThisTimeLineID = 1;
38023811

38033812
/* page buffer must be aligned suitably for O_DIRECT */
3804-
buffer = (char *) palloc(BLCKSZ + ALIGNOF_XLOG_BUFFER);
3813+
buffer = (char *) palloc(XLOG_BLCKSZ + ALIGNOF_XLOG_BUFFER);
38053814
page = (XLogPageHeader) TYPEALIGN(ALIGNOF_XLOG_BUFFER, buffer);
3806-
memset(page, 0, BLCKSZ);
3815+
memset(page, 0, XLOG_BLCKSZ);
38073816

38083817
/* Set up information for the initial checkpoint record */
38093818
checkPoint.redo.xlogid = 0;
@@ -3855,7 +3864,7 @@ BootStrapXLOG(void)
38553864

38563865
/* Write the first page with the initial record */
38573866
errno = 0;
3858-
if (write(openLogFile, page, BLCKSZ) != BLCKSZ)
3867+
if (write(openLogFile, page, XLOG_BLCKSZ) != XLOG_BLCKSZ)
38593868
{
38603869
/* if write didn't set errno, assume problem is no disk space */
38613870
if (errno == 0)
@@ -4712,17 +4721,17 @@ StartupXLOG(void)
47124721
Insert->PrevRecord = LastRec;
47134722
XLogCtl->xlblocks[0].xlogid = openLogId;
47144723
XLogCtl->xlblocks[0].xrecoff =
4715-
((EndOfLog.xrecoff - 1) / BLCKSZ + 1) * BLCKSZ;
4724+
((EndOfLog.xrecoff - 1) / XLOG_BLCKSZ + 1) * XLOG_BLCKSZ;
47164725

47174726
/*
47184727
* Tricky point here: readBuf contains the *last* block that the LastRec
47194728
* record spans, not the one it starts in. The last block is indeed the
47204729
* one we want to use.
47214730
*/
4722-
Assert(readOff == (XLogCtl->xlblocks[0].xrecoff - BLCKSZ) % XLogSegSize);
4723-
memcpy((char *) Insert->currpage, readBuf, BLCKSZ);
4731+
Assert(readOff == (XLogCtl->xlblocks[0].xrecoff - XLOG_BLCKSZ) % XLogSegSize);
4732+
memcpy((char *) Insert->currpage, readBuf, XLOG_BLCKSZ);
47244733
Insert->currpos = (char *) Insert->currpage +
4725-
(EndOfLog.xrecoff + BLCKSZ - XLogCtl->xlblocks[0].xrecoff);
4734+
(EndOfLog.xrecoff + XLOG_BLCKSZ - XLogCtl->xlblocks[0].xrecoff);
47264735

47274736
LogwrtResult.Write = LogwrtResult.Flush = EndOfLog;
47284737

src/bin/pg_controldata/pg_controldata.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* copyright (c) Oliver Elphick <olly@lfix.co.uk>, 2001;
77
* licence: BSD
88
*
9-
* $PostgreSQL: pgsql/src/bin/pg_controldata/pg_controldata.c,v 1.27 2005/10/15 02:49:37 momjian Exp $
9+
* $PostgreSQL: pgsql/src/bin/pg_controldata/pg_controldata.c,v 1.28 2006/04/03 23:35:04 tgl Exp $
1010
*/
1111
#include "postgres.h"
1212

@@ -172,6 +172,7 @@ main(int argc, char *argv[])
172172
/* we don't print floatFormat since can't say much useful about it */
173173
printf(_("Database block size: %u\n"), ControlFile.blcksz);
174174
printf(_("Blocks per segment of large relation: %u\n"), ControlFile.relseg_size);
175+
printf(_("WAL block size: %u\n"), ControlFile.xlog_blcksz);
175176
printf(_("Bytes per WAL segment: %u\n"), ControlFile.xlog_seg_size);
176177
printf(_("Maximum length of identifiers: %u\n"), ControlFile.nameDataLen);
177178
printf(_("Maximum columns in an index: %u\n"), ControlFile.indexMaxKeys);

0 commit comments

Comments
 (0)