Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 2c03216

Browse files
committed
Revamp the WAL record format.
Each WAL record now carries information about the modified relation and block(s) in a standardized format. That makes it easier to write tools that need that information, like pg_rewind, prefetching the blocks to speed up recovery, etc. There's a whole new API for building WAL records, replacing the XLogRecData chains used previously. The new API consists of XLogRegister* functions, which are called for each buffer and chunk of data that is added to the record. The new API also gives more control over when a full-page image is written, by passing flags to the XLogRegisterBuffer function. This also simplifies the XLogReadBufferForRedo() calls. The function can dig the relation and block number from the WAL record, so they no longer need to be passed as arguments. For the convenience of redo routines, XLogReader now disects each WAL record after reading it, copying the main data part and the per-block data into MAXALIGNed buffers. The data chunks are not aligned within the WAL record, but the redo routines can assume that the pointers returned by XLogRecGet* functions are. Redo routines are now passed the XLogReaderState, which contains the record in the already-disected format, instead of the plain XLogRecord. The new record format also makes the fixed size XLogRecord header smaller, by removing the xl_len field. The length of the "main data" portion is now stored at the end of the WAL record, and there's a separate header after XLogRecord for it. The alignment padding at the end of XLogRecord is also removed. This compansates for the fact that the new format would otherwise be more bulky than the old format. Reviewed by Andres Freund, Amit Kapila, Michael Paquier, Alvaro Herrera, Fujii Masao.
1 parent 8dc626d commit 2c03216

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

93 files changed

+3945
-4366
lines changed

contrib/pg_xlogdump/pg_xlogdump.c

+78-50
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
#include "access/xlogreader.h"
1919
#include "access/xlogrecord.h"
20+
#include "access/xlog_internal.h"
2021
#include "access/transam.h"
2122
#include "common/fe_memutils.h"
2223
#include "getopt_long.h"
@@ -343,90 +344,117 @@ XLogDumpReadPage(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen,
343344
* Store per-rmgr and per-record statistics for a given record.
344345
*/
345346
static void
346-
XLogDumpCountRecord(XLogDumpConfig *config, XLogDumpStats *stats, XLogRecPtr ReadRecPtr, XLogRecord *record)
347+
XLogDumpCountRecord(XLogDumpConfig *config, XLogDumpStats *stats,
348+
XLogReaderState *record)
347349
{
348350
RmgrId rmid;
349351
uint8 recid;
352+
uint32 rec_len;
353+
uint32 fpi_len;
350354

351355
stats->count++;
352356

353357
/* Update per-rmgr statistics */
354358

355-
rmid = record->xl_rmid;
359+
rmid = XLogRecGetRmid(record);
360+
rec_len = XLogRecGetDataLen(record) + SizeOfXLogRecord;
361+
fpi_len = record->decoded_record->xl_tot_len - rec_len;
356362

357363
stats->rmgr_stats[rmid].count++;
358-
stats->rmgr_stats[rmid].rec_len +=
359-
record->xl_len + SizeOfXLogRecord;
360-
stats->rmgr_stats[rmid].fpi_len +=
361-
record->xl_tot_len - (record->xl_len + SizeOfXLogRecord);
364+
stats->rmgr_stats[rmid].rec_len += rec_len;
365+
stats->rmgr_stats[rmid].fpi_len += fpi_len;
362366

363367
/*
364368
* Update per-record statistics, where the record is identified by a
365-
* combination of the RmgrId and the four bits of the xl_info field
366-
* that are the rmgr's domain (resulting in sixteen possible entries
367-
* per RmgrId).
369+
* combination of the RmgrId and the four bits of the xl_info field that
370+
* are the rmgr's domain (resulting in sixteen possible entries per
371+
* RmgrId).
368372
*/
369373

370-
recid = record->xl_info >> 4;
374+
recid = XLogRecGetInfo(record) >> 4;
371375

372376
stats->record_stats[rmid][recid].count++;
373-
stats->record_stats[rmid][recid].rec_len +=
374-
record->xl_len + SizeOfXLogRecord;
375-
stats->record_stats[rmid][recid].fpi_len +=
376-
record->xl_tot_len - (record->xl_len + SizeOfXLogRecord);
377+
stats->record_stats[rmid][recid].rec_len += rec_len;
378+
stats->record_stats[rmid][recid].fpi_len += fpi_len;
377379
}
378380

379381
/*
380382
* Print a record to stdout
381383
*/
382384
static void
383-
XLogDumpDisplayRecord(XLogDumpConfig *config, XLogRecPtr ReadRecPtr, XLogRecord *record)
385+
XLogDumpDisplayRecord(XLogDumpConfig *config, XLogReaderState *record)
384386
{
385-
const char *id;
386-
const RmgrDescData *desc = &RmgrDescTable[record->xl_rmid];
387-
388-
id = desc->rm_identify(record->xl_info);
387+
const char *id;
388+
const RmgrDescData *desc = &RmgrDescTable[XLogRecGetRmid(record)];
389+
RelFileNode rnode;
390+
ForkNumber forknum;
391+
BlockNumber blk;
392+
int block_id;
393+
uint8 info = XLogRecGetInfo(record);
394+
XLogRecPtr xl_prev = XLogRecGetPrev(record);
395+
396+
id = desc->rm_identify(info);
389397
if (id == NULL)
390-
id = psprintf("UNKNOWN (%x)", record->xl_info & ~XLR_INFO_MASK);
398+
id = psprintf("UNKNOWN (%x)", info & ~XLR_INFO_MASK);
391399

392-
printf("rmgr: %-11s len (rec/tot): %6u/%6u, tx: %10u, lsn: %X/%08X, prev %X/%08X, bkp: %u%u%u%u, desc: %s ",
400+
printf("rmgr: %-11s len (rec/tot): %6u/%6u, tx: %10u, lsn: %X/%08X, prev %X/%08X, ",
393401
desc->rm_name,
394-
record->xl_len, record->xl_tot_len,
395-
record->xl_xid,
396-
(uint32) (ReadRecPtr >> 32), (uint32) ReadRecPtr,
397-
(uint32) (record->xl_prev >> 32), (uint32) record->xl_prev,
398-
!!(XLR_BKP_BLOCK(0) & record->xl_info),
399-
!!(XLR_BKP_BLOCK(1) & record->xl_info),
400-
!!(XLR_BKP_BLOCK(2) & record->xl_info),
401-
!!(XLR_BKP_BLOCK(3) & record->xl_info),
402-
id);
402+
XLogRecGetDataLen(record), XLogRecGetTotalLen(record),
403+
XLogRecGetXid(record),
404+
(uint32) (record->ReadRecPtr >> 32), (uint32) record->ReadRecPtr,
405+
(uint32) (xl_prev >> 32), (uint32) xl_prev);
406+
printf("desc: %s ", id);
403407

404408
/* the desc routine will printf the description directly to stdout */
405409
desc->rm_desc(NULL, record);
406410

407-
putchar('\n');
408-
409-
if (config->bkp_details)
411+
if (!config->bkp_details)
410412
{
411-
int bkpnum;
412-
char *blk = (char *) XLogRecGetData(record) + record->xl_len;
413-
414-
for (bkpnum = 0; bkpnum < XLR_MAX_BKP_BLOCKS; bkpnum++)
413+
/* print block references (short format) */
414+
for (block_id = 0; block_id <= record->max_block_id; block_id++)
415415
{
416-
BkpBlock bkpb;
417-
418-
if (!(XLR_BKP_BLOCK(bkpnum) & record->xl_info))
416+
if (!XLogRecHasBlockRef(record, block_id))
419417
continue;
420418

421-
memcpy(&bkpb, blk, sizeof(BkpBlock));
422-
blk += sizeof(BkpBlock);
423-
blk += BLCKSZ - bkpb.hole_length;
419+
XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blk);
420+
if (forknum != MAIN_FORKNUM)
421+
printf(", blkref #%u: rel %u/%u/%u fork %s blk %u",
422+
block_id,
423+
rnode.spcNode, rnode.dbNode, rnode.relNode,
424+
forkNames[forknum],
425+
blk);
426+
else
427+
printf(", blkref #%u: rel %u/%u/%u blk %u",
428+
block_id,
429+
rnode.spcNode, rnode.dbNode, rnode.relNode,
430+
blk);
431+
if (XLogRecHasBlockImage(record, block_id))
432+
printf(" FPW");
433+
}
434+
putchar('\n');
435+
}
436+
else
437+
{
438+
/* print block references (detailed format) */
439+
putchar('\n');
440+
for (block_id = 0; block_id <= record->max_block_id; block_id++)
441+
{
442+
if (!XLogRecHasBlockRef(record, block_id))
443+
continue;
424444

425-
printf("\tbackup bkp #%u; rel %u/%u/%u; fork: %s; block: %u; hole: offset: %u, length: %u\n",
426-
bkpnum,
427-
bkpb.node.spcNode, bkpb.node.dbNode, bkpb.node.relNode,
428-
forkNames[bkpb.fork],
429-
bkpb.block, bkpb.hole_offset, bkpb.hole_length);
445+
XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blk);
446+
printf("\tblkref #%u: rel %u/%u/%u fork %s blk %u",
447+
block_id,
448+
rnode.spcNode, rnode.dbNode, rnode.relNode,
449+
forkNames[forknum],
450+
blk);
451+
if (XLogRecHasBlockImage(record, block_id))
452+
{
453+
printf(" (FPW); hole: offset: %u, length: %u\n",
454+
record->blocks[block_id].hole_offset,
455+
record->blocks[block_id].hole_length);
456+
}
457+
putchar('\n');
430458
}
431459
}
432460
}
@@ -924,9 +952,9 @@ main(int argc, char **argv)
924952

925953
/* process the record */
926954
if (config.stats == true)
927-
XLogDumpCountRecord(&config, &stats, xlogreader_state->ReadRecPtr, record);
955+
XLogDumpCountRecord(&config, &stats, xlogreader_state);
928956
else
929-
XLogDumpDisplayRecord(&config, xlogreader_state->ReadRecPtr, record);
957+
XLogDumpDisplayRecord(&config, xlogreader_state);
930958

931959
/* check whether we printed enough */
932960
config.already_displayed_records++;

contrib/pg_xlogdump/rmgrdesc.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
typedef struct RmgrDescData
1414
{
1515
const char *rm_name;
16-
void (*rm_desc) (StringInfo buf, XLogRecord *record);
16+
void (*rm_desc) (StringInfo buf, XLogReaderState *record);
1717
const char *(*rm_identify) (uint8 info);
1818
} RmgrDescData;
1919

src/backend/access/brin/brin.c

+4-7
Original file line numberDiff line numberDiff line change
@@ -666,19 +666,16 @@ brinbuild(PG_FUNCTION_ARGS)
666666
{
667667
xl_brin_createidx xlrec;
668668
XLogRecPtr recptr;
669-
XLogRecData rdata;
670669
Page page;
671670

672-
xlrec.node = index->rd_node;
673671
xlrec.version = BRIN_CURRENT_VERSION;
674672
xlrec.pagesPerRange = BrinGetPagesPerRange(index);
675673

676-
rdata.buffer = InvalidBuffer;
677-
rdata.data = (char *) &xlrec;
678-
rdata.len = SizeOfBrinCreateIdx;
679-
rdata.next = NULL;
674+
XLogBeginInsert();
675+
XLogRegisterData((char *) &xlrec, SizeOfBrinCreateIdx);
676+
XLogRegisterBuffer(0, meta, REGBUF_WILL_INIT);
680677

681-
recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_CREATE_INDEX, &rdata);
678+
recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_CREATE_INDEX);
682679

683680
page = BufferGetPage(meta);
684681
PageSetLSN(page, recptr);

src/backend/access/brin/brin_pageops.c

+31-66
Original file line numberDiff line numberDiff line change
@@ -140,27 +140,19 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
140140
/* XLOG stuff */
141141
if (RelationNeedsWAL(idxrel))
142142
{
143-
BlockNumber blk = BufferGetBlockNumber(oldbuf);
144143
xl_brin_samepage_update xlrec;
145144
XLogRecPtr recptr;
146-
XLogRecData rdata[2];
147145
uint8 info = XLOG_BRIN_SAMEPAGE_UPDATE;
148146

149-
xlrec.node = idxrel->rd_node;
150-
ItemPointerSetBlockNumber(&xlrec.tid, blk);
151-
ItemPointerSetOffsetNumber(&xlrec.tid, oldoff);
152-
rdata[0].data = (char *) &xlrec;
153-
rdata[0].len = SizeOfBrinSamepageUpdate;
154-
rdata[0].buffer = InvalidBuffer;
155-
rdata[0].next = &(rdata[1]);
147+
xlrec.offnum = oldoff;
156148

157-
rdata[1].data = (char *) newtup;
158-
rdata[1].len = newsz;
159-
rdata[1].buffer = oldbuf;
160-
rdata[1].buffer_std = true;
161-
rdata[1].next = NULL;
149+
XLogBeginInsert();
150+
XLogRegisterData((char *) &xlrec, SizeOfBrinSamepageUpdate);
162151

163-
recptr = XLogInsert(RM_BRIN_ID, info, rdata);
152+
XLogRegisterBuffer(0, oldbuf, REGBUF_STANDARD);
153+
XLogRegisterBufData(0, (char *) newtup, newsz);
154+
155+
recptr = XLogInsert(RM_BRIN_ID, info);
164156

165157
PageSetLSN(oldpage, recptr);
166158
}
@@ -211,43 +203,30 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
211203
{
212204
xl_brin_update xlrec;
213205
XLogRecPtr recptr;
214-
XLogRecData rdata[4];
215206
uint8 info;
216207

217208
info = XLOG_BRIN_UPDATE | (extended ? XLOG_BRIN_INIT_PAGE : 0);
218209

219-
xlrec.insert.node = idxrel->rd_node;
220-
ItemPointerSet(&xlrec.insert.tid, BufferGetBlockNumber(newbuf), newoff);
210+
xlrec.insert.offnum = newoff;
221211
xlrec.insert.heapBlk = heapBlk;
222-
xlrec.insert.tuplen = newsz;
223-
xlrec.insert.revmapBlk = BufferGetBlockNumber(revmapbuf);
224212
xlrec.insert.pagesPerRange = pagesPerRange;
225-
ItemPointerSet(&xlrec.oldtid, BufferGetBlockNumber(oldbuf), oldoff);
213+
xlrec.oldOffnum = oldoff;
214+
215+
XLogBeginInsert();
226216

227-
rdata[0].data = (char *) &xlrec;
228-
rdata[0].len = SizeOfBrinUpdate;
229-
rdata[0].buffer = InvalidBuffer;
230-
rdata[0].next = &(rdata[1]);
217+
/* new page */
218+
XLogRegisterData((char *) &xlrec, SizeOfBrinUpdate);
231219

232-
rdata[1].data = (char *) newtup;
233-
rdata[1].len = newsz;
234-
rdata[1].buffer = extended ? InvalidBuffer : newbuf;
235-
rdata[1].buffer_std = true;
236-
rdata[1].next = &(rdata[2]);
220+
XLogRegisterBuffer(0, newbuf, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
221+
XLogRegisterBufData(0, (char *) newtup, newsz);
237222

238-
rdata[2].data = (char *) NULL;
239-
rdata[2].len = 0;
240-
rdata[2].buffer = revmapbuf;
241-
rdata[2].buffer_std = true;
242-
rdata[2].next = &(rdata[3]);
223+
/* revmap page */
224+
XLogRegisterBuffer(1, revmapbuf, REGBUF_STANDARD);
243225

244-
rdata[3].data = (char *) NULL;
245-
rdata[3].len = 0;
246-
rdata[3].buffer = oldbuf;
247-
rdata[3].buffer_std = true;
248-
rdata[3].next = NULL;
226+
/* old page */
227+
XLogRegisterBuffer(2, oldbuf, REGBUF_STANDARD);
249228

250-
recptr = XLogInsert(RM_BRIN_ID, info, rdata);
229+
recptr = XLogInsert(RM_BRIN_ID, info);
251230

252231
PageSetLSN(oldpage, recptr);
253232
PageSetLSN(newpage, recptr);
@@ -354,36 +333,22 @@ brin_doinsert(Relation idxrel, BlockNumber pagesPerRange,
354333
{
355334
xl_brin_insert xlrec;
356335
XLogRecPtr recptr;
357-
XLogRecData rdata[3];
358336
uint8 info;
359337

360338
info = XLOG_BRIN_INSERT | (extended ? XLOG_BRIN_INIT_PAGE : 0);
361-
xlrec.node = idxrel->rd_node;
362339
xlrec.heapBlk = heapBlk;
363340
xlrec.pagesPerRange = pagesPerRange;
364-
xlrec.revmapBlk = BufferGetBlockNumber(revmapbuf);
365-
xlrec.tuplen = itemsz;
366-
ItemPointerSet(&xlrec.tid, blk, off);
367-
368-
rdata[0].data = (char *) &xlrec;
369-
rdata[0].len = SizeOfBrinInsert;
370-
rdata[0].buffer = InvalidBuffer;
371-
rdata[0].buffer_std = false;
372-
rdata[0].next = &(rdata[1]);
373-
374-
rdata[1].data = (char *) tup;
375-
rdata[1].len = itemsz;
376-
rdata[1].buffer = extended ? InvalidBuffer : *buffer;
377-
rdata[1].buffer_std = true;
378-
rdata[1].next = &(rdata[2]);
379-
380-
rdata[2].data = (char *) NULL;
381-
rdata[2].len = 0;
382-
rdata[2].buffer = revmapbuf;
383-
rdata[2].buffer_std = false;
384-
rdata[2].next = NULL;
385-
386-
recptr = XLogInsert(RM_BRIN_ID, info, rdata);
341+
xlrec.offnum = off;
342+
343+
XLogBeginInsert();
344+
XLogRegisterData((char *) &xlrec, SizeOfBrinInsert);
345+
346+
XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
347+
XLogRegisterBufData(0, (char *) tup, itemsz);
348+
349+
XLogRegisterBuffer(1, revmapbuf, 0);
350+
351+
recptr = XLogInsert(RM_BRIN_ID, info);
387352

388353
PageSetLSN(page, recptr);
389354
PageSetLSN(BufferGetPage(revmapbuf), recptr);

src/backend/access/brin/brin_revmap.c

+8-15
Original file line numberDiff line numberDiff line change
@@ -477,23 +477,16 @@ revmap_physical_extend(BrinRevmap *revmap)
477477
{
478478
xl_brin_revmap_extend xlrec;
479479
XLogRecPtr recptr;
480-
XLogRecData rdata[2];
481480

482-
xlrec.node = revmap->rm_irel->rd_node;
483481
xlrec.targetBlk = mapBlk;
484-
rdata[0].data = (char *) &xlrec;
485-
rdata[0].len = SizeOfBrinRevmapExtend;
486-
rdata[0].buffer = InvalidBuffer;
487-
rdata[0].buffer_std = false;
488-
rdata[0].next = &(rdata[1]);
489-
490-
rdata[1].data = (char *) NULL;
491-
rdata[1].len = 0;
492-
rdata[1].buffer = revmap->rm_metaBuf;
493-
rdata[1].buffer_std = false;
494-
rdata[1].next = NULL;
495-
496-
recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_REVMAP_EXTEND, rdata);
482+
483+
XLogBeginInsert();
484+
XLogRegisterData((char *) &xlrec, SizeOfBrinRevmapExtend);
485+
XLogRegisterBuffer(0, revmap->rm_metaBuf, 0);
486+
487+
XLogRegisterBuffer(1, buf, REGBUF_WILL_INIT);
488+
489+
recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_REVMAP_EXTEND);
497490
PageSetLSN(metapage, recptr);
498491
PageSetLSN(page, recptr);
499492
}

0 commit comments

Comments
 (0)