Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit d2e5e20

Browse files
Add xl_btree_delete optimization.
Commit 558a916 taught _bt_delitems_delete() to produce its own XID horizon on the primary. Standbys no longer needed to generate their own latestRemovedXid, since they could just use the explicitly logged value from the primary instead. The deleted offset numbers array from the xl_btree_delete WAL record was no longer used by the REDO routine for anything other than deleting the items. This enables a minor optimization: We now treat the array as buffer state, not generic WAL data, following _bt_delitems_vacuum()'s example. This should be a minor win, since it allows us to avoid including the deleted items array in cases where XLogInsert() stores the whole buffer anyway. The primary goal here is to make the code more maintainable, though. Removing inessential differences between the two functions highlights the fundamental differences that remain. Also change xl_btree_delete to use uint32 for the size of the array of item offsets being deleted. This brings xl_btree_delete closer to xl_btree_vacuum. Furthermore, it seems like a good idea to use an explicit-width integer type (the field was previously an "int"). Bump XLOG_PAGE_MAGIC because xl_btree_delete changed. Discussion: https://postgr.es/m/CAH2-Wzkz4TjmezzfAbaV1zYrh=fr0bCpzuJTvBe5iUQ3aUPsCQ@mail.gmail.com
1 parent 56a3921 commit d2e5e20

File tree

6 files changed

+32
-42
lines changed

6 files changed

+32
-42
lines changed

src/backend/access/nbtree/nbtpage.c

Lines changed: 21 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -961,20 +961,15 @@ _bt_page_recyclable(Page page)
961961
}
962962

963963
/*
964-
* Delete item(s) from a btree page during VACUUM.
965-
*
966-
* This must only be used for deleting leaf items. Deleting an item on a
967-
* non-leaf page has to be done as part of an atomic action that includes
968-
* deleting the page it points to.
964+
* Delete item(s) from a btree leaf page during VACUUM.
969965
*
970966
* This routine assumes that the caller has a super-exclusive write lock on
971967
* the buffer. Also, the given deletable array *must* be sorted in ascending
972968
* order.
973969
*
974970
* We record VACUUMs and b-tree deletes differently in WAL. Deletes must
975-
* generate recovery conflicts by accessing the heap inline, whereas VACUUMs
976-
* can rely on the initial heap scan taking care of the problem (pruning would
977-
* have generated the conflicts needed for hot standby already).
971+
* generate their own latestRemovedXid by accessing the heap directly, whereas
972+
* VACUUMs rely on the initial heap scan taking care of it indirectly.
978973
*/
979974
void
980975
_bt_delitems_vacuum(Relation rel, Buffer buf,
@@ -1030,9 +1025,9 @@ _bt_delitems_vacuum(Relation rel, Buffer buf,
10301025
XLogRegisterData((char *) &xlrec_vacuum, SizeOfBtreeVacuum);
10311026

10321027
/*
1033-
* The target-offsets array is not in the buffer, but pretend that it
1034-
* is. When XLogInsert stores the whole buffer, the offsets array
1035-
* need not be stored too.
1028+
* The deletable array is not in the buffer, but pretend that it is.
1029+
* When XLogInsert stores the whole buffer, the array need not be
1030+
* stored too.
10361031
*/
10371032
XLogRegisterBufData(0, (char *) deletable,
10381033
ndeletable * sizeof(OffsetNumber));
@@ -1046,40 +1041,38 @@ _bt_delitems_vacuum(Relation rel, Buffer buf,
10461041
}
10471042

10481043
/*
1049-
* Delete item(s) from a btree page during single-page cleanup.
1050-
*
1051-
* As above, must only be used on leaf pages.
1044+
* Delete item(s) from a btree leaf page during single-page cleanup.
10521045
*
10531046
* This routine assumes that the caller has pinned and write locked the
1054-
* buffer. Also, the given itemnos *must* appear in increasing order in the
1055-
* array.
1047+
* buffer. Also, the given deletable array *must* be sorted in ascending
1048+
* order.
10561049
*
10571050
* This is nearly the same as _bt_delitems_vacuum as far as what it does to
1058-
* the page, but it needs to generate its own recovery conflicts by accessing
1059-
* the heap. See comments for _bt_delitems_vacuum.
1051+
* the page, but it needs to generate its own latestRemovedXid by accessing
1052+
* the heap. This is used by the REDO routine to generate recovery conflicts.
10601053
*/
10611054
void
10621055
_bt_delitems_delete(Relation rel, Buffer buf,
1063-
OffsetNumber *itemnos, int nitems,
1056+
OffsetNumber *deletable, int ndeletable,
10641057
Relation heapRel)
10651058
{
10661059
Page page = BufferGetPage(buf);
10671060
BTPageOpaque opaque;
10681061
TransactionId latestRemovedXid = InvalidTransactionId;
10691062

10701063
/* Shouldn't be called unless there's something to do */
1071-
Assert(nitems > 0);
1064+
Assert(ndeletable > 0);
10721065

10731066
if (XLogStandbyInfoActive() && RelationNeedsWAL(rel))
10741067
latestRemovedXid =
10751068
index_compute_xid_horizon_for_tuples(rel, heapRel, buf,
1076-
itemnos, nitems);
1069+
deletable, ndeletable);
10771070

10781071
/* No ereport(ERROR) until changes are logged */
10791072
START_CRIT_SECTION();
10801073

10811074
/* Fix the page */
1082-
PageIndexMultiDelete(page, itemnos, nitems);
1075+
PageIndexMultiDelete(page, deletable, ndeletable);
10831076

10841077
/*
10851078
* Unlike _bt_delitems_vacuum, we *must not* clear the vacuum cycle ID,
@@ -1098,18 +1091,19 @@ _bt_delitems_delete(Relation rel, Buffer buf,
10981091
xl_btree_delete xlrec_delete;
10991092

11001093
xlrec_delete.latestRemovedXid = latestRemovedXid;
1101-
xlrec_delete.nitems = nitems;
1094+
xlrec_delete.ndeleted = ndeletable;
11021095

11031096
XLogBeginInsert();
11041097
XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
11051098
XLogRegisterData((char *) &xlrec_delete, SizeOfBtreeDelete);
11061099

11071100
/*
1108-
* We need the target-offsets array whether or not we store the whole
1109-
* buffer, to allow us to find the latestRemovedXid on a standby
1110-
* server.
1101+
* The deletable array is not in the buffer, but pretend that it is.
1102+
* When XLogInsert stores the whole buffer, the array need not be
1103+
* stored too.
11111104
*/
1112-
XLogRegisterData((char *) itemnos, nitems * sizeof(OffsetNumber));
1105+
XLogRegisterBufData(0, (char *) deletable,
1106+
ndeletable * sizeof(OffsetNumber));
11131107

11141108
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE);
11151109

src/backend/access/nbtree/nbtxlog.c

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -449,16 +449,11 @@ btree_xlog_delete(XLogReaderState *record)
449449
*/
450450
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
451451
{
452-
page = (Page) BufferGetPage(buffer);
453-
454-
if (XLogRecGetDataLen(record) > SizeOfBtreeDelete)
455-
{
456-
OffsetNumber *unused;
452+
char *ptr = XLogRecGetBlockData(record, 0, NULL);
457453

458-
unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeDelete);
454+
page = (Page) BufferGetPage(buffer);
459455

460-
PageIndexMultiDelete(page, unused, xlrec->nitems);
461-
}
456+
PageIndexMultiDelete(page, (OffsetNumber *) ptr, xlrec->ndeleted);
462457

463458
/* Mark the page as not containing any LP_DEAD items */
464459
opaque = (BTPageOpaque) PageGetSpecialPointer(page);

src/backend/access/rmgrdesc/nbtdesc.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ btree_desc(StringInfo buf, XLogReaderState *record)
5353
{
5454
xl_btree_delete *xlrec = (xl_btree_delete *) rec;
5555

56-
appendStringInfo(buf, "%d items, latest removed xid %u",
57-
xlrec->nitems, xlrec->latestRemovedXid);
56+
appendStringInfo(buf, "latestRemovedXid %u; ndeleted %u",
57+
xlrec->latestRemovedXid, xlrec->ndeleted);
5858
break;
5959
}
6060
case XLOG_BTREE_MARK_PAGE_HALFDEAD:

src/include/access/nbtree.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -779,7 +779,8 @@ extern bool _bt_page_recyclable(Page page);
779779
extern void _bt_delitems_vacuum(Relation rel, Buffer buf,
780780
OffsetNumber *deletable, int ndeletable);
781781
extern void _bt_delitems_delete(Relation rel, Buffer buf,
782-
OffsetNumber *itemnos, int nitems, Relation heapRel);
782+
OffsetNumber *deletable, int ndeletable,
783+
Relation heapRel);
783784
extern int _bt_pagedel(Relation rel, Buffer buf);
784785

785786
/*

src/include/access/nbtxlog.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -126,12 +126,12 @@ typedef struct xl_btree_split
126126
typedef struct xl_btree_delete
127127
{
128128
TransactionId latestRemovedXid;
129-
int nitems;
129+
uint32 ndeleted;
130130

131-
/* TARGET OFFSET NUMBERS FOLLOW AT THE END */
131+
/* DELETED TARGET OFFSET NUMBERS FOLLOW */
132132
} xl_btree_delete;
133133

134-
#define SizeOfBtreeDelete (offsetof(xl_btree_delete, nitems) + sizeof(int))
134+
#define SizeOfBtreeDelete (offsetof(xl_btree_delete, ndeleted) + sizeof(uint32))
135135

136136
/*
137137
* This is what we need to know about page reuse within btree. This record

src/include/access/xlog_internal.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
/*
3232
* Each page of XLOG file has a header like this:
3333
*/
34-
#define XLOG_PAGE_MAGIC 0xD103 /* can be used as WAL version indicator */
34+
#define XLOG_PAGE_MAGIC 0xD104 /* can be used as WAL version indicator */
3535

3636
typedef struct XLogPageHeaderData
3737
{

0 commit comments

Comments
 (0)