Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 94e0333

Browse files
committed
Create a routine PageIndexMultiDelete() that replaces a loop around
PageIndexTupleDelete() with a single pass of compactification --- logic mostly lifted from PageRepairFragmentation. I noticed while profiling that a VACUUM that's cleaning up a whole lot of deleted tuples would spend as much as a third of its CPU time in PageIndexTupleDelete; not too surprising considering the loop method was roughly O(N^2) in the number of tuples involved.
1 parent 775d283 commit 94e0333

File tree

4 files changed

+144
-19
lines changed

4 files changed

+144
-19
lines changed

src/backend/access/nbtree/nbtpage.c

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
*
1010
*
1111
* IDENTIFICATION
12-
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.81 2004/12/31 21:59:22 pgsql Exp $
12+
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.82 2005/03/22 06:17:03 tgl Exp $
1313
*
1414
* NOTES
1515
* Postgres btree pages look like ordinary relation pages. The opaque
@@ -639,17 +639,12 @@ _bt_delitems(Relation rel, Buffer buf,
639639
OffsetNumber *itemnos, int nitems)
640640
{
641641
Page page = BufferGetPage(buf);
642-
int i;
643642

644643
/* No ereport(ERROR) until changes are logged */
645644
START_CRIT_SECTION();
646645

647-
/*
648-
* Delete the items in reverse order so we don't have to think about
649-
* adjusting item numbers for previous deletions.
650-
*/
651-
for (i = nitems - 1; i >= 0; i--)
652-
PageIndexTupleDelete(page, itemnos[i]);
646+
/* Fix the page */
647+
PageIndexMultiDelete(page, itemnos, nitems);
653648

654649
/* XLOG stuff */
655650
if (!rel->rd_istemp)

src/backend/access/nbtree/nbtxlog.c

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.19 2004/12/31 21:59:22 pgsql Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.20 2005/03/22 06:17:03 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -411,12 +411,7 @@ btree_xlog_delete(bool redo, XLogRecPtr lsn, XLogRecord *record)
411411
unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeDelete);
412412
unend = (OffsetNumber *) ((char *) xlrec + record->xl_len);
413413

414-
/* be careful to delete from back to front */
415-
while (unused < unend)
416-
{
417-
unend--;
418-
PageIndexTupleDelete(page, *unend);
419-
}
414+
PageIndexMultiDelete(page, unused, unend - unused);
420415
}
421416

422417
PageSetLSN(page, lsn);

src/backend/storage/page/bufpage.c

Lines changed: 137 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/storage/page/bufpage.c,v 1.62 2004/12/31 22:01:10 pgsql Exp $
11+
* $PostgreSQL: pgsql/src/backend/storage/page/bufpage.c,v 1.63 2005/03/22 06:17:03 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -274,13 +274,14 @@ PageRestoreTempPage(Page tempPage, Page oldPage)
274274
}
275275

276276
/*
277-
* sorting support for PageRepairFragmentation
277+
* sorting support for PageRepairFragmentation and PageIndexMultiDelete
278278
*/
279279
typedef struct itemIdSortData
280280
{
281281
int offsetindex; /* linp array index */
282282
int itemoff; /* page offset of item data */
283283
Size alignedlen; /* MAXALIGN(item data len) */
284+
ItemIdData olditemid; /* used only in PageIndexMultiDelete */
284285
} itemIdSortData;
285286
typedef itemIdSortData *itemIdSort;
286287

@@ -297,7 +298,8 @@ itemoffcompare(const void *itemidp1, const void *itemidp2)
297298
*
298299
* Frees fragmented space on a page.
299300
* It doesn't remove unused line pointers! Please don't change this.
300-
* This routine is usable for heap pages only.
301+
*
302+
* This routine is usable for heap pages only, but see PageIndexMultiDelete.
301303
*
302304
* Returns number of unused line pointers on page. If "unused" is not NULL
303305
* then the unused[] array is filled with indexes of unused line pointers.
@@ -543,3 +545,135 @@ PageIndexTupleDelete(Page page, OffsetNumber offnum)
543545
}
544546
}
545547
}
548+
549+
550+
/*
551+
* PageIndexMultiDelete
552+
*
553+
* This routine handles the case of deleting multiple tuples from an
554+
* index page at once. It is considerably faster than a loop around
555+
* PageIndexTupleDelete ... however, the caller *must* supply the array
556+
* of item numbers to be deleted in item number order!
557+
*/
558+
void
559+
PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
560+
{
561+
PageHeader phdr = (PageHeader) page;
562+
Offset pd_lower = phdr->pd_lower;
563+
Offset pd_upper = phdr->pd_upper;
564+
Offset pd_special = phdr->pd_special;
565+
itemIdSort itemidbase,
566+
itemidptr;
567+
ItemId lp;
568+
int nline,
569+
nused;
570+
int i;
571+
Size totallen;
572+
Offset upper;
573+
Size size;
574+
unsigned offset;
575+
int nextitm;
576+
OffsetNumber offnum;
577+
578+
/*
579+
* If there aren't very many items to delete, then retail
580+
* PageIndexTupleDelete is the best way. Delete the items in reverse
581+
* order so we don't have to think about adjusting item numbers for
582+
* previous deletions.
583+
*
584+
* TODO: tune the magic number here
585+
*/
586+
if (nitems <= 2)
587+
{
588+
while (--nitems >= 0)
589+
PageIndexTupleDelete(page, itemnos[nitems]);
590+
return;
591+
}
592+
593+
/*
594+
* As with PageRepairFragmentation, paranoia seems justified.
595+
*/
596+
if (pd_lower < SizeOfPageHeaderData ||
597+
pd_lower > pd_upper ||
598+
pd_upper > pd_special ||
599+
pd_special > BLCKSZ ||
600+
pd_special != MAXALIGN(pd_special))
601+
ereport(ERROR,
602+
(errcode(ERRCODE_DATA_CORRUPTED),
603+
errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
604+
pd_lower, pd_upper, pd_special)));
605+
606+
/*
607+
* Scan the item pointer array and build a list of just the ones we
608+
* are going to keep. Notice we do not modify the page yet, since
609+
* we are still validity-checking.
610+
*/
611+
nline = PageGetMaxOffsetNumber(page);
612+
itemidbase = (itemIdSort) palloc(sizeof(itemIdSortData) * nline);
613+
itemidptr = itemidbase;
614+
totallen = 0;
615+
nused = 0;
616+
nextitm = 0;
617+
for (offnum = 1; offnum <= nline; offnum++)
618+
{
619+
lp = PageGetItemId(page, offnum);
620+
size = ItemIdGetLength(lp);
621+
offset = ItemIdGetOffset(lp);
622+
if (offset < pd_upper ||
623+
(offset + size) > pd_special ||
624+
offset != MAXALIGN(offset))
625+
ereport(ERROR,
626+
(errcode(ERRCODE_DATA_CORRUPTED),
627+
errmsg("corrupted item pointer: offset = %u, size = %u",
628+
offset, (unsigned int) size)));
629+
630+
if (nextitm < nitems && offnum == itemnos[nextitm])
631+
{
632+
/* skip item to be deleted */
633+
nextitm++;
634+
}
635+
else
636+
{
637+
itemidptr->offsetindex = nused; /* where it will go */
638+
itemidptr->itemoff = offset;
639+
itemidptr->olditemid = *lp;
640+
itemidptr->alignedlen = MAXALIGN(size);
641+
totallen += itemidptr->alignedlen;
642+
itemidptr++;
643+
nused++;
644+
}
645+
}
646+
647+
/* this will catch invalid or out-of-order itemnos[] */
648+
if (nextitm != nitems)
649+
elog(ERROR, "incorrect index offsets supplied");
650+
651+
if (totallen > (Size) (pd_special - pd_lower))
652+
ereport(ERROR,
653+
(errcode(ERRCODE_DATA_CORRUPTED),
654+
errmsg("corrupted item lengths: total %u, available space %u",
655+
(unsigned int) totallen, pd_special - pd_lower)));
656+
657+
/* sort itemIdSortData array into decreasing itemoff order */
658+
qsort((char *) itemidbase, nused, sizeof(itemIdSortData),
659+
itemoffcompare);
660+
661+
/* compactify page and install new itemids */
662+
upper = pd_special;
663+
664+
for (i = 0, itemidptr = itemidbase; i < nused; i++, itemidptr++)
665+
{
666+
lp = PageGetItemId(page, itemidptr->offsetindex + 1);
667+
upper -= itemidptr->alignedlen;
668+
memmove((char *) page + upper,
669+
(char *) page + itemidptr->itemoff,
670+
itemidptr->alignedlen);
671+
*lp = itemidptr->olditemid;
672+
lp->lp_off = upper;
673+
}
674+
675+
phdr->pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData);
676+
phdr->pd_upper = upper;
677+
678+
pfree(itemidbase);
679+
}

src/include/storage/bufpage.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/storage/bufpage.h,v 1.63 2004/12/31 22:03:42 pgsql Exp $
10+
* $PostgreSQL: pgsql/src/include/storage/bufpage.h,v 1.64 2005/03/22 06:17:03 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -323,5 +323,6 @@ extern void PageRestoreTempPage(Page tempPage, Page oldPage);
323323
extern int PageRepairFragmentation(Page page, OffsetNumber *unused);
324324
extern Size PageGetFreeSpace(Page page);
325325
extern void PageIndexTupleDelete(Page page, OffsetNumber offset);
326+
extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems);
326327

327328
#endif /* BUFPAGE_H */

0 commit comments

Comments
 (0)