Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 626a120

Browse files
committed
Further optimize GIN multi-key searches.
When skipping over some items in a posting tree, re-find the new location by descending the tree from root, rather than walking the right links. This can save a lot of I/O. Heavily modified from Alexander Korotkov's fast scan patch.
1 parent 8440897 commit 626a120

File tree

3 files changed

+98
-29
lines changed

3 files changed

+98
-29
lines changed

src/backend/access/gin/gindatapage.c

+4-5
Original file line numberDiff line numberDiff line change
@@ -1639,16 +1639,15 @@ ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
16391639
* Starts a new scan on a posting tree.
16401640
*/
16411641
GinBtreeStack *
1642-
ginScanBeginPostingTree(Relation index, BlockNumber rootBlkno)
1642+
ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno)
16431643
{
1644-
GinBtreeData btree;
16451644
GinBtreeStack *stack;
16461645

1647-
ginPrepareDataScan(&btree, index, rootBlkno);
1646+
ginPrepareDataScan(btree, index, rootBlkno);
16481647

1649-
btree.fullScan = TRUE;
1648+
btree->fullScan = TRUE;
16501649

1651-
stack = ginFindLeafPage(&btree, TRUE);
1650+
stack = ginFindLeafPage(btree, TRUE);
16521651

16531652
return stack;
16541653
}

src/backend/access/gin/ginget.c

+92-23
Original file line numberDiff line numberDiff line change
@@ -99,12 +99,13 @@ static void
9999
scanPostingTree(Relation index, GinScanEntry scanEntry,
100100
BlockNumber rootPostingTree)
101101
{
102+
GinBtreeData btree;
102103
GinBtreeStack *stack;
103104
Buffer buffer;
104105
Page page;
105106

106107
/* Descend to the leftmost leaf page */
107-
stack = ginScanBeginPostingTree(index, rootPostingTree);
108+
stack = ginScanBeginPostingTree(&btree, index, rootPostingTree);
108109
buffer = stack->buffer;
109110
IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
110111

@@ -412,7 +413,8 @@ startScanEntry(GinState *ginstate, GinScanEntry entry)
412413
LockBuffer(stackEntry->buffer, GIN_UNLOCK);
413414
needUnlock = FALSE;
414415

415-
stack = ginScanBeginPostingTree(ginstate->index, rootPostingTree);
416+
stack = ginScanBeginPostingTree(&entry->btree, ginstate->index,
417+
rootPostingTree);
416418
entry->buffer = stack->buffer;
417419

418420
/*
@@ -506,8 +508,60 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan
506508
{
507509
Page page;
508510
int i;
511+
bool stepright;
512+
513+
if (!BufferIsValid(entry->buffer))
514+
{
515+
entry->isFinished = true;
516+
return;
517+
}
518+
519+
/*
520+
* We have two strategies for finding the correct page: step right from
521+
* the current page, or descend the tree again from the root. If
522+
* advancePast equals the current item, the next matching item should be
523+
* on the next page, so we step right. Otherwise, descend from root.
524+
*/
525+
if (ginCompareItemPointers(&entry->curItem, &advancePast) == 0)
526+
{
527+
stepright = true;
528+
LockBuffer(entry->buffer, GIN_SHARE);
529+
}
530+
else
531+
{
532+
GinBtreeStack *stack;
533+
534+
ReleaseBuffer(entry->buffer);
535+
536+
/*
537+
* Set the search key, and find the correct leaf page.
538+
*/
539+
if (ItemPointerIsLossyPage(&advancePast))
540+
{
541+
ItemPointerSet(&entry->btree.itemptr,
542+
GinItemPointerGetBlockNumber(&advancePast) + 1,
543+
FirstOffsetNumber);
544+
}
545+
else
546+
{
547+
entry->btree.itemptr = advancePast;
548+
entry->btree.itemptr.ip_posid++;
549+
}
550+
entry->btree.fullScan = false;
551+
stack = ginFindLeafPage(&entry->btree, true);
552+
553+
/* we don't need the stack, just the buffer. */
554+
entry->buffer = stack->buffer;
555+
IncrBufferRefCount(entry->buffer);
556+
freeGinBtreeStack(stack);
557+
stepright = false;
558+
}
559+
560+
elog(DEBUG2, "entryLoadMoreItems, %u/%u, skip: %d",
561+
GinItemPointerGetBlockNumber(&advancePast),
562+
GinItemPointerGetOffsetNumber(&advancePast),
563+
!stepright);
509564

510-
LockBuffer(entry->buffer, GIN_SHARE);
511565
page = BufferGetPage(entry->buffer);
512566
for (;;)
513567
{
@@ -519,30 +573,34 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan
519573
entry->nlist = 0;
520574
}
521575

522-
/*
523-
* We've processed all the entries on this page. If it was the last
524-
* page in the tree, we're done.
525-
*/
526-
if (GinPageRightMost(page))
576+
if (stepright)
527577
{
528-
UnlockReleaseBuffer(entry->buffer);
529-
entry->buffer = InvalidBuffer;
530-
entry->isFinished = TRUE;
531-
return;
578+
/*
579+
* We've processed all the entries on this page. If it was the last
580+
* page in the tree, we're done.
581+
*/
582+
if (GinPageRightMost(page))
583+
{
584+
UnlockReleaseBuffer(entry->buffer);
585+
entry->buffer = InvalidBuffer;
586+
entry->isFinished = TRUE;
587+
return;
588+
}
589+
590+
/*
591+
* Step to next page, following the right link. then find the first
592+
* ItemPointer greater than advancePast.
593+
*/
594+
entry->buffer = ginStepRight(entry->buffer,
595+
ginstate->index,
596+
GIN_SHARE);
597+
page = BufferGetPage(entry->buffer);
532598
}
599+
stepright = true;
533600

534601
if (GinPageGetOpaque(page)->flags & GIN_DELETED)
535602
continue; /* page was deleted by concurrent vacuum */
536603

537-
/*
538-
* Step to next page, following the right link. then find the first
539-
* ItemPointer greater than advancePast.
540-
*/
541-
entry->buffer = ginStepRight(entry->buffer,
542-
ginstate->index,
543-
GIN_SHARE);
544-
page = BufferGetPage(entry->buffer);
545-
546604
/*
547605
* The first item > advancePast might not be on this page, but
548606
* somewhere to the right, if the page was split, or a non-match from
@@ -566,8 +624,16 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan
566624
{
567625
if (ginCompareItemPointers(&advancePast, &entry->list[i]) < 0)
568626
{
569-
LockBuffer(entry->buffer, GIN_UNLOCK);
570627
entry->offset = i;
628+
629+
if (GinPageRightMost(page))
630+
{
631+
/* after processing the copied items, we're done. */
632+
UnlockReleaseBuffer(entry->buffer);
633+
entry->buffer = InvalidBuffer;
634+
}
635+
else
636+
LockBuffer(entry->buffer, GIN_UNLOCK);
571637
return;
572638
}
573639
}
@@ -677,7 +743,10 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
677743
}
678744
else if (!BufferIsValid(entry->buffer))
679745
{
680-
/* A posting list from an entry tuple */
746+
/*
747+
* A posting list from an entry tuple, or the last page of a posting
748+
* tree.
749+
*/
681750
do
682751
{
683752
if (entry->offset >= entry->nlist)

src/include/access/gin_private.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -702,7 +702,7 @@ extern void GinPageDeletePostingItem(Page page, OffsetNumber offset);
702702
extern void ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
703703
ItemPointerData *items, uint32 nitem,
704704
GinStatsData *buildStats);
705-
extern GinBtreeStack *ginScanBeginPostingTree(Relation index, BlockNumber rootBlkno);
705+
extern GinBtreeStack *ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno);
706706
extern void ginDataFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage);
707707
extern void ginPrepareDataScan(GinBtree btree, Relation index, BlockNumber rootBlkno);
708708

@@ -802,6 +802,7 @@ typedef struct GinScanEntryData
802802
bool isFinished;
803803
bool reduceResult;
804804
uint32 predictNumberResult;
805+
GinBtreeData btree;
805806
} GinScanEntryData;
806807

807808
typedef struct GinScanOpaqueData

0 commit comments

Comments
 (0)