Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 48c7d9f

Browse files
committed
Improve GIN indexscan cost estimation.
The better estimate requires more statistics than we previously stored: in particular, counts of "entry" versus "data" pages within the index, as well as knowledge of the number of distinct key values. We collect this information during initial index build and update it during VACUUM, storing the info in new fields on the index metapage. No initdb is required because these fields will read as zeroes in a pre-existing index, and the new gincostestimate code is coded to behave (reasonably) sanely if they are zeroes. Teodor Sigaev, reviewed by Jan Urbanski, Tom Lane, and Itagaki Takahiro.
1 parent cd0e825 commit 48c7d9f

File tree

10 files changed

+561
-42
lines changed

10 files changed

+561
-42
lines changed

src/backend/access/gin/ginbtree.c

+22-1
Original file line numberDiff line numberDiff line change
@@ -268,10 +268,13 @@ findParents(GinBtree btree, GinBtreeStack *stack,
268268
/*
269269
* Insert value (stored in GinBtree) to tree described by stack
270270
*
271+
* During an index build, buildStats is non-null and the counters
272+
* it contains should be incremented as needed.
273+
*
271274
* NB: the passed-in stack is freed, as though by freeGinBtreeStack.
272275
*/
273276
void
274-
ginInsertValue(GinBtree btree, GinBtreeStack *stack)
277+
ginInsertValue(GinBtree btree, GinBtreeStack *stack, GinStatsData *buildStats)
275278
{
276279
GinBtreeStack *parent = stack;
277280
BlockNumber rootBlkno = InvalidBuffer;
@@ -330,6 +333,15 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack)
330333

331334
((ginxlogSplit *) (rdata->data))->rootBlkno = rootBlkno;
332335

336+
/* During index build, count the newly-split page */
337+
if (buildStats)
338+
{
339+
if (btree->isData)
340+
buildStats->nDataPages++;
341+
else
342+
buildStats->nEntryPages++;
343+
}
344+
333345
parent = stack->parent;
334346

335347
if (parent == NULL)
@@ -381,6 +393,15 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack)
381393

382394
freeGinBtreeStack(stack);
383395

396+
/* During index build, count the newly-added root page */
397+
if (buildStats)
398+
{
399+
if (btree->isData)
400+
buildStats->nDataPages++;
401+
else
402+
buildStats->nEntryPages++;
403+
}
404+
384405
return;
385406
}
386407
else

src/backend/access/gin/gindatapage.c

+8-3
Original file line numberDiff line numberDiff line change
@@ -592,9 +592,11 @@ void
592592
prepareDataScan(GinBtree btree, Relation index)
593593
{
594594
memset(btree, 0, sizeof(GinBtreeData));
595+
595596
btree->index = index;
596-
btree->isMoveRight = dataIsMoveRight;
597+
597598
btree->findChildPage = dataLocateItem;
599+
btree->isMoveRight = dataIsMoveRight;
598600
btree->findItem = dataLocateLeafItem;
599601
btree->findChildPtr = dataFindChildPtr;
600602
btree->getLeftMostPage = dataGetLeftMostPage;
@@ -603,6 +605,7 @@ prepareDataScan(GinBtree btree, Relation index)
603605
btree->splitPage = dataSplitPage;
604606
btree->fillRoot = dataFillRoot;
605607

608+
btree->isData = TRUE;
606609
btree->searchMode = FALSE;
607610
btree->isDelete = FALSE;
608611
btree->fullScan = FALSE;
@@ -628,7 +631,9 @@ prepareScanPostingTree(Relation index, BlockNumber rootBlkno, bool searchMode)
628631
* Inserts array of item pointers, may execute several tree scan (very rare)
629632
*/
630633
void
631-
insertItemPointer(GinPostingTreeScan *gdi, ItemPointerData *items, uint32 nitem)
634+
ginInsertItemPointer(GinPostingTreeScan *gdi,
635+
ItemPointerData *items, uint32 nitem,
636+
GinStatsData *buildStats)
632637
{
633638
BlockNumber rootBlkno = gdi->stack->blkno;
634639

@@ -653,7 +658,7 @@ insertItemPointer(GinPostingTreeScan *gdi, ItemPointerData *items, uint32 nitem)
653658
freeGinBtreeStack(gdi->stack);
654659
}
655660
else
656-
ginInsertValue(&(gdi->btree), gdi->stack);
661+
ginInsertValue(&(gdi->btree), gdi->stack, buildStats);
657662

658663
gdi->stack = NULL;
659664
}

src/backend/access/gin/ginentrypage.c

+9-7
Original file line numberDiff line numberDiff line change
@@ -659,8 +659,11 @@ prepareEntryScan(GinBtree btree, Relation index, OffsetNumber attnum, Datum valu
659659
{
660660
memset(btree, 0, sizeof(GinBtreeData));
661661

662-
btree->isMoveRight = entryIsMoveRight;
662+
btree->index = index;
663+
btree->ginstate = ginstate;
664+
663665
btree->findChildPage = entryLocateEntry;
666+
btree->isMoveRight = entryIsMoveRight;
664667
btree->findItem = entryLocateLeafEntry;
665668
btree->findChildPtr = entryFindChildPtr;
666669
btree->getLeftMostPage = entryGetLeftMostPage;
@@ -669,13 +672,12 @@ prepareEntryScan(GinBtree btree, Relation index, OffsetNumber attnum, Datum valu
669672
btree->splitPage = entrySplitPage;
670673
btree->fillRoot = entryFillRoot;
671674

672-
btree->index = index;
673-
btree->ginstate = ginstate;
674-
btree->entryAttnum = attnum;
675-
btree->entryValue = value;
676-
677-
btree->isDelete = FALSE;
675+
btree->isData = FALSE;
678676
btree->searchMode = FALSE;
679677
btree->fullScan = FALSE;
680678
btree->isBuild = FALSE;
679+
680+
btree->entryAttnum = attnum;
681+
btree->entryValue = value;
682+
btree->isDelete = FALSE;
681683
}

src/backend/access/gin/ginfast.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -789,7 +789,7 @@ ginInsertCleanup(Relation index, GinState *ginstate,
789789
ginBeginBAScan(&accum);
790790
while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL)
791791
{
792-
ginEntryInsert(index, ginstate, attnum, entry, list, nlist, FALSE);
792+
ginEntryInsert(index, ginstate, attnum, entry, list, nlist, NULL);
793793
if (vac_delay)
794794
vacuum_delay_point();
795795
}
@@ -823,7 +823,7 @@ ginInsertCleanup(Relation index, GinState *ginstate,
823823

824824
ginBeginBAScan(&accum);
825825
while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL)
826-
ginEntryInsert(index, ginstate, attnum, entry, list, nlist, FALSE);
826+
ginEntryInsert(index, ginstate, attnum, entry, list, nlist, NULL);
827827
}
828828

829829
/*

src/backend/access/gin/gininsert.c

+41-15
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ typedef struct
2727
{
2828
GinState ginstate;
2929
double indtuples;
30+
GinStatsData buildStats;
3031
MemoryContext tmpCtx;
3132
MemoryContext funcCtx;
3233
BuildAccumulator accum;
@@ -97,8 +98,10 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems)
9798
* GinFormTuple().
9899
*/
99100
static IndexTuple
100-
addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack,
101-
IndexTuple old, ItemPointerData *items, uint32 nitem, bool isBuild)
101+
addItemPointersToTuple(Relation index, GinState *ginstate,
102+
GinBtreeStack *stack, IndexTuple old,
103+
ItemPointerData *items, uint32 nitem,
104+
GinStatsData *buildStats)
102105
{
103106
Datum key = gin_index_getattr(ginstate, old);
104107
OffsetNumber attnum = gintuple_get_attrnum(ginstate, old);
@@ -128,30 +131,41 @@ addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack,
128131
GinSetPostingTree(res, postingRoot);
129132

130133
gdi = prepareScanPostingTree(index, postingRoot, FALSE);
131-
gdi->btree.isBuild = isBuild;
134+
gdi->btree.isBuild = (buildStats != NULL);
132135

133-
insertItemPointer(gdi, items, nitem);
136+
ginInsertItemPointer(gdi, items, nitem, buildStats);
134137

135138
pfree(gdi);
139+
140+
/* During index build, count the newly-added data page */
141+
if (buildStats)
142+
buildStats->nDataPages++;
136143
}
137144

138145
return res;
139146
}
140147

141148
/*
142149
* Inserts only one entry to the index, but it can add more than 1 ItemPointer.
150+
*
151+
* During an index build, buildStats is non-null and the counters
152+
* it contains should be incremented as needed.
143153
*/
144154
void
145155
ginEntryInsert(Relation index, GinState *ginstate,
146156
OffsetNumber attnum, Datum value,
147157
ItemPointerData *items, uint32 nitem,
148-
bool isBuild)
158+
GinStatsData *buildStats)
149159
{
150160
GinBtreeData btree;
151161
GinBtreeStack *stack;
152162
IndexTuple itup;
153163
Page page;
154164

165+
/* During index build, count the to-be-inserted entry */
166+
if (buildStats)
167+
buildStats->nEntries++;
168+
155169
prepareEntryScan(&btree, index, attnum, value, ginstate);
156170

157171
stack = ginFindLeafPage(&btree, NULL);
@@ -174,14 +188,15 @@ ginEntryInsert(Relation index, GinState *ginstate,
174188

175189
/* insert into posting tree */
176190
gdi = prepareScanPostingTree(index, rootPostingTree, FALSE);
177-
gdi->btree.isBuild = isBuild;
178-
insertItemPointer(gdi, items, nitem);
191+
gdi->btree.isBuild = (buildStats != NULL);
192+
ginInsertItemPointer(gdi, items, nitem, buildStats);
179193
pfree(gdi);
180194

181195
return;
182196
}
183197

184-
itup = addItemPointersToTuple(index, ginstate, stack, itup, items, nitem, isBuild);
198+
itup = addItemPointersToTuple(index, ginstate, stack, itup,
199+
items, nitem, buildStats);
185200

186201
btree.isDelete = TRUE;
187202
}
@@ -195,13 +210,14 @@ ginEntryInsert(Relation index, GinState *ginstate,
195210
/* Add the rest, making a posting tree if necessary */
196211
IndexTuple previtup = itup;
197212

198-
itup = addItemPointersToTuple(index, ginstate, stack, previtup, items + 1, nitem - 1, isBuild);
213+
itup = addItemPointersToTuple(index, ginstate, stack, previtup,
214+
items + 1, nitem - 1, buildStats);
199215
pfree(previtup);
200216
}
201217
}
202218

203219
btree.entry = itup;
204-
ginInsertValue(&btree, stack);
220+
ginInsertValue(&btree, stack, buildStats);
205221
pfree(itup);
206222
}
207223

@@ -260,7 +276,8 @@ ginBuildCallback(Relation index, HeapTuple htup, Datum *values,
260276
{
261277
/* there could be many entries, so be willing to abort here */
262278
CHECK_FOR_INTERRUPTS();
263-
ginEntryInsert(index, &buildstate->ginstate, attnum, entry, list, nlist, TRUE);
279+
ginEntryInsert(index, &buildstate->ginstate, attnum, entry,
280+
list, nlist, &buildstate->buildStats);
264281
}
265282

266283
MemoryContextReset(buildstate->tmpCtx);
@@ -292,6 +309,8 @@ ginbuild(PG_FUNCTION_ARGS)
292309
RelationGetRelationName(index));
293310

294311
initGinState(&buildstate.ginstate, index);
312+
buildstate.indtuples = 0;
313+
memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
295314

296315
/* initialize the meta page */
297316
MetaBuffer = GinNewBuffer(index);
@@ -331,8 +350,8 @@ ginbuild(PG_FUNCTION_ARGS)
331350
UnlockReleaseBuffer(RootBuffer);
332351
END_CRIT_SECTION();
333352

334-
/* build the index */
335-
buildstate.indtuples = 0;
353+
/* count the root as first entry page */
354+
buildstate.buildStats.nEntryPages++;
336355

337356
/*
338357
* create a temporary memory context that is reset once for each tuple
@@ -367,12 +386,19 @@ ginbuild(PG_FUNCTION_ARGS)
367386
{
368387
/* there could be many entries, so be willing to abort here */
369388
CHECK_FOR_INTERRUPTS();
370-
ginEntryInsert(index, &buildstate.ginstate, attnum, entry, list, nlist, TRUE);
389+
ginEntryInsert(index, &buildstate.ginstate, attnum, entry,
390+
list, nlist, &buildstate.buildStats);
371391
}
372392
MemoryContextSwitchTo(oldCtx);
373393

374394
MemoryContextDelete(buildstate.tmpCtx);
375395

396+
/*
397+
* Update metapage stats
398+
*/
399+
buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);
400+
ginUpdateStats(index, &buildstate.buildStats);
401+
376402
/*
377403
* Return statistics
378404
*/
@@ -401,7 +427,7 @@ ginHeapTupleInsert(Relation index, GinState *ginstate, OffsetNumber attnum, Datu
401427
return 0;
402428

403429
for (i = 0; i < nentries; i++)
404-
ginEntryInsert(index, ginstate, attnum, entries[i], item, 1, FALSE);
430+
ginEntryInsert(index, ginstate, attnum, entries[i], item, 1, NULL);
405431

406432
return nentries;
407433
}

0 commit comments

Comments
 (0)