Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit be14f88

Browse files
Fix deduplication "single value" strategy bug.
It was possible for deduplication's single value strategy to mistakenly believe that a very small duplicate tuple counts as one of the six large tuples that it aims to leave behind after the page finally splits. This could cause slightly suboptimal space utilization with very low cardinality indexes, though only under fairly narrow conditions. To fix, be particular about what kind of tuple counts as a maxpostingsize-capped tuple. This avoids confusion in the event of a small tuple that gets "wedged" between two large tuples, where all tuples on the page are duplicates of the same value. Discussion: https://postgr.es/m/CAH2-Wz=Y+sgSFc-O3LpiZX-POx2bC+okec2KafERHuzdVa7-rQ@mail.gmail.com Backpatch: 13-, where deduplication was introduced (by commit 0d861bb)
1 parent f9e9704 commit be14f88

File tree

4 files changed

+32
-13
lines changed

4 files changed

+32
-13
lines changed

src/backend/access/nbtree/nbtdedup.c

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ _bt_dedup_one_page(Relation rel, Buffer buf, Relation heapRel,
6262
Page page = BufferGetPage(buf);
6363
BTPageOpaque opaque;
6464
Page newpage;
65-
int newpagendataitems = 0;
6665
OffsetNumber deletable[MaxIndexTuplesPerPage];
6766
BTDedupState state;
6867
int ndeletable = 0;
@@ -124,6 +123,7 @@ _bt_dedup_one_page(Relation rel, Buffer buf, Relation heapRel,
124123
*/
125124
state = (BTDedupState) palloc(sizeof(BTDedupStateData));
126125
state->deduplicate = true;
126+
state->nmaxitems = 0;
127127
state->maxpostingsize = Min(BTMaxItemSize(page) / 2, INDEX_SIZE_MASK);
128128
/* Metadata about base tuple of current pending posting list */
129129
state->base = NULL;
@@ -204,26 +204,25 @@ _bt_dedup_one_page(Relation rel, Buffer buf, Relation heapRel,
204204
* reset the state and move on without modifying the page.
205205
*/
206206
pagesaving += _bt_dedup_finish_pending(newpage, state);
207-
newpagendataitems++;
208207

209208
if (singlevalstrat)
210209
{
211210
/*
212211
* Single value strategy's extra steps.
213212
*
214-
* Lower maxpostingsize for sixth and final item that might be
215-
* deduplicated by current deduplication pass. When sixth
216-
* item formed/observed, stop deduplicating items.
213+
* Lower maxpostingsize for sixth and final large posting list
214+
* tuple at the point where 5 maxpostingsize-capped tuples
215+
* have either been formed or observed.
217216
*
218-
* Note: It's possible that this will be reached even when
219-
* current deduplication pass has yet to merge together some
220-
* existing items. It doesn't matter whether or not the
221-
* current call generated the maxpostingsize-capped duplicate
222-
* tuples at the start of the page.
217+
* When a sixth maxpostingsize-capped item is formed/observed,
218+
* stop merging together tuples altogether. The few tuples
219+
* that remain at the end of the page won't be merged together
220+
* at all (at least not until after a future page split takes
221+
* place).
223222
*/
224-
if (newpagendataitems == 5)
223+
if (state->nmaxitems == 5)
225224
_bt_singleval_fillfactor(page, state, newitemsz);
226-
else if (newpagendataitems == 6)
225+
else if (state->nmaxitems == 6)
227226
{
228227
state->deduplicate = false;
229228
singlevalstrat = false; /* won't be back here */
@@ -237,7 +236,6 @@ _bt_dedup_one_page(Relation rel, Buffer buf, Relation heapRel,
237236

238237
/* Handle the last item */
239238
pagesaving += _bt_dedup_finish_pending(newpage, state);
240-
newpagendataitems++;
241239

242240
/*
243241
* If no items suitable for deduplication were found, newpage must be
@@ -404,7 +402,24 @@ _bt_dedup_save_htid(BTDedupState state, IndexTuple itup)
404402
(state->nhtids + nhtids) * sizeof(ItemPointerData));
405403

406404
if (mergedtupsz > state->maxpostingsize)
405+
{
406+
/*
407+
* Count this as an oversized item for single value strategy, though
408+
* only when there are 50 TIDs in the final posting list tuple. This
409+
* limit (which is fairly arbitrary) avoids confusion about how many
410+
* 1/6 of a page tuples have been encountered/created by the current
411+
* deduplication pass.
412+
*
413+
* Note: We deliberately don't consider which deduplication pass
414+
* merged together tuples to create this item (could be a previous
415+
* deduplication pass, or current pass). See _bt_do_singleval()
416+
* comments.
417+
*/
418+
if (state->nhtids > 50)
419+
state->nmaxitems++;
420+
407421
return false;
422+
}
408423

409424
/*
410425
* Save heap TIDs to pending posting list tuple -- itup can be merged into

src/backend/access/nbtree/nbtsort.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1095,6 +1095,7 @@ _bt_sort_dedup_finish_pending(BTWriteState *wstate, BTPageState *state,
10951095
pfree(postingtuple);
10961096
}
10971097

1098+
dstate->nmaxitems = 0;
10981099
dstate->nhtids = 0;
10991100
dstate->nitems = 0;
11001101
dstate->phystupsize = 0;
@@ -1310,6 +1311,7 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
13101311

13111312
dstate = (BTDedupState) palloc(sizeof(BTDedupStateData));
13121313
dstate->deduplicate = true; /* unused */
1314+
dstate->nmaxitems = 0; /* unused */
13131315
dstate->maxpostingsize = 0; /* set later */
13141316
/* Metadata about base tuple of current pending posting list */
13151317
dstate->base = NULL;

src/backend/access/nbtree/nbtxlog.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,7 @@ btree_xlog_dedup(XLogReaderState *record)
483483

484484
state = (BTDedupState) palloc(sizeof(BTDedupStateData));
485485
state->deduplicate = true; /* unused */
486+
state->nmaxitems = 0; /* unused */
486487
/* Conservatively use larger maxpostingsize than primary */
487488
state->maxpostingsize = BTMaxItemSize(page);
488489
state->base = NULL;

src/include/access/nbtree.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -739,6 +739,7 @@ typedef struct BTDedupStateData
739739
{
740740
/* Deduplication status info for entire pass over page */
741741
bool deduplicate; /* Still deduplicating page? */
742+
int nmaxitems; /* Number of max-sized tuples so far */
742743
Size maxpostingsize; /* Limit on size of final tuple */
743744

744745
/* Metadata about base tuple of current pending posting list */

0 commit comments

Comments
 (0)