@@ -70,6 +70,9 @@ static void _bt_recsplitloc(FindSplitData *state,
70
70
static void _bt_deltasortsplits (FindSplitData * state , double fillfactormult ,
71
71
bool usemult );
72
72
static int _bt_splitcmp (const void * arg1 , const void * arg2 );
73
+ static bool _bt_afternewitemoff (FindSplitData * state , OffsetNumber maxoff ,
74
+ int leaffillfactor , bool * usemult );
75
+ static bool _bt_adjacenthtid (ItemPointer lowhtid , ItemPointer highhtid );
73
76
static OffsetNumber _bt_bestsplitloc (FindSplitData * state , int perfectpenalty ,
74
77
bool * newitemonleft );
75
78
static int _bt_strategy (FindSplitData * state , SplitPoint * leftpage ,
@@ -249,9 +252,10 @@ _bt_findsplitloc(Relation rel,
249
252
* Start search for a split point among list of legal split points. Give
250
253
* primary consideration to equalizing available free space in each half
251
254
* of the split initially (start with default strategy), while applying
252
- * rightmost optimization where appropriate. Either of the two other
253
- * fallback strategies may be required for cases with a large number of
254
- * duplicates around the original/space-optimal split point.
255
+ * rightmost and split-after-new-item optimizations where appropriate.
256
+ * Either of the two other fallback strategies may be required for cases
257
+ * with a large number of duplicates around the original/space-optimal
258
+ * split point.
255
259
*
256
260
* Default strategy gives some weight to suffix truncation in deciding a
257
261
* split point on leaf pages. It attempts to select a split point where a
@@ -273,6 +277,44 @@ _bt_findsplitloc(Relation rel,
273
277
usemult = true;
274
278
fillfactormult = leaffillfactor / 100.0 ;
275
279
}
280
+ else if (_bt_afternewitemoff (& state , maxoff , leaffillfactor , & usemult ))
281
+ {
282
+ /*
283
+ * New item inserted at rightmost point among a localized grouping on
284
+ * a leaf page -- apply "split after new item" optimization, either by
285
+ * applying leaf fillfactor multiplier, or by choosing the exact split
286
+ * point that leaves the new item as last on the left. (usemult is set
287
+ * for us.)
288
+ */
289
+ if (usemult )
290
+ {
291
+ /* fillfactormult should be set based on leaf fillfactor */
292
+ fillfactormult = leaffillfactor / 100.0 ;
293
+ }
294
+ else
295
+ {
296
+ /* find precise split point after newitemoff */
297
+ for (int i = 0 ; i < state .nsplits ; i ++ )
298
+ {
299
+ SplitPoint * split = state .splits + i ;
300
+
301
+ if (split -> newitemonleft &&
302
+ newitemoff == split -> firstoldonright )
303
+ {
304
+ pfree (state .splits );
305
+ * newitemonleft = true;
306
+ return newitemoff ;
307
+ }
308
+ }
309
+
310
+ /*
311
+ * Cannot legally split after newitemoff; proceed with split
312
+ * without using fillfactor multiplier. This is defensive, and
313
+ * should never be needed in practice.
314
+ */
315
+ fillfactormult = 0.50 ;
316
+ }
317
+ }
276
318
else
277
319
{
278
320
/* Other leaf page. 50:50 page split. */
@@ -519,6 +561,172 @@ _bt_splitcmp(const void *arg1, const void *arg2)
519
561
return 0 ;
520
562
}
521
563
564
+ /*
565
+ * Subroutine to determine whether or not a non-rightmost leaf page should be
566
+ * split immediately after the would-be original page offset for the
567
+ * new/incoming tuple (or should have leaf fillfactor applied when new item is
568
+ * to the right on original page). This is appropriate when there is a
569
+ * pattern of localized monotonically increasing insertions into a composite
570
+ * index, where leading attribute values form local groupings, and we
571
+ * anticipate further insertions of the same/current grouping (new item's
572
+ * grouping) in the near future. This can be thought of as a variation on
573
+ * applying leaf fillfactor during rightmost leaf page splits, since cases
574
+ * that benefit will converge on packing leaf pages leaffillfactor% full over
575
+ * time.
576
+ *
577
+ * We may leave extra free space remaining on the rightmost page of a "most
578
+ * significant column" grouping of tuples if that grouping never ends up
579
+ * having future insertions that use the free space. That effect is
580
+ * self-limiting; a future grouping that becomes the "nearest on the right"
581
+ * grouping of the affected grouping usually puts the extra free space to good
582
+ * use.
583
+ *
584
+ * Caller uses optimization when routine returns true, though the exact action
585
+ * taken by caller varies. Caller uses original leaf page fillfactor in
586
+ * standard way rather than using the new item offset directly when *usemult
587
+ * was also set to true here. Otherwise, caller applies optimization by
588
+ * locating the legal split point that makes the new tuple the very last tuple
589
+ * on the left side of the split.
590
+ */
591
+ static bool
592
+ _bt_afternewitemoff (FindSplitData * state , OffsetNumber maxoff ,
593
+ int leaffillfactor , bool * usemult )
594
+ {
595
+ int16 nkeyatts ;
596
+ ItemId itemid ;
597
+ IndexTuple tup ;
598
+ int keepnatts ;
599
+
600
+ Assert (state -> is_leaf && !state -> is_rightmost );
601
+
602
+ nkeyatts = IndexRelationGetNumberOfKeyAttributes (state -> rel );
603
+
604
+ /* Single key indexes not considered here */
605
+ if (nkeyatts == 1 )
606
+ return false;
607
+
608
+ /* Ascending insertion pattern never inferred when new item is first */
609
+ if (state -> newitemoff == P_FIRSTKEY )
610
+ return false;
611
+
612
+ /*
613
+ * Only apply optimization on pages with equisized tuples, since ordinal
614
+ * keys are likely to be fixed-width. Testing if the new tuple is
615
+ * variable width directly might also work, but that fails to apply the
616
+ * optimization to indexes with a numeric_ops attribute.
617
+ *
618
+ * Conclude that page has equisized tuples when the new item is the same
619
+ * width as the smallest item observed during pass over page, and other
620
+ * non-pivot tuples must be the same width as well. (Note that the
621
+ * possibly-truncated existing high key isn't counted in
622
+ * olddataitemstotal, and must be subtracted from maxoff.)
623
+ */
624
+ if (state -> newitemsz != state -> minfirstrightsz )
625
+ return false;
626
+ if (state -> newitemsz * (maxoff - 1 ) != state -> olddataitemstotal )
627
+ return false;
628
+
629
+ /*
630
+ * Avoid applying optimization when tuples are wider than a tuple
631
+ * consisting of two non-NULL int8/int64 attributes (or four non-NULL
632
+ * int4/int32 attributes)
633
+ */
634
+ if (state -> newitemsz >
635
+ MAXALIGN (sizeof (IndexTupleData ) + sizeof (int64 ) * 2 ) +
636
+ sizeof (ItemIdData ))
637
+ return false;
638
+
639
+ /*
640
+ * At least the first attribute's value must be equal to the corresponding
641
+ * value in previous tuple to apply optimization. New item cannot be a
642
+ * duplicate, either.
643
+ *
644
+ * Handle case where new item is to the right of all items on the existing
645
+ * page. This is suggestive of monotonically increasing insertions in
646
+ * itself, so the "heap TID adjacency" test is not applied here.
647
+ */
648
+ if (state -> newitemoff > maxoff )
649
+ {
650
+ itemid = PageGetItemId (state -> page , maxoff );
651
+ tup = (IndexTuple ) PageGetItem (state -> page , itemid );
652
+ keepnatts = _bt_keep_natts_fast (state -> rel , tup , state -> newitem );
653
+
654
+ if (keepnatts > 1 && keepnatts <= nkeyatts )
655
+ {
656
+ * usemult = true;
657
+ return true;
658
+ }
659
+
660
+ return false;
661
+ }
662
+
663
+ /*
664
+ * "Low cardinality leading column, high cardinality suffix column"
665
+ * indexes with a random insertion pattern (e.g., an index with a boolean
666
+ * column, such as an index on '(book_is_in_print, book_isbn)') present us
667
+ * with a risk of consistently misapplying the optimization. We're
668
+ * willing to accept very occasional misapplication of the optimization,
669
+ * provided the cases where we get it wrong are rare and self-limiting.
670
+ *
671
+ * Heap TID adjacency strongly suggests that the item just to the left was
672
+ * inserted very recently, which limits overapplication of the
673
+ * optimization. Besides, all inappropriate cases triggered here will
674
+ * still split in the middle of the page on average.
675
+ */
676
+ itemid = PageGetItemId (state -> page , OffsetNumberPrev (state -> newitemoff ));
677
+ tup = (IndexTuple ) PageGetItem (state -> page , itemid );
678
+ /* Do cheaper test first */
679
+ if (!_bt_adjacenthtid (& tup -> t_tid , & state -> newitem -> t_tid ))
680
+ return false;
681
+ /* Check same conditions as rightmost item case, too */
682
+ keepnatts = _bt_keep_natts_fast (state -> rel , tup , state -> newitem );
683
+
684
+ if (keepnatts > 1 && keepnatts <= nkeyatts )
685
+ {
686
+ double interp = (double ) state -> newitemoff / ((double ) maxoff + 1 );
687
+ double leaffillfactormult = (double ) leaffillfactor / 100.0 ;
688
+
689
+ /*
690
+ * Don't allow caller to split after a new item when it will result in
691
+ * a split point to the right of the point that a leaf fillfactor
692
+ * split would use -- have caller apply leaf fillfactor instead
693
+ */
694
+ * usemult = interp > leaffillfactormult ;
695
+
696
+ return true;
697
+ }
698
+
699
+ return false;
700
+ }
701
+
702
+ /*
703
+ * Subroutine for determining if two heap TIDS are "adjacent".
704
+ *
705
+ * Adjacent means that the high TID is very likely to have been inserted into
706
+ * heap relation immediately after the low TID, probably during the current
707
+ * transaction.
708
+ */
709
+ static bool
710
+ _bt_adjacenthtid (ItemPointer lowhtid , ItemPointer highhtid )
711
+ {
712
+ BlockNumber lowblk ,
713
+ highblk ;
714
+
715
+ lowblk = ItemPointerGetBlockNumber (lowhtid );
716
+ highblk = ItemPointerGetBlockNumber (highhtid );
717
+
718
+ /* Make optimistic assumption of adjacency when heap blocks match */
719
+ if (lowblk == highblk )
720
+ return true;
721
+
722
+ /* When heap block one up, second offset should be FirstOffsetNumber */
723
+ if (lowblk + 1 == highblk &&
724
+ ItemPointerGetOffsetNumber (highhtid ) == FirstOffsetNumber )
725
+ return true;
726
+
727
+ return false;
728
+ }
729
+
522
730
/*
523
731
* Subroutine to find the "best" split point among candidate split points.
524
732
* The best split point is the split point with the lowest penalty among split
0 commit comments