Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit e5adcb7

Browse files
Refactor nbtree insertion scankeys.
Use dedicated struct to represent nbtree insertion scan keys. Having a dedicated struct makes the difference between search type scankeys and insertion scankeys a lot clearer, and simplifies the signature of several related functions. This is based on a suggestion by Andrey Lepikhov. Streamline how unique index insertions cache binary search progress. Cache the state of in-progress binary searches within _bt_check_unique() for later instead of having callers avoid repeating the binary search in an ad-hoc manner. This makes it easy to add a new optimization: _bt_check_unique() now falls out of its loop immediately in the common case where it's already clear that there couldn't possibly be a duplicate. The new _bt_check_unique() scheme makes it a lot easier to manage cached binary search effort afterwards, from within _bt_findinsertloc(). This is needed for the upcoming patch to make nbtree tuples unique by treating heap TID as a final tiebreaker column. Unique key binary searches need to restore lower and upper bounds. They cannot simply continue to use the >= lower bound as the offset to insert at, because the heap TID tiebreaker column must be used in comparisons for the restored binary search (unlike the original _bt_check_unique() binary search, where scankey's heap TID column must be omitted). Author: Peter Geoghegan, Heikki Linnakangas Reviewed-By: Heikki Linnakangas, Andrey Lepikhov Discussion: https://postgr.es/m/CAH2-WzmE6AhUdk9NdWBf4K3HjWXZBX3+umC7mH7+WDrKcRtsOw@mail.gmail.com
1 parent 550b9d2 commit e5adcb7

File tree

9 files changed

+529
-387
lines changed

9 files changed

+529
-387
lines changed

contrib/amcheck/verify_nbtree.c

+24-28
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,9 @@ static void bt_check_every_level(Relation rel, Relation heaprel,
127127
static BtreeLevel bt_check_level_from_leftmost(BtreeCheckState *state,
128128
BtreeLevel level);
129129
static void bt_target_page_check(BtreeCheckState *state);
130-
static ScanKey bt_right_page_check_scankey(BtreeCheckState *state);
131-
static void bt_downlink_check(BtreeCheckState *state, BlockNumber childblock,
132-
ScanKey targetkey);
130+
static BTScanInsert bt_right_page_check_scankey(BtreeCheckState *state);
131+
static void bt_downlink_check(BtreeCheckState *state, BTScanInsert targetkey,
132+
BlockNumber childblock);
133133
static void bt_downlink_missing_check(BtreeCheckState *state);
134134
static void bt_tuple_present_callback(Relation index, HeapTuple htup,
135135
Datum *values, bool *isnull,
@@ -139,14 +139,14 @@ static IndexTuple bt_normalize_tuple(BtreeCheckState *state,
139139
static inline bool offset_is_negative_infinity(BTPageOpaque opaque,
140140
OffsetNumber offset);
141141
static inline bool invariant_leq_offset(BtreeCheckState *state,
142-
ScanKey key,
142+
BTScanInsert key,
143143
OffsetNumber upperbound);
144144
static inline bool invariant_geq_offset(BtreeCheckState *state,
145-
ScanKey key,
145+
BTScanInsert key,
146146
OffsetNumber lowerbound);
147147
static inline bool invariant_leq_nontarget_offset(BtreeCheckState *state,
148-
Page other,
149-
ScanKey key,
148+
BTScanInsert key,
149+
Page nontarget,
150150
OffsetNumber upperbound);
151151
static Page palloc_btree_page(BtreeCheckState *state, BlockNumber blocknum);
152152

@@ -838,8 +838,8 @@ bt_target_page_check(BtreeCheckState *state)
838838
{
839839
ItemId itemid;
840840
IndexTuple itup;
841-
ScanKey skey;
842841
size_t tupsize;
842+
BTScanInsert skey;
843843

844844
CHECK_FOR_INTERRUPTS();
845845

@@ -1030,7 +1030,7 @@ bt_target_page_check(BtreeCheckState *state)
10301030
*/
10311031
else if (offset == max)
10321032
{
1033-
ScanKey rightkey;
1033+
BTScanInsert rightkey;
10341034

10351035
/* Get item in next/right page */
10361036
rightkey = bt_right_page_check_scankey(state);
@@ -1082,7 +1082,7 @@ bt_target_page_check(BtreeCheckState *state)
10821082
{
10831083
BlockNumber childblock = BTreeInnerTupleGetDownLink(itup);
10841084

1085-
bt_downlink_check(state, childblock, skey);
1085+
bt_downlink_check(state, skey, childblock);
10861086
}
10871087
}
10881088

@@ -1111,11 +1111,12 @@ bt_target_page_check(BtreeCheckState *state)
11111111
* Note that !readonly callers must reverify that target page has not
11121112
* been concurrently deleted.
11131113
*/
1114-
static ScanKey
1114+
static BTScanInsert
11151115
bt_right_page_check_scankey(BtreeCheckState *state)
11161116
{
11171117
BTPageOpaque opaque;
11181118
ItemId rightitem;
1119+
IndexTuple firstitup;
11191120
BlockNumber targetnext;
11201121
Page rightpage;
11211122
OffsetNumber nline;
@@ -1303,8 +1304,8 @@ bt_right_page_check_scankey(BtreeCheckState *state)
13031304
* Return first real item scankey. Note that this relies on right page
13041305
* memory remaining allocated.
13051306
*/
1306-
return _bt_mkscankey(state->rel,
1307-
(IndexTuple) PageGetItem(rightpage, rightitem));
1307+
firstitup = (IndexTuple) PageGetItem(rightpage, rightitem);
1308+
return _bt_mkscankey(state->rel, firstitup);
13081309
}
13091310

13101311
/*
@@ -1317,8 +1318,8 @@ bt_right_page_check_scankey(BtreeCheckState *state)
13171318
* verification this way around is much more practical.
13181319
*/
13191320
static void
1320-
bt_downlink_check(BtreeCheckState *state, BlockNumber childblock,
1321-
ScanKey targetkey)
1321+
bt_downlink_check(BtreeCheckState *state, BTScanInsert targetkey,
1322+
BlockNumber childblock)
13221323
{
13231324
OffsetNumber offset;
13241325
OffsetNumber maxoffset;
@@ -1423,8 +1424,7 @@ bt_downlink_check(BtreeCheckState *state, BlockNumber childblock,
14231424
if (offset_is_negative_infinity(copaque, offset))
14241425
continue;
14251426

1426-
if (!invariant_leq_nontarget_offset(state, child,
1427-
targetkey, offset))
1427+
if (!invariant_leq_nontarget_offset(state, targetkey, child, offset))
14281428
ereport(ERROR,
14291429
(errcode(ERRCODE_INDEX_CORRUPTED),
14301430
errmsg("down-link lower bound invariant violated for index \"%s\"",
@@ -1864,13 +1864,12 @@ offset_is_negative_infinity(BTPageOpaque opaque, OffsetNumber offset)
18641864
* to corruption.
18651865
*/
18661866
static inline bool
1867-
invariant_leq_offset(BtreeCheckState *state, ScanKey key,
1867+
invariant_leq_offset(BtreeCheckState *state, BTScanInsert key,
18681868
OffsetNumber upperbound)
18691869
{
1870-
int16 nkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
18711870
int32 cmp;
18721871

1873-
cmp = _bt_compare(state->rel, nkeyatts, key, state->target, upperbound);
1872+
cmp = _bt_compare(state->rel, key, state->target, upperbound);
18741873

18751874
return cmp <= 0;
18761875
}
@@ -1883,13 +1882,12 @@ invariant_leq_offset(BtreeCheckState *state, ScanKey key,
18831882
* to corruption.
18841883
*/
18851884
static inline bool
1886-
invariant_geq_offset(BtreeCheckState *state, ScanKey key,
1885+
invariant_geq_offset(BtreeCheckState *state, BTScanInsert key,
18871886
OffsetNumber lowerbound)
18881887
{
1889-
int16 nkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
18901888
int32 cmp;
18911889

1892-
cmp = _bt_compare(state->rel, nkeyatts, key, state->target, lowerbound);
1890+
cmp = _bt_compare(state->rel, key, state->target, lowerbound);
18931891

18941892
return cmp >= 0;
18951893
}
@@ -1905,14 +1903,12 @@ invariant_geq_offset(BtreeCheckState *state, ScanKey key,
19051903
* to corruption.
19061904
*/
19071905
static inline bool
1908-
invariant_leq_nontarget_offset(BtreeCheckState *state,
1909-
Page nontarget, ScanKey key,
1910-
OffsetNumber upperbound)
1906+
invariant_leq_nontarget_offset(BtreeCheckState *state, BTScanInsert key,
1907+
Page nontarget, OffsetNumber upperbound)
19111908
{
1912-
int16 nkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
19131909
int32 cmp;
19141910

1915-
cmp = _bt_compare(state->rel, nkeyatts, key, nontarget, upperbound);
1911+
cmp = _bt_compare(state->rel, key, nontarget, upperbound);
19161912

19171913
return cmp <= 0;
19181914
}

src/backend/access/nbtree/README

+16-13
Original file line numberDiff line numberDiff line change
@@ -598,19 +598,22 @@ scankey point to comparison functions that return boolean, such as int4lt.
598598
There might be more than one scankey entry for a given index column, or
599599
none at all. (We require the keys to appear in index column order, but
600600
the order of multiple keys for a given column is unspecified.) An
601-
insertion scankey uses the same array-of-ScanKey data structure, but the
602-
sk_func pointers point to btree comparison support functions (ie, 3-way
603-
comparators that return int4 values interpreted as <0, =0, >0). In an
604-
insertion scankey there is exactly one entry per index column. Insertion
605-
scankeys are built within the btree code (eg, by _bt_mkscankey()) and are
606-
used to locate the starting point of a scan, as well as for locating the
607-
place to insert a new index tuple. (Note: in the case of an insertion
608-
scankey built from a search scankey, there might be fewer keys than
609-
index columns, indicating that we have no constraints for the remaining
610-
index columns.) After we have located the starting point of a scan, the
611-
original search scankey is consulted as each index entry is sequentially
612-
scanned to decide whether to return the entry and whether the scan can
613-
stop (see _bt_checkkeys()).
601+
insertion scankey ("BTScanInsert" data structure) uses a similar
602+
array-of-ScanKey data structure, but the sk_func pointers point to btree
603+
comparison support functions (ie, 3-way comparators that return int4 values
604+
interpreted as <0, =0, >0). In an insertion scankey there is at most one
605+
entry per index column. There is also other data about the rules used to
606+
locate where to begin the scan, such as whether or not the scan is a
607+
"nextkey" scan. Insertion scankeys are built within the btree code (eg, by
608+
_bt_mkscankey()) and are used to locate the starting point of a scan, as
609+
well as for locating the place to insert a new index tuple. (Note: in the
610+
case of an insertion scankey built from a search scankey or built from a
611+
truncated pivot tuple, there might be fewer keys than index columns,
612+
indicating that we have no constraints for the remaining index columns.)
613+
After we have located the starting point of a scan, the original search
614+
scankey is consulted as each index entry is sequentially scanned to decide
615+
whether to return the entry and whether the scan can stop (see
616+
_bt_checkkeys()).
614617

615618
We use term "pivot" index tuples to distinguish tuples which don't point
616619
to heap tuples, but rather used for tree navigation. Pivot tuples includes

0 commit comments

Comments
 (0)