Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit a52c47b

Browse files
author
Alexander Korotkov
committed
Fix B-tree split algorithm
Instead of naive picking of counts, do detailed consideration of free space in pages.
1 parent 69e3431 commit a52c47b

File tree

3 files changed

+153
-13
lines changed

3 files changed

+153
-13
lines changed

contrib/in_memory/btree.c

Lines changed: 144 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@
2626
#include "btree_private.h"
2727
#include "fdw.h"
2828

29+
static int get_split_item_size(Page p, OffsetNumber newoffset,
30+
LocationIndex newitem_size, bool replace, OffsetNumber offset);
31+
static OffsetNumber find_split_location(Page p, OffsetNumber offset,
32+
LocationIndex tuplesize, bool replace, float target_ratio);
2933
static PageNumber btree_split(BTreeDescr *desc, Page p, OffsetNumber *offset,
3034
bool *place_right, Pointer tupleheader, Pointer tuple,
3135
OffsetNumber tuplesize, bool replace, uint32 *state, CommitSeqNo *csn);
@@ -54,6 +58,117 @@ init_btree(BTreeDescr *desc)
5458
init_meta_page(GET_PAGE(desc->meta), 1);
5559
}
5660

61+
/*
62+
* Get size of item during split consideration.
63+
*/
64+
static int
65+
get_split_item_size(Page p, OffsetNumber newoffset, LocationIndex newitem_size,
66+
bool replace, OffsetNumber offset)
67+
{
68+
if (offset < newoffset)
69+
return BTREE_PAGE_GET_ITEM_SIZE(p, offset) + sizeof(OffsetNumber);
70+
else if (offset == newoffset)
71+
return newitem_size + sizeof(OffsetNumber);
72+
else if (replace)
73+
return BTREE_PAGE_GET_ITEM_SIZE(p, offset) + sizeof(OffsetNumber);
74+
else
75+
return BTREE_PAGE_GET_ITEM_SIZE(p, offset - 1) + sizeof(OffsetNumber);
76+
}
77+
78+
/*
79+
* Find appropriate location to split. Return number of tuples to be placed
80+
* to the left page.
81+
*/
82+
static OffsetNumber
83+
find_split_location(Page p, OffsetNumber offset, LocationIndex tuplesize,
84+
bool replace, float target_ratio)
85+
{
86+
int left_free_space;
87+
int right_free_space;
88+
int left_bound;
89+
int right_bound;
90+
int first_data_key = BTREE_FIRST_DATA_KEY(p);
91+
int count;
92+
bool left_bounded = false;
93+
bool right_bounded = false;
94+
bool leaf = PAGE_IS_LEAF(p);
95+
LocationIndex header_size = leaf ? OLeafTupleHeaderSize : OInternalTupleHeaderSize;
96+
LocationIndex newitem_size = sizeof(OffsetNumber) + header_size + tuplesize;
97+
LocationIndex item_size;
98+
99+
/*
100+
* Covert ratio of fillfactor to ratio of free space.
101+
*/
102+
target_ratio = target_ratio / (1.0 - target_ratio);
103+
104+
count = BTREE_PAGE_ITEMS_COUNT(p) - first_data_key + (replace ? 0 : 1);
105+
left_free_space = right_free_space = IN_MEMORY_BLCKSZ - offsetof(BTreePageHeader, items);
106+
107+
/*
108+
* Left and right pages initially contain one item each. Left page also
109+
* reserves space for high key. For leafs, We assume that high key
110+
* couldn't be wider than than source tuple.
111+
*/
112+
left_bound = 1;
113+
left_free_space -= get_split_item_size(p, offset, newitem_size,
114+
replace, first_data_key);
115+
left_free_space -= get_split_item_size(p, offset, newitem_size,
116+
replace, first_data_key + 1);
117+
118+
right_bound = count - 1;
119+
right_free_space -= get_split_item_size(p, offset, newitem_size,
120+
replace, first_data_key + count - 1);
121+
if (!PAGE_IS_RIGHTMOST(p))
122+
right_free_space -= BTREE_PAGE_GET_ITEM_SIZE(p, BTREE_HIKEY) + sizeof(OffsetNumber);
123+
124+
Assert(left_free_space >= 0 && right_free_space >= 0);
125+
126+
/*
127+
* Iterate shifting left bound upper and right bound lower until those
128+
* bounds meet each other.
129+
*/
130+
while (left_bound < right_bound)
131+
{
132+
if (right_bounded || (!left_bounded &&
133+
(float) left_free_space * target_ratio > (float) right_free_space))
134+
{
135+
Assert(!left_bounded);
136+
item_size = get_split_item_size(p, offset, newitem_size, replace,
137+
first_data_key + left_bound + 1);
138+
if (left_free_space >= item_size)
139+
{
140+
left_free_space -= item_size;
141+
left_bound++;
142+
}
143+
else
144+
{
145+
left_bounded = true;
146+
}
147+
}
148+
else
149+
{
150+
Assert(!right_bounded);
151+
item_size = get_split_item_size(p, offset, newitem_size, replace,
152+
first_data_key + right_bound - 1);
153+
if (right_free_space >= item_size)
154+
{
155+
right_free_space -= item_size;
156+
right_bound--;
157+
}
158+
else
159+
{
160+
right_bounded = true;
161+
}
162+
}
163+
}
164+
165+
Assert(left_bound == right_bound);
166+
return left_bound;
167+
}
168+
169+
/*
170+
* Split B-tree page into two.
171+
*/
57172
static PageNumber
58173
btree_split(BTreeDescr *desc, Page p, OffsetNumber *offset, bool *place_right,
59174
Pointer tupleheader, Pointer tuple, LocationIndex tuplesize,
@@ -85,7 +200,10 @@ btree_split(BTreeDescr *desc, Page p, OffsetNumber *offset, bool *place_right,
85200
{
86201
right_count = count / 2;
87202
}
88-
left_count = (count - right_count) + BTREE_FIRST_DATA_KEY(p);
203+
left_count = find_split_location(p, *offset, tuplesize, replace,
204+
was_rightmost ? 0.9 : 0.5);
205+
right_count = count - left_count;
206+
left_count += BTREE_FIRST_DATA_KEY(p);
89207
if (*offset < left_count)
90208
{
91209
*place_right = false;
@@ -162,27 +280,20 @@ btree_split(BTreeDescr *desc, Page p, OffsetNumber *offset, bool *place_right,
162280
new_header->csn = *csn;
163281
new_header->undoPos = undo_pos;
164282

165-
/* Insert tuple */
283+
/* Insert new tuple to the right page if needed */
166284
if (*place_right)
167285
{
168286
if (!replace)
169287
add_page_item(new_page, *offset, MAXALIGN(tuplesize) + header_size);
170288
else
171289
resize_page_item(new_page, *offset, MAXALIGN(tuplesize) + header_size);
172290
tuple_ptr = BTREE_PAGE_GET_ITEM(new_page, *offset);
291+
memcpy(tuple_ptr, tupleheader, header_size);
292+
tuple_ptr += header_size;
293+
memcpy(tuple_ptr, tuple, tuplesize);
173294
}
174-
else
175-
{
176-
if (!replace)
177-
add_page_item(p, *offset, MAXALIGN(tuplesize) + header_size);
178-
else
179-
resize_page_item(p, *offset, MAXALIGN(tuplesize) + header_size);
180-
tuple_ptr = BTREE_PAGE_GET_ITEM(p, *offset);
181-
}
182-
memcpy(tuple_ptr, tupleheader, header_size);
183-
tuple_ptr += header_size;
184-
memcpy(tuple_ptr, tuple, tuplesize);
185295

296+
/* Update high key of left page */
186297
first_data_key = BTREE_FIRST_DATA_KEY(new_page);
187298
rightbound_key = BTREE_PAGE_GET_ITEM(new_page, first_data_key) + header_size;
188299
if (leaf)
@@ -197,13 +308,33 @@ btree_split(BTreeDescr *desc, Page p, OffsetNumber *offset, bool *place_right,
197308
}
198309

199310
if (was_rightmost)
311+
{
312+
if (!(*place_right))
313+
(*offset)++;
200314
add_page_item(p, BTREE_HIKEY, MAXALIGN(rightbound_key_size));
315+
}
201316
else
317+
{
202318
resize_page_item(p, BTREE_HIKEY, MAXALIGN(rightbound_key_size));
319+
}
203320

204321
memcpy(BTREE_PAGE_GET_ITEM(p, BTREE_HIKEY),
205322
rightbound_key, rightbound_key_size);
206323

324+
/* Insert new tuple to the left page if needed */
325+
if (!(*place_right))
326+
{
327+
if (!replace)
328+
add_page_item(p, *offset, MAXALIGN(tuplesize) + header_size);
329+
else
330+
resize_page_item(p, *offset, MAXALIGN(tuplesize) + header_size);
331+
tuple_ptr = BTREE_PAGE_GET_ITEM(p, *offset);
332+
memcpy(tuple_ptr, tupleheader, header_size);
333+
tuple_ptr += header_size;
334+
memcpy(tuple_ptr, tuple, tuplesize);
335+
}
336+
337+
207338
#ifdef NOT_USED
208339
/* Remove leftmost key from the page */
209340
if (!leaf)

contrib/in_memory/expected/in_memory.out

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5231,3 +5231,7 @@ SELECT * FROM im_test1;
52315231
3 | xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
52325232
(3 rows)
52335233

5234+
-- split location bug
5235+
TRUNCATE im_test1;
5236+
INSERT INTO im_test1 (SELECT id, id || repeat('x', 25) FROM generate_series(2, 16, 1) id);
5237+
INSERT INTO im_test1 VALUES(1, repeat('x', 280));

contrib/in_memory/sql/in_memory.sql

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -720,3 +720,8 @@ DELETE FROM im_test1 WHERE id = 3;
720720
INSERT INTO im_test1 SELECT id, repeat('x', 280) FROM generate_series(3, 4) id;
721721
ROLLBACK;
722722
SELECT * FROM im_test1;
723+
724+
-- split location bug
725+
TRUNCATE im_test1;
726+
INSERT INTO im_test1 (SELECT id, id || repeat('x', 25) FROM generate_series(2, 16, 1) id);
727+
INSERT INTO im_test1 VALUES(1, repeat('x', 280));

0 commit comments

Comments
 (0)