26
26
#include "btree_private.h"
27
27
#include "fdw.h"
28
28
29
+ static int get_split_item_size (Page p , OffsetNumber newoffset ,
30
+ LocationIndex newitem_size , bool replace , OffsetNumber offset );
31
+ static OffsetNumber find_split_location (Page p , OffsetNumber offset ,
32
+ LocationIndex tuplesize , bool replace , float target_ratio );
29
33
static PageNumber btree_split (BTreeDescr * desc , Page p , OffsetNumber * offset ,
30
34
bool * place_right , Pointer tupleheader , Pointer tuple ,
31
35
OffsetNumber tuplesize , bool replace , uint32 * state , CommitSeqNo * csn );
@@ -54,6 +58,117 @@ init_btree(BTreeDescr *desc)
54
58
init_meta_page (GET_PAGE (desc -> meta ), 1 );
55
59
}
56
60
61
+ /*
62
+ * Get size of item during split consideration.
63
+ */
64
+ static int
65
+ get_split_item_size (Page p , OffsetNumber newoffset , LocationIndex newitem_size ,
66
+ bool replace , OffsetNumber offset )
67
+ {
68
+ if (offset < newoffset )
69
+ return BTREE_PAGE_GET_ITEM_SIZE (p , offset ) + sizeof (OffsetNumber );
70
+ else if (offset == newoffset )
71
+ return newitem_size + sizeof (OffsetNumber );
72
+ else if (replace )
73
+ return BTREE_PAGE_GET_ITEM_SIZE (p , offset ) + sizeof (OffsetNumber );
74
+ else
75
+ return BTREE_PAGE_GET_ITEM_SIZE (p , offset - 1 ) + sizeof (OffsetNumber );
76
+ }
77
+
78
+ /*
79
+ * Find appropriate location to split. Return number of tuples to be placed
80
+ * to the left page.
81
+ */
82
+ static OffsetNumber
83
+ find_split_location (Page p , OffsetNumber offset , LocationIndex tuplesize ,
84
+ bool replace , float target_ratio )
85
+ {
86
+ int left_free_space ;
87
+ int right_free_space ;
88
+ int left_bound ;
89
+ int right_bound ;
90
+ int first_data_key = BTREE_FIRST_DATA_KEY (p );
91
+ int count ;
92
+ bool left_bounded = false;
93
+ bool right_bounded = false;
94
+ bool leaf = PAGE_IS_LEAF (p );
95
+ LocationIndex header_size = leaf ? OLeafTupleHeaderSize : OInternalTupleHeaderSize ;
96
+ LocationIndex newitem_size = sizeof (OffsetNumber ) + header_size + tuplesize ;
97
+ LocationIndex item_size ;
98
+
99
+ /*
100
+ * Covert ratio of fillfactor to ratio of free space.
101
+ */
102
+ target_ratio = target_ratio / (1.0 - target_ratio );
103
+
104
+ count = BTREE_PAGE_ITEMS_COUNT (p ) - first_data_key + (replace ? 0 : 1 );
105
+ left_free_space = right_free_space = IN_MEMORY_BLCKSZ - offsetof(BTreePageHeader , items );
106
+
107
+ /*
108
+ * Left and right pages initially contain one item each. Left page also
109
+ * reserves space for high key. For leafs, We assume that high key
110
+ * couldn't be wider than than source tuple.
111
+ */
112
+ left_bound = 1 ;
113
+ left_free_space -= get_split_item_size (p , offset , newitem_size ,
114
+ replace , first_data_key );
115
+ left_free_space -= get_split_item_size (p , offset , newitem_size ,
116
+ replace , first_data_key + 1 );
117
+
118
+ right_bound = count - 1 ;
119
+ right_free_space -= get_split_item_size (p , offset , newitem_size ,
120
+ replace , first_data_key + count - 1 );
121
+ if (!PAGE_IS_RIGHTMOST (p ))
122
+ right_free_space -= BTREE_PAGE_GET_ITEM_SIZE (p , BTREE_HIKEY ) + sizeof (OffsetNumber );
123
+
124
+ Assert (left_free_space >= 0 && right_free_space >= 0 );
125
+
126
+ /*
127
+ * Iterate shifting left bound upper and right bound lower until those
128
+ * bounds meet each other.
129
+ */
130
+ while (left_bound < right_bound )
131
+ {
132
+ if (right_bounded || (!left_bounded &&
133
+ (float ) left_free_space * target_ratio > (float ) right_free_space ))
134
+ {
135
+ Assert (!left_bounded );
136
+ item_size = get_split_item_size (p , offset , newitem_size , replace ,
137
+ first_data_key + left_bound + 1 );
138
+ if (left_free_space >= item_size )
139
+ {
140
+ left_free_space -= item_size ;
141
+ left_bound ++ ;
142
+ }
143
+ else
144
+ {
145
+ left_bounded = true;
146
+ }
147
+ }
148
+ else
149
+ {
150
+ Assert (!right_bounded );
151
+ item_size = get_split_item_size (p , offset , newitem_size , replace ,
152
+ first_data_key + right_bound - 1 );
153
+ if (right_free_space >= item_size )
154
+ {
155
+ right_free_space -= item_size ;
156
+ right_bound -- ;
157
+ }
158
+ else
159
+ {
160
+ right_bounded = true;
161
+ }
162
+ }
163
+ }
164
+
165
+ Assert (left_bound == right_bound );
166
+ return left_bound ;
167
+ }
168
+
169
+ /*
170
+ * Split B-tree page into two.
171
+ */
57
172
static PageNumber
58
173
btree_split (BTreeDescr * desc , Page p , OffsetNumber * offset , bool * place_right ,
59
174
Pointer tupleheader , Pointer tuple , LocationIndex tuplesize ,
@@ -85,7 +200,10 @@ btree_split(BTreeDescr *desc, Page p, OffsetNumber *offset, bool *place_right,
85
200
{
86
201
right_count = count / 2 ;
87
202
}
88
- left_count = (count - right_count ) + BTREE_FIRST_DATA_KEY (p );
203
+ left_count = find_split_location (p , * offset , tuplesize , replace ,
204
+ was_rightmost ? 0.9 : 0.5 );
205
+ right_count = count - left_count ;
206
+ left_count += BTREE_FIRST_DATA_KEY (p );
89
207
if (* offset < left_count )
90
208
{
91
209
* place_right = false;
@@ -162,33 +280,26 @@ btree_split(BTreeDescr *desc, Page p, OffsetNumber *offset, bool *place_right,
162
280
new_header -> csn = * csn ;
163
281
new_header -> undoPos = undo_pos ;
164
282
165
- /* Insert tuple */
283
+ /* Insert new tuple to the right page if needed */
166
284
if (* place_right )
167
285
{
168
286
if (!replace )
169
- add_page_item (new_page , * offset , tuplesize + header_size );
287
+ add_page_item (new_page , * offset , MAXALIGN ( tuplesize ) + header_size );
170
288
else
171
- resize_page_item (new_page , * offset , tuplesize + header_size );
289
+ resize_page_item (new_page , * offset , MAXALIGN ( tuplesize ) + header_size );
172
290
tuple_ptr = BTREE_PAGE_GET_ITEM (new_page , * offset );
291
+ memcpy (tuple_ptr , tupleheader , header_size );
292
+ tuple_ptr += header_size ;
293
+ memcpy (tuple_ptr , tuple , tuplesize );
173
294
}
174
- else
175
- {
176
- if (!replace )
177
- add_page_item (p , * offset , tuplesize + header_size );
178
- else
179
- resize_page_item (p , * offset , tuplesize + header_size );
180
- tuple_ptr = BTREE_PAGE_GET_ITEM (p , * offset );
181
- }
182
- memcpy (tuple_ptr , tupleheader , header_size );
183
- tuple_ptr += header_size ;
184
- memcpy (tuple_ptr , tuple , tuplesize );
185
295
296
+ /* Update high key of left page */
186
297
first_data_key = BTREE_FIRST_DATA_KEY (new_page );
187
298
rightbound_key = BTREE_PAGE_GET_ITEM (new_page , first_data_key ) + header_size ;
188
299
if (leaf )
189
300
{
190
301
rightbound_key = desc -> tuple_make_key (desc , rightbound_key );
191
- rightbound_key_size = MAXALIGN ( desc -> key_len (desc , rightbound_key ) );
302
+ rightbound_key_size = desc -> key_len (desc , rightbound_key );
192
303
}
193
304
else
194
305
{
@@ -197,13 +308,33 @@ btree_split(BTreeDescr *desc, Page p, OffsetNumber *offset, bool *place_right,
197
308
}
198
309
199
310
if (was_rightmost )
200
- add_page_item (p , BTREE_HIKEY , rightbound_key_size );
311
+ {
312
+ if (!(* place_right ))
313
+ (* offset )++ ;
314
+ add_page_item (p , BTREE_HIKEY , MAXALIGN (rightbound_key_size ));
315
+ }
201
316
else
202
- resize_page_item (p , BTREE_HIKEY , rightbound_key_size );
317
+ {
318
+ resize_page_item (p , BTREE_HIKEY , MAXALIGN (rightbound_key_size ));
319
+ }
203
320
204
321
memcpy (BTREE_PAGE_GET_ITEM (p , BTREE_HIKEY ),
205
322
rightbound_key , rightbound_key_size );
206
323
324
+ /* Insert new tuple to the left page if needed */
325
+ if (!(* place_right ))
326
+ {
327
+ if (!replace )
328
+ add_page_item (p , * offset , MAXALIGN (tuplesize ) + header_size );
329
+ else
330
+ resize_page_item (p , * offset , MAXALIGN (tuplesize ) + header_size );
331
+ tuple_ptr = BTREE_PAGE_GET_ITEM (p , * offset );
332
+ memcpy (tuple_ptr , tupleheader , header_size );
333
+ tuple_ptr += header_size ;
334
+ memcpy (tuple_ptr , tuple , tuplesize );
335
+ }
336
+
337
+
207
338
#ifdef NOT_USED
208
339
/* Remove leftmost key from the page */
209
340
if (!leaf )
@@ -275,7 +406,8 @@ insert_to_leaf(BTreeStack *stack,
275
406
276
407
while (true)
277
408
{
278
- PageFitsItemResult fit = page_fits_item (p , offset , tuplen + tupheaderlen , replace );
409
+ LocationIndex itemsize = MAXALIGN (tuplen ) + tupheaderlen ;
410
+ PageFitsItemResult fit = page_fits_item (p , offset , itemsize , replace );
279
411
280
412
if (fit != PageNotFit )
281
413
{
@@ -286,26 +418,49 @@ insert_to_leaf(BTreeStack *stack,
286
418
{
287
419
compact_page (desc , p , & state );
288
420
offset = btree_page_binary_search (desc , p , tuple , BTreeTuple );
421
+
422
+ /*
423
+ * If we were going to replace the tuple, then we should check
424
+ * it's not gone after compaction.
425
+ */
426
+ if (replace )
427
+ {
428
+ Assert (PAGE_IS_LEAF (p )); /* We place tuples only on leaf pages */
429
+
430
+ if (offset < header -> itemsCount )
431
+ {
432
+ Pointer curTuple = BTREE_PAGE_GET_ITEM (p , offset ) +
433
+ OLeafTupleHeaderSize ;
434
+ if (desc -> cmp (desc , tuple , BTreeTuple , curTuple , BTreeTuple ))
435
+ replace = false;
436
+ }
437
+ else
438
+ {
439
+ replace = false;
440
+ }
441
+ }
289
442
}
290
443
291
444
state = modify_page (p , state );
292
445
if (!replace )
293
446
{
294
- add_page_item (p , offset , tuplen + tupheaderlen );
447
+ add_page_item (p , offset , itemsize );
295
448
296
449
/* Increment number of tuples in progress */
297
450
if (level == 0 )
298
451
header -> nInProgress ++ ;
299
452
}
300
453
else
301
454
{
455
+ int old_item_size = BTREE_PAGE_GET_ITEM_SIZE (p , offset );
456
+
302
457
/*
303
458
* We should be able to undo this action without memory
304
459
* allocation. Thus we shouldn't cut space occupied by this
305
460
* page item.
306
461
*/
307
- if (tuplen + tupheaderlen > BTREE_PAGE_GET_ITEM_SIZE ( p , offset ) )
308
- resize_page_item (p , offset , tuplen + tupheaderlen );
462
+ if (itemsize > old_item_size )
463
+ resize_page_item (p , offset , itemsize );
309
464
310
465
if (level == 0 )
311
466
{
@@ -324,8 +479,8 @@ insert_to_leaf(BTreeStack *stack,
324
479
* tuple.
325
480
*/
326
481
if (prev -> deleted )
327
- header -> nVacatedBytes -= BTREE_PAGE_GET_ITEM_SIZE ( p , offset )
328
- + sizeof (OffsetNumber );
482
+ header -> nVacatedBytes -= ( old_item_size
483
+ + sizeof (OffsetNumber ) );
329
484
}
330
485
}
331
486
}
@@ -656,6 +811,7 @@ btree_modify_internal(BTreeDescr *desc, BTreeModifyAction action,
656
811
}
657
812
658
813
undo_pos = preserve_undo_record (OLeafTupleHeaderSize );
814
+
659
815
if (!UndoPosIsValid (undo_pos ))
660
816
{
661
817
unlock_page (p , state );
@@ -697,6 +853,12 @@ btree_modify_internal(BTreeDescr *desc, BTreeModifyAction action,
697
853
{
698
854
/* If we deleted it ourselves, revert this deletion */
699
855
apply_undo (desc , p , offset , false);
856
+
857
+ /*
858
+ * We've already reverted the previous delete, so don't
859
+ * consider it as sefl update anymore.
860
+ */
861
+ self_update = NoSelfUpdate ;
700
862
}
701
863
replace = true;
702
864
}
@@ -747,7 +909,7 @@ btree_modify_internal(BTreeDescr *desc, BTreeModifyAction action,
747
909
}
748
910
}
749
911
750
- tuplen = MAXALIGN ( desc -> tuple_len (desc , tuple ) );
912
+ tuplen = desc -> tuple_len (desc , tuple );
751
913
insert_to_leaf (& stack , tuple , tuplen , & leaf_header , replace , self_update , undo_pos );
752
914
753
915
return result ;
0 commit comments