Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 6db4b49

Browse files
committed
Fix wrong validation of top-parent pointer during page deletion in Btree.
After introducing usage of t_tid of inner or page high key for storing number of attributes of tuple, validation of tuple's ItemPointer with ItemPointerIsValid becomes incorrect, it's need to validate only blocknumber of ItemPointer. Missing this causes a incorrect page deletion, fix that. Test is added. BTW, current contrib/amcheck doesn't fail on index corrupted by this way. Also introduce BTreeTupleGetTopParent/BTreeTupleSetTopParent macroses to improve code readability and to avoid possible confusion with page high key: high key is used to store top-parent link for branch to remove. Bug found by Michael Paquier, but bug doesn't exist in previous versions because t_tid was set to P_HIKEY. Author: Teodor Sigaev Reviewer: Peter Geoghegan Discussion: https://www.postgresql.org/message-id/flat/20180419052436.GA16000%40paquier.xyz
1 parent 6a7b2ce commit 6db4b49

File tree

6 files changed

+46
-23
lines changed

6 files changed

+46
-23
lines changed

src/backend/access/nbtree/nbtpage.c

+8-13
Original file line numberDiff line numberDiff line change
@@ -1602,10 +1602,9 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
16021602
MemSet(&trunctuple, 0, sizeof(IndexTupleData));
16031603
trunctuple.t_info = sizeof(IndexTupleData);
16041604
if (target != leafblkno)
1605-
ItemPointerSetBlockNumber(&trunctuple.t_tid, target);
1605+
BTreeTupleSetTopParent(&trunctuple, target);
16061606
else
1607-
ItemPointerSetInvalid(&trunctuple.t_tid);
1608-
BTreeTupleSetNAtts(&trunctuple, 0);
1607+
BTreeTupleSetTopParent(&trunctuple, InvalidBlockNumber);
16091608

16101609
if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY,
16111610
false, false) == InvalidOffsetNumber)
@@ -1690,7 +1689,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
16901689
BTPageOpaque opaque;
16911690
bool rightsib_is_rightmost;
16921691
int targetlevel;
1693-
ItemPointer leafhikey;
1692+
IndexTuple leafhikey;
16941693
BlockNumber nextchild;
16951694

16961695
page = BufferGetPage(leafbuf);
@@ -1702,7 +1701,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
17021701
* Remember some information about the leaf page.
17031702
*/
17041703
itemid = PageGetItemId(page, P_HIKEY);
1705-
leafhikey = &((IndexTuple) PageGetItem(page, itemid))->t_tid;
1704+
leafhikey = (IndexTuple) PageGetItem(page, itemid);
17061705
leafleftsib = opaque->btpo_prev;
17071706
leafrightsib = opaque->btpo_next;
17081707

@@ -1714,9 +1713,10 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
17141713
* parent in the branch. Set 'target' and 'buf' to reference the page
17151714
* actually being unlinked.
17161715
*/
1717-
if (ItemPointerIsValid(leafhikey))
1716+
target = BTreeTupleGetTopParent(leafhikey);
1717+
1718+
if (target != InvalidBlockNumber)
17181719
{
1719-
target = ItemPointerGetBlockNumberNoCheck(leafhikey);
17201720
Assert(target != leafblkno);
17211721

17221722
/* fetch the block number of the topmost parent's left sibling */
@@ -1919,12 +1919,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
19191919
* branch.
19201920
*/
19211921
if (target != leafblkno)
1922-
{
1923-
if (nextchild == InvalidBlockNumber)
1924-
ItemPointerSetInvalid(leafhikey);
1925-
else
1926-
ItemPointerSetBlockNumber(leafhikey, nextchild);
1927-
}
1922+
BTreeTupleSetTopParent(leafhikey, nextchild);
19281923

19291924
/*
19301925
* Mark the page itself deleted. It can be recycled when all current

src/backend/access/nbtree/nbtxlog.c

+2-10
Original file line numberDiff line numberDiff line change
@@ -800,11 +800,7 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record)
800800
*/
801801
MemSet(&trunctuple, 0, sizeof(IndexTupleData));
802802
trunctuple.t_info = sizeof(IndexTupleData);
803-
if (xlrec->topparent != InvalidBlockNumber)
804-
ItemPointerSetBlockNumber(&trunctuple.t_tid, xlrec->topparent);
805-
else
806-
ItemPointerSetInvalid(&trunctuple.t_tid);
807-
BTreeTupleSetNAtts(&trunctuple, 0);
803+
BTreeTupleSetTopParent(&trunctuple, xlrec->topparent);
808804

809805
if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY,
810806
false, false) == InvalidOffsetNumber)
@@ -912,11 +908,7 @@ btree_xlog_unlink_page(uint8 info, XLogReaderState *record)
912908
/* Add a dummy hikey item */
913909
MemSet(&trunctuple, 0, sizeof(IndexTupleData));
914910
trunctuple.t_info = sizeof(IndexTupleData);
915-
if (xlrec->topparent != InvalidBlockNumber)
916-
ItemPointerSetBlockNumber(&trunctuple.t_tid, xlrec->topparent);
917-
else
918-
ItemPointerSetInvalid(&trunctuple.t_tid);
919-
BTreeTupleSetNAtts(&trunctuple, 0);
911+
BTreeTupleSetTopParent(&trunctuple, xlrec->topparent);
920912

921913
if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY,
922914
false, false) == InvalidOffsetNumber)

src/include/access/nbtree.h

+14
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,20 @@ typedef struct BTMetaPageData
226226
#define BTreeInnerTupleSetDownLink(itup, blkno) \
227227
ItemPointerSetBlockNumber(&((itup)->t_tid), (blkno))
228228

229+
/*
230+
* Get/set leaf page highkey's link. During the second phase of deletion, the
231+
* target leaf page's high key may point to an ancestor page (at all other
232+
* times, the leaf level high key's link is not used). See the nbtree README
233+
* for full details.
234+
*/
235+
#define BTreeTupleGetTopParent(itup) \
236+
ItemPointerGetBlockNumberNoCheck(&((itup)->t_tid))
237+
#define BTreeTupleSetTopParent(itup, blkno) \
238+
do { \
239+
ItemPointerSetBlockNumber(&((itup)->t_tid), (blkno)); \
240+
BTreeTupleSetNAtts((itup), 0); \
241+
} while(0)
242+
229243
/*
230244
* Get/set number of attributes within B-tree index tuple. Asserts should be
231245
* removed when BT_RESERVED_OFFSET_MASK bits will be used.

src/test/regress/expected/create_index.out

+10
Original file line numberDiff line numberDiff line change
@@ -3052,6 +3052,16 @@ explain (costs off)
30523052
Filter: (NOT b)
30533053
(4 rows)
30543054

3055+
--
3056+
-- Test for multilevel page deletion
3057+
--
3058+
CREATE TABLE delete_test_table (a bigint, b bigint, c bigint, d bigint);
3059+
INSERT INTO delete_test_table SELECT i, 1, 2, 3 FROM generate_series(1,80000) i;
3060+
ALTER TABLE delete_test_table ADD PRIMARY KEY (a,b,c,d);
3061+
DELETE FROM delete_test_table WHERE a > 40000;
3062+
VACUUM delete_test_table;
3063+
DELETE FROM delete_test_table WHERE a > 10;
3064+
VACUUM delete_test_table;
30553065
--
30563066
-- REINDEX (VERBOSE)
30573067
--

src/test/regress/expected/sanity_check.out

+1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ d_star|f
3838
date_tbl|f
3939
default_tbl|f
4040
defaultexpr_tbl|f
41+
delete_test_table|t
4142
dept|f
4243
dupindexcols|t
4344
e_star|f

src/test/regress/sql/create_index.sql

+11
Original file line numberDiff line numberDiff line change
@@ -1061,6 +1061,17 @@ explain (costs off)
10611061
explain (costs off)
10621062
select * from boolindex where not b order by i limit 10;
10631063

1064+
--
1065+
-- Test for multilevel page deletion
1066+
--
1067+
CREATE TABLE delete_test_table (a bigint, b bigint, c bigint, d bigint);
1068+
INSERT INTO delete_test_table SELECT i, 1, 2, 3 FROM generate_series(1,80000) i;
1069+
ALTER TABLE delete_test_table ADD PRIMARY KEY (a,b,c,d);
1070+
DELETE FROM delete_test_table WHERE a > 40000;
1071+
VACUUM delete_test_table;
1072+
DELETE FROM delete_test_table WHERE a > 10;
1073+
VACUUM delete_test_table;
1074+
10641075
--
10651076
-- REINDEX (VERBOSE)
10661077
--

0 commit comments

Comments
 (0)