Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 6a1ea02

Browse files
committed
Fix locking when fixing an incomplete split of a GIN internal page
ginFinishSplit() expects the caller to hold an exclusive lock on the buffer, but when finishing an earlier "leftover" incomplete split of an internal page, the caller held a shared lock. That caused an assertion failure in MarkBufferDirty(). Without assertions, it could lead to corruption if two backends tried to complete the split at the same time. On master, add a test case using the new injection point facility. Report and analysis by Fei Changhong. Backpatch the fix to all supported versions. Reviewed-by: Fei Changhong, Michael Paquier Discussion: https://www.postgresql.org/message-id/tencent_A3CE810F59132D8E230475A5F0F7A08C8307@qq.com
1 parent 6d4565a commit 6a1ea02

File tree

7 files changed

+418
-20
lines changed

7 files changed

+418
-20
lines changed

src/backend/access/gin/ginbtree.c

Lines changed: 59 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "access/xloginsert.h"
2020
#include "miscadmin.h"
2121
#include "storage/predicate.h"
22+
#include "utils/injection_point.h"
2223
#include "utils/memutils.h"
2324
#include "utils/rel.h"
2425

@@ -28,6 +29,8 @@ static bool ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
2829
Buffer childbuf, GinStatsData *buildStats);
2930
static void ginFinishSplit(GinBtree btree, GinBtreeStack *stack,
3031
bool freestack, GinStatsData *buildStats);
32+
static void ginFinishOldSplit(GinBtree btree, GinBtreeStack *stack,
33+
GinStatsData *buildStats, int access);
3134

3235
/*
3336
* Lock buffer by needed method for search.
@@ -108,7 +111,7 @@ ginFindLeafPage(GinBtree btree, bool searchMode,
108111
* encounter on the way.
109112
*/
110113
if (!searchMode && GinPageIsIncompleteSplit(page))
111-
ginFinishSplit(btree, stack, false, NULL);
114+
ginFinishOldSplit(btree, stack, NULL, access);
112115

113116
/*
114117
* ok, page is correctly locked, we should check to move right ..,
@@ -128,7 +131,7 @@ ginFindLeafPage(GinBtree btree, bool searchMode,
128131
page = BufferGetPage(stack->buffer);
129132

130133
if (!searchMode && GinPageIsIncompleteSplit(page))
131-
ginFinishSplit(btree, stack, false, NULL);
134+
ginFinishOldSplit(btree, stack, NULL, access);
132135
}
133136

134137
if (GinPageIsLeaf(page)) /* we found, return locked page */
@@ -164,8 +167,11 @@ ginFindLeafPage(GinBtree btree, bool searchMode,
164167
* Step right from current page.
165168
*
166169
* The next page is locked first, before releasing the current page. This is
167-
* crucial to protect from concurrent page deletion (see comment in
168-
* ginDeletePage).
170+
* crucial to prevent concurrent VACUUM from deleting a page that we are about
171+
* to step to. (The lock-coupling isn't strictly necessary when we are
172+
* traversing the tree to find an insert location, because page deletion grabs
173+
* a cleanup lock on the root to prevent any concurrent inserts. See Page
174+
* deletion section in the README. But there's no harm in doing it always.)
169175
*/
170176
Buffer
171177
ginStepRight(Buffer buffer, Relation index, int lockmode)
@@ -262,7 +268,7 @@ ginFindParents(GinBtree btree, GinBtreeStack *stack)
262268
ptr->parent = root;
263269
ptr->off = InvalidOffsetNumber;
264270

265-
ginFinishSplit(btree, ptr, false, NULL);
271+
ginFinishOldSplit(btree, ptr, NULL, GIN_EXCLUSIVE);
266272
}
267273

268274
leftmostBlkno = btree->getLeftMostChild(btree, page);
@@ -291,7 +297,7 @@ ginFindParents(GinBtree btree, GinBtreeStack *stack)
291297
ptr->parent = root;
292298
ptr->off = InvalidOffsetNumber;
293299

294-
ginFinishSplit(btree, ptr, false, NULL);
300+
ginFinishOldSplit(btree, ptr, NULL, GIN_EXCLUSIVE);
295301
}
296302
}
297303

@@ -670,22 +676,20 @@ ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack,
670676
bool done;
671677
bool first = true;
672678

673-
/*
674-
* freestack == false when we encounter an incompletely split page during
675-
* a scan, while freestack == true is used in the normal scenario that a
676-
* split is finished right after the initial insert.
677-
*/
678-
if (!freestack)
679-
elog(DEBUG1, "finishing incomplete split of block %u in gin index \"%s\"",
680-
stack->blkno, RelationGetRelationName(btree->index));
681-
682679
/* this loop crawls up the stack until the insertion is complete */
683680
do
684681
{
685682
GinBtreeStack *parent = stack->parent;
686683
void *insertdata;
687684
BlockNumber updateblkno;
688685

686+
#ifdef USE_INJECTION_POINTS
687+
if (GinPageIsLeaf(BufferGetPage(stack->buffer)))
688+
INJECTION_POINT("gin-leave-leaf-split-incomplete");
689+
else
690+
INJECTION_POINT("gin-leave-internal-split-incomplete");
691+
#endif
692+
689693
/* search parent to lock */
690694
LockBuffer(parent->buffer, GIN_EXCLUSIVE);
691695

@@ -699,7 +703,7 @@ ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack,
699703
* would fail.
700704
*/
701705
if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer)))
702-
ginFinishSplit(btree, parent, false, buildStats);
706+
ginFinishOldSplit(btree, parent, buildStats, GIN_EXCLUSIVE);
703707

704708
/* move right if it's needed */
705709
page = BufferGetPage(parent->buffer);
@@ -723,7 +727,7 @@ ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack,
723727
page = BufferGetPage(parent->buffer);
724728

725729
if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer)))
726-
ginFinishSplit(btree, parent, false, buildStats);
730+
ginFinishOldSplit(btree, parent, buildStats, GIN_EXCLUSIVE);
727731
}
728732

729733
/* insert the downlink */
@@ -759,6 +763,43 @@ ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack,
759763
freeGinBtreeStack(stack);
760764
}
761765

766+
/*
767+
* An entry point to ginFinishSplit() that is used when we stumble upon an
768+
* existing incompletely split page in the tree, as opposed to completing a
769+
* split that we just made outselves. The difference is that stack->buffer may
770+
* be merely share-locked on entry, and will be upgraded to exclusive mode.
771+
*
772+
* Note: Upgrading the lock momentarily releases it. Doing that in a scan
773+
* would not be OK, because a concurrent VACUUM might delete the page while
774+
* we're not holding the lock. It's OK in an insert, though, because VACUUM
775+
* has a different mechanism that prevents it from running concurrently with
776+
* inserts. (Namely, it holds a cleanup lock on the root.)
777+
*/
778+
static void
779+
ginFinishOldSplit(GinBtree btree, GinBtreeStack *stack, GinStatsData *buildStats, int access)
780+
{
781+
INJECTION_POINT("gin-finish-incomplete-split");
782+
elog(DEBUG1, "finishing incomplete split of block %u in gin index \"%s\"",
783+
stack->blkno, RelationGetRelationName(btree->index));
784+
785+
if (access == GIN_SHARE)
786+
{
787+
LockBuffer(stack->buffer, GIN_UNLOCK);
788+
LockBuffer(stack->buffer, GIN_EXCLUSIVE);
789+
790+
if (!GinPageIsIncompleteSplit(BufferGetPage(stack->buffer)))
791+
{
792+
/*
793+
* Someone else already completed the split while we were not
794+
* holding the lock.
795+
*/
796+
return;
797+
}
798+
}
799+
800+
ginFinishSplit(btree, stack, false, buildStats);
801+
}
802+
762803
/*
763804
* Insert a value to tree described by stack.
764805
*
@@ -779,7 +820,7 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack, void *insertdata,
779820

780821
/* If the leaf page was incompletely split, finish the split first */
781822
if (GinPageIsIncompleteSplit(BufferGetPage(stack->buffer)))
782-
ginFinishSplit(btree, stack, false, buildStats);
823+
ginFinishOldSplit(btree, stack, buildStats, GIN_EXCLUSIVE);
783824

784825
done = ginPlaceToPage(btree, stack,
785826
insertdata, InvalidBlockNumber,

src/test/modules/Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ SUBDIRS = \
4040

4141

4242
ifeq ($(enable_injection_points),yes)
43-
SUBDIRS += injection_points
43+
SUBDIRS += injection_points gin
4444
else
45-
ALWAYS_SUBDIRS += injection_points
45+
ALWAYS_SUBDIRS += injection_points gin
4646
endif
4747

4848
ifeq ($(with_ssl),openssl)

src/test/modules/gin/Makefile

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# src/test/modules/gin/Makefile
2+
3+
EXTRA_INSTALL = src/test/modules/injection_points
4+
5+
REGRESS = gin_incomplete_splits
6+
7+
ifdef USE_PGXS
8+
PG_CONFIG = pg_config
9+
PGXS := $(shell $(PG_CONFIG) --pgxs)
10+
include $(PGXS)
11+
else
12+
subdir = src/test/modules/gin
13+
top_builddir = ../../../..
14+
include $(top_builddir)/src/Makefile.global
15+
include $(top_srcdir)/contrib/contrib-global.mk
16+
endif
Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
--
2+
-- The test uses a GIN index over int[]. The table contains arrays
3+
-- with integers from 1 to :next_i. Each integer occurs exactly once,
4+
-- no gaps or duplicates, although the index does contain some
5+
-- duplicate elements because some of the inserting transactions are
6+
-- rolled back during the test. The exact contents of the table depend
7+
-- on the physical layout of the index, which in turn depends at least
8+
-- on the block size, so instead of check for the exact contents, we
9+
-- check those invariants. :next_i psql variable is maintained at all
10+
-- times to hold the last inserted integer + 1.
11+
--
12+
-- This uses injection points to cause errors that leave some page
13+
-- splits in "incomplete" state
14+
create extension injection_points;
15+
-- Use the index for all the queries
16+
set enable_seqscan=off;
17+
-- Print a NOTICE whenever an incomplete split gets fixed
18+
SELECT injection_points_attach('gin-finish-incomplete-split', 'notice');
19+
injection_points_attach
20+
-------------------------
21+
22+
(1 row)
23+
24+
--
25+
-- First create the test table and some helper functions
26+
--
27+
create table gin_incomplete_splits(i int4[]) with (autovacuum_enabled = off);
28+
create index on gin_incomplete_splits using gin (i) with (fastupdate = off);
29+
-- Creates an array with all integers from $1 (inclusive) $2 (exclusive)
30+
create function range_array(int, int) returns int[] language sql immutable as $$
31+
select array_agg(g) from generate_series($1, $2 - 1) g
32+
$$;
33+
-- Inserts an array with 'n' rows to the test table. Pass :next_i as
34+
-- the first argument, returns the new value for :next_i.
35+
create function insert_n(first_i int, n int) returns int language plpgsql as $$
36+
begin
37+
insert into gin_incomplete_splits values (range_array(first_i, first_i+n));
38+
return first_i + n;
39+
end;
40+
$$;
41+
-- Inserts to the table until an insert fails. Like insert_n(), returns the
42+
-- new value for :next_i.
43+
create function insert_until_fail(next_i int, step int default 1) returns int language plpgsql as $$
44+
declare
45+
i integer;
46+
begin
47+
-- Insert arrays with 'step' elements each, until an error occurs.
48+
loop
49+
begin
50+
select insert_n(next_i, step) into next_i;
51+
exception when others then
52+
raise notice 'failed with: %', sqlerrm;
53+
exit;
54+
end;
55+
56+
-- The caller is expected to set an injection point that eventuall
57+
-- causes an error. But bail out if still no error after 10000
58+
-- attempts, so that we don't get stuck in an infinite loop.
59+
i := i + 1;
60+
if i = 10000 then
61+
raise 'no error on inserts after ';
62+
end if;
63+
end loop;
64+
65+
return next_i;
66+
end;
67+
$$;
68+
-- Check the invariants.
69+
create function verify(next_i int) returns bool language plpgsql as $$
70+
declare
71+
a integer[];
72+
elem integer;
73+
c integer;
74+
begin
75+
-- Perform a scan over the trailing part of the index, where the
76+
-- possible incomplete splits are. (We don't check the whole table,
77+
-- because that'd be pretty slow.)
78+
c := 0;
79+
-- Find all arrays that overlap with the last 200 inserted integers. Or
80+
-- the next 100, which shouldn't exist.
81+
for a in select i from gin_incomplete_splits where i && range_array(next_i - 200, next_i + 100)
82+
loop
83+
-- count all elements in those arrays in the window.
84+
foreach elem in ARRAY a loop
85+
if elem >= next_i then
86+
raise 'unexpected value % in array', elem;
87+
end if;
88+
if elem >= next_i - 200 then
89+
c := c + 1;
90+
end if;
91+
end loop;
92+
end loop;
93+
if c <> 200 then
94+
raise 'unexpected count % ', c;
95+
end if;
96+
97+
return true;
98+
end;
99+
$$;
100+
-- Insert one array to get started.
101+
select insert_n(1, 1000) as next_i
102+
\gset
103+
select verify(:next_i);
104+
verify
105+
--------
106+
t
107+
(1 row)
108+
109+
--
110+
-- Test incomplete leaf split
111+
--
112+
SELECT injection_points_attach('gin-leave-leaf-split-incomplete', 'error');
113+
injection_points_attach
114+
-------------------------
115+
116+
(1 row)
117+
118+
select insert_until_fail(:next_i) as next_i
119+
\gset
120+
NOTICE: failed with: error triggered for injection point gin-leave-leaf-split-incomplete
121+
SELECT injection_points_detach('gin-leave-leaf-split-incomplete');
122+
injection_points_detach
123+
-------------------------
124+
125+
(1 row)
126+
127+
-- Verify that a scan works even though there's an incomplete split
128+
select verify(:next_i);
129+
verify
130+
--------
131+
t
132+
(1 row)
133+
134+
-- Insert some more rows, finishing the split
135+
select insert_n(:next_i, 10) as next_i
136+
\gset
137+
NOTICE: notice triggered for injection point gin-finish-incomplete-split
138+
-- Verify that a scan still works
139+
select verify(:next_i);
140+
verify
141+
--------
142+
t
143+
(1 row)
144+
145+
--
146+
-- Test incomplete internal page split
147+
--
148+
SELECT injection_points_attach('gin-leave-internal-split-incomplete', 'error');
149+
injection_points_attach
150+
-------------------------
151+
152+
(1 row)
153+
154+
select insert_until_fail(:next_i, 100) as next_i
155+
\gset
156+
NOTICE: failed with: error triggered for injection point gin-leave-internal-split-incomplete
157+
SELECT injection_points_detach('gin-leave-internal-split-incomplete');
158+
injection_points_detach
159+
-------------------------
160+
161+
(1 row)
162+
163+
-- Verify that a scan works even though there's an incomplete split
164+
select verify(:next_i);
165+
verify
166+
--------
167+
t
168+
(1 row)
169+
170+
-- Insert some more rows, finishing the split
171+
select insert_n(:next_i, 10) as next_i
172+
\gset
173+
NOTICE: notice triggered for injection point gin-finish-incomplete-split
174+
-- Verify that a scan still works
175+
select verify(:next_i);
176+
verify
177+
--------
178+
t
179+
(1 row)
180+

src/test/modules/gin/meson.build

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Copyright (c) 2022-2024, PostgreSQL Global Development Group
2+
3+
if not get_option('injection_points')
4+
subdir_done()
5+
endif
6+
7+
tests += {
8+
'name': 'gin',
9+
'sd': meson.current_source_dir(),
10+
'bd': meson.current_build_dir(),
11+
'regress': {
12+
'sql': [
13+
'gin_incomplete_splits',
14+
],
15+
},
16+
}

0 commit comments

Comments
 (0)