Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 43d1ed6

Browse files
committed
Predicate locking in GIN index
Predicate locks are used on per page basis only if fastupdate = off, in opposite case predicate lock on pending list will effectively lock whole index, to reduce locking overhead, just lock a relation. Entry and posting trees are essentially B-tree, so locks are acquired on leaf pages only. Author: Shubham Barai with some editorization by me and Dmitry Ivanov Review by: Alexander Korotkov, Dmitry Ivanov, Fedor Sigaev Discussion: https://www.postgresql.org/message-id/flat/CALxAEPt5sWW+EwTaKUGFL5_XFcZ0MuGBcyJ70oqbWqr42YKR8Q@mail.gmail.com
1 parent 019fa57 commit 43d1ed6

File tree

11 files changed

+1054
-18
lines changed

11 files changed

+1054
-18
lines changed

src/backend/access/gin/ginbtree.c

+22
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "access/gin_private.h"
1818
#include "access/ginxlog.h"
1919
#include "access/xloginsert.h"
20+
#include "storage/predicate.h"
2021
#include "miscadmin.h"
2122
#include "utils/memutils.h"
2223
#include "utils/rel.h"
@@ -515,6 +516,19 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
515516
btree->fillRoot(btree, newrootpg,
516517
BufferGetBlockNumber(lbuffer), newlpage,
517518
BufferGetBlockNumber(rbuffer), newrpage);
519+
520+
if (GinPageIsLeaf(BufferGetPage(stack->buffer)))
521+
{
522+
523+
PredicateLockPageSplit(btree->index,
524+
BufferGetBlockNumber(stack->buffer),
525+
BufferGetBlockNumber(lbuffer));
526+
527+
PredicateLockPageSplit(btree->index,
528+
BufferGetBlockNumber(stack->buffer),
529+
BufferGetBlockNumber(rbuffer));
530+
}
531+
518532
}
519533
else
520534
{
@@ -524,6 +538,14 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
524538
GinPageGetOpaque(newrpage)->rightlink = savedRightLink;
525539
GinPageGetOpaque(newlpage)->flags |= GIN_INCOMPLETE_SPLIT;
526540
GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
541+
542+
if (GinPageIsLeaf(BufferGetPage(stack->buffer)))
543+
{
544+
545+
PredicateLockPageSplit(btree->index,
546+
BufferGetBlockNumber(stack->buffer),
547+
BufferGetBlockNumber(rbuffer));
548+
}
527549
}
528550

529551
/*

src/backend/access/gin/gindatapage.c

+9-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "access/xloginsert.h"
2020
#include "lib/ilist.h"
2121
#include "miscadmin.h"
22+
#include "storage/predicate.h"
2223
#include "utils/rel.h"
2324

2425
/*
@@ -1759,7 +1760,7 @@ leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining)
17591760
*/
17601761
BlockNumber
17611762
createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
1762-
GinStatsData *buildStats)
1763+
GinStatsData *buildStats, Buffer entrybuffer)
17631764
{
17641765
BlockNumber blkno;
17651766
Buffer buffer;
@@ -1810,6 +1811,12 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
18101811
page = BufferGetPage(buffer);
18111812
blkno = BufferGetBlockNumber(buffer);
18121813

1814+
/*
1815+
* Copy a predicate lock from entry tree leaf (containing posting list)
1816+
* to posting tree.
1817+
*/
1818+
PredicateLockPageSplit(index, BufferGetBlockNumber(entrybuffer), blkno);
1819+
18131820
START_CRIT_SECTION();
18141821

18151822
PageRestoreTempPage(tmppage, page);
@@ -1904,6 +1911,7 @@ ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
19041911
btree.itemptr = insertdata.items[insertdata.curitem];
19051912
stack = ginFindLeafPage(&btree, false, NULL);
19061913

1914+
GinCheckForSerializableConflictIn(btree.index, NULL, stack->buffer);
19071915
ginInsertValue(&btree, stack, &insertdata, buildStats);
19081916
}
19091917
}

src/backend/access/gin/ginget.c

+69-7
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@
1717
#include "access/gin_private.h"
1818
#include "access/relscan.h"
1919
#include "miscadmin.h"
20+
#include "storage/predicate.h"
2021
#include "utils/datum.h"
2122
#include "utils/memutils.h"
23+
#include "utils/rel.h"
2224

2325
/* GUC parameter */
2426
int GinFuzzySearchLimit = 0;
@@ -33,11 +35,25 @@ typedef struct pendingPosition
3335
} pendingPosition;
3436

3537

38+
/*
39+
* Place predicate lock on GIN page if needed.
40+
*/
41+
static void
42+
GinPredicateLockPage(Relation index, BlockNumber blkno, Snapshot snapshot)
43+
{
44+
/*
45+
* When fast update is on then no need in locking pages, because we
46+
* anyway need to lock the whole index.
47+
*/
48+
if (!GinGetUseFastUpdate(index))
49+
PredicateLockPage(index, blkno, snapshot);
50+
}
51+
3652
/*
3753
* Goes to the next page if current offset is outside of bounds
3854
*/
3955
static bool
40-
moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack)
56+
moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack, Snapshot snapshot)
4157
{
4258
Page page = BufferGetPage(stack->buffer);
4359

@@ -52,6 +68,7 @@ moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack)
5268
stack->buffer = ginStepRight(stack->buffer, btree->index, GIN_SHARE);
5369
stack->blkno = BufferGetBlockNumber(stack->buffer);
5470
stack->off = FirstOffsetNumber;
71+
GinPredicateLockPage(btree->index, stack->blkno, snapshot);
5572
}
5673

5774
return true;
@@ -73,6 +90,7 @@ scanPostingTree(Relation index, GinScanEntry scanEntry,
7390
/* Descend to the leftmost leaf page */
7491
stack = ginScanBeginPostingTree(&btree, index, rootPostingTree, snapshot);
7592
buffer = stack->buffer;
93+
7694
IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
7795

7896
freeGinBtreeStack(stack);
@@ -82,6 +100,11 @@ scanPostingTree(Relation index, GinScanEntry scanEntry,
82100
*/
83101
for (;;)
84102
{
103+
/*
104+
* Predicate lock each leaf page in posting tree
105+
*/
106+
GinPredicateLockPage(index, BufferGetBlockNumber(buffer), snapshot);
107+
85108
page = BufferGetPage(buffer);
86109
if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0)
87110
{
@@ -131,6 +154,12 @@ collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
131154
attnum = scanEntry->attnum;
132155
attr = TupleDescAttr(btree->ginstate->origTupdesc, attnum - 1);
133156

157+
/*
158+
* Predicate lock entry leaf page, following pages will be locked by
159+
* moveRightIfItNeeded()
160+
*/
161+
GinPredicateLockPage(btree->index, stack->buffer, snapshot);
162+
134163
for (;;)
135164
{
136165
Page page;
@@ -141,7 +170,7 @@ collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
141170
/*
142171
* stack->off points to the interested entry, buffer is already locked
143172
*/
144-
if (moveRightIfItNeeded(btree, stack) == false)
173+
if (moveRightIfItNeeded(btree, stack, snapshot) == false)
145174
return true;
146175

147176
page = BufferGetPage(stack->buffer);
@@ -250,7 +279,7 @@ collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
250279
Datum newDatum;
251280
GinNullCategory newCategory;
252281

253-
if (moveRightIfItNeeded(btree, stack) == false)
282+
if (moveRightIfItNeeded(btree, stack, snapshot) == false)
254283
elog(ERROR, "lost saved point in index"); /* must not happen !!! */
255284

256285
page = BufferGetPage(stack->buffer);
@@ -323,6 +352,7 @@ startScanEntry(GinState *ginstate, GinScanEntry entry, Snapshot snapshot)
323352
ginstate);
324353
stackEntry = ginFindLeafPage(&btreeEntry, true, snapshot);
325354
page = BufferGetPage(stackEntry->buffer);
355+
326356
/* ginFindLeafPage() will have already checked snapshot age. */
327357
needUnlock = true;
328358

@@ -370,6 +400,10 @@ startScanEntry(GinState *ginstate, GinScanEntry entry, Snapshot snapshot)
370400
{
371401
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
372402

403+
/* Predicate lock visited entry leaf page */
404+
GinPredicateLockPage(ginstate->index,
405+
BufferGetBlockNumber(stackEntry->buffer), snapshot);
406+
373407
if (GinIsPostingTree(itup))
374408
{
375409
BlockNumber rootPostingTree = GinGetPostingTree(itup);
@@ -391,6 +425,12 @@ startScanEntry(GinState *ginstate, GinScanEntry entry, Snapshot snapshot)
391425
rootPostingTree, snapshot);
392426
entry->buffer = stack->buffer;
393427

428+
/*
429+
* Predicate lock visited posting tree page, following pages
430+
* will be locked by moveRightIfItNeeded or entryLoadMoreItems
431+
*/
432+
GinPredicateLockPage(ginstate->index, BufferGetBlockNumber(entry->buffer), snapshot);
433+
394434
/*
395435
* We keep buffer pinned because we need to prevent deletion of
396436
* page during scan. See GIN's vacuum implementation. RefCount is
@@ -493,7 +533,7 @@ startScanKey(GinState *ginstate, GinScanOpaque so, GinScanKey key)
493533

494534
for (i = 0; i < key->nentries - 1; i++)
495535
{
496-
/* Pass all entries <= i as FALSE, and the rest as MAYBE */
536+
/* Pass all entries <= i as false, and the rest as MAYBE */
497537
for (j = 0; j <= i; j++)
498538
key->entryRes[entryIndexes[j]] = GIN_FALSE;
499539
for (j = i + 1; j < key->nentries; j++)
@@ -633,6 +673,8 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry,
633673
entry->btree.fullScan = false;
634674
stack = ginFindLeafPage(&entry->btree, true, snapshot);
635675

676+
GinPredicateLockPage(ginstate->index, BufferGetBlockNumber(stack->buffer), snapshot);
677+
636678
/* we don't need the stack, just the buffer. */
637679
entry->buffer = stack->buffer;
638680
IncrBufferRefCount(entry->buffer);
@@ -677,6 +719,10 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry,
677719
entry->buffer = ginStepRight(entry->buffer,
678720
ginstate->index,
679721
GIN_SHARE);
722+
723+
GinPredicateLockPage(ginstate->index, BufferGetBlockNumber(entry->buffer), snapshot);
724+
725+
680726
page = BufferGetPage(entry->buffer);
681727
}
682728
stepright = true;
@@ -1038,8 +1084,8 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
10381084
* lossy page even when none of the other entries match.
10391085
*
10401086
* Our strategy is to call the tri-state consistent function, with the
1041-
* lossy-page entries set to MAYBE, and all the other entries FALSE. If it
1042-
* returns FALSE, none of the lossy items alone are enough for a match, so
1087+
* lossy-page entries set to MAYBE, and all the other entries false. If it
1088+
* returns false, none of the lossy items alone are enough for a match, so
10431089
* we don't need to return a lossy-page pointer. Otherwise, return a
10441090
* lossy-page pointer to indicate that the whole heap page must be
10451091
* checked. (On subsequent calls, we'll do nothing until minItem is past
@@ -1700,7 +1746,8 @@ collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos)
17001746
}
17011747

17021748
/*
1703-
* Collect all matched rows from pending list into bitmap
1749+
* Collect all matched rows from pending list into bitmap. Also function
1750+
* takes PendingLockRelation if it's needed.
17041751
*/
17051752
static void
17061753
scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
@@ -1730,9 +1777,24 @@ scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
17301777
{
17311778
/* No pending list, so proceed with normal scan */
17321779
UnlockReleaseBuffer(metabuffer);
1780+
1781+
/*
1782+
* If fast update is enabled, we acquire a predicate lock on the entire
1783+
* relation as fast update postpones the insertion of tuples into index
1784+
* structure due to which we can't detect rw conflicts.
1785+
*/
1786+
if (GinGetUseFastUpdate(scan->indexRelation))
1787+
PredicateLockRelation(scan->indexRelation, scan->xs_snapshot);
1788+
17331789
return;
17341790
}
17351791

1792+
/*
1793+
* Pending list is not empty, we need to lock the index doesn't despite on
1794+
* fastupdate state
1795+
*/
1796+
PredicateLockRelation(scan->indexRelation, scan->xs_snapshot);
1797+
17361798
pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno);
17371799
LockBuffer(pos.pendingBuffer, GIN_SHARE);
17381800
pos.firstOffset = FirstOffsetNumber;

src/backend/access/gin/gininsert.c

+32-6
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "storage/bufmgr.h"
2323
#include "storage/smgr.h"
2424
#include "storage/indexfsm.h"
25+
#include "storage/predicate.h"
2526
#include "utils/memutils.h"
2627
#include "utils/rel.h"
2728

@@ -48,7 +49,7 @@ static IndexTuple
4849
addItemPointersToLeafTuple(GinState *ginstate,
4950
IndexTuple old,
5051
ItemPointerData *items, uint32 nitem,
51-
GinStatsData *buildStats)
52+
GinStatsData *buildStats, Buffer buffer)
5253
{
5354
OffsetNumber attnum;
5455
Datum key;
@@ -99,7 +100,8 @@ addItemPointersToLeafTuple(GinState *ginstate,
99100
postingRoot = createPostingTree(ginstate->index,
100101
oldItems,
101102
oldNPosting,
102-
buildStats);
103+
buildStats,
104+
buffer);
103105

104106
/* Now insert the TIDs-to-be-added into the posting tree */
105107
ginInsertItemPointers(ginstate->index, postingRoot,
@@ -127,7 +129,7 @@ static IndexTuple
127129
buildFreshLeafTuple(GinState *ginstate,
128130
OffsetNumber attnum, Datum key, GinNullCategory category,
129131
ItemPointerData *items, uint32 nitem,
130-
GinStatsData *buildStats)
132+
GinStatsData *buildStats, Buffer buffer)
131133
{
132134
IndexTuple res = NULL;
133135
GinPostingList *compressedList;
@@ -157,7 +159,7 @@ buildFreshLeafTuple(GinState *ginstate,
157159
* Initialize a new posting tree with the TIDs.
158160
*/
159161
postingRoot = createPostingTree(ginstate->index, items, nitem,
160-
buildStats);
162+
buildStats, buffer);
161163

162164
/* And save the root link in the result tuple */
163165
GinSetPostingTree(res, postingRoot);
@@ -217,17 +219,19 @@ ginEntryInsert(GinState *ginstate,
217219
return;
218220
}
219221

222+
GinCheckForSerializableConflictIn(btree.index, NULL, stack->buffer);
220223
/* modify an existing leaf entry */
221224
itup = addItemPointersToLeafTuple(ginstate, itup,
222-
items, nitem, buildStats);
225+
items, nitem, buildStats, stack->buffer);
223226

224227
insertdata.isDelete = true;
225228
}
226229
else
227230
{
231+
GinCheckForSerializableConflictIn(btree.index, NULL, stack->buffer);
228232
/* no match, so construct a new leaf entry */
229233
itup = buildFreshLeafTuple(ginstate, attnum, key, category,
230-
items, nitem, buildStats);
234+
items, nitem, buildStats, stack->buffer);
231235
}
232236

233237
/* Insert the new or modified leaf tuple */
@@ -513,6 +517,18 @@ gininsert(Relation index, Datum *values, bool *isnull,
513517

514518
memset(&collector, 0, sizeof(GinTupleCollector));
515519

520+
/*
521+
* With fastupdate on each scan and each insert begin with access to
522+
* pending list, so it effectively lock entire index. In this case
523+
* we aquire predicate lock and check for conflicts over index relation,
524+
* and hope that it will reduce locking overhead.
525+
*
526+
* Do not use GinCheckForSerializableConflictIn() here, because
527+
* it will do nothing (it does actual work only with fastupdate off).
528+
* Check for conflicts for entire index.
529+
*/
530+
CheckForSerializableConflictIn(index, NULL, InvalidBuffer);
531+
516532
for (i = 0; i < ginstate->origTupdesc->natts; i++)
517533
ginHeapTupleFastCollect(ginstate, &collector,
518534
(OffsetNumber) (i + 1),
@@ -523,6 +539,16 @@ gininsert(Relation index, Datum *values, bool *isnull,
523539
}
524540
else
525541
{
542+
GinStatsData stats;
543+
544+
/*
545+
* Fastupdate is off but if pending list isn't empty then we need to
546+
* check conflicts with PredicateLockRelation in scanPendingInsert().
547+
*/
548+
ginGetStats(index, &stats);
549+
if (stats.nPendingPages > 0)
550+
CheckForSerializableConflictIn(index, NULL, InvalidBuffer);
551+
526552
for (i = 0; i < ginstate->origTupdesc->natts; i++)
527553
ginHeapTupleInsert(ginstate, (OffsetNumber) (i + 1),
528554
values[i], isnull[i],

0 commit comments

Comments
 (0)