Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 8193d64

Browse files
committed
Check for conflicting queries during replay of gistvacuumpage()
013ebc0 implements so-called GiST microvacuum. That is gistgettuple() marks index tuples as dead when kill_prior_tuple is set. Later, when new tuple insertion claims page space, those dead index tuples are physically deleted from page. When this deletion is replayed on standby, it might conflict with read-only queries. But 013ebc0 doesn't handle this. That may lead to disappearance of some tuples from read-only snapshots on standby. This commit implements resolving of conflicts between replay of GiST microvacuum and standby queries. On the master we implement new WAL record type XLOG_GIST_DELETE, which comprises necessary information. On stable releases we've to be tricky to keep WAL compatibility. Information required for conflict processing is just appended to data of XLOG_GIST_PAGE_UPDATE record. So, PostgreSQL version, which doesn't know about conflict processing, will just ignore that. Reported-by: Andres Freund Diagnosed-by: Andres Freund Discussion: https://postgr.es/m/20181212224524.scafnlyjindmrbe6%40alap3.anarazel.de Author: Alexander Korotkov Backpatch-through: 9.6
1 parent 053ad56 commit 8193d64

File tree

5 files changed

+220
-16
lines changed

5 files changed

+220
-16
lines changed

src/backend/access/gist/gist.c

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ static bool gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
3838
bool unlockbuf, bool unlockleftchild);
3939
static void gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
4040
GISTSTATE *giststate, List *splitinfo, bool releasebuf);
41-
static void gistvacuumpage(Relation rel, Page page, Buffer buffer);
41+
static void gistvacuumpage(Relation rel, Page page, Buffer buffer,
42+
Relation heapRel);
4243

4344

4445
#define ROTATEDIST(d) do { \
@@ -172,7 +173,7 @@ gistinsert(Relation r, Datum *values, bool *isnull,
172173
values, isnull, true /* size is currently bogus */ );
173174
itup->t_tid = *ht_ctid;
174175

175-
gistdoinsert(r, itup, 0, giststate);
176+
gistdoinsert(r, itup, 0, giststate, heapRel);
176177

177178
/* cleanup */
178179
MemoryContextSwitchTo(oldCxt);
@@ -218,7 +219,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
218219
BlockNumber *newblkno,
219220
Buffer leftchildbuf,
220221
List **splitinfo,
221-
bool markfollowright)
222+
bool markfollowright,
223+
Relation heapRel)
222224
{
223225
BlockNumber blkno = BufferGetBlockNumber(buffer);
224226
Page page = BufferGetPage(buffer);
@@ -259,7 +261,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
259261
*/
260262
if (is_split && GistPageIsLeaf(page) && GistPageHasGarbage(page))
261263
{
262-
gistvacuumpage(rel, page, buffer);
264+
gistvacuumpage(rel, page, buffer, heapRel);
263265
is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);
264266
}
265267

@@ -556,7 +558,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
556558

557559
recptr = gistXLogUpdate(buffer,
558560
deloffs, ndeloffs, itup, ntup,
559-
leftchildbuf);
561+
leftchildbuf, NULL);
560562

561563
PageSetLSN(page, recptr);
562564
}
@@ -604,7 +606,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
604606
* so it does not bother releasing palloc'd allocations.
605607
*/
606608
void
607-
gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate)
609+
gistdoinsert(Relation r, IndexTuple itup, Size freespace,
610+
GISTSTATE *giststate, Relation heapRel)
608611
{
609612
ItemId iid;
610613
IndexTuple idxtuple;
@@ -616,6 +619,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate)
616619
memset(&state, 0, sizeof(GISTInsertState));
617620
state.freespace = freespace;
618621
state.r = r;
622+
state.heapRel = heapRel;
619623

620624
/* Start from the root */
621625
firststack.blkno = GIST_ROOT_BLKNO;
@@ -1232,7 +1236,8 @@ gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
12321236
oldoffnum, NULL,
12331237
leftchild,
12341238
&splitinfo,
1235-
true);
1239+
true,
1240+
state->heapRel);
12361241

12371242
/*
12381243
* Before recursing up in case the page was split, release locks on the
@@ -1543,7 +1548,7 @@ freeGISTstate(GISTSTATE *giststate)
15431548
* Function assumes that buffer is exclusively locked.
15441549
*/
15451550
static void
1546-
gistvacuumpage(Relation rel, Page page, Buffer buffer)
1551+
gistvacuumpage(Relation rel, Page page, Buffer buffer, Relation heapRel)
15471552
{
15481553
OffsetNumber deletable[MaxIndexTuplesPerPage];
15491554
int ndeletable = 0;
@@ -1591,7 +1596,8 @@ gistvacuumpage(Relation rel, Page page, Buffer buffer)
15911596

15921597
recptr = gistXLogUpdate(buffer,
15931598
deletable, ndeletable,
1594-
NULL, 0, InvalidBuffer);
1599+
NULL, 0, InvalidBuffer,
1600+
&heapRel->rd_node);
15951601

15961602
PageSetLSN(page, recptr);
15971603
}

src/backend/access/gist/gistbuild.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ typedef enum
5656
typedef struct
5757
{
5858
Relation indexrel;
59+
Relation heaprel;
5960
GISTSTATE *giststate;
6061

6162
int64 indtuples; /* number of tuples indexed */
@@ -122,6 +123,7 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
122123
int fillfactor;
123124

124125
buildstate.indexrel = index;
126+
buildstate.heaprel = heap;
125127
if (index->rd_options)
126128
{
127129
/* Get buffering mode from the options string */
@@ -484,7 +486,7 @@ gistBuildCallback(Relation index,
484486
* locked, we call gistdoinsert directly.
485487
*/
486488
gistdoinsert(index, itup, buildstate->freespace,
487-
buildstate->giststate);
489+
buildstate->giststate, buildstate->heaprel);
488490
}
489491

490492
/* Update tuple count and total size. */
@@ -690,7 +692,8 @@ gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level,
690692
itup, ntup, oldoffnum, &placed_to_blk,
691693
InvalidBuffer,
692694
&splitinfo,
693-
false);
695+
false,
696+
buildstate->heaprel);
694697

695698
/*
696699
* If this is a root split, update the root path item kept in memory. This

src/backend/access/gist/gistvacuum.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,8 @@ gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
224224

225225
recptr = gistXLogUpdate(buffer,
226226
todelete, ntodelete,
227-
NULL, 0, InvalidBuffer);
227+
NULL, 0, InvalidBuffer,
228+
NULL);
228229
PageSetLSN(page, recptr);
229230
}
230231
else

src/backend/access/gist/gistxlog.c

Lines changed: 192 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,12 @@
1616
#include "access/bufmask.h"
1717
#include "access/gist_private.h"
1818
#include "access/gistxlog.h"
19+
#include "access/heapam_xlog.h"
20+
#include "access/transam.h"
1921
#include "access/xloginsert.h"
2022
#include "access/xlogutils.h"
23+
#include "miscadmin.h"
24+
#include "storage/procarray.h"
2125
#include "utils/memutils.h"
2226

2327
static MemoryContext opCtx; /* working memory for operations */
@@ -60,6 +64,155 @@ gistRedoClearFollowRight(XLogReaderState *record, uint8 block_id)
6064
UnlockReleaseBuffer(buffer);
6165
}
6266

67+
/*
68+
* Get the latestRemovedXid from the heap pages pointed at by the index
69+
* tuples being deleted. See also btree_xlog_delete_get_latestRemovedXid,
70+
* on which this function is based.
71+
*/
72+
static TransactionId
73+
gistRedoPageUpdateRecordGetLatestRemovedXid(XLogReaderState *record)
74+
{
75+
gistxlogPageUpdate *xlrec = (gistxlogPageUpdate *) XLogRecGetData(record);
76+
OffsetNumber *todelete;
77+
Buffer ibuffer,
78+
hbuffer;
79+
Page ipage,
80+
hpage;
81+
RelFileNode rnode,
82+
*hnode;
83+
BlockNumber blkno;
84+
ItemId iitemid,
85+
hitemid;
86+
IndexTuple itup;
87+
HeapTupleHeader htuphdr;
88+
BlockNumber hblkno;
89+
OffsetNumber hoffnum;
90+
TransactionId latestRemovedXid = InvalidTransactionId;
91+
int i;
92+
93+
/*
94+
* If there's nothing running on the standby we don't need to derive a
95+
* full latestRemovedXid value, so use a fast path out of here. This
96+
* returns InvalidTransactionId, and so will conflict with all HS
97+
* transactions; but since we just worked out that that's zero people,
98+
* it's OK.
99+
*
100+
* XXX There is a race condition here, which is that a new backend might
101+
* start just after we look. If so, it cannot need to conflict, but this
102+
* coding will result in throwing a conflict anyway.
103+
*/
104+
if (CountDBBackends(InvalidOid) == 0)
105+
return latestRemovedXid;
106+
107+
/*
108+
* In what follows, we have to examine the previous state of the index
109+
* page, as well as the heap page(s) it points to. This is only valid if
110+
* WAL replay has reached a consistent database state; which means that
111+
* the preceding check is not just an optimization, but is *necessary*. We
112+
* won't have let in any user sessions before we reach consistency.
113+
*/
114+
if (!reachedConsistency)
115+
elog(PANIC, "gistRedoDeleteRecordGetLatestRemovedXid: cannot operate with inconsistent data");
116+
117+
/*
118+
* Get index page. If the DB is consistent, this should not fail, nor
119+
* should any of the heap page fetches below. If one does, we return
120+
* InvalidTransactionId to cancel all HS transactions. That's probably
121+
* overkill, but it's safe, and certainly better than panicking here.
122+
*/
123+
XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
124+
ibuffer = XLogReadBufferExtended(rnode, MAIN_FORKNUM, blkno, RBM_NORMAL);
125+
if (!BufferIsValid(ibuffer))
126+
return InvalidTransactionId;
127+
LockBuffer(ibuffer, BUFFER_LOCK_EXCLUSIVE);
128+
ipage = (Page) BufferGetPage(ibuffer);
129+
130+
/*
131+
* Loop through the deleted index items to obtain the TransactionId from
132+
* the heap items they point to.
133+
*/
134+
hnode = (RelFileNode *) ((char *) xlrec + sizeof(gistxlogPageUpdate));
135+
todelete = (OffsetNumber *) ((char *) hnode + sizeof(RelFileNode));
136+
137+
for (i = 0; i < xlrec->ntodelete; i++)
138+
{
139+
/*
140+
* Identify the index tuple about to be deleted
141+
*/
142+
iitemid = PageGetItemId(ipage, todelete[i]);
143+
itup = (IndexTuple) PageGetItem(ipage, iitemid);
144+
145+
/*
146+
* Locate the heap page that the index tuple points at
147+
*/
148+
hblkno = ItemPointerGetBlockNumber(&(itup->t_tid));
149+
hbuffer = XLogReadBufferExtended(*hnode, MAIN_FORKNUM, hblkno, RBM_NORMAL);
150+
if (!BufferIsValid(hbuffer))
151+
{
152+
UnlockReleaseBuffer(ibuffer);
153+
return InvalidTransactionId;
154+
}
155+
LockBuffer(hbuffer, BUFFER_LOCK_SHARE);
156+
hpage = (Page) BufferGetPage(hbuffer);
157+
158+
/*
159+
* Look up the heap tuple header that the index tuple points at by
160+
* using the heap node supplied with the xlrec. We can't use
161+
* heap_fetch, since it uses ReadBuffer rather than XLogReadBuffer.
162+
* Note that we are not looking at tuple data here, just headers.
163+
*/
164+
hoffnum = ItemPointerGetOffsetNumber(&(itup->t_tid));
165+
hitemid = PageGetItemId(hpage, hoffnum);
166+
167+
/*
168+
* Follow any redirections until we find something useful.
169+
*/
170+
while (ItemIdIsRedirected(hitemid))
171+
{
172+
hoffnum = ItemIdGetRedirect(hitemid);
173+
hitemid = PageGetItemId(hpage, hoffnum);
174+
CHECK_FOR_INTERRUPTS();
175+
}
176+
177+
/*
178+
* If the heap item has storage, then read the header and use that to
179+
* set latestRemovedXid.
180+
*
181+
* Some LP_DEAD items may not be accessible, so we ignore them.
182+
*/
183+
if (ItemIdHasStorage(hitemid))
184+
{
185+
htuphdr = (HeapTupleHeader) PageGetItem(hpage, hitemid);
186+
187+
HeapTupleHeaderAdvanceLatestRemovedXid(htuphdr, &latestRemovedXid);
188+
}
189+
else if (ItemIdIsDead(hitemid))
190+
{
191+
/*
192+
* Conjecture: if hitemid is dead then it had xids before the xids
193+
* marked on LP_NORMAL items. So we just ignore this item and move
194+
* onto the next, for the purposes of calculating
195+
* latestRemovedxids.
196+
*/
197+
}
198+
else
199+
Assert(!ItemIdIsUsed(hitemid));
200+
201+
UnlockReleaseBuffer(hbuffer);
202+
}
203+
204+
UnlockReleaseBuffer(ibuffer);
205+
206+
/*
207+
* If all heap tuples were LP_DEAD then we will be returning
208+
* InvalidTransactionId here, which avoids conflicts. This matches
209+
* existing logic which assumes that LP_DEAD tuples must already be older
210+
* than the latestRemovedXid on the cleanup record that set them as
211+
* LP_DEAD, hence must already have generated a conflict.
212+
*/
213+
return latestRemovedXid;
214+
}
215+
63216
/*
64217
* redo any page update (except page split)
65218
*/
@@ -71,6 +224,34 @@ gistRedoPageUpdateRecord(XLogReaderState *record)
71224
Buffer buffer;
72225
Page page;
73226

227+
/*
228+
* If we have any conflict processing to do, it must happen before we
229+
* update the page.
230+
*
231+
* Support for conflict processing in GiST has been backpatched. This is
232+
* why we have to use tricky way of saving WAL-compatibility between minor
233+
* versions. Information required for conflict processing is just
234+
* appended to data of XLOG_GIST_PAGE_UPDATE record. So, PostgreSQL
235+
* version, which doesn't know about conflict processing, will just ignore
236+
* that.
237+
*
238+
* GiST delete records can conflict with standby queries. You might think
239+
* that vacuum records would conflict as well, but we've handled that
240+
* already. XLOG_HEAP2_CLEANUP_INFO records provide the highest xid
241+
* cleaned by the vacuum of the heap and so we can resolve any conflicts
242+
* just once when that arrives. After that we know that no conflicts
243+
* exist from individual gist vacuum records on that index.
244+
*/
245+
if (InHotStandby && XLogRecGetDataLen(record) > sizeof(gistxlogPageUpdate))
246+
{
247+
TransactionId latestRemovedXid = gistRedoPageUpdateRecordGetLatestRemovedXid(record);
248+
RelFileNode rnode;
249+
250+
XLogRecGetBlockTag(record, 0, &rnode, NULL, NULL);
251+
252+
ResolveRecoveryConflictWithSnapshot(latestRemovedXid, rnode);
253+
}
254+
74255
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
75256
{
76257
char *begin;
@@ -457,7 +638,7 @@ XLogRecPtr
457638
gistXLogUpdate(Buffer buffer,
458639
OffsetNumber *todelete, int ntodelete,
459640
IndexTuple *itup, int ituplen,
460-
Buffer leftchildbuf)
641+
Buffer leftchildbuf, RelFileNode *hnode)
461642
{
462643
gistxlogPageUpdate xlrec;
463644
int i;
@@ -469,6 +650,16 @@ gistXLogUpdate(Buffer buffer,
469650
XLogBeginInsert();
470651
XLogRegisterData((char *) &xlrec, sizeof(gistxlogPageUpdate));
471652

653+
/*
654+
* Append the information required for standby conflict processing if it
655+
* is provided by caller.
656+
*/
657+
if (hnode)
658+
{
659+
XLogRegisterData((char *) hnode, sizeof(RelFileNode));
660+
XLogRegisterData((char *) todelete, sizeof(OffsetNumber) * ntodelete);
661+
}
662+
472663
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
473664
XLogRegisterBufData(0, (char *) todelete, sizeof(OffsetNumber) * ntodelete);
474665

0 commit comments

Comments
 (0)