Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 1ccc1e0

Browse files
committed
Remove retry loop in heap_page_prune().
The retry loop is needed because heap_page_prune() calls HeapTupleSatisfiesVacuum() and then lazy_scan_prune() does the same thing again, and they might get different answers due to concurrent clog updates. But this patch makes heap_page_prune() return the HeapTupleSatisfiesVacuum() results that it computed back to the caller, which allows lazy_scan_prune() to avoid needing to recompute those values in the first place. That's nice both because it eliminates the need for a retry loop and also because it's cheaper. Melanie Plageman, reviewed by David Geier, Andres Freund, and me. Discussion: https://postgr.es/m/CAAKRu_br124qsGJieuYA0nGjywEukhK1dKBfRdby_4yY3E9SXA%40mail.gmail.com
1 parent e64c733 commit 1ccc1e0

File tree

3 files changed

+55
-49
lines changed

3 files changed

+55
-49
lines changed

src/backend/access/heap/pruneheap.c

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -53,16 +53,6 @@ typedef struct
5353
* 1. Otherwise every access would need to subtract 1.
5454
*/
5555
bool marked[MaxHeapTuplesPerPage + 1];
56-
57-
/*
58-
* Tuple visibility is only computed once for each tuple, for correctness
59-
* and efficiency reasons; see comment in heap_page_prune() for details.
60-
* This is of type int8[], instead of HTSV_Result[], so we can use -1 to
61-
* indicate no visibility has been computed, e.g. for LP_DEAD items.
62-
*
63-
* Same indexing as ->marked.
64-
*/
65-
int8 htsv[MaxHeapTuplesPerPage + 1];
6656
} PruneState;
6757

6858
/* Local functions */
@@ -71,6 +61,7 @@ static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate,
7161
Buffer buffer);
7262
static int heap_prune_chain(Buffer buffer,
7363
OffsetNumber rootoffnum,
64+
int8 *htsv,
7465
PruneState *prstate);
7566
static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
7667
static void heap_prune_record_redirect(PruneState *prstate,
@@ -240,6 +231,10 @@ heap_page_prune(Relation relation, Buffer buffer,
240231
prstate.nredirected = prstate.ndead = prstate.nunused = 0;
241232
memset(prstate.marked, 0, sizeof(prstate.marked));
242233

234+
/*
235+
* presult->htsv is not initialized here because all ntuple spots in the
236+
* array will be set either to a valid HTSV_Result value or -1.
237+
*/
243238
presult->ndeleted = 0;
244239
presult->nnewlpdead = 0;
245240

@@ -276,7 +271,7 @@ heap_page_prune(Relation relation, Buffer buffer,
276271
/* Nothing to do if slot doesn't contain a tuple */
277272
if (!ItemIdIsNormal(itemid))
278273
{
279-
prstate.htsv[offnum] = -1;
274+
presult->htsv[offnum] = -1;
280275
continue;
281276
}
282277

@@ -292,8 +287,8 @@ heap_page_prune(Relation relation, Buffer buffer,
292287
if (off_loc)
293288
*off_loc = offnum;
294289

295-
prstate.htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
296-
buffer);
290+
presult->htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
291+
buffer);
297292
}
298293

299294
/* Scan the page */
@@ -317,7 +312,8 @@ heap_page_prune(Relation relation, Buffer buffer,
317312
continue;
318313

319314
/* Process this item or chain of items */
320-
presult->ndeleted += heap_prune_chain(buffer, offnum, &prstate);
315+
presult->ndeleted += heap_prune_chain(buffer, offnum,
316+
presult->htsv, &prstate);
321317
}
322318

323319
/* Clear the offset information once we have processed the given page. */
@@ -446,6 +442,8 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
446442
/*
447443
* Prune specified line pointer or a HOT chain originating at line pointer.
448444
*
445+
* Tuple visibility information is provided in htsv.
446+
*
449447
* If the item is an index-referenced tuple (i.e. not a heap-only tuple),
450448
* the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
451449
* chain. We also prune any RECENTLY_DEAD tuples preceding a DEAD tuple.
@@ -473,7 +471,8 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
473471
* Returns the number of tuples (to be) deleted from the page.
474472
*/
475473
static int
476-
heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
474+
heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
475+
int8 *htsv, PruneState *prstate)
477476
{
478477
int ndeleted = 0;
479478
Page dp = (Page) BufferGetPage(buffer);
@@ -494,7 +493,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
494493
*/
495494
if (ItemIdIsNormal(rootlp))
496495
{
497-
Assert(prstate->htsv[rootoffnum] != -1);
496+
Assert(htsv[rootoffnum] != -1);
498497
htup = (HeapTupleHeader) PageGetItem(dp, rootlp);
499498

500499
if (HeapTupleHeaderIsHeapOnly(htup))
@@ -517,7 +516,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
517516
* either here or while following a chain below. Whichever path
518517
* gets there first will mark the tuple unused.
519518
*/
520-
if (prstate->htsv[rootoffnum] == HEAPTUPLE_DEAD &&
519+
if (htsv[rootoffnum] == HEAPTUPLE_DEAD &&
521520
!HeapTupleHeaderIsHotUpdated(htup))
522521
{
523522
heap_prune_record_unused(prstate, rootoffnum);
@@ -585,7 +584,6 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
585584
break;
586585

587586
Assert(ItemIdIsNormal(lp));
588-
Assert(prstate->htsv[offnum] != -1);
589587
htup = (HeapTupleHeader) PageGetItem(dp, lp);
590588

591589
/*
@@ -605,7 +603,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
605603
*/
606604
tupdead = recent_dead = false;
607605

608-
switch ((HTSV_Result) prstate->htsv[offnum])
606+
switch (htsv_get_valid_status(htsv[offnum]))
609607
{
610608
case HEAPTUPLE_DEAD:
611609
tupdead = true;

src/backend/access/heap/vacuumlazy.c

Lines changed: 13 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1524,12 +1524,13 @@ lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
15241524
* of complexity just so we could deal with tuples that were DEAD to VACUUM,
15251525
* but nevertheless were left with storage after pruning.
15261526
*
1527-
* The approach we take now is to restart pruning when the race condition is
1528-
* detected. This allows heap_page_prune() to prune the tuples inserted by
1529-
* the now-aborted transaction. This is a little crude, but it guarantees
1530-
* that any items that make it into the dead_items array are simple LP_DEAD
1531-
* line pointers, and that every remaining item with tuple storage is
1532-
* considered as a candidate for freezing.
1527+
* As of Postgres 17, we circumvent this problem altogether by reusing the
1528+
* result of heap_page_prune()'s visibility check. Without the second call to
1529+
* HeapTupleSatisfiesVacuum(), there is no new HTSV_Result and there can be no
1530+
* disagreement. We'll just handle such tuples as if they had become fully dead
1531+
* right after this operation completes instead of in the middle of it. Note that
1532+
* any tuple that becomes dead after the call to heap_page_prune() can't need to
1533+
* be frozen, because it was visible to another session when vacuum started.
15331534
*/
15341535
static void
15351536
lazy_scan_prune(LVRelState *vacrel,
@@ -1542,8 +1543,6 @@ lazy_scan_prune(LVRelState *vacrel,
15421543
OffsetNumber offnum,
15431544
maxoff;
15441545
ItemId itemid;
1545-
HeapTupleData tuple;
1546-
HTSV_Result res;
15471546
PruneResult presult;
15481547
int tuples_frozen,
15491548
lpdead_items,
@@ -1563,8 +1562,6 @@ lazy_scan_prune(LVRelState *vacrel,
15631562
*/
15641563
maxoff = PageGetMaxOffsetNumber(page);
15651564

1566-
retry:
1567-
15681565
/* Initialize (or reset) page-level state */
15691566
pagefrz.freeze_required = false;
15701567
pagefrz.FreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
@@ -1600,6 +1597,7 @@ lazy_scan_prune(LVRelState *vacrel,
16001597
offnum <= maxoff;
16011598
offnum = OffsetNumberNext(offnum))
16021599
{
1600+
HeapTupleHeader htup;
16031601
bool totally_frozen;
16041602

16051603
/*
@@ -1642,22 +1640,7 @@ lazy_scan_prune(LVRelState *vacrel,
16421640

16431641
Assert(ItemIdIsNormal(itemid));
16441642

1645-
ItemPointerSet(&(tuple.t_self), blkno, offnum);
1646-
tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1647-
tuple.t_len = ItemIdGetLength(itemid);
1648-
tuple.t_tableOid = RelationGetRelid(rel);
1649-
1650-
/*
1651-
* DEAD tuples are almost always pruned into LP_DEAD line pointers by
1652-
* heap_page_prune(), but it's possible that the tuple state changed
1653-
* since heap_page_prune() looked. Handle that here by restarting.
1654-
* (See comments at the top of function for a full explanation.)
1655-
*/
1656-
res = HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
1657-
buf);
1658-
1659-
if (unlikely(res == HEAPTUPLE_DEAD))
1660-
goto retry;
1643+
htup = (HeapTupleHeader) PageGetItem(page, itemid);
16611644

16621645
/*
16631646
* The criteria for counting a tuple as live in this block need to
@@ -1678,7 +1661,7 @@ lazy_scan_prune(LVRelState *vacrel,
16781661
* (Cases where we bypass index vacuuming will violate this optimistic
16791662
* assumption, but the overall impact of that should be negligible.)
16801663
*/
1681-
switch (res)
1664+
switch (htsv_get_valid_status(presult.htsv[offnum]))
16821665
{
16831666
case HEAPTUPLE_LIVE:
16841667

@@ -1700,7 +1683,7 @@ lazy_scan_prune(LVRelState *vacrel,
17001683
{
17011684
TransactionId xmin;
17021685

1703-
if (!HeapTupleHeaderXminCommitted(tuple.t_data))
1686+
if (!HeapTupleHeaderXminCommitted(htup))
17041687
{
17051688
prunestate->all_visible = false;
17061689
break;
@@ -1710,7 +1693,7 @@ lazy_scan_prune(LVRelState *vacrel,
17101693
* The inserter definitely committed. But is it old enough
17111694
* that everyone sees it as committed?
17121695
*/
1713-
xmin = HeapTupleHeaderGetXmin(tuple.t_data);
1696+
xmin = HeapTupleHeaderGetXmin(htup);
17141697
if (!TransactionIdPrecedes(xmin,
17151698
vacrel->cutoffs.OldestXmin))
17161699
{
@@ -1764,7 +1747,7 @@ lazy_scan_prune(LVRelState *vacrel,
17641747
prunestate->hastup = true; /* page makes rel truncation unsafe */
17651748

17661749
/* Tuple with storage -- consider need to freeze */
1767-
if (heap_prepare_freeze_tuple(tuple.t_data, &vacrel->cutoffs, &pagefrz,
1750+
if (heap_prepare_freeze_tuple(htup, &vacrel->cutoffs, &pagefrz,
17681751
&frozen[tuples_frozen], &totally_frozen))
17691752
{
17701753
/* Save prepared freeze plan for later */

src/include/access/heapam.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,33 @@ typedef struct PruneResult
198198
{
199199
int ndeleted; /* Number of tuples deleted from the page */
200200
int nnewlpdead; /* Number of newly LP_DEAD items */
201+
202+
/*
203+
* Tuple visibility is only computed once for each tuple, for correctness
204+
* and efficiency reasons; see comment in heap_page_prune() for details.
205+
* This is of type int8[], instead of HTSV_Result[], so we can use -1 to
206+
* indicate no visibility has been computed, e.g. for LP_DEAD items.
207+
*
208+
* This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
209+
* 1. Otherwise every access would need to subtract 1.
210+
*/
211+
int8 htsv[MaxHeapTuplesPerPage + 1];
201212
} PruneResult;
202213

214+
/*
215+
* Pruning calculates tuple visibility once and saves the results in an array
216+
* of int8. See PruneResult.htsv for details. This helper function is meant to
217+
* guard against examining visibility status array members which have not yet
218+
* been computed.
219+
*/
220+
static inline HTSV_Result
221+
htsv_get_valid_status(int status)
222+
{
223+
Assert(status >= HEAPTUPLE_DEAD &&
224+
status <= HEAPTUPLE_DELETE_IN_PROGRESS);
225+
return (HTSV_Result) status;
226+
}
227+
203228
/* ----------------
204229
* function prototypes for heap access method
205230
*

0 commit comments

Comments
 (0)