Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 9256822

Browse files
Use streaming read I/O in VACUUM's first phase
Make vacuum's first phase, which prunes and freezes tuples and records dead TIDs, use the read stream API by by converting heap_vac_scan_next_block() to a read stream callback. Reviewed-by: Masahiko Sawada <sawada.mshk@gmail.com> Reviewed-by: Thomas Munro <thomas.munro@gmail.com> Discussion: https://postgr.es/m/CAAKRu_aLwANZpxHc0tC-6OT0OQT4TftDGkKAO5yigMUOv_Tcsw%40mail.gmail.com
1 parent 32acad7 commit 9256822

File tree

1 file changed

+94
-58
lines changed

1 file changed

+94
-58
lines changed

src/backend/access/heap/vacuumlazy.c

+94-58
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@
153153
#include "storage/bufmgr.h"
154154
#include "storage/freespace.h"
155155
#include "storage/lmgr.h"
156+
#include "storage/read_stream.h"
156157
#include "utils/lsyscache.h"
157158
#include "utils/pg_rusage.h"
158159
#include "utils/timestamp.h"
@@ -423,8 +424,9 @@ typedef struct LVSavedErrInfo
423424
static void lazy_scan_heap(LVRelState *vacrel);
424425
static void heap_vacuum_eager_scan_setup(LVRelState *vacrel,
425426
VacuumParams *params);
426-
static bool heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
427-
uint8 *blk_info);
427+
static BlockNumber heap_vac_scan_next_block(ReadStream *stream,
428+
void *callback_private_data,
429+
void *per_buffer_data);
428430
static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis);
429431
static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
430432
BlockNumber blkno, Page page,
@@ -1174,10 +1176,11 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
11741176
static void
11751177
lazy_scan_heap(LVRelState *vacrel)
11761178
{
1179+
ReadStream *stream;
11771180
BlockNumber rel_pages = vacrel->rel_pages,
1178-
blkno,
1181+
blkno = 0,
11791182
next_fsm_block_to_vacuum = 0;
1180-
uint8 blk_info = 0;
1183+
void *per_buffer_data = NULL;
11811184
BlockNumber orig_eager_scan_success_limit =
11821185
vacrel->eager_scan_remaining_successes; /* for logging */
11831186
Buffer vmbuffer = InvalidBuffer;
@@ -1201,23 +1204,24 @@ lazy_scan_heap(LVRelState *vacrel)
12011204
vacrel->next_unskippable_eager_scanned = false;
12021205
vacrel->next_unskippable_vmbuffer = InvalidBuffer;
12031206

1204-
while (heap_vac_scan_next_block(vacrel, &blkno, &blk_info))
1207+
/* Set up the read stream for vacuum's first pass through the heap */
1208+
stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE,
1209+
vacrel->bstrategy,
1210+
vacrel->rel,
1211+
MAIN_FORKNUM,
1212+
heap_vac_scan_next_block,
1213+
vacrel,
1214+
sizeof(uint8));
1215+
1216+
while (true)
12051217
{
12061218
Buffer buf;
12071219
Page page;
1220+
uint8 blk_info = 0;
12081221
bool has_lpdead_items;
12091222
bool vm_page_frozen = false;
12101223
bool got_cleanup_lock = false;
12111224

1212-
vacrel->scanned_pages++;
1213-
if (blk_info & VAC_BLK_WAS_EAGER_SCANNED)
1214-
vacrel->eager_scanned_pages++;
1215-
1216-
/* Report as block scanned, update error traceback information */
1217-
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
1218-
update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP,
1219-
blkno, InvalidOffsetNumber);
1220-
12211225
vacuum_delay_point(false);
12221226

12231227
/*
@@ -1229,7 +1233,8 @@ lazy_scan_heap(LVRelState *vacrel)
12291233
* one-pass strategy, and the two-pass strategy with the index_cleanup
12301234
* param set to 'off'.
12311235
*/
1232-
if (vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
1236+
if (vacrel->scanned_pages > 0 &&
1237+
vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
12331238
lazy_check_wraparound_failsafe(vacrel);
12341239

12351240
/*
@@ -1258,28 +1263,45 @@ lazy_scan_heap(LVRelState *vacrel)
12581263

12591264
/*
12601265
* Vacuum the Free Space Map to make newly-freed space visible on
1261-
* upper-level FSM pages. Note we have not yet processed blkno.
1266+
* upper-level FSM pages. Note that blkno is the previously
1267+
* processed block.
12621268
*/
12631269
FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1264-
blkno);
1270+
blkno + 1);
12651271
next_fsm_block_to_vacuum = blkno;
12661272

12671273
/* Report that we are once again scanning the heap */
12681274
pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
12691275
PROGRESS_VACUUM_PHASE_SCAN_HEAP);
12701276
}
12711277

1278+
buf = read_stream_next_buffer(stream, &per_buffer_data);
1279+
1280+
/* The relation is exhausted. */
1281+
if (!BufferIsValid(buf))
1282+
break;
1283+
1284+
blk_info = *((uint8 *) per_buffer_data);
1285+
CheckBufferIsPinnedOnce(buf);
1286+
page = BufferGetPage(buf);
1287+
blkno = BufferGetBlockNumber(buf);
1288+
1289+
vacrel->scanned_pages++;
1290+
if (blk_info & VAC_BLK_WAS_EAGER_SCANNED)
1291+
vacrel->eager_scanned_pages++;
1292+
1293+
/* Report as block scanned, update error traceback information */
1294+
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
1295+
update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP,
1296+
blkno, InvalidOffsetNumber);
1297+
12721298
/*
12731299
* Pin the visibility map page in case we need to mark the page
12741300
* all-visible. In most cases this will be very cheap, because we'll
12751301
* already have the correct page pinned anyway.
12761302
*/
12771303
visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
12781304

1279-
buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
1280-
vacrel->bstrategy);
1281-
page = BufferGetPage(buf);
1282-
12831305
/*
12841306
* We need a buffer cleanup lock to prune HOT chains and defragment
12851307
* the page in lazy_scan_prune. But when it's not possible to acquire
@@ -1439,8 +1461,12 @@ lazy_scan_heap(LVRelState *vacrel)
14391461
if (BufferIsValid(vmbuffer))
14401462
ReleaseBuffer(vmbuffer);
14411463

1442-
/* report that everything is now scanned */
1443-
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
1464+
/*
1465+
* Report that everything is now scanned. We never skip scanning the last
1466+
* block in the relation, so we can pass rel_pages here.
1467+
*/
1468+
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED,
1469+
rel_pages);
14441470

14451471
/* now we can compute the new value for pg_class.reltuples */
14461472
vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
@@ -1455,6 +1481,8 @@ lazy_scan_heap(LVRelState *vacrel)
14551481
Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
14561482
vacrel->missed_dead_tuples;
14571483

1484+
read_stream_end(stream);
1485+
14581486
/*
14591487
* Do index vacuuming (call each index's ambulkdelete routine), then do
14601488
* related heap vacuuming
@@ -1465,49 +1493,56 @@ lazy_scan_heap(LVRelState *vacrel)
14651493
/*
14661494
* Vacuum the remainder of the Free Space Map. We must do this whether or
14671495
* not there were indexes, and whether or not we bypassed index vacuuming.
1496+
* We can pass rel_pages here because we never skip scanning the last
1497+
* block of the relation.
14681498
*/
1469-
if (blkno > next_fsm_block_to_vacuum)
1470-
FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, blkno);
1499+
if (rel_pages > next_fsm_block_to_vacuum)
1500+
FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, rel_pages);
14711501

14721502
/* report all blocks vacuumed */
1473-
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
1503+
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, rel_pages);
14741504

14751505
/* Do final index cleanup (call each index's amvacuumcleanup routine) */
14761506
if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
14771507
lazy_cleanup_all_indexes(vacrel);
14781508
}
14791509

14801510
/*
1481-
* heap_vac_scan_next_block() -- get next block for vacuum to process
1482-
*
1483-
* lazy_scan_heap() calls here every time it needs to get the next block to
1484-
* prune and vacuum. The function uses the visibility map, vacuum options,
1485-
* and various thresholds to skip blocks which do not need to be processed and
1486-
* sets blkno to the next block to process.
1487-
*
1488-
* The block number of the next block to process is set in *blkno and its
1489-
* visibility status and whether or not it was eager scanned is set in
1490-
* *blk_info.
1491-
*
1492-
* The return value is false if there are no further blocks to process.
1493-
*
1494-
* vacrel is an in/out parameter here. Vacuum options and information about
1495-
* the relation are read. vacrel->skippedallvis is set if we skip a block
1496-
* that's all-visible but not all-frozen, to ensure that we don't update
1497-
* relfrozenxid in that case. vacrel also holds information about the next
1498-
* unskippable block, as bookkeeping for this function.
1511+
* heap_vac_scan_next_block() -- read stream callback to get the next block
1512+
* for vacuum to process
1513+
*
1514+
* Every time lazy_scan_heap() needs a new block to process during its first
1515+
* phase, it invokes read_stream_next_buffer() with a stream set up to call
1516+
* heap_vac_scan_next_block() to get the next block.
1517+
*
1518+
* heap_vac_scan_next_block() uses the visibility map, vacuum options, and
1519+
* various thresholds to skip blocks which do not need to be processed and
1520+
* returns the next block to process or InvalidBlockNumber if there are no
1521+
* remaining blocks.
1522+
*
1523+
* The visibility status of the next block to process and whether or not it
1524+
* was eager scanned is set in the per_buffer_data.
1525+
*
1526+
* callback_private_data contains a reference to the LVRelState, passed to the
1527+
* read stream API during stream setup. The LVRelState is an in/out parameter
1528+
* here (locally named `vacrel`). Vacuum options and information about the
1529+
* relation are read from it. vacrel->skippedallvis is set if we skip a block
1530+
* that's all-visible but not all-frozen (to ensure that we don't update
1531+
* relfrozenxid in that case). vacrel also holds information about the next
1532+
* unskippable block -- as bookkeeping for this function.
14991533
*/
1500-
static bool
1501-
heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
1502-
uint8 *blk_info)
1534+
static BlockNumber
1535+
heap_vac_scan_next_block(ReadStream *stream,
1536+
void *callback_private_data,
1537+
void *per_buffer_data)
15031538
{
15041539
BlockNumber next_block;
1540+
LVRelState *vacrel = callback_private_data;
1541+
uint8 blk_info = 0;
15051542

15061543
/* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
15071544
next_block = vacrel->current_block + 1;
15081545

1509-
*blk_info = 0;
1510-
15111546
/* Have we reached the end of the relation? */
15121547
if (next_block >= vacrel->rel_pages)
15131548
{
@@ -1516,8 +1551,7 @@ heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
15161551
ReleaseBuffer(vacrel->next_unskippable_vmbuffer);
15171552
vacrel->next_unskippable_vmbuffer = InvalidBuffer;
15181553
}
1519-
*blkno = vacrel->rel_pages;
1520-
return false;
1554+
return InvalidBlockNumber;
15211555
}
15221556

15231557
/*
@@ -1566,9 +1600,10 @@ heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
15661600
* but chose not to. We know that they are all-visible in the VM,
15671601
* otherwise they would've been unskippable.
15681602
*/
1569-
*blkno = vacrel->current_block = next_block;
1570-
*blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM;
1571-
return true;
1603+
vacrel->current_block = next_block;
1604+
blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM;
1605+
*((uint8 *) per_buffer_data) = blk_info;
1606+
return vacrel->current_block;
15721607
}
15731608
else
15741609
{
@@ -1578,12 +1613,13 @@ heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
15781613
*/
15791614
Assert(next_block == vacrel->next_unskippable_block);
15801615

1581-
*blkno = vacrel->current_block = next_block;
1616+
vacrel->current_block = next_block;
15821617
if (vacrel->next_unskippable_allvis)
1583-
*blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM;
1618+
blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM;
15841619
if (vacrel->next_unskippable_eager_scanned)
1585-
*blk_info |= VAC_BLK_WAS_EAGER_SCANNED;
1586-
return true;
1620+
blk_info |= VAC_BLK_WAS_EAGER_SCANNED;
1621+
*((uint8 *) per_buffer_data) = blk_info;
1622+
return vacrel->current_block;
15871623
}
15881624
}
15891625

0 commit comments

Comments
 (0)