Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 6d05086

Browse files
committed
Speedup truncations of relation forks.
When a relation is truncated, shared_buffers needs to be scanned so that any buffers for the relation forks are invalidated in it. Previously, shared_buffers was scanned for each relation forks, i.e., MAIN, FSM and VM, when VACUUM truncated off any empty pages at the end of relation or TRUNCATE truncated the relation in place. Since shared_buffers needed to be scanned multiple times, it could take a long time to finish those commands especially when shared_buffers was large. This commit changes the logic so that shared_buffers is scanned only one time for those three relation forks. Author: Kirk Jamison Reviewed-by: Masahiko Sawada, Thomas Munro, Alvaro Herrera, Takayuki Tsunakawa and Fujii Masao Discussion: https://postgr.es/m/D09B13F772D2274BB348A310EE3027C64E2067@g01jpexmbkw24
1 parent 2e5c83a commit 6d05086

File tree

10 files changed

+172
-86
lines changed

10 files changed

+172
-86
lines changed

contrib/pg_visibility/pg_visibility.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,8 @@ pg_truncate_visibility_map(PG_FUNCTION_ARGS)
383383
{
384384
Oid relid = PG_GETARG_OID(0);
385385
Relation rel;
386+
ForkNumber fork;
387+
BlockNumber block;
386388

387389
rel = relation_open(relid, AccessExclusiveLock);
388390

@@ -392,7 +394,12 @@ pg_truncate_visibility_map(PG_FUNCTION_ARGS)
392394
RelationOpenSmgr(rel);
393395
rel->rd_smgr->smgr_vm_nblocks = InvalidBlockNumber;
394396

395-
visibilitymap_truncate(rel, 0);
397+
block = visibilitymap_prepare_truncate(rel, 0);
398+
if (BlockNumberIsValid(block))
399+
{
400+
fork = VISIBILITYMAP_FORKNUM;
401+
smgrtruncate(rel->rd_smgr, &fork, 1, &block);
402+
}
396403

397404
if (RelationNeedsWAL(rel))
398405
{
@@ -418,7 +425,7 @@ pg_truncate_visibility_map(PG_FUNCTION_ARGS)
418425
* here and when we sent the messages at our eventual commit. However,
419426
* we're currently only sending a non-transactional smgr invalidation,
420427
* which will have been posted to shared memory immediately from within
421-
* visibilitymap_truncate. Therefore, there should be no race here.
428+
* smgr_truncate. Therefore, there should be no race here.
422429
*
423430
* The reason why it's desirable to release the lock early here is because
424431
* of the possibility that someone will need to use this to blow away many

src/backend/access/heap/visibilitymap.c

Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@
1717
* visibilitymap_set - set a bit in a previously pinned page
1818
* visibilitymap_get_status - get status of bits
1919
* visibilitymap_count - count number of bits set in visibility map
20-
* visibilitymap_truncate - truncate the visibility map
20+
* visibilitymap_prepare_truncate -
21+
* prepare for truncation of the visibility map
2122
*
2223
* NOTES
2324
*
@@ -430,16 +431,18 @@ visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_fro
430431
}
431432

432433
/*
433-
* visibilitymap_truncate - truncate the visibility map
434-
*
435-
* The caller must hold AccessExclusiveLock on the relation, to ensure that
436-
* other backends receive the smgr invalidation event that this function sends
437-
* before they access the VM again.
434+
* visibilitymap_prepare_truncate -
435+
* prepare for truncation of the visibility map
438436
*
439437
* nheapblocks is the new size of the heap.
438+
*
439+
* Return the number of blocks of new visibility map.
440+
* If it's InvalidBlockNumber, there is nothing to truncate;
441+
* otherwise the caller is responsible for calling smgrtruncate()
442+
* to truncate the visibility map pages.
440443
*/
441-
void
442-
visibilitymap_truncate(Relation rel, BlockNumber nheapblocks)
444+
BlockNumber
445+
visibilitymap_prepare_truncate(Relation rel, BlockNumber nheapblocks)
443446
{
444447
BlockNumber newnblocks;
445448

@@ -459,7 +462,7 @@ visibilitymap_truncate(Relation rel, BlockNumber nheapblocks)
459462
* nothing to truncate.
460463
*/
461464
if (!smgrexists(rel->rd_smgr, VISIBILITYMAP_FORKNUM))
462-
return;
465+
return InvalidBlockNumber;
463466

464467
/*
465468
* Unless the new size is exactly at a visibility map page boundary, the
@@ -480,7 +483,7 @@ visibilitymap_truncate(Relation rel, BlockNumber nheapblocks)
480483
if (!BufferIsValid(mapBuffer))
481484
{
482485
/* nothing to do, the file was already smaller */
483-
return;
486+
return InvalidBlockNumber;
484487
}
485488

486489
page = BufferGetPage(mapBuffer);
@@ -528,20 +531,10 @@ visibilitymap_truncate(Relation rel, BlockNumber nheapblocks)
528531
if (smgrnblocks(rel->rd_smgr, VISIBILITYMAP_FORKNUM) <= newnblocks)
529532
{
530533
/* nothing to do, the file was already smaller than requested size */
531-
return;
534+
return InvalidBlockNumber;
532535
}
533536

534-
/* Truncate the unused VM pages, and send smgr inval message */
535-
smgrtruncate(rel->rd_smgr, VISIBILITYMAP_FORKNUM, newnblocks);
536-
537-
/*
538-
* We might as well update the local smgr_vm_nblocks setting. smgrtruncate
539-
* sent an smgr cache inval message, which will cause other backends to
540-
* invalidate their copy of smgr_vm_nblocks, and this one too at the next
541-
* command boundary. But this ensures it isn't outright wrong until then.
542-
*/
543-
if (rel->rd_smgr)
544-
rel->rd_smgr->smgr_vm_nblocks = newnblocks;
537+
return newnblocks;
545538
}
546539

547540
/*

src/backend/catalog/storage.c

Lines changed: 77 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,10 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
231231
{
232232
bool fsm;
233233
bool vm;
234+
bool need_fsm_vacuum = false;
235+
ForkNumber forks[MAX_FORKNUM];
236+
BlockNumber blocks[MAX_FORKNUM];
237+
int nforks = 0;
234238

235239
/* Open it at the smgr level if not already done */
236240
RelationOpenSmgr(rel);
@@ -242,15 +246,35 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
242246
rel->rd_smgr->smgr_fsm_nblocks = InvalidBlockNumber;
243247
rel->rd_smgr->smgr_vm_nblocks = InvalidBlockNumber;
244248

245-
/* Truncate the FSM first if it exists */
249+
/* Prepare for truncation of MAIN fork of the relation */
250+
forks[nforks] = MAIN_FORKNUM;
251+
blocks[nforks] = nblocks;
252+
nforks++;
253+
254+
/* Prepare for truncation of the FSM if it exists */
246255
fsm = smgrexists(rel->rd_smgr, FSM_FORKNUM);
247256
if (fsm)
248-
FreeSpaceMapTruncateRel(rel, nblocks);
257+
{
258+
blocks[nforks] = FreeSpaceMapPrepareTruncateRel(rel, nblocks);
259+
if (BlockNumberIsValid(blocks[nforks]))
260+
{
261+
forks[nforks] = FSM_FORKNUM;
262+
nforks++;
263+
need_fsm_vacuum = true;
264+
}
265+
}
249266

250-
/* Truncate the visibility map too if it exists. */
267+
/* Prepare for truncation of the visibility map too if it exists */
251268
vm = smgrexists(rel->rd_smgr, VISIBILITYMAP_FORKNUM);
252269
if (vm)
253-
visibilitymap_truncate(rel, nblocks);
270+
{
271+
blocks[nforks] = visibilitymap_prepare_truncate(rel, nblocks);
272+
if (BlockNumberIsValid(blocks[nforks]))
273+
{
274+
forks[nforks] = VISIBILITYMAP_FORKNUM;
275+
nforks++;
276+
}
277+
}
254278

255279
/*
256280
* We WAL-log the truncation before actually truncating, which means
@@ -290,8 +314,16 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
290314
XLogFlush(lsn);
291315
}
292316

293-
/* Do the real work */
294-
smgrtruncate(rel->rd_smgr, MAIN_FORKNUM, nblocks);
317+
/* Do the real work to truncate relation forks */
318+
smgrtruncate(rel->rd_smgr, forks, nforks, blocks);
319+
320+
/*
321+
* Update upper-level FSM pages to account for the truncation.
322+
* This is important because the just-truncated pages were likely
323+
* marked as all-free, and would be preferentially selected.
324+
*/
325+
if (need_fsm_vacuum)
326+
FreeSpaceMapVacuumRange(rel, nblocks, InvalidBlockNumber);
295327
}
296328

297329
/*
@@ -588,6 +620,10 @@ smgr_redo(XLogReaderState *record)
588620
xl_smgr_truncate *xlrec = (xl_smgr_truncate *) XLogRecGetData(record);
589621
SMgrRelation reln;
590622
Relation rel;
623+
ForkNumber forks[MAX_FORKNUM];
624+
BlockNumber blocks[MAX_FORKNUM];
625+
int nforks = 0;
626+
bool need_fsm_vacuum = false;
591627

592628
reln = smgropen(xlrec->rnode, InvalidBackendId);
593629

@@ -616,23 +652,54 @@ smgr_redo(XLogReaderState *record)
616652
*/
617653
XLogFlush(lsn);
618654

655+
/* Prepare for truncation of MAIN fork */
619656
if ((xlrec->flags & SMGR_TRUNCATE_HEAP) != 0)
620657
{
621-
smgrtruncate(reln, MAIN_FORKNUM, xlrec->blkno);
658+
forks[nforks] = MAIN_FORKNUM;
659+
blocks[nforks] = xlrec->blkno;
660+
nforks++;
622661

623662
/* Also tell xlogutils.c about it */
624663
XLogTruncateRelation(xlrec->rnode, MAIN_FORKNUM, xlrec->blkno);
625664
}
626665

627-
/* Truncate FSM and VM too */
666+
/* Prepare for truncation of FSM and VM too */
628667
rel = CreateFakeRelcacheEntry(xlrec->rnode);
629668

630669
if ((xlrec->flags & SMGR_TRUNCATE_FSM) != 0 &&
631670
smgrexists(reln, FSM_FORKNUM))
632-
FreeSpaceMapTruncateRel(rel, xlrec->blkno);
671+
{
672+
blocks[nforks] = FreeSpaceMapPrepareTruncateRel(rel, xlrec->blkno);
673+
if (BlockNumberIsValid(blocks[nforks]))
674+
{
675+
forks[nforks] = FSM_FORKNUM;
676+
nforks++;
677+
need_fsm_vacuum = true;
678+
}
679+
}
633680
if ((xlrec->flags & SMGR_TRUNCATE_VM) != 0 &&
634681
smgrexists(reln, VISIBILITYMAP_FORKNUM))
635-
visibilitymap_truncate(rel, xlrec->blkno);
682+
{
683+
blocks[nforks] = visibilitymap_prepare_truncate(rel, xlrec->blkno);
684+
if (BlockNumberIsValid(blocks[nforks]))
685+
{
686+
forks[nforks] = VISIBILITYMAP_FORKNUM;
687+
nforks++;
688+
}
689+
}
690+
691+
/* Do the real work to truncate relation forks */
692+
if (nforks > 0)
693+
smgrtruncate(reln, forks, nforks, blocks);
694+
695+
/*
696+
* Update upper-level FSM pages to account for the truncation.
697+
* This is important because the just-truncated pages were likely
698+
* marked as all-free, and would be preferentially selected.
699+
*/
700+
if (need_fsm_vacuum)
701+
FreeSpaceMapVacuumRange(rel, xlrec->blkno,
702+
InvalidBlockNumber);
636703

637704
FreeFakeRelcacheEntry(rel);
638705
}

src/backend/storage/buffer/bufmgr.c

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2901,7 +2901,7 @@ BufferGetLSNAtomic(Buffer buffer)
29012901
* DropRelFileNodeBuffers
29022902
*
29032903
* This function removes from the buffer pool all the pages of the
2904-
* specified relation fork that have block numbers >= firstDelBlock.
2904+
* specified relation forks that have block numbers >= firstDelBlock.
29052905
* (In particular, with firstDelBlock = 0, all pages are removed.)
29062906
* Dirty pages are simply dropped, without bothering to write them
29072907
* out first. Therefore, this is NOT rollback-able, and so should be
@@ -2924,16 +2924,21 @@ BufferGetLSNAtomic(Buffer buffer)
29242924
* --------------------------------------------------------------------
29252925
*/
29262926
void
2927-
DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber forkNum,
2928-
BlockNumber firstDelBlock)
2927+
DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum,
2928+
int nforks, BlockNumber *firstDelBlock)
29292929
{
29302930
int i;
2931+
int j;
29312932

29322933
/* If it's a local relation, it's localbuf.c's problem. */
29332934
if (RelFileNodeBackendIsTemp(rnode))
29342935
{
29352936
if (rnode.backend == MyBackendId)
2936-
DropRelFileNodeLocalBuffers(rnode.node, forkNum, firstDelBlock);
2937+
{
2938+
for (j = 0; j < nforks; j++)
2939+
DropRelFileNodeLocalBuffers(rnode.node, forkNum[j],
2940+
firstDelBlock[j]);
2941+
}
29372942
return;
29382943
}
29392944

@@ -2962,11 +2967,18 @@ DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber forkNum,
29622967
continue;
29632968

29642969
buf_state = LockBufHdr(bufHdr);
2965-
if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node) &&
2966-
bufHdr->tag.forkNum == forkNum &&
2967-
bufHdr->tag.blockNum >= firstDelBlock)
2968-
InvalidateBuffer(bufHdr); /* releases spinlock */
2969-
else
2970+
2971+
for (j = 0; j < nforks; j++)
2972+
{
2973+
if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node) &&
2974+
bufHdr->tag.forkNum == forkNum[j] &&
2975+
bufHdr->tag.blockNum >= firstDelBlock[j])
2976+
{
2977+
InvalidateBuffer(bufHdr); /* releases spinlock */
2978+
break;
2979+
}
2980+
}
2981+
if (j >= nforks)
29702982
UnlockBufHdr(bufHdr, buf_state);
29712983
}
29722984
}

src/backend/storage/freespace/freespace.c

Lines changed: 13 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -247,16 +247,18 @@ GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk)
247247
}
248248

249249
/*
250-
* FreeSpaceMapTruncateRel - adjust for truncation of a relation.
251-
*
252-
* The caller must hold AccessExclusiveLock on the relation, to ensure that
253-
* other backends receive the smgr invalidation event that this function sends
254-
* before they access the FSM again.
250+
* FreeSpaceMapPrepareTruncateRel - prepare for truncation of a relation.
255251
*
256252
* nblocks is the new size of the heap.
253+
*
254+
* Return the number of blocks of new FSM.
255+
* If it's InvalidBlockNumber, there is nothing to truncate;
256+
* otherwise the caller is responsible for calling smgrtruncate()
257+
* to truncate the FSM pages, and FreeSpaceMapVacuumRange()
258+
* to update upper-level pages in the FSM.
257259
*/
258-
void
259-
FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks)
260+
BlockNumber
261+
FreeSpaceMapPrepareTruncateRel(Relation rel, BlockNumber nblocks)
260262
{
261263
BlockNumber new_nfsmblocks;
262264
FSMAddress first_removed_address;
@@ -270,7 +272,7 @@ FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks)
270272
* truncate.
271273
*/
272274
if (!smgrexists(rel->rd_smgr, FSM_FORKNUM))
273-
return;
275+
return InvalidBlockNumber;
274276

275277
/* Get the location in the FSM of the first removed heap block */
276278
first_removed_address = fsm_get_location(nblocks, &first_removed_slot);
@@ -285,7 +287,7 @@ FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks)
285287
{
286288
buf = fsm_readbuf(rel, first_removed_address, false);
287289
if (!BufferIsValid(buf))
288-
return; /* nothing to do; the FSM was already smaller */
290+
return InvalidBlockNumber; /* nothing to do; the FSM was already smaller */
289291
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
290292

291293
/* NO EREPORT(ERROR) from here till changes are logged */
@@ -315,28 +317,10 @@ FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks)
315317
{
316318
new_nfsmblocks = fsm_logical_to_physical(first_removed_address);
317319
if (smgrnblocks(rel->rd_smgr, FSM_FORKNUM) <= new_nfsmblocks)
318-
return; /* nothing to do; the FSM was already smaller */
320+
return InvalidBlockNumber; /* nothing to do; the FSM was already smaller */
319321
}
320322

321-
/* Truncate the unused FSM pages, and send smgr inval message */
322-
smgrtruncate(rel->rd_smgr, FSM_FORKNUM, new_nfsmblocks);
323-
324-
/*
325-
* We might as well update the local smgr_fsm_nblocks setting.
326-
* smgrtruncate sent an smgr cache inval message, which will cause other
327-
* backends to invalidate their copy of smgr_fsm_nblocks, and this one too
328-
* at the next command boundary. But this ensures it isn't outright wrong
329-
* until then.
330-
*/
331-
if (rel->rd_smgr)
332-
rel->rd_smgr->smgr_fsm_nblocks = new_nfsmblocks;
333-
334-
/*
335-
* Update upper-level FSM pages to account for the truncation. This is
336-
* important because the just-truncated pages were likely marked as
337-
* all-free, and would be preferentially selected.
338-
*/
339-
FreeSpaceMapVacuumRange(rel, nblocks, InvalidBlockNumber);
323+
return new_nfsmblocks;
340324
}
341325

342326
/*

0 commit comments

Comments
 (0)