Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 279628a

Browse files
committed
Accelerate end-of-transaction dropping of relations
When relations are dropped, at end of transaction we need to remove the files and clean the buffer pool of buffers containing pages of those relations. Previously we would scan the buffer pool once per relation to clean up buffers. When there are many relations to drop, the repeated scans make this process slow; so we now instead pass a list of relations to drop and scan the pool once, checking each buffer against the passed list. When the number of relations is larger than a threshold (which as of this patch is being set to 20 relations) we sort the array before starting, and bsearch the array; when it's smaller, we simply scan the array linearly each time, because that's faster. The exact optimal threshold value depends on many factors, but the difference is not likely to be significant enough to justify making it user-settable. This has been measured to be a significant win (a 15x win when dropping 100,000 relations; an extreme case, but reportedly a real one). Author: Tomas Vondra, some tweaks by me Reviewed by: Robert Haas, Shigeru Hanada, Andres Freund, Álvaro Herrera
1 parent 0b63291 commit 279628a

File tree

5 files changed

+206
-14
lines changed

5 files changed

+206
-14
lines changed

src/backend/catalog/storage.c

+24-2
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,10 @@ smgrDoPendingDeletes(bool isCommit)
312312
PendingRelDelete *pending;
313313
PendingRelDelete *prev;
314314
PendingRelDelete *next;
315+
int nrels = 0,
316+
i = 0,
317+
maxrels = 8;
318+
SMgrRelation *srels = palloc(maxrels * sizeof(SMgrRelation));
315319

316320
prev = NULL;
317321
for (pending = pendingDeletes; pending != NULL; pending = next)
@@ -335,14 +339,32 @@ smgrDoPendingDeletes(bool isCommit)
335339
SMgrRelation srel;
336340

337341
srel = smgropen(pending->relnode, pending->backend);
338-
smgrdounlink(srel, false);
339-
smgrclose(srel);
342+
343+
/* extend the array if needed (double the size) */
344+
if (maxrels <= nrels)
345+
{
346+
maxrels *= 2;
347+
srels = repalloc(srels, sizeof(SMgrRelation) * maxrels);
348+
}
349+
350+
srels[nrels++] = srel;
340351
}
341352
/* must explicitly free the list entry */
342353
pfree(pending);
343354
/* prev does not change */
344355
}
345356
}
357+
358+
if (nrels > 0)
359+
{
360+
smgrdounlinkall(srels, nrels, false);
361+
362+
for (i = 0; i < nrels; i++)
363+
smgrclose(srels[i]);
364+
}
365+
366+
pfree(srels);
367+
346368
}
347369

348370
/*

src/backend/storage/buffer/bufmgr.c

+99-10
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
#define BUF_WRITTEN 0x01
6363
#define BUF_REUSABLE 0x02
6464

65+
#define DROP_RELS_BSEARCH_THRESHOLD 20
6566

6667
/* GUC variables */
6768
bool zero_damaged_pages = false;
@@ -107,6 +108,7 @@ static volatile BufferDesc *BufferAlloc(SMgrRelation smgr,
107108
bool *foundPtr);
108109
static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln);
109110
static void AtProcExit_Buffers(int code, Datum arg);
111+
static int rnode_comparator(const void *p1, const void *p2);
110112

111113

112114
/*
@@ -2086,43 +2088,103 @@ DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber forkNum,
20862088
}
20872089

20882090
/* ---------------------------------------------------------------------
2089-
* DropRelFileNodeAllBuffers
2091+
* DropRelFileNodesAllBuffers
20902092
*
20912093
* This function removes from the buffer pool all the pages of all
2092-
* forks of the specified relation. It's equivalent to calling
2093-
* DropRelFileNodeBuffers once per fork with firstDelBlock = 0.
2094+
* forks of the specified relations. It's equivalent to calling
2095+
* DropRelFileNodeBuffers once per fork per relation with
2096+
* firstDelBlock = 0.
20942097
* --------------------------------------------------------------------
20952098
*/
20962099
void
2097-
DropRelFileNodeAllBuffers(RelFileNodeBackend rnode)
2100+
DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes)
20982101
{
2099-
int i;
2102+
int i,
2103+
n = 0;
2104+
RelFileNode *nodes;
2105+
bool use_bsearch;
2106+
2107+
if (nnodes == 0)
2108+
return;
2109+
2110+
nodes = palloc(sizeof(RelFileNode) * nnodes); /* non-local relations */
21002111

21012112
/* If it's a local relation, it's localbuf.c's problem. */
2102-
if (RelFileNodeBackendIsTemp(rnode))
2113+
for (i = 0; i < nnodes; i++)
21032114
{
2104-
if (rnode.backend == MyBackendId)
2105-
DropRelFileNodeAllLocalBuffers(rnode.node);
2115+
if (RelFileNodeBackendIsTemp(rnodes[i]))
2116+
{
2117+
if (rnodes[i].backend == MyBackendId)
2118+
DropRelFileNodeAllLocalBuffers(rnodes[i].node);
2119+
}
2120+
else
2121+
nodes[n++] = rnodes[i].node;
2122+
}
2123+
2124+
/*
2125+
* If there are no non-local relations, then we're done. Release the memory
2126+
* and return.
2127+
*/
2128+
if (n == 0)
2129+
{
2130+
pfree(nodes);
21062131
return;
21072132
}
21082133

2134+
/*
2135+
* For low number of relations to drop just use a simple walk through, to
2136+
* save the bsearch overhead. The threshold to use is rather a guess than a
2137+
* exactly determined value, as it depends on many factors (CPU and RAM
2138+
* speeds, amount of shared buffers etc.).
2139+
*/
2140+
use_bsearch = n > DROP_RELS_BSEARCH_THRESHOLD;
2141+
2142+
/* sort the list of rnodes if necessary */
2143+
if (use_bsearch)
2144+
pg_qsort(nodes, n, sizeof(RelFileNode), rnode_comparator);
2145+
21092146
for (i = 0; i < NBuffers; i++)
21102147
{
2148+
RelFileNode *rnode = NULL;
21112149
volatile BufferDesc *bufHdr = &BufferDescriptors[i];
21122150

21132151
/*
21142152
* As in DropRelFileNodeBuffers, an unlocked precheck should be safe
21152153
* and saves some cycles.
21162154
*/
2117-
if (!RelFileNodeEquals(bufHdr->tag.rnode, rnode.node))
2155+
2156+
if (!use_bsearch)
2157+
{
2158+
int j;
2159+
2160+
for (j = 0; j < n; j++)
2161+
{
2162+
if (RelFileNodeEquals(bufHdr->tag.rnode, nodes[j]))
2163+
{
2164+
rnode = &nodes[j];
2165+
break;
2166+
}
2167+
}
2168+
}
2169+
else
2170+
{
2171+
rnode = bsearch((const void *) &(bufHdr->tag.rnode),
2172+
nodes, n, sizeof(RelFileNode),
2173+
rnode_comparator);
2174+
}
2175+
2176+
/* buffer doesn't belong to any of the given relfilenodes; skip it */
2177+
if (rnode == NULL)
21182178
continue;
21192179

21202180
LockBufHdr(bufHdr);
2121-
if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node))
2181+
if (RelFileNodeEquals(bufHdr->tag.rnode, (*rnode)))
21222182
InvalidateBuffer(bufHdr); /* releases spinlock */
21232183
else
21242184
UnlockBufHdr(bufHdr);
21252185
}
2186+
2187+
pfree(nodes);
21262188
}
21272189

21282190
/* ---------------------------------------------------------------------
@@ -2953,3 +3015,30 @@ local_buffer_write_error_callback(void *arg)
29533015
pfree(path);
29543016
}
29553017
}
3018+
3019+
/*
3020+
* RelFileNode qsort/bsearch comparator; see RelFileNodeEquals.
3021+
*/
3022+
static int
3023+
rnode_comparator(const void *p1, const void *p2)
3024+
{
3025+
RelFileNode n1 = *(RelFileNode *) p1;
3026+
RelFileNode n2 = *(RelFileNode *) p2;
3027+
3028+
if (n1.relNode < n2.relNode)
3029+
return -1;
3030+
else if (n1.relNode > n2.relNode)
3031+
return 1;
3032+
3033+
if (n1.dbNode < n2.dbNode)
3034+
return -1;
3035+
else if (n1.dbNode > n2.dbNode)
3036+
return 1;
3037+
3038+
if (n1.spcNode < n2.spcNode)
3039+
return -1;
3040+
else if (n1.spcNode > n2.spcNode)
3041+
return 1;
3042+
else
3043+
return 0;
3044+
}

src/backend/storage/smgr/smgr.c

+81-1
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,7 @@ smgrdounlink(SMgrRelation reln, bool isRedo)
390390
* Get rid of any remaining buffers for the relation. bufmgr will just
391391
* drop them without bothering to write the contents.
392392
*/
393-
DropRelFileNodeAllBuffers(rnode);
393+
DropRelFileNodesAllBuffers(&rnode, 1);
394394

395395
/*
396396
* It'd be nice to tell the stats collector to forget it immediately, too.
@@ -419,6 +419,86 @@ smgrdounlink(SMgrRelation reln, bool isRedo)
419419
(*(smgrsw[which].smgr_unlink)) (rnode, InvalidForkNumber, isRedo);
420420
}
421421

422+
/*
423+
* smgrdounlinkall() -- Immediately unlink all forks of all given relations
424+
*
425+
* All forks of all given relations are removed from the store. This
426+
* should not be used during transactional operations, since it can't be
427+
* undone.
428+
*
429+
* If isRedo is true, it is okay for the underlying file(s) to be gone
430+
* already.
431+
*
432+
* This is equivalent to calling smgrdounlink for each relation, but it's
433+
* significantly quicker so should be preferred when possible.
434+
*/
435+
void
436+
smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
437+
{
438+
int i = 0;
439+
RelFileNodeBackend *rnodes;
440+
ForkNumber forknum;
441+
442+
if (nrels == 0)
443+
return;
444+
445+
/*
446+
* create an array which contains all relations to be dropped, and
447+
* close each relation's forks at the smgr level while at it
448+
*/
449+
rnodes = palloc(sizeof(RelFileNodeBackend) * nrels);
450+
for (i = 0; i < nrels; i++)
451+
{
452+
RelFileNodeBackend rnode = rels[i]->smgr_rnode;
453+
int which = rels[i]->smgr_which;
454+
455+
rnodes[i] = rnode;
456+
457+
/* Close the forks at smgr level */
458+
for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
459+
(*(smgrsw[which].smgr_close)) (rels[i], forknum);
460+
}
461+
462+
/*
463+
* Get rid of any remaining buffers for the relations. bufmgr will just
464+
* drop them without bothering to write the contents.
465+
*/
466+
DropRelFileNodesAllBuffers(rnodes, nrels);
467+
468+
/*
469+
* It'd be nice to tell the stats collector to forget them immediately, too.
470+
* But we can't because we don't know the OIDs.
471+
*/
472+
473+
/*
474+
* Send a shared-inval message to force other backends to close any
475+
* dangling smgr references they may have for these rels. We should do
476+
* this before starting the actual unlinking, in case we fail partway
477+
* through that step. Note that the sinval messages will eventually come
478+
* back to this backend, too, and thereby provide a backstop that we closed
479+
* our own smgr rel.
480+
*/
481+
for (i = 0; i < nrels; i++)
482+
CacheInvalidateSmgr(rnodes[i]);
483+
484+
/*
485+
* Delete the physical file(s).
486+
*
487+
* Note: smgr_unlink must treat deletion failure as a WARNING, not an
488+
* ERROR, because we've already decided to commit or abort the current
489+
* xact.
490+
*/
491+
492+
for (i = 0; i < nrels; i++)
493+
{
494+
int which = rels[i]->smgr_which;
495+
for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
496+
(*(smgrsw[which].smgr_unlink)) (rnodes[i], forknum, isRedo);
497+
}
498+
499+
pfree(rnodes);
500+
}
501+
422502
/*
423503
* smgrdounlinkfork() -- Immediately unlink one fork of a relation.
424504
*

src/include/storage/bufmgr.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ extern void FlushRelationBuffers(Relation rel);
188188
extern void FlushDatabaseBuffers(Oid dbid);
189189
extern void DropRelFileNodeBuffers(RelFileNodeBackend rnode,
190190
ForkNumber forkNum, BlockNumber firstDelBlock);
191-
extern void DropRelFileNodeAllBuffers(RelFileNodeBackend rnode);
191+
extern void DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes);
192192
extern void DropDatabaseBuffers(Oid dbid);
193193

194194
#define RelationGetNumberOfBlocks(reln) \

src/include/storage/smgr.h

+1
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ extern void smgrcloseall(void);
8585
extern void smgrclosenode(RelFileNodeBackend rnode);
8686
extern void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo);
8787
extern void smgrdounlink(SMgrRelation reln, bool isRedo);
88+
extern void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo);
8889
extern void smgrdounlinkfork(SMgrRelation reln, ForkNumber forknum, bool isRedo);
8990
extern void smgrextend(SMgrRelation reln, ForkNumber forknum,
9091
BlockNumber blocknum, char *buffer, bool skipFsync);

0 commit comments

Comments
 (0)