Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 8db05ba

Browse files
committed
Repair old performance bug in tuplesort.c/logtape.c. In the case where
we are doing the final merge pass on-the-fly, and not writing the data back onto a 'tape', the number of free blocks in the tape set will become large, leading to a lot of time wasted in ltsReleaseBlock(). There is really no need to track the free blocks anymore in this state, so add a simple shutoff switch. Per report from Stefan Kaltenbrunner.
1 parent e6107da commit 8db05ba

File tree

3 files changed

+65
-17
lines changed

3 files changed

+65
-17
lines changed

src/backend/utils/sort/logtape.c

+28-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@
6464
* Portions Copyright (c) 1994, Regents of the University of California
6565
*
6666
* IDENTIFICATION
67-
* $PostgreSQL: pgsql/src/backend/utils/sort/logtape.c,v 1.19 2006/03/05 15:58:49 momjian Exp $
67+
* $PostgreSQL: pgsql/src/backend/utils/sort/logtape.c,v 1.20 2006/03/07 19:06:49 tgl Exp $
6868
*
6969
*-------------------------------------------------------------------------
7070
*/
@@ -146,7 +146,12 @@ struct LogicalTapeSet
146146
* When there are no such blocks, we extend the underlying file. Note
147147
* that the block numbers in freeBlocks are always in *decreasing* order,
148148
* so that removing the last entry gives us the lowest free block.
149+
*
150+
* If forgetFreeSpace is true then any freed blocks are simply forgotten
151+
* rather than being remembered in freeBlocks[]. See notes for
152+
* LogicalTapeSetForgetFreeSpace().
149153
*/
154+
bool forgetFreeSpace; /* are we remembering free blocks? */
150155
long *freeBlocks; /* resizable array */
151156
int nFreeBlocks; /* # of currently free blocks */
152157
int freeBlocksLen; /* current allocated length of freeBlocks[] */
@@ -247,6 +252,12 @@ ltsReleaseBlock(LogicalTapeSet *lts, long blocknum)
247252
int ndx;
248253
long *ptr;
249254

255+
/*
256+
* Do nothing if we're no longer interested in remembering free space.
257+
*/
258+
if (lts->forgetFreeSpace)
259+
return;
260+
250261
/*
251262
* Enlarge freeBlocks array if full.
252263
*/
@@ -491,6 +502,7 @@ LogicalTapeSetCreate(int ntapes)
491502
(ntapes - 1) * sizeof(LogicalTape));
492503
lts->pfile = BufFileCreateTemp(false);
493504
lts->nFileBlocks = 0L;
505+
lts->forgetFreeSpace = false;
494506
lts->freeBlocksLen = 32; /* reasonable initial guess */
495507
lts->freeBlocks = (long *) palloc(lts->freeBlocksLen * sizeof(long));
496508
lts->nFreeBlocks = 0;
@@ -546,6 +558,21 @@ LogicalTapeSetClose(LogicalTapeSet *lts)
546558
pfree(lts);
547559
}
548560

561+
/*
562+
* Mark a logical tape set as not needing management of free space anymore.
563+
*
564+
* This should be called if the caller does not intend to write any more data
565+
* into the tape set, but is reading from un-frozen tapes. Since no more
566+
* writes are planned, remembering free blocks is no longer useful. Setting
567+
* this flag lets us avoid wasting time and space in ltsReleaseBlock(), which
568+
* is not designed to handle large numbers of free blocks.
569+
*/
570+
void
571+
LogicalTapeSetForgetFreeSpace(LogicalTapeSet *lts)
572+
{
573+
lts->forgetFreeSpace = true;
574+
}
575+
549576
/*
550577
* Dump the dirty buffer of a logical tape.
551578
*/

src/backend/utils/sort/tuplesort.c

+35-15
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@
9191
* Portions Copyright (c) 1994, Regents of the University of California
9292
*
9393
* IDENTIFICATION
94-
* $PostgreSQL: pgsql/src/backend/utils/sort/tuplesort.c,v 1.62 2006/03/05 15:58:49 momjian Exp $
94+
* $PostgreSQL: pgsql/src/backend/utils/sort/tuplesort.c,v 1.63 2006/03/07 19:06:50 tgl Exp $
9595
*
9696
*-------------------------------------------------------------------------
9797
*/
@@ -1384,7 +1384,8 @@ mergeruns(Tuplesortstate *state)
13841384
/*
13851385
* If we produced only one initial run (quite likely if the total data
13861386
* volume is between 1X and 2X workMem), we can just use that tape as the
1387-
* finished output, rather than doing a useless merge.
1387+
* finished output, rather than doing a useless merge. (This obvious
1388+
* optimization is not in Knuth's algorithm.)
13881389
*/
13891390
if (state->currentRun == 1)
13901391
{
@@ -1401,33 +1402,51 @@ mergeruns(Tuplesortstate *state)
14011402

14021403
for (;;)
14031404
{
1404-
/* Step D5: merge runs onto tape[T] until tape[P] is empty */
1405-
while (state->tp_runs[state->tapeRange - 1] ||
1406-
state->tp_dummy[state->tapeRange - 1])
1405+
/*
1406+
* At this point we know that tape[T] is empty. If there's just one
1407+
* (real or dummy) run left on each input tape, then only one merge
1408+
* pass remains. If we don't have to produce a materialized sorted
1409+
* tape, we can stop at this point and do the final merge on-the-fly.
1410+
*/
1411+
if (!state->randomAccess)
14071412
{
1408-
bool allDummy = true;
14091413
bool allOneRun = true;
14101414

1415+
Assert(state->tp_runs[state->tapeRange] == 0);
14111416
for (tapenum = 0; tapenum < state->tapeRange; tapenum++)
14121417
{
1413-
if (state->tp_dummy[tapenum] == 0)
1414-
allDummy = false;
14151418
if (state->tp_runs[tapenum] + state->tp_dummy[tapenum] != 1)
1419+
{
14161420
allOneRun = false;
1421+
break;
1422+
}
14171423
}
1418-
1419-
/*
1420-
* If we don't have to produce a materialized sorted tape, quit as
1421-
* soon as we're down to one real/dummy run per tape.
1422-
*/
1423-
if (!state->randomAccess && allOneRun)
1424+
if (allOneRun)
14241425
{
1425-
Assert(!allDummy);
1426+
/* Tell logtape.c we won't be writing anymore */
1427+
LogicalTapeSetForgetFreeSpace(state->tapeset);
14261428
/* Initialize for the final merge pass */
14271429
beginmerge(state);
14281430
state->status = TSS_FINALMERGE;
14291431
return;
14301432
}
1433+
}
1434+
1435+
/* Step D5: merge runs onto tape[T] until tape[P] is empty */
1436+
while (state->tp_runs[state->tapeRange - 1] ||
1437+
state->tp_dummy[state->tapeRange - 1])
1438+
{
1439+
bool allDummy = true;
1440+
1441+
for (tapenum = 0; tapenum < state->tapeRange; tapenum++)
1442+
{
1443+
if (state->tp_dummy[tapenum] == 0)
1444+
{
1445+
allDummy = false;
1446+
break;
1447+
}
1448+
}
1449+
14311450
if (allDummy)
14321451
{
14331452
state->tp_dummy[state->tapeRange]++;
@@ -1437,6 +1456,7 @@ mergeruns(Tuplesortstate *state)
14371456
else
14381457
mergeonerun(state);
14391458
}
1459+
14401460
/* Step D6: decrease level */
14411461
if (--state->Level == 0)
14421462
break;

src/include/utils/logtape.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
99
* Portions Copyright (c) 1994, Regents of the University of California
1010
*
11-
* $PostgreSQL: pgsql/src/include/utils/logtape.h,v 1.14 2006/03/05 15:59:07 momjian Exp $
11+
* $PostgreSQL: pgsql/src/include/utils/logtape.h,v 1.15 2006/03/07 19:06:50 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -26,6 +26,7 @@ typedef struct LogicalTapeSet LogicalTapeSet;
2626

2727
extern LogicalTapeSet *LogicalTapeSetCreate(int ntapes);
2828
extern void LogicalTapeSetClose(LogicalTapeSet *lts);
29+
extern void LogicalTapeSetForgetFreeSpace(LogicalTapeSet *lts);
2930
extern size_t LogicalTapeRead(LogicalTapeSet *lts, int tapenum,
3031
void *ptr, size_t size);
3132
extern void LogicalTapeWrite(LogicalTapeSet *lts, int tapenum,

0 commit comments

Comments
 (0)