Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit c02fdc9

Browse files
committed
Logical Tape Set: use min heap for freelist.
Previously, the freelist of blocks was tracked as an occasionally-sorted array. A min heap is more resilient to larger freelists or more frequent changes between reading and writing. Discussion: https://postgr.es/m/97c46a59c27f3c38e486ca170fcbc618d97ab049.camel%40j-davis.com
1 parent cac8ce4 commit c02fdc9

File tree

1 file changed

+104
-56
lines changed

1 file changed

+104
-56
lines changed

src/backend/utils/sort/logtape.c

+104-56
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,8 @@
4949
* when reading, and read multiple blocks from the same tape in one go,
5050
* whenever the buffer becomes empty.
5151
*
52-
* To support the above policy of writing to the lowest free block,
53-
* ltsGetFreeBlock sorts the list of free block numbers into decreasing
54-
* order each time it is asked for a block and the list isn't currently
55-
* sorted. This is an efficient way to handle it because we expect cycles
56-
* of releasing many blocks followed by re-using many blocks, due to
57-
* the larger read buffer.
52+
* To support the above policy of writing to the lowest free block, the
53+
* freelist is a min heap.
5854
*
5955
* Since all the bookkeeping and buffer memory is allocated with palloc(),
6056
* and the underlying file(s) are made with OpenTemporaryFile, all resources
@@ -170,7 +166,7 @@ struct LogicalTapeSet
170166
/*
171167
* File size tracking. nBlocksWritten is the size of the underlying file,
172168
* in BLCKSZ blocks. nBlocksAllocated is the number of blocks allocated
173-
* by ltsGetFreeBlock(), and it is always greater than or equal to
169+
* by ltsReleaseBlock(), and it is always greater than or equal to
174170
* nBlocksWritten. Blocks between nBlocksAllocated and nBlocksWritten are
175171
* blocks that have been allocated for a tape, but have not been written
176172
* to the underlying file yet. nHoleBlocks tracks the total number of
@@ -188,17 +184,11 @@ struct LogicalTapeSet
188184
* If forgetFreeSpace is true then any freed blocks are simply forgotten
189185
* rather than being remembered in freeBlocks[]. See notes for
190186
* LogicalTapeSetForgetFreeSpace().
191-
*
192-
* If blocksSorted is true then the block numbers in freeBlocks are in
193-
* *decreasing* order, so that removing the last entry gives us the lowest
194-
* free block. We re-sort the blocks whenever a block is demanded; this
195-
* should be reasonably efficient given the expected usage pattern.
196187
*/
197188
bool forgetFreeSpace; /* are we remembering free blocks? */
198-
bool blocksSorted; /* is freeBlocks[] currently in order? */
199-
long *freeBlocks; /* resizable array */
200-
int nFreeBlocks; /* # of currently free blocks */
201-
int freeBlocksLen; /* current allocated length of freeBlocks[] */
189+
long *freeBlocks; /* resizable array holding minheap */
190+
long nFreeBlocks; /* # of currently free blocks */
191+
Size freeBlocksLen; /* current allocated length of freeBlocks[] */
202192

203193
/* The array of logical tapes. */
204194
int nTapes; /* # of logical tapes in set */
@@ -321,46 +311,88 @@ ltsReadFillBuffer(LogicalTapeSet *lts, LogicalTape *lt)
321311
return (lt->nbytes > 0);
322312
}
323313

324-
/*
325-
* qsort comparator for sorting freeBlocks[] into decreasing order.
326-
*/
327-
static int
328-
freeBlocks_cmp(const void *a, const void *b)
314+
static inline void
315+
swap_nodes(long *heap, unsigned long a, unsigned long b)
316+
{
317+
unsigned long swap;
318+
319+
swap = heap[a];
320+
heap[a] = heap[b];
321+
heap[b] = swap;
322+
}
323+
324+
static inline unsigned long
325+
left_offset(unsigned long i)
326+
{
327+
return 2 * i + 1;
328+
}
329+
330+
static inline unsigned long
331+
right_offset(unsigned i)
332+
{
333+
return 2 * i + 2;
334+
}
335+
336+
static inline unsigned long
337+
parent_offset(unsigned long i)
329338
{
330-
long ablk = *((const long *) a);
331-
long bblk = *((const long *) b);
332-
333-
/* can't just subtract because long might be wider than int */
334-
if (ablk < bblk)
335-
return 1;
336-
if (ablk > bblk)
337-
return -1;
338-
return 0;
339+
return (i - 1) / 2;
339340
}
340341

341342
/*
342-
* Select a currently unused block for writing to.
343+
* Select the lowest currently unused block by taking the first element from
344+
* the freelist min heap.
343345
*/
344346
static long
345347
ltsGetFreeBlock(LogicalTapeSet *lts)
346348
{
347-
/*
348-
* If there are multiple free blocks, we select the one appearing last in
349-
* freeBlocks[] (after sorting the array if needed). If there are none,
350-
* assign the next block at the end of the file.
351-
*/
352-
if (lts->nFreeBlocks > 0)
349+
long *heap = lts->freeBlocks;
350+
long blocknum;
351+
int heapsize;
352+
unsigned long pos;
353+
354+
/* freelist empty; allocate a new block */
355+
if (lts->nFreeBlocks == 0)
356+
return lts->nBlocksAllocated++;
357+
358+
if (lts->nFreeBlocks == 1)
353359
{
354-
if (!lts->blocksSorted)
355-
{
356-
qsort((void *) lts->freeBlocks, lts->nFreeBlocks,
357-
sizeof(long), freeBlocks_cmp);
358-
lts->blocksSorted = true;
359-
}
360-
return lts->freeBlocks[--lts->nFreeBlocks];
360+
lts->nFreeBlocks--;
361+
return lts->freeBlocks[0];
361362
}
362-
else
363-
return lts->nBlocksAllocated++;
363+
364+
/* take top of minheap */
365+
blocknum = heap[0];
366+
367+
/* replace with end of minheap array */
368+
heap[0] = heap[--lts->nFreeBlocks];
369+
370+
/* sift down */
371+
pos = 0;
372+
heapsize = lts->nFreeBlocks;
373+
while (true)
374+
{
375+
unsigned long left = left_offset(pos);
376+
unsigned long right = right_offset(pos);
377+
unsigned long min_child;
378+
379+
if (left < heapsize && right < heapsize)
380+
min_child = (heap[left] < heap[right]) ? left : right;
381+
else if (left < heapsize)
382+
min_child = left;
383+
else if (right < heapsize)
384+
min_child = right;
385+
else
386+
break;
387+
388+
if (heap[min_child] >= heap[pos])
389+
break;
390+
391+
swap_nodes(heap, min_child, pos);
392+
pos = min_child;
393+
}
394+
395+
return blocknum;
364396
}
365397

366398
/*
@@ -369,7 +401,8 @@ ltsGetFreeBlock(LogicalTapeSet *lts)
369401
static void
370402
ltsReleaseBlock(LogicalTapeSet *lts, long blocknum)
371403
{
372-
int ndx;
404+
long *heap;
405+
unsigned long pos;
373406

374407
/*
375408
* Do nothing if we're no longer interested in remembering free space.
@@ -382,19 +415,35 @@ ltsReleaseBlock(LogicalTapeSet *lts, long blocknum)
382415
*/
383416
if (lts->nFreeBlocks >= lts->freeBlocksLen)
384417
{
418+
/*
419+
* If the freelist becomes very large, just return and leak this free
420+
* block.
421+
*/
422+
if (lts->freeBlocksLen * 2 > MaxAllocSize)
423+
return;
424+
385425
lts->freeBlocksLen *= 2;
386426
lts->freeBlocks = (long *) repalloc(lts->freeBlocks,
387427
lts->freeBlocksLen * sizeof(long));
388428
}
389429

390-
/*
391-
* Add blocknum to array, and mark the array unsorted if it's no longer in
392-
* decreasing order.
393-
*/
394-
ndx = lts->nFreeBlocks++;
395-
lts->freeBlocks[ndx] = blocknum;
396-
if (ndx > 0 && lts->freeBlocks[ndx - 1] < blocknum)
397-
lts->blocksSorted = false;
430+
heap = lts->freeBlocks;
431+
pos = lts->nFreeBlocks;
432+
433+
/* place entry at end of minheap array */
434+
heap[pos] = blocknum;
435+
lts->nFreeBlocks++;
436+
437+
/* sift up */
438+
while (pos != 0)
439+
{
440+
unsigned long parent = parent_offset(pos);
441+
if (heap[parent] < heap[pos])
442+
break;
443+
444+
swap_nodes(heap, parent, pos);
445+
pos = parent;
446+
}
398447
}
399448

400449
/*
@@ -524,7 +573,6 @@ LogicalTapeSetCreate(int ntapes, TapeShare *shared, SharedFileSet *fileset,
524573
lts->nBlocksWritten = 0L;
525574
lts->nHoleBlocks = 0L;
526575
lts->forgetFreeSpace = false;
527-
lts->blocksSorted = true; /* a zero-length array is sorted ... */
528576
lts->freeBlocksLen = 32; /* reasonable initial guess */
529577
lts->freeBlocks = (long *) palloc(lts->freeBlocksLen * sizeof(long));
530578
lts->nFreeBlocks = 0;

0 commit comments

Comments
 (0)