Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 8bf7496

Browse files
committed
Reduce the number of pallocs() in BRIN
Instead of allocating memory in brin_deform_tuple and brin_copy_tuple over and over during a scan, allow reuse of previously allocated memory. This is said to make for a measurable performance improvement. Author: Jinyu Zhang, Álvaro Herrera Reviewed by: Tomas Vondra Discussion: https://postgr.es/m/495deb78.4186.1500dacaa63.Coremail.beijing_pg@163.com
1 parent e8fdbd5 commit 8bf7496

File tree

5 files changed

+78
-42
lines changed

5 files changed

+78
-42
lines changed

contrib/pageinspect/brinfuncs.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,8 @@ brin_page_items(PG_FUNCTION_ARGS)
226226
if (ItemIdIsUsed(itemId))
227227
{
228228
dtup = brin_deform_tuple(bdesc,
229-
(BrinTuple *) PageGetItem(page, itemId));
229+
(BrinTuple *) PageGetItem(page, itemId),
230+
NULL);
230231
attno = 1;
231232
unusedItem = false;
232233
}

src/backend/access/brin/brin.c

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ brininsert(Relation idxRel, Datum *values, bool *nulls,
217217
MemoryContextSwitchTo(tupcxt);
218218
}
219219

220-
dtup = brin_deform_tuple(bdesc, brtup);
220+
dtup = brin_deform_tuple(bdesc, brtup, NULL);
221221

222222
/*
223223
* Compare the key values of the new tuple to the stored index values;
@@ -268,7 +268,7 @@ brininsert(Relation idxRel, Datum *values, bool *nulls,
268268
* re-acquiring the lock.
269269
*/
270270
origsz = ItemIdGetLength(lp);
271-
origtup = brin_copy_tuple(brtup, origsz);
271+
origtup = brin_copy_tuple(brtup, origsz, NULL, NULL);
272272

273273
/*
274274
* Before releasing the lock, check if we can attempt a same-page
@@ -363,6 +363,9 @@ bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
363363
FmgrInfo *consistentFn;
364364
MemoryContext oldcxt;
365365
MemoryContext perRangeCxt;
366+
BrinMemTuple *dtup;
367+
BrinTuple *btup = NULL;
368+
Size btupsz = 0;
366369

367370
opaque = (BrinOpaque *) scan->opaque;
368371
bdesc = opaque->bo_bdesc;
@@ -384,6 +387,9 @@ bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
384387
*/
385388
consistentFn = palloc0(sizeof(FmgrInfo) * bdesc->bd_tupdesc->natts);
386389

390+
/* allocate an initial in-memory tuple, out of the per-range memcxt */
391+
dtup = brin_new_memtuple(bdesc);
392+
387393
/*
388394
* Setup and use a per-range memory context, which is reset every time we
389395
* loop below. This avoids having to free the tuples within the loop.
@@ -401,6 +407,7 @@ bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
401407
for (heapBlk = 0; heapBlk < nblocks; heapBlk += opaque->bo_pagesPerRange)
402408
{
403409
bool addrange;
410+
bool gottuple = false;
404411
BrinTuple *tup;
405412
OffsetNumber off;
406413
Size size;
@@ -414,23 +421,22 @@ bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
414421
scan->xs_snapshot);
415422
if (tup)
416423
{
417-
tup = brin_copy_tuple(tup, size);
424+
gottuple = true;
425+
btup = brin_copy_tuple(tup, size, btup, &btupsz);
418426
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
419427
}
420428

421429
/*
422430
* For page ranges with no indexed tuple, we must return the whole
423431
* range; otherwise, compare it to the scan keys.
424432
*/
425-
if (tup == NULL)
433+
if (!gottuple)
426434
{
427435
addrange = true;
428436
}
429437
else
430438
{
431-
BrinMemTuple *dtup;
432-
433-
dtup = brin_deform_tuple(bdesc, tup);
439+
dtup = brin_deform_tuple(bdesc, btup, dtup);
434440
if (dtup->bt_placeholder)
435441
{
436442
/*
@@ -1210,7 +1216,7 @@ summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel,
12101216
/* the placeholder tuple must exist */
12111217
if (phtup == NULL)
12121218
elog(ERROR, "missing placeholder tuple");
1213-
phtup = brin_copy_tuple(phtup, phsz);
1219+
phtup = brin_copy_tuple(phtup, phsz, NULL, NULL);
12141220
LockBuffer(phbuf, BUFFER_LOCK_UNLOCK);
12151221

12161222
/* merge it into the tuple from the heap scan */
@@ -1358,7 +1364,7 @@ union_tuples(BrinDesc *bdesc, BrinMemTuple *a, BrinTuple *b)
13581364
"brin union",
13591365
ALLOCSET_DEFAULT_SIZES);
13601366
oldcxt = MemoryContextSwitchTo(cxt);
1361-
db = brin_deform_tuple(bdesc, b);
1367+
db = brin_deform_tuple(bdesc, b, NULL);
13621368
MemoryContextSwitchTo(oldcxt);
13631369

13641370
for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)

src/backend/access/brin/brin_pageops.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,8 @@ brin_evacuate_page(Relation idxRel, BlockNumber pagesPerRange,
548548
OffsetNumber off;
549549
OffsetNumber maxoff;
550550
Page page;
551+
BrinTuple *btup = NULL;
552+
Size btupsz = 0;
551553

552554
page = BufferGetPage(buf);
553555

@@ -567,7 +569,7 @@ brin_evacuate_page(Relation idxRel, BlockNumber pagesPerRange,
567569
{
568570
sz = ItemIdGetLength(lp);
569571
tup = (BrinTuple *) PageGetItem(page, lp);
570-
tup = brin_copy_tuple(tup, sz);
572+
tup = brin_copy_tuple(tup, sz, btup, &btupsz);
571573

572574
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
573575

src/backend/access/brin/brin_tuple.c

Lines changed: 49 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -311,17 +311,26 @@ brin_free_tuple(BrinTuple *tuple)
311311
}
312312

313313
/*
314-
* Create a palloc'd copy of a BrinTuple.
314+
* Given a brin tuple of size len, create a copy of it. If 'dest' is not
315+
* NULL, its size is destsz, and can be used as output buffer; if the tuple
316+
* to be copied does not fit, it is enlarged by repalloc, and the size is
317+
* updated to match. This avoids palloc/free cycles when many brin tuples
318+
* are being processed in loops.
315319
*/
316320
BrinTuple *
317-
brin_copy_tuple(BrinTuple *tuple, Size len)
321+
brin_copy_tuple(BrinTuple *tuple, Size len, BrinTuple *dest, Size *destsz)
318322
{
319-
BrinTuple *newtup;
323+
if (!destsz || *destsz == 0)
324+
dest = palloc(len);
325+
else if (len > *destsz)
326+
{
327+
dest = repalloc(dest, len);
328+
*destsz = len;
329+
}
320330

321-
newtup = palloc(len);
322-
memcpy(newtup, tuple, len);
331+
memcpy(dest, tuple, len);
323332

324-
return newtup;
333+
return dest;
325334
}
326335

327336
/*
@@ -348,54 +357,69 @@ BrinMemTuple *
348357
brin_new_memtuple(BrinDesc *brdesc)
349358
{
350359
BrinMemTuple *dtup;
351-
char *currdatum;
352360
long basesize;
353-
int i;
354361

355362
basesize = MAXALIGN(sizeof(BrinMemTuple) +
356363
sizeof(BrinValues) * brdesc->bd_tupdesc->natts);
357364
dtup = palloc0(basesize + sizeof(Datum) * brdesc->bd_totalstored);
358-
currdatum = (char *) dtup + basesize;
359-
for (i = 0; i < brdesc->bd_tupdesc->natts; i++)
360-
{
361-
dtup->bt_columns[i].bv_attno = i + 1;
362-
dtup->bt_columns[i].bv_allnulls = true;
363-
dtup->bt_columns[i].bv_hasnulls = false;
364-
dtup->bt_columns[i].bv_values = (Datum *) currdatum;
365-
currdatum += sizeof(Datum) * brdesc->bd_info[i]->oi_nstored;
366-
}
365+
366+
dtup->bt_values = palloc(sizeof(Datum) * brdesc->bd_totalstored);
367+
dtup->bt_allnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts);
368+
dtup->bt_hasnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts);
367369

368370
dtup->bt_context = AllocSetContextCreate(CurrentMemoryContext,
369371
"brin dtuple",
370372
ALLOCSET_DEFAULT_SIZES);
373+
374+
brin_memtuple_initialize(dtup, brdesc);
375+
371376
return dtup;
372377
}
373378

374379
/*
375-
* Reset a BrinMemTuple to initial state
380+
* Reset a BrinMemTuple to initial state. We return the same tuple, for
381+
* notational convenience.
376382
*/
377-
void
383+
BrinMemTuple *
378384
brin_memtuple_initialize(BrinMemTuple *dtuple, BrinDesc *brdesc)
379385
{
380386
int i;
387+
char *currdatum;
381388

382389
MemoryContextReset(dtuple->bt_context);
390+
391+
currdatum = (char *) dtuple +
392+
MAXALIGN(sizeof(BrinMemTuple) +
393+
sizeof(BrinValues) * brdesc->bd_tupdesc->natts);
383394
for (i = 0; i < brdesc->bd_tupdesc->natts; i++)
384395
{
385396
dtuple->bt_columns[i].bv_allnulls = true;
386397
dtuple->bt_columns[i].bv_hasnulls = false;
398+
399+
dtuple->bt_columns[i].bv_attno = i + 1;
400+
dtuple->bt_columns[i].bv_allnulls = true;
401+
dtuple->bt_columns[i].bv_hasnulls = false;
402+
dtuple->bt_columns[i].bv_values = (Datum *) currdatum;
403+
currdatum += sizeof(Datum) * brdesc->bd_info[i]->oi_nstored;
387404
}
405+
406+
return dtuple;
388407
}
389408

390409
/*
391410
* Convert a BrinTuple back to a BrinMemTuple. This is the reverse of
392411
* brin_form_tuple.
393412
*
413+
* As an optimization, the caller can pass a previously allocated 'dMemtuple'.
414+
* This avoids having to allocate it here, which can be useful when this
415+
* function is called many times in a loop. It is caller's responsibility
416+
* that the given BrinMemTuple matches what we need here.
417+
*
394418
* Note we don't need the "on disk tupdesc" here; we rely on our own routine to
395419
* deconstruct the tuple from the on-disk format.
396420
*/
397421
BrinMemTuple *
398-
brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple)
422+
brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple, BrinMemTuple *dMemtuple)
399423
{
400424
BrinMemTuple *dtup;
401425
Datum *values;
@@ -407,15 +431,16 @@ brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple)
407431
int valueno;
408432
MemoryContext oldcxt;
409433

410-
dtup = brin_new_memtuple(brdesc);
434+
dtup = dMemtuple ? brin_memtuple_initialize(dMemtuple, brdesc) :
435+
brin_new_memtuple(brdesc);
411436

412437
if (BrinTupleIsPlaceholder(tuple))
413438
dtup->bt_placeholder = true;
414439
dtup->bt_blkno = tuple->bt_blkno;
415440

416-
values = palloc(sizeof(Datum) * brdesc->bd_totalstored);
417-
allnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts);
418-
hasnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts);
441+
values = dtup->bt_values;
442+
allnulls = dtup->bt_allnulls;
443+
hasnulls = dtup->bt_hasnulls;
419444

420445
tp = (char *) tuple + BrinTupleDataOffset(tuple);
421446

@@ -458,10 +483,6 @@ brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple)
458483

459484
MemoryContextSwitchTo(oldcxt);
460485

461-
pfree(values);
462-
pfree(allnulls);
463-
pfree(hasnulls);
464-
465486
return dtup;
466487
}
467488

src/include/access/brin_tuple.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,11 @@ typedef struct BrinMemTuple
3838
bool bt_placeholder; /* this is a placeholder tuple */
3939
BlockNumber bt_blkno; /* heap blkno that the tuple is for */
4040
MemoryContext bt_context; /* memcxt holding the bt_columns values */
41+
/* output arrays for brin_deform_tuple: */
42+
Datum *bt_values; /* values array */
43+
bool *bt_allnulls; /* allnulls array */
44+
bool *bt_hasnulls; /* hasnulls array */
45+
/* not an output array, but must be last */
4146
BrinValues bt_columns[FLEXIBLE_ARRAY_MEMBER];
4247
} BrinMemTuple;
4348

@@ -83,14 +88,15 @@ extern BrinTuple *brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno,
8388
extern BrinTuple *brin_form_placeholder_tuple(BrinDesc *brdesc,
8489
BlockNumber blkno, Size *size);
8590
extern void brin_free_tuple(BrinTuple *tuple);
86-
extern BrinTuple *brin_copy_tuple(BrinTuple *tuple, Size len);
91+
extern BrinTuple *brin_copy_tuple(BrinTuple *tuple, Size len,
92+
BrinTuple *dest, Size *destsz);
8793
extern bool brin_tuples_equal(const BrinTuple *a, Size alen,
8894
const BrinTuple *b, Size blen);
8995

9096
extern BrinMemTuple *brin_new_memtuple(BrinDesc *brdesc);
91-
extern void brin_memtuple_initialize(BrinMemTuple *dtuple,
97+
extern BrinMemTuple *brin_memtuple_initialize(BrinMemTuple *dtuple,
9298
BrinDesc *brdesc);
9399
extern BrinMemTuple *brin_deform_tuple(BrinDesc *brdesc,
94-
BrinTuple *tuple);
100+
BrinTuple *tuple, BrinMemTuple *dMemtuple);
95101

96102
#endif /* BRIN_TUPLE_H */

0 commit comments

Comments
 (0)