Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 5edc63b

Browse files
committed
Account for the effect of lossy pages when costing bitmap scans.
Dilip Kumar, reviewed by Alexander Kumenkov, Amul Sul, and me. Some final adjustments by me. Discussion: http://postgr.es/m/CAFiTN-sYtqUOXQ4SpuhTv0Z9gD0si3YxZGv_PQAAMX8qbOotcg@mail.gmail.com
1 parent 0c98d0d commit 5edc63b

File tree

3 files changed

+75
-22
lines changed

3 files changed

+75
-22
lines changed

src/backend/nodes/tidbitmap.c

+25-12
Original file line numberDiff line numberDiff line change
@@ -265,25 +265,14 @@ TIDBitmap *
265265
tbm_create(long maxbytes, dsa_area *dsa)
266266
{
267267
TIDBitmap *tbm;
268-
long nbuckets;
269268

270269
/* Create the TIDBitmap struct and zero all its fields */
271270
tbm = makeNode(TIDBitmap);
272271

273272
tbm->mcxt = CurrentMemoryContext;
274273
tbm->status = TBM_EMPTY;
275274

276-
/*
277-
* Estimate number of hashtable entries we can have within maxbytes. This
278-
* estimates the hash cost as sizeof(PagetableEntry), which is good enough
279-
* for our purpose. Also count an extra Pointer per entry for the arrays
280-
* created during iteration readout.
281-
*/
282-
nbuckets = maxbytes /
283-
(sizeof(PagetableEntry) + sizeof(Pointer) + sizeof(Pointer));
284-
nbuckets = Min(nbuckets, INT_MAX - 1); /* safety limit */
285-
nbuckets = Max(nbuckets, 16); /* sanity limit */
286-
tbm->maxentries = (int) nbuckets;
275+
tbm->maxentries = (int) tbm_calculate_entries(maxbytes);
287276
tbm->lossify_start = 0;
288277
tbm->dsa = dsa;
289278
tbm->dsapagetable = InvalidDsaPointer;
@@ -1546,3 +1535,27 @@ pagetable_free(pagetable_hash *pagetable, void *pointer)
15461535
tbm->dsapagetableold = InvalidDsaPointer;
15471536
}
15481537
}
1538+
1539+
/*
1540+
* tbm_calculate_entries
1541+
*
1542+
* Estimate number of hashtable entries we can have within maxbytes.
1543+
*/
1544+
long
1545+
tbm_calculate_entries(double maxbytes)
1546+
{
1547+
long nbuckets;
1548+
1549+
/*
1550+
* Estimate number of hashtable entries we can have within maxbytes. This
1551+
* estimates the hash cost as sizeof(PagetableEntry), which is good enough
1552+
* for our purpose. Also count an extra Pointer per entry for the arrays
1553+
* created during iteration readout.
1554+
*/
1555+
nbuckets = maxbytes /
1556+
(sizeof(PagetableEntry) + sizeof(Pointer) + sizeof(Pointer));
1557+
nbuckets = Min(nbuckets, INT_MAX - 1); /* safety limit */
1558+
nbuckets = Max(nbuckets, 16); /* sanity limit */
1559+
1560+
return nbuckets;
1561+
}

src/backend/optimizer/path/costsize.c

+49-10
Original file line numberDiff line numberDiff line change
@@ -5171,6 +5171,8 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual,
51715171
double T;
51725172
double pages_fetched;
51735173
double tuples_fetched;
5174+
double heap_pages;
5175+
long maxentries;
51745176

51755177
/*
51765178
* Fetch total cost of obtaining the bitmap, as well as its total
@@ -5185,6 +5187,24 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual,
51855187

51865188
T = (baserel->pages > 1) ? (double) baserel->pages : 1.0;
51875189

5190+
/*
5191+
* For a single scan, the number of heap pages that need to be fetched is
5192+
* the same as the Mackert and Lohman formula for the case T <= b (ie, no
5193+
* re-reads needed).
5194+
*/
5195+
pages_fetched = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
5196+
5197+
/*
5198+
* Calculate the number of pages fetched from the heap. Then based on
5199+
* current work_mem estimate get the estimated maxentries in the bitmap.
5200+
* (Note that we always do this calculation based on the number of pages
5201+
* that would be fetched in a single iteration, even if loop_count > 1.
5202+
* That's correct, because only that number of entries will be stored in
5203+
* the bitmap at one time.)
5204+
*/
5205+
heap_pages = Min(pages_fetched, baserel->pages);
5206+
maxentries = tbm_calculate_entries(work_mem * 1024L);
5207+
51885208
if (loop_count > 1)
51895209
{
51905210
/*
@@ -5199,22 +5219,41 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual,
51995219
root);
52005220
pages_fetched /= loop_count;
52015221
}
5202-
else
5203-
{
5204-
/*
5205-
* For a single scan, the number of heap pages that need to be fetched
5206-
* is the same as the Mackert and Lohman formula for the case T <= b
5207-
* (ie, no re-reads needed).
5208-
*/
5209-
pages_fetched =
5210-
(2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
5211-
}
52125222

52135223
if (pages_fetched >= T)
52145224
pages_fetched = T;
52155225
else
52165226
pages_fetched = ceil(pages_fetched);
52175227

5228+
if (maxentries < heap_pages)
5229+
{
5230+
double exact_pages;
5231+
double lossy_pages;
5232+
5233+
/*
5234+
* Crude approximation of the number of lossy pages. Because of the
5235+
* way tbm_lossify() is coded, the number of lossy pages increases
5236+
* very sharply as soon as we run short of memory; this formula has
5237+
* that property and seems to perform adequately in testing, but it's
5238+
* possible we could do better somehow.
5239+
*/
5240+
lossy_pages = Max(0, heap_pages - maxentries / 2);
5241+
exact_pages = heap_pages - lossy_pages;
5242+
5243+
/*
5244+
* If there are lossy pages then recompute the number of tuples
5245+
* processed by the bitmap heap node. We assume here that the chance
5246+
* of a given tuple coming from an exact page is the same as the
5247+
* chance that a given page is exact. This might not be true, but
5248+
* it's not clear how we can do any better.
5249+
*/
5250+
if (lossy_pages > 0)
5251+
tuples_fetched =
5252+
clamp_row_est(indexSelectivity *
5253+
(exact_pages / heap_pages) * baserel->tuples +
5254+
(lossy_pages / heap_pages) * baserel->tuples);
5255+
}
5256+
52185257
if (cost)
52195258
*cost = indexTotalCost;
52205259
if (tuple)

src/include/nodes/tidbitmap.h

+1
Original file line numberDiff line numberDiff line change
@@ -70,5 +70,6 @@ extern void tbm_end_iterate(TBMIterator *iterator);
7070
extern void tbm_end_shared_iterate(TBMSharedIterator *iterator);
7171
extern TBMSharedIterator *tbm_attach_shared_iterate(dsa_area *dsa,
7272
dsa_pointer dp);
73+
extern long tbm_calculate_entries(double maxbytes);
7374

7475
#endif /* TIDBITMAP_H */

0 commit comments

Comments
 (0)