54
54
* Portions Copyright (c) 1994, Regents of the University of California
55
55
*
56
56
* IDENTIFICATION
57
- * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.157 2006/06/05 20:56:33 tgl Exp $
57
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.158 2006/06/06 17:59:57 tgl Exp $
58
58
*
59
59
*-------------------------------------------------------------------------
60
60
*/
@@ -133,7 +133,7 @@ clamp_row_est(double nrows)
133
133
* better and to avoid possible divide-by-zero when interpolating costs.
134
134
* Make it an integer, too.
135
135
*/
136
- if (nrows < 1.0 )
136
+ if (nrows <= 1.0 )
137
137
nrows = 1.0 ;
138
138
else
139
139
nrows = rint (nrows );
@@ -181,8 +181,10 @@ cost_seqscan(Path *path, PlannerInfo *root,
181
181
*
182
182
* 'index' is the index to be used
183
183
* 'indexQuals' is the list of applicable qual clauses (implicit AND semantics)
184
- * 'is_injoin' is T if we are considering using the index scan as the inside
185
- * of a nestloop join (hence, some of the indexQuals are join clauses)
184
+ * 'outer_rel' is the outer relation when we are considering using the index
185
+ * scan as the inside of a nestloop join (hence, some of the indexQuals
186
+ * are join clauses, and we should expect repeated scans of the index);
187
+ * NULL for a plain index scan
186
188
*
187
189
* cost_index() takes an IndexPath not just a Path, because it sets a few
188
190
* additional fields of the IndexPath besides startup_cost and total_cost.
200
202
cost_index (IndexPath * path , PlannerInfo * root ,
201
203
IndexOptInfo * index ,
202
204
List * indexQuals ,
203
- bool is_injoin )
205
+ RelOptInfo * outer_rel )
204
206
{
205
207
RelOptInfo * baserel = index -> rel ;
206
208
Cost startup_cost = 0 ;
@@ -215,8 +217,6 @@ cost_index(IndexPath *path, PlannerInfo *root,
215
217
Cost cpu_per_tuple ;
216
218
double tuples_fetched ;
217
219
double pages_fetched ;
218
- double T ,
219
- b ;
220
220
221
221
/* Should only be applied to base relations */
222
222
Assert (IsA (baserel , RelOptInfo ) &&
@@ -233,10 +233,11 @@ cost_index(IndexPath *path, PlannerInfo *root,
233
233
* the fraction of main-table tuples we will have to retrieve) and its
234
234
* correlation to the main-table tuple order.
235
235
*/
236
- OidFunctionCall7 (index -> amcostestimate ,
236
+ OidFunctionCall8 (index -> amcostestimate ,
237
237
PointerGetDatum (root ),
238
238
PointerGetDatum (index ),
239
239
PointerGetDatum (indexQuals ),
240
+ PointerGetDatum (outer_rel ),
240
241
PointerGetDatum (& indexStartupCost ),
241
242
PointerGetDatum (& indexTotalCost ),
242
243
PointerGetDatum (& indexSelectivity ),
@@ -254,86 +255,75 @@ cost_index(IndexPath *path, PlannerInfo *root,
254
255
startup_cost += indexStartupCost ;
255
256
run_cost += indexTotalCost - indexStartupCost ;
256
257
258
+ /* estimate number of main-table tuples fetched */
259
+ tuples_fetched = clamp_row_est (indexSelectivity * baserel -> tuples );
260
+
257
261
/*----------
258
- * Estimate number of main-table tuples and pages fetched .
262
+ * Estimate number of main-table pages fetched, and compute I/O cost .
259
263
*
260
264
* When the index ordering is uncorrelated with the table ordering,
261
- * we use an approximation proposed by Mackert and Lohman, "Index Scans
262
- * Using a Finite LRU Buffer: A Validated I/O Model", ACM Transactions
263
- * on Database Systems, Vol. 14, No. 3, September 1989, Pages 401-424.
264
- * The Mackert and Lohman approximation is that the number of pages
265
- * fetched is
266
- * PF =
267
- * min(2TNs/(2T+Ns), T) when T <= b
268
- * 2TNs/(2T+Ns) when T > b and Ns <= 2Tb/(2T-b)
269
- * b + (Ns - 2Tb/(2T-b))*(T-b)/T when T > b and Ns > 2Tb/(2T-b)
270
- * where
271
- * T = # pages in table
272
- * N = # tuples in table
273
- * s = selectivity = fraction of table to be scanned
274
- * b = # buffer pages available (we include kernel space here)
265
+ * we use an approximation proposed by Mackert and Lohman (see
266
+ * index_pages_fetched() for details) to compute the number of pages
267
+ * fetched, and then charge random_page_cost per page fetched.
275
268
*
276
269
* When the index ordering is exactly correlated with the table ordering
277
270
* (just after a CLUSTER, for example), the number of pages fetched should
278
- * be just sT. What's more, these will be sequential fetches, not the
279
- * random fetches that occur in the uncorrelated case. So, depending on
280
- * the extent of correlation, we should estimate the actual I/O cost
281
- * somewhere between s * T * seq_page_cost and PF * random_page_cost.
282
- * We currently interpolate linearly between these two endpoints based on
283
- * the correlation squared (XXX is that appropriate?).
284
- *
285
- * In any case the number of tuples fetched is Ns .
271
+ * be exactly selectivity * table_size. What's more, all but the first
272
+ * will be sequential fetches, not the random fetches that occur in the
273
+ * uncorrelated case. So if the number of pages is more than 1, we
274
+ * ought to charge
275
+ * random_page_cost + (pages_fetched - 1) * seq_page_cost
276
+ * For partially-correlated indexes, we ought to charge somewhere between
277
+ * these two estimates. We currently interpolate linearly between the
278
+ * estimates based on the correlation squared (XXX is that appropriate?) .
286
279
*----------
287
280
*/
281
+ if (outer_rel != NULL && outer_rel -> rows > 1 )
282
+ {
283
+ /*
284
+ * For repeated indexscans, scale up the number of tuples fetched
285
+ * in the Mackert and Lohman formula by the number of scans, so
286
+ * that we estimate the number of pages fetched by all the scans.
287
+ * Then pro-rate the costs for one scan. In this case we assume
288
+ * all the fetches are random accesses. XXX it'd be good to
289
+ * include correlation in this model, but it's not clear how to do
290
+ * that without double-counting cache effects.
291
+ */
292
+ double num_scans = outer_rel -> rows ;
288
293
289
- tuples_fetched = clamp_row_est ( indexSelectivity * baserel -> tuples );
290
-
291
- /* This part is the Mackert and Lohman formula */
294
+ pages_fetched = index_pages_fetched ( tuples_fetched * num_scans ,
295
+ baserel -> pages ,
296
+ index -> pages );
292
297
293
- T = (baserel -> pages > 1 ) ? (double ) baserel -> pages : 1.0 ;
294
- b = (effective_cache_size > 1 ) ? effective_cache_size : 1.0 ;
295
-
296
- if (T <= b )
297
- {
298
- pages_fetched =
299
- (2.0 * T * tuples_fetched ) / (2.0 * T + tuples_fetched );
300
- if (pages_fetched >= T )
301
- pages_fetched = T ;
302
- else
303
- pages_fetched = ceil (pages_fetched );
298
+ run_cost += (pages_fetched * random_page_cost ) / num_scans ;
304
299
}
305
300
else
306
301
{
307
- double lim ;
302
+ /*
303
+ * Normal case: apply the Mackert and Lohman formula, and then
304
+ * interpolate between that and the correlation-derived result.
305
+ */
306
+ pages_fetched = index_pages_fetched (tuples_fetched ,
307
+ baserel -> pages ,
308
+ index -> pages );
308
309
309
- lim = (2.0 * T * b ) / (2.0 * T - b );
310
- if (tuples_fetched <= lim )
311
- {
312
- pages_fetched =
313
- (2.0 * T * tuples_fetched ) / (2.0 * T + tuples_fetched );
314
- }
315
- else
316
- {
317
- pages_fetched =
318
- b + (tuples_fetched - lim ) * (T - b ) / T ;
319
- }
320
- pages_fetched = ceil (pages_fetched );
321
- }
310
+ /* max_IO_cost is for the perfectly uncorrelated case (csquared=0) */
311
+ max_IO_cost = pages_fetched * random_page_cost ;
322
312
323
- /*
324
- * min_IO_cost corresponds to the perfectly correlated case (csquared=1),
325
- * max_IO_cost to the perfectly uncorrelated case (csquared=0).
326
- */
327
- min_IO_cost = ceil (indexSelectivity * T ) * seq_page_cost ;
328
- max_IO_cost = pages_fetched * random_page_cost ;
313
+ /* min_IO_cost is for the perfectly correlated case (csquared=1) */
314
+ pages_fetched = ceil (indexSelectivity * (double ) baserel -> pages );
315
+ min_IO_cost = random_page_cost ;
316
+ if (pages_fetched > 1 )
317
+ min_IO_cost += (pages_fetched - 1 ) * seq_page_cost ;
329
318
330
- /*
331
- * Now interpolate based on estimated index order correlation to get total
332
- * disk I/O cost for main table accesses.
333
- */
334
- csquared = indexCorrelation * indexCorrelation ;
319
+ /*
320
+ * Now interpolate based on estimated index order correlation to get
321
+ * total disk I/O cost for main table accesses.
322
+ */
323
+ csquared = indexCorrelation * indexCorrelation ;
335
324
336
- run_cost += max_IO_cost + csquared * (min_IO_cost - max_IO_cost );
325
+ run_cost += max_IO_cost + csquared * (min_IO_cost - max_IO_cost );
326
+ }
337
327
338
328
/*
339
329
* Estimate CPU costs per tuple.
@@ -348,7 +338,7 @@ cost_index(IndexPath *path, PlannerInfo *root,
348
338
startup_cost += baserel -> baserestrictcost .startup ;
349
339
cpu_per_tuple = cpu_tuple_cost + baserel -> baserestrictcost .per_tuple ;
350
340
351
- if (! is_injoin )
341
+ if (outer_rel == NULL )
352
342
{
353
343
QualCost index_qual_cost ;
354
344
@@ -363,19 +353,102 @@ cost_index(IndexPath *path, PlannerInfo *root,
363
353
path -> path .total_cost = startup_cost + run_cost ;
364
354
}
365
355
356
+ /*
357
+ * index_pages_fetched
358
+ * Estimate the number of pages actually fetched after accounting for
359
+ * cache effects.
360
+ *
361
+ * We use an approximation proposed by Mackert and Lohman, "Index Scans
362
+ * Using a Finite LRU Buffer: A Validated I/O Model", ACM Transactions
363
+ * on Database Systems, Vol. 14, No. 3, September 1989, Pages 401-424.
364
+ * The Mackert and Lohman approximation is that the number of pages
365
+ * fetched is
366
+ * PF =
367
+ * min(2TNs/(2T+Ns), T) when T <= b
368
+ * 2TNs/(2T+Ns) when T > b and Ns <= 2Tb/(2T-b)
369
+ * b + (Ns - 2Tb/(2T-b))*(T-b)/T when T > b and Ns > 2Tb/(2T-b)
370
+ * where
371
+ * T = # pages in table
372
+ * N = # tuples in table
373
+ * s = selectivity = fraction of table to be scanned
374
+ * b = # buffer pages available (we include kernel space here)
375
+ *
376
+ * We assume that effective_cache_size is the total number of buffer pages
377
+ * available for both table and index, and pro-rate that space between the
378
+ * table and index. (Ideally other_pages should include all the other
379
+ * tables and indexes used by the query too; but we don't have a good way
380
+ * to get that number here.)
381
+ *
382
+ * The product Ns is the number of tuples fetched; we pass in that
383
+ * product rather than calculating it here.
384
+ *
385
+ * Caller is expected to have ensured that tuples_fetched is greater than zero
386
+ * and rounded to integer (see clamp_row_est). The result will likewise be
387
+ * greater than zero and integral.
388
+ */
389
+ double
390
+ index_pages_fetched (double tuples_fetched , BlockNumber pages ,
391
+ BlockNumber other_pages )
392
+ {
393
+ double pages_fetched ;
394
+ double T ,
395
+ b ;
396
+
397
+ /* T is # pages in table, but don't allow it to be zero */
398
+ T = (pages > 1 ) ? (double ) pages : 1.0 ;
399
+
400
+ /* b is pro-rated share of effective_cache_size */
401
+ b = effective_cache_size * T / (T + (double ) other_pages );
402
+ /* force it positive and integral */
403
+ if (b <= 1.0 )
404
+ b = 1.0 ;
405
+ else
406
+ b = ceil (b );
407
+
408
+ /* This part is the Mackert and Lohman formula */
409
+ if (T <= b )
410
+ {
411
+ pages_fetched =
412
+ (2.0 * T * tuples_fetched ) / (2.0 * T + tuples_fetched );
413
+ if (pages_fetched >= T )
414
+ pages_fetched = T ;
415
+ else
416
+ pages_fetched = ceil (pages_fetched );
417
+ }
418
+ else
419
+ {
420
+ double lim ;
421
+
422
+ lim = (2.0 * T * b ) / (2.0 * T - b );
423
+ if (tuples_fetched <= lim )
424
+ {
425
+ pages_fetched =
426
+ (2.0 * T * tuples_fetched ) / (2.0 * T + tuples_fetched );
427
+ }
428
+ else
429
+ {
430
+ pages_fetched =
431
+ b + (tuples_fetched - lim ) * (T - b ) / T ;
432
+ }
433
+ pages_fetched = ceil (pages_fetched );
434
+ }
435
+ return pages_fetched ;
436
+ }
437
+
366
438
/*
367
439
* cost_bitmap_heap_scan
368
440
* Determines and returns the cost of scanning a relation using a bitmap
369
441
* index-then-heap plan.
370
442
*
371
443
* 'baserel' is the relation to be scanned
372
444
* 'bitmapqual' is a tree of IndexPaths, BitmapAndPaths, and BitmapOrPaths
373
- * 'is_injoin' is T if we are considering using the scan as the inside
374
- * of a nestloop join (hence, some of the quals are join clauses)
445
+ *
446
+ * Note: we take no explicit notice here of whether this is a join inner path.
447
+ * If it is, the component IndexPaths should have been costed accordingly.
375
448
*/
376
449
void
377
450
cost_bitmap_heap_scan (Path * path , PlannerInfo * root , RelOptInfo * baserel ,
378
- Path * bitmapqual , bool is_injoin )
451
+ Path * bitmapqual )
379
452
{
380
453
Cost startup_cost = 0 ;
381
454
Cost run_cost = 0 ;
0 commit comments