54
54
* Portions Copyright (c) 1994, Regents of the University of California
55
55
*
56
56
* IDENTIFICATION
57
- * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.210 2009/07/11 04:09:33 tgl Exp $
57
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.211 2009/09/12 22:12:03 tgl Exp $
58
58
*
59
59
*-------------------------------------------------------------------------
60
60
*/
63
63
64
64
#include <math.h>
65
65
66
+ #include "executor/executor.h"
66
67
#include "executor/nodeHash.h"
67
68
#include "miscadmin.h"
68
69
#include "nodes/nodeFuncs.h"
@@ -119,6 +120,8 @@ typedef struct
119
120
static MergeScanSelCache * cached_scansel (PlannerInfo * root ,
120
121
RestrictInfo * rinfo ,
121
122
PathKey * pathkey );
123
+ static void cost_rescan (PlannerInfo * root , Path * path ,
124
+ Cost * rescan_startup_cost , Cost * rescan_total_cost );
122
125
static bool cost_qual_eval_walker (Node * node , cost_qual_eval_context * context );
123
126
static bool adjust_semi_join (PlannerInfo * root , JoinPath * path ,
124
127
SpecialJoinInfo * sjinfo ,
@@ -895,15 +898,26 @@ cost_functionscan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
895
898
rte = planner_rt_fetch (baserel -> relid , root );
896
899
Assert (rte -> rtekind == RTE_FUNCTION );
897
900
898
- /* Estimate costs of executing the function expression */
901
+ /*
902
+ * Estimate costs of executing the function expression.
903
+ *
904
+ * Currently, nodeFunctionscan.c always executes the function to
905
+ * completion before returning any rows, and caches the results in a
906
+ * tuplestore. So the function eval cost is all startup cost, and
907
+ * per-row costs are minimal.
908
+ *
909
+ * XXX in principle we ought to charge tuplestore spill costs if the
910
+ * number of rows is large. However, given how phony our rowcount
911
+ * estimates for functions tend to be, there's not a lot of point
912
+ * in that refinement right now.
913
+ */
899
914
cost_qual_eval_node (& exprcost , rte -> funcexpr , root );
900
915
901
- startup_cost += exprcost .startup ;
902
- cpu_per_tuple = exprcost .per_tuple ;
916
+ startup_cost += exprcost .startup + exprcost .per_tuple ;
903
917
904
918
/* Add scanning CPU costs */
905
919
startup_cost += baserel -> baserestrictcost .startup ;
906
- cpu_per_tuple + = cpu_tuple_cost + baserel -> baserestrictcost .per_tuple ;
920
+ cpu_per_tuple = cpu_tuple_cost + baserel -> baserestrictcost .per_tuple ;
907
921
run_cost += cpu_per_tuple * baserel -> tuples ;
908
922
909
923
path -> startup_cost = startup_cost ;
@@ -1176,41 +1190,44 @@ sort_exceeds_work_mem(Sort *sort)
1176
1190
*
1177
1191
* If the total volume of data to materialize exceeds work_mem, we will need
1178
1192
* to write it to disk, so the cost is much higher in that case.
1193
+ *
1194
+ * Note that here we are estimating the costs for the first scan of the
1195
+ * relation, so the materialization is all overhead --- any savings will
1196
+ * occur only on rescan, which is estimated in cost_rescan.
1179
1197
*/
1180
1198
void
1181
1199
cost_material (Path * path ,
1182
- Cost input_cost , double tuples , int width )
1200
+ Cost input_startup_cost , Cost input_total_cost ,
1201
+ double tuples , int width )
1183
1202
{
1184
- Cost startup_cost = input_cost ;
1185
- Cost run_cost = 0 ;
1203
+ Cost startup_cost = input_startup_cost ;
1204
+ Cost run_cost = input_total_cost - input_startup_cost ;
1186
1205
double nbytes = relation_byte_size (tuples , width );
1187
1206
long work_mem_bytes = work_mem * 1024L ;
1188
1207
1189
- /* disk costs */
1208
+ /*
1209
+ * Whether spilling or not, charge 2x cpu_tuple_cost per tuple to reflect
1210
+ * bookkeeping overhead. (This rate must be more than cpu_tuple_cost;
1211
+ * if it is exactly the same then there will be a cost tie between
1212
+ * nestloop with A outer, materialized B inner and nestloop with B outer,
1213
+ * materialized A inner. The extra cost ensures we'll prefer
1214
+ * materializing the smaller rel.)
1215
+ */
1216
+ run_cost += 2 * cpu_tuple_cost * tuples ;
1217
+
1218
+ /*
1219
+ * If we will spill to disk, charge at the rate of seq_page_cost per page.
1220
+ * This cost is assumed to be evenly spread through the plan run phase,
1221
+ * which isn't exactly accurate but our cost model doesn't allow for
1222
+ * nonuniform costs within the run phase.
1223
+ */
1190
1224
if (nbytes > work_mem_bytes )
1191
1225
{
1192
1226
double npages = ceil (nbytes / BLCKSZ );
1193
1227
1194
- /* We'll write during startup and read during retrieval */
1195
- startup_cost += seq_page_cost * npages ;
1196
1228
run_cost += seq_page_cost * npages ;
1197
1229
}
1198
1230
1199
- /*
1200
- * Charge a very small amount per inserted tuple, to reflect bookkeeping
1201
- * costs. We use cpu_tuple_cost/10 for this. This is needed to break the
1202
- * tie that would otherwise exist between nestloop with A outer,
1203
- * materialized B inner and nestloop with B outer, materialized A inner.
1204
- * The extra cost ensures we'll prefer materializing the smaller rel.
1205
- */
1206
- startup_cost += cpu_tuple_cost * 0.1 * tuples ;
1207
-
1208
- /*
1209
- * Also charge a small amount per extracted tuple. We use cpu_tuple_cost
1210
- * so that it doesn't appear worthwhile to materialize a bare seqscan.
1211
- */
1212
- run_cost += cpu_tuple_cost * tuples ;
1213
-
1214
1231
path -> startup_cost = startup_cost ;
1215
1232
path -> total_cost = startup_cost + run_cost ;
1216
1233
}
@@ -1400,7 +1417,10 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
1400
1417
Path * inner_path = path -> innerjoinpath ;
1401
1418
Cost startup_cost = 0 ;
1402
1419
Cost run_cost = 0 ;
1420
+ Cost inner_rescan_start_cost ;
1421
+ Cost inner_rescan_total_cost ;
1403
1422
Cost inner_run_cost ;
1423
+ Cost inner_rescan_run_cost ;
1404
1424
Cost cpu_per_tuple ;
1405
1425
QualCost restrict_qual_cost ;
1406
1426
double outer_path_rows = PATH_ROWS (outer_path );
@@ -1413,32 +1433,26 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
1413
1433
if (!enable_nestloop )
1414
1434
startup_cost += disable_cost ;
1415
1435
1436
+ /* estimate costs to rescan the inner relation */
1437
+ cost_rescan (root , inner_path ,
1438
+ & inner_rescan_start_cost ,
1439
+ & inner_rescan_total_cost );
1440
+
1416
1441
/* cost of source data */
1417
1442
1418
1443
/*
1419
1444
* NOTE: clearly, we must pay both outer and inner paths' startup_cost
1420
1445
* before we can start returning tuples, so the join's startup cost is
1421
- * their sum. What's not so clear is whether the inner path's
1422
- * startup_cost must be paid again on each rescan of the inner path. This
1423
- * is not true if the inner path is materialized or is a hashjoin, but
1424
- * probably is true otherwise.
1446
+ * their sum. We'll also pay the inner path's rescan startup cost
1447
+ * multiple times.
1425
1448
*/
1426
1449
startup_cost += outer_path -> startup_cost + inner_path -> startup_cost ;
1427
1450
run_cost += outer_path -> total_cost - outer_path -> startup_cost ;
1428
- if (IsA (inner_path , MaterialPath ) ||
1429
- IsA (inner_path , HashPath ))
1430
- {
1431
- /* charge only run cost for each iteration of inner path */
1432
- }
1433
- else
1434
- {
1435
- /*
1436
- * charge startup cost for each iteration of inner path, except we
1437
- * already charged the first startup_cost in our own startup
1438
- */
1439
- run_cost += (outer_path_rows - 1 ) * inner_path -> startup_cost ;
1440
- }
1451
+ if (outer_path_rows > 1 )
1452
+ run_cost += (outer_path_rows - 1 ) * inner_rescan_start_cost ;
1453
+
1441
1454
inner_run_cost = inner_path -> total_cost - inner_path -> startup_cost ;
1455
+ inner_rescan_run_cost = inner_rescan_total_cost - inner_rescan_start_cost ;
1442
1456
1443
1457
if (adjust_semi_join (root , path , sjinfo ,
1444
1458
& outer_match_frac ,
@@ -1458,12 +1472,22 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
1458
1472
* that fraction. (If we used a larger fuzz factor, we'd have to
1459
1473
* clamp inner_scan_frac to at most 1.0; but since match_count is at
1460
1474
* least 1, no such clamp is needed now.)
1475
+ *
1476
+ * A complicating factor is that rescans may be cheaper than first
1477
+ * scans. If we never scan all the way to the end of the inner rel,
1478
+ * it might be (depending on the plan type) that we'd never pay the
1479
+ * whole inner first-scan run cost. However it is difficult to
1480
+ * estimate whether that will happen, so be conservative and always
1481
+ * charge the whole first-scan cost once.
1461
1482
*/
1483
+ run_cost += inner_run_cost ;
1484
+
1462
1485
outer_matched_rows = rint (outer_path_rows * outer_match_frac );
1463
1486
inner_scan_frac = 2.0 / (match_count + 1.0 );
1464
1487
1465
- /* Add inner run cost for outer tuples having matches */
1466
- run_cost += outer_matched_rows * inner_run_cost * inner_scan_frac ;
1488
+ /* Add inner run cost for additional outer tuples having matches */
1489
+ if (outer_matched_rows > 1 )
1490
+ run_cost += (outer_matched_rows - 1 ) * inner_rescan_run_cost * inner_scan_frac ;
1467
1491
1468
1492
/* Compute number of tuples processed (not number emitted!) */
1469
1493
ntuples = outer_matched_rows * inner_path_rows * inner_scan_frac ;
@@ -1479,21 +1503,26 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
1479
1503
if (indexed_join_quals )
1480
1504
{
1481
1505
run_cost += (outer_path_rows - outer_matched_rows ) *
1482
- inner_run_cost / inner_path_rows ;
1483
- /* We won't be evaluating any quals at all for these rows */
1506
+ inner_rescan_run_cost / inner_path_rows ;
1507
+ /*
1508
+ * We won't be evaluating any quals at all for these rows,
1509
+ * so don't add them to ntuples.
1510
+ */
1484
1511
}
1485
1512
else
1486
1513
{
1487
1514
run_cost += (outer_path_rows - outer_matched_rows ) *
1488
- inner_run_cost ;
1515
+ inner_rescan_run_cost ;
1489
1516
ntuples += (outer_path_rows - outer_matched_rows ) *
1490
1517
inner_path_rows ;
1491
1518
}
1492
1519
}
1493
1520
else
1494
1521
{
1495
1522
/* Normal case; we'll scan whole input rel for each outer row */
1496
- run_cost += outer_path_rows * inner_run_cost ;
1523
+ run_cost += inner_run_cost ;
1524
+ if (outer_path_rows > 1 )
1525
+ run_cost += (outer_path_rows - 1 ) * inner_rescan_run_cost ;
1497
1526
1498
1527
/* Compute number of tuples processed (not number emitted!) */
1499
1528
ntuples = outer_path_rows * inner_path_rows ;
@@ -2190,13 +2219,13 @@ cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan)
2190
2219
2191
2220
/*
2192
2221
* Also account for subplan's startup cost. If the subplan is
2193
- * uncorrelated or undirect correlated, AND its topmost node is a Sort
2194
- * or Material node, assume that we'll only need to pay its startup
2195
- * cost once; otherwise assume we pay the startup cost every time.
2222
+ * uncorrelated or undirect correlated, AND its topmost node is one
2223
+ * that materializes its output, assume that we'll only need to pay
2224
+ * its startup cost once; otherwise assume we pay the startup cost
2225
+ * every time.
2196
2226
*/
2197
2227
if (subplan -> parParam == NIL &&
2198
- (IsA (plan , Sort ) ||
2199
- IsA (plan , Material )))
2228
+ ExecMaterializesOutput (nodeTag (plan )))
2200
2229
sp_cost .startup += plan -> startup_cost ;
2201
2230
else
2202
2231
sp_cost .per_tuple += plan -> startup_cost ;
@@ -2207,6 +2236,81 @@ cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan)
2207
2236
}
2208
2237
2209
2238
2239
+ /*
2240
+ * cost_rescan
2241
+ * Given a finished Path, estimate the costs of rescanning it after
2242
+ * having done so the first time. For some Path types a rescan is
2243
+ * cheaper than an original scan (if no parameters change), and this
2244
+ * function embodies knowledge about that. The default is to return
2245
+ * the same costs stored in the Path. (Note that the cost estimates
2246
+ * actually stored in Paths are always for first scans.)
2247
+ *
2248
+ * This function is not currently intended to model effects such as rescans
2249
+ * being cheaper due to disk block caching; what we are concerned with is
2250
+ * plan types wherein the executor caches results explicitly, or doesn't
2251
+ * redo startup calculations, etc.
2252
+ */
2253
+ static void
2254
+ cost_rescan (PlannerInfo * root , Path * path ,
2255
+ Cost * rescan_startup_cost , /* output parameters */
2256
+ Cost * rescan_total_cost )
2257
+ {
2258
+ switch (path -> pathtype )
2259
+ {
2260
+ case T_FunctionScan :
2261
+ /*
2262
+ * Currently, nodeFunctionscan.c always executes the function
2263
+ * to completion before returning any rows, and caches the
2264
+ * results in a tuplestore. So the function eval cost is
2265
+ * all startup cost and isn't paid over again on rescans.
2266
+ * However, all run costs will be paid over again.
2267
+ */
2268
+ * rescan_startup_cost = 0 ;
2269
+ * rescan_total_cost = path -> total_cost - path -> startup_cost ;
2270
+ break ;
2271
+ case T_HashJoin :
2272
+ /*
2273
+ * Assume that all of the startup cost represents hash table
2274
+ * building, which we won't have to do over.
2275
+ */
2276
+ * rescan_startup_cost = 0 ;
2277
+ * rescan_total_cost = path -> total_cost - path -> startup_cost ;
2278
+ break ;
2279
+ case T_Material :
2280
+ case T_CteScan :
2281
+ case T_WorkTableScan :
2282
+ case T_Sort :
2283
+ {
2284
+ /*
2285
+ * These plan types materialize their final result in a
2286
+ * tuplestore or tuplesort object. So the rescan cost is only
2287
+ * cpu_tuple_cost per tuple, unless the result is large enough
2288
+ * to spill to disk.
2289
+ */
2290
+ Cost run_cost = cpu_tuple_cost * path -> parent -> rows ;
2291
+ double nbytes = relation_byte_size (path -> parent -> rows ,
2292
+ path -> parent -> width );
2293
+ long work_mem_bytes = work_mem * 1024L ;
2294
+
2295
+ if (nbytes > work_mem_bytes )
2296
+ {
2297
+ /* It will spill, so account for re-read cost */
2298
+ double npages = ceil (nbytes / BLCKSZ );
2299
+
2300
+ run_cost += seq_page_cost * npages ;
2301
+ }
2302
+ * rescan_startup_cost = 0 ;
2303
+ * rescan_total_cost = run_cost ;
2304
+ }
2305
+ break ;
2306
+ default :
2307
+ * rescan_startup_cost = path -> startup_cost ;
2308
+ * rescan_total_cost = path -> total_cost ;
2309
+ break ;
2310
+ }
2311
+ }
2312
+
2313
+
2210
2314
/*
2211
2315
* cost_qual_eval
2212
2316
* Estimate the CPU costs of evaluating a WHERE clause.
0 commit comments