54
54
* Portions Copyright (c) 1994, Regents of the University of California
55
55
*
56
56
* IDENTIFICATION
57
- * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.211 2009/09/12 22:12:03 tgl Exp $
57
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.212 2009/11/15 02:45:35 tgl Exp $
58
58
*
59
59
*-------------------------------------------------------------------------
60
60
*/
@@ -1166,23 +1166,6 @@ cost_sort(Path *path, PlannerInfo *root,
1166
1166
path -> total_cost = startup_cost + run_cost ;
1167
1167
}
1168
1168
1169
- /*
1170
- * sort_exceeds_work_mem
1171
- * Given a finished Sort plan node, detect whether it is expected to
1172
- * spill to disk (ie, will need more than work_mem workspace)
1173
- *
1174
- * This assumes there will be no available LIMIT.
1175
- */
1176
- bool
1177
- sort_exceeds_work_mem (Sort * sort )
1178
- {
1179
- double input_bytes = relation_byte_size (sort -> plan .plan_rows ,
1180
- sort -> plan .plan_width );
1181
- long work_mem_bytes = work_mem * 1024L ;
1182
-
1183
- return (input_bytes > work_mem_bytes );
1184
- }
1185
-
1186
1169
/*
1187
1170
* cost_material
1188
1171
* Determines and returns the cost of materializing a relation, including
@@ -1543,7 +1526,18 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
1543
1526
* Determines and returns the cost of joining two relations using the
1544
1527
* merge join algorithm.
1545
1528
*
1546
- * 'path' is already filled in except for the cost fields
1529
+ * Unlike other costsize functions, this routine makes one actual decision:
1530
+ * whether we should materialize the inner path. We do that either because
1531
+ * the inner path can't support mark/restore, or because it's cheaper to
1532
+ * use an interposed Material node to handle mark/restore. When the decision
1533
+ * is cost-based it would be logically cleaner to build and cost two separate
1534
+ * paths with and without that flag set; but that would require repeating most
1535
+ * of the calculations here, which are not all that cheap. Since the choice
1536
+ * will not affect output pathkeys or startup cost, only total cost, there is
1537
+ * no possibility of wanting to keep both paths. So it seems best to make
1538
+ * the decision here and record it in the path's materialize_inner field.
1539
+ *
1540
+ * 'path' is already filled in except for the cost fields and materialize_inner
1547
1541
* 'sjinfo' is extra info about the join for selectivity estimation
1548
1542
*
1549
1543
* Notes: path's mergeclauses should be a subset of the joinrestrictinfo list;
@@ -1561,7 +1555,10 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
1561
1555
List * innersortkeys = path -> innersortkeys ;
1562
1556
Cost startup_cost = 0 ;
1563
1557
Cost run_cost = 0 ;
1564
- Cost cpu_per_tuple ;
1558
+ Cost cpu_per_tuple ,
1559
+ inner_run_cost ,
1560
+ bare_inner_cost ,
1561
+ mat_inner_cost ;
1565
1562
QualCost merge_qual_cost ;
1566
1563
QualCost qp_qual_cost ;
1567
1564
double outer_path_rows = PATH_ROWS (outer_path );
@@ -1606,10 +1603,7 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
1606
1603
/*
1607
1604
* When there are equal merge keys in the outer relation, the mergejoin
1608
1605
* must rescan any matching tuples in the inner relation. This means
1609
- * re-fetching inner tuples. Our cost model for this is that a re-fetch
1610
- * costs the same as an original fetch, which is probably an overestimate;
1611
- * but on the other hand we ignore the bookkeeping costs of mark/restore.
1612
- * Not clear if it's worth developing a more refined model.
1606
+ * re-fetching inner tuples; we have to estimate how often that happens.
1613
1607
*
1614
1608
* For regular inner and outer joins, the number of re-fetches can be
1615
1609
* estimated approximately as size of merge join output minus size of
@@ -1641,7 +1635,7 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
1641
1635
if (rescannedtuples < 0 )
1642
1636
rescannedtuples = 0 ;
1643
1637
}
1644
- /* We'll inflate inner run cost this much to account for rescanning */
1638
+ /* We'll inflate various costs this much to account for rescanning */
1645
1639
rescanratio = 1.0 + (rescannedtuples / inner_path_rows );
1646
1640
1647
1641
/*
@@ -1778,32 +1772,83 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
1778
1772
-1.0 );
1779
1773
startup_cost += sort_path .startup_cost ;
1780
1774
startup_cost += (sort_path .total_cost - sort_path .startup_cost )
1781
- * innerstartsel * rescanratio ;
1782
- run_cost += (sort_path .total_cost - sort_path .startup_cost )
1783
- * (innerendsel - innerstartsel ) * rescanratio ;
1784
-
1785
- /*
1786
- * If the inner sort is expected to spill to disk, we want to add a
1787
- * materialize node to shield it from the need to handle mark/restore.
1788
- * This will allow it to perform the last merge pass on-the-fly, while
1789
- * in most cases not requiring the materialize to spill to disk.
1790
- * Charge an extra cpu_tuple_cost per tuple to account for the
1791
- * materialize node. (Keep this estimate in sync with similar ones in
1792
- * create_mergejoin_path and create_mergejoin_plan.)
1793
- */
1794
- if (relation_byte_size (inner_path_rows , inner_path -> parent -> width ) >
1795
- (work_mem * 1024L ))
1796
- run_cost += cpu_tuple_cost * inner_path_rows ;
1775
+ * innerstartsel ;
1776
+ inner_run_cost = (sort_path .total_cost - sort_path .startup_cost )
1777
+ * (innerendsel - innerstartsel );
1797
1778
}
1798
1779
else
1799
1780
{
1800
1781
startup_cost += inner_path -> startup_cost ;
1801
1782
startup_cost += (inner_path -> total_cost - inner_path -> startup_cost )
1802
- * innerstartsel * rescanratio ;
1803
- run_cost + = (inner_path -> total_cost - inner_path -> startup_cost )
1804
- * (innerendsel - innerstartsel ) * rescanratio ;
1783
+ * innerstartsel ;
1784
+ inner_run_cost = (inner_path -> total_cost - inner_path -> startup_cost )
1785
+ * (innerendsel - innerstartsel );
1805
1786
}
1806
1787
1788
+ /*
1789
+ * Decide whether we want to materialize the inner input to shield it from
1790
+ * mark/restore and performing re-fetches. Our cost model for regular
1791
+ * re-fetches is that a re-fetch costs the same as an original fetch,
1792
+ * which is probably an overestimate; but on the other hand we ignore the
1793
+ * bookkeeping costs of mark/restore. Not clear if it's worth developing
1794
+ * a more refined model. So we just need to inflate the inner run cost
1795
+ * by rescanratio.
1796
+ */
1797
+ bare_inner_cost = inner_run_cost * rescanratio ;
1798
+ /*
1799
+ * When we interpose a Material node the re-fetch cost is assumed to be
1800
+ * just cpu_tuple_cost per tuple, independently of the underlying plan's
1801
+ * cost; but we have to charge an extra cpu_tuple_cost per original fetch
1802
+ * as well. Note that we're assuming the materialize node will never
1803
+ * spill to disk, since it only has to remember tuples back to the last
1804
+ * mark. (If there are a huge number of duplicates, our other cost
1805
+ * factors will make the path so expensive that it probably won't get
1806
+ * chosen anyway.) So we don't use cost_rescan here.
1807
+ *
1808
+ * Note: keep this estimate in sync with create_mergejoin_plan's labeling
1809
+ * of the generated Material node.
1810
+ */
1811
+ mat_inner_cost = inner_run_cost +
1812
+ cpu_tuple_cost * inner_path_rows * rescanratio ;
1813
+
1814
+ /* Prefer materializing if it looks cheaper */
1815
+ if (mat_inner_cost < bare_inner_cost )
1816
+ path -> materialize_inner = true;
1817
+ /*
1818
+ * Even if materializing doesn't look cheaper, we *must* do it if the
1819
+ * inner path is to be used directly (without sorting) and it doesn't
1820
+ * support mark/restore.
1821
+ *
1822
+ * Since the inner side must be ordered, and only Sorts and IndexScans can
1823
+ * create order to begin with, and they both support mark/restore, you
1824
+ * might think there's no problem --- but you'd be wrong. Nestloop and
1825
+ * merge joins can *preserve* the order of their inputs, so they can be
1826
+ * selected as the input of a mergejoin, and they don't support
1827
+ * mark/restore at present.
1828
+ */
1829
+ else if (innersortkeys == NIL &&
1830
+ !ExecSupportsMarkRestore (inner_path -> pathtype ))
1831
+ path -> materialize_inner = true;
1832
+ /*
1833
+ * Also, force materializing if the inner path is to be sorted and the
1834
+ * sort is expected to spill to disk. This is because the final merge
1835
+ * pass can be done on-the-fly if it doesn't have to support mark/restore.
1836
+ * We don't try to adjust the cost estimates for this consideration,
1837
+ * though.
1838
+ */
1839
+ else if (innersortkeys != NIL &&
1840
+ relation_byte_size (inner_path_rows , inner_path -> parent -> width ) >
1841
+ (work_mem * 1024L ))
1842
+ path -> materialize_inner = true;
1843
+ else
1844
+ path -> materialize_inner = false;
1845
+
1846
+ /* Charge the right incremental cost for the chosen case */
1847
+ if (path -> materialize_inner )
1848
+ run_cost += mat_inner_cost ;
1849
+ else
1850
+ run_cost += bare_inner_cost ;
1851
+
1807
1852
/* CPU costs */
1808
1853
1809
1854
/*
0 commit comments