54
54
* Portions Copyright (c) 1994, Regents of the University of California
55
55
*
56
56
* IDENTIFICATION
57
- * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.172 2007/01/05 22:19:31 momjian Exp $
57
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.173 2007/01/08 16:09:22 tgl Exp $
58
58
*
59
59
*-------------------------------------------------------------------------
60
60
*/
@@ -1498,10 +1498,6 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
1498
1498
double hashjointuples ;
1499
1499
double outer_path_rows = PATH_ROWS (outer_path );
1500
1500
double inner_path_rows = PATH_ROWS (inner_path );
1501
- double outerbytes = relation_byte_size (outer_path_rows ,
1502
- outer_path -> parent -> width );
1503
- double innerbytes = relation_byte_size (inner_path_rows ,
1504
- inner_path -> parent -> width );
1505
1501
int num_hashclauses = list_length (hashclauses );
1506
1502
int numbuckets ;
1507
1503
int numbatches ;
@@ -1538,13 +1534,16 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
1538
1534
1539
1535
/*
1540
1536
* Cost of computing hash function: must do it once per input tuple. We
1541
- * charge one cpu_operator_cost for each column's hash function.
1537
+ * charge one cpu_operator_cost for each column's hash function. Also,
1538
+ * tack on one cpu_tuple_cost per inner row, to model the costs of
1539
+ * inserting the row into the hashtable.
1542
1540
*
1543
1541
* XXX when a hashclause is more complex than a single operator, we really
1544
1542
* should charge the extra eval costs of the left or right side, as
1545
1543
* appropriate, here. This seems more work than it's worth at the moment.
1546
1544
*/
1547
- startup_cost += cpu_operator_cost * num_hashclauses * inner_path_rows ;
1545
+ startup_cost += (cpu_operator_cost * num_hashclauses + cpu_tuple_cost )
1546
+ * inner_path_rows ;
1548
1547
run_cost += cpu_operator_cost * num_hashclauses * outer_path_rows ;
1549
1548
1550
1549
/* Get hash table size that executor would use for inner relation */
@@ -1624,8 +1623,8 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
1624
1623
/*
1625
1624
* If inner relation is too big then we will need to "batch" the join,
1626
1625
* which implies writing and reading most of the tuples to disk an extra
1627
- * time. Charge one cost unit per page of I/O (correct since it should be
1628
- * nice and sequential...) . Writing the inner rel counts as startup cost,
1626
+ * time. Charge seq_page_cost per page, since the I/O should be nice and
1627
+ * sequential. Writing the inner rel counts as startup cost,
1629
1628
* all the rest as run cost.
1630
1629
*/
1631
1630
if (numbatches > 1 )
@@ -1635,8 +1634,8 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
1635
1634
double innerpages = page_size (inner_path_rows ,
1636
1635
inner_path -> parent -> width );
1637
1636
1638
- startup_cost += innerpages ;
1639
- run_cost += innerpages + 2 * outerpages ;
1637
+ startup_cost += seq_page_cost * innerpages ;
1638
+ run_cost += seq_page_cost * ( innerpages + 2 * outerpages ) ;
1640
1639
}
1641
1640
1642
1641
/* CPU costs */
@@ -1654,14 +1653,15 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
1654
1653
* The number of tuple comparisons needed is the number of outer tuples
1655
1654
* times the typical number of tuples in a hash bucket, which is the inner
1656
1655
* relation size times its bucketsize fraction. At each one, we need to
1657
- * evaluate the hashjoin quals. (Note: charging the full qual eval cost
1658
- * at each tuple is pessimistic, since we don't evaluate the quals unless
1659
- * the hash values match exactly.)
1656
+ * evaluate the hashjoin quals. But actually, charging the full qual eval
1657
+ * cost at each tuple is pessimistic, since we don't evaluate the quals
1658
+ * unless the hash values match exactly. For lack of a better idea, halve
1659
+ * the cost estimate to allow for that.
1660
1660
*/
1661
1661
startup_cost += hash_qual_cost .startup ;
1662
1662
run_cost += hash_qual_cost .per_tuple *
1663
1663
outer_path_rows * clamp_row_est (inner_path_rows * innerbucketsize ) *
1664
- joininfactor ;
1664
+ joininfactor * 0.5 ;
1665
1665
1666
1666
/*
1667
1667
* For each tuple that gets through the hashjoin proper, we charge
@@ -1673,22 +1673,6 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
1673
1673
cpu_per_tuple = cpu_tuple_cost + qp_qual_cost .per_tuple ;
1674
1674
run_cost += cpu_per_tuple * hashjointuples * joininfactor ;
1675
1675
1676
- /*
1677
- * Bias against putting larger relation on inside. We don't want an
1678
- * absolute prohibition, though, since larger relation might have better
1679
- * bucketsize --- and we can't trust the size estimates unreservedly,
1680
- * anyway. Instead, inflate the run cost by the square root of the size
1681
- * ratio. (Why square root? No real good reason, but it seems
1682
- * reasonable...)
1683
- *
1684
- * Note: before 7.4 we implemented this by inflating startup cost; but if
1685
- * there's a disable_cost component in the input paths' startup cost, that
1686
- * unfairly penalizes the hash. Probably it'd be better to keep track of
1687
- * disable penalty separately from cost.
1688
- */
1689
- if (innerbytes > outerbytes && outerbytes > 0 )
1690
- run_cost *= sqrt (innerbytes / outerbytes );
1691
-
1692
1676
path -> jpath .path .startup_cost = startup_cost ;
1693
1677
path -> jpath .path .total_cost = startup_cost + run_cost ;
1694
1678
}
0 commit comments