49
49
* Portions Copyright (c) 1994, Regents of the University of California
50
50
*
51
51
* IDENTIFICATION
52
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.103 2003/01/27 20:51:50 tgl Exp $
52
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.104 2003/01/28 22:13:33 tgl Exp $
53
53
*
54
54
*-------------------------------------------------------------------------
55
55
*/
@@ -104,7 +104,8 @@ bool enable_hashjoin = true;
104
104
static Selectivity estimate_hash_bucketsize (Query * root , Var * var ,
105
105
int nbuckets );
106
106
static bool cost_qual_eval_walker (Node * node , QualCost * total );
107
- static Selectivity approx_selectivity (Query * root , List * quals );
107
+ static Selectivity approx_selectivity (Query * root , List * quals ,
108
+ JoinType jointype );
108
109
static void set_rel_width (Query * root , RelOptInfo * rel );
109
110
static double relation_byte_size (double tuples , int width );
110
111
static double page_size (double tuples , int width );
@@ -697,7 +698,8 @@ cost_nestloop(NestPath *path, Query *root)
697
698
*/
698
699
if (path -> jointype == JOIN_IN )
699
700
{
700
- Selectivity qual_selec = approx_selectivity (root , restrictlist );
701
+ Selectivity qual_selec = approx_selectivity (root , restrictlist ,
702
+ path -> jointype );
701
703
double qptuples ;
702
704
703
705
qptuples = ceil (qual_selec * outer_path_rows * inner_path_rows );
@@ -816,10 +818,12 @@ cost_mergejoin(MergePath *path, Query *root)
816
818
* Note: it's probably bogus to use the normal selectivity calculation
817
819
* here when either the outer or inner path is a UniquePath.
818
820
*/
819
- merge_selec = approx_selectivity (root , mergeclauses );
821
+ merge_selec = approx_selectivity (root , mergeclauses ,
822
+ path -> jpath .jointype );
820
823
cost_qual_eval (& merge_qual_cost , mergeclauses );
821
824
qpquals = set_ptrDifference (restrictlist , mergeclauses );
822
- qp_selec = approx_selectivity (root , qpquals );
825
+ qp_selec = approx_selectivity (root , qpquals ,
826
+ path -> jpath .jointype );
823
827
cost_qual_eval (& qp_qual_cost , qpquals );
824
828
freeList (qpquals );
825
829
@@ -1044,10 +1048,12 @@ cost_hashjoin(HashPath *path, Query *root)
1044
1048
* Note: it's probably bogus to use the normal selectivity calculation
1045
1049
* here when either the outer or inner path is a UniquePath.
1046
1050
*/
1047
- hash_selec = approx_selectivity (root , hashclauses );
1051
+ hash_selec = approx_selectivity (root , hashclauses ,
1052
+ path -> jpath .jointype );
1048
1053
cost_qual_eval (& hash_qual_cost , hashclauses );
1049
1054
qpquals = set_ptrDifference (restrictlist , hashclauses );
1050
- qp_selec = approx_selectivity (root , qpquals );
1055
+ qp_selec = approx_selectivity (root , qpquals ,
1056
+ path -> jpath .jointype );
1051
1057
cost_qual_eval (& qp_qual_cost , qpquals );
1052
1058
freeList (qpquals );
1053
1059
@@ -1084,54 +1090,67 @@ cost_hashjoin(HashPath *path, Query *root)
1084
1090
* Determine bucketsize fraction for inner relation. We use the
1085
1091
* smallest bucketsize estimated for any individual hashclause;
1086
1092
* this is undoubtedly conservative.
1093
+ *
1094
+ * BUT: if inner relation has been unique-ified, we can assume it's
1095
+ * good for hashing. This is important both because it's the right
1096
+ * answer, and because we avoid contaminating the cache with a value
1097
+ * that's wrong for non-unique-ified paths.
1087
1098
*/
1088
- innerbucketsize = 1.0 ;
1089
- foreach (hcl , hashclauses )
1099
+ if (IsA (inner_path , UniquePath ))
1100
+ innerbucketsize = 1.0 / virtualbuckets ;
1101
+ else
1090
1102
{
1091
- RestrictInfo * restrictinfo = (RestrictInfo * ) lfirst (hcl );
1092
- Selectivity thisbucketsize ;
1103
+ innerbucketsize = 1.0 ;
1104
+ foreach (hcl , hashclauses )
1105
+ {
1106
+ RestrictInfo * restrictinfo = (RestrictInfo * ) lfirst (hcl );
1107
+ Selectivity thisbucketsize ;
1093
1108
1094
- Assert (IsA (restrictinfo , RestrictInfo ));
1109
+ Assert (IsA (restrictinfo , RestrictInfo ));
1095
1110
1096
- /*
1097
- * First we have to figure out which side of the hashjoin clause is the
1098
- * inner side.
1099
- *
1100
- * Since we tend to visit the same clauses over and over when planning
1101
- * a large query, we cache the bucketsize estimate in the RestrictInfo
1102
- * node to avoid repeated lookups of statistics.
1103
- */
1104
- if (is_subseti (restrictinfo -> right_relids , inner_path -> parent -> relids ))
1105
- {
1106
- /* righthand side is inner */
1107
- thisbucketsize = restrictinfo -> right_bucketsize ;
1108
- if (thisbucketsize < 0 )
1111
+ /*
1112
+ * First we have to figure out which side of the hashjoin clause
1113
+ * is the inner side.
1114
+ *
1115
+ * Since we tend to visit the same clauses over and over when
1116
+ * planning a large query, we cache the bucketsize estimate in the
1117
+ * RestrictInfo node to avoid repeated lookups of statistics.
1118
+ */
1119
+ if (is_subseti (restrictinfo -> right_relids ,
1120
+ inner_path -> parent -> relids ))
1109
1121
{
1110
- /* not cached yet */
1111
- thisbucketsize = estimate_hash_bucketsize (root ,
1122
+ /* righthand side is inner */
1123
+ thisbucketsize = restrictinfo -> right_bucketsize ;
1124
+ if (thisbucketsize < 0 )
1125
+ {
1126
+ /* not cached yet */
1127
+ thisbucketsize =
1128
+ estimate_hash_bucketsize (root ,
1112
1129
(Var * ) get_rightop (restrictinfo -> clause ),
1113
- virtualbuckets );
1114
- restrictinfo -> right_bucketsize = thisbucketsize ;
1130
+ virtualbuckets );
1131
+ restrictinfo -> right_bucketsize = thisbucketsize ;
1132
+ }
1115
1133
}
1116
- }
1117
- else
1118
- {
1119
- Assert (is_subseti (restrictinfo -> left_relids ,
1120
- inner_path -> parent -> relids ));
1121
- /* lefthand side is inner */
1122
- thisbucketsize = restrictinfo -> left_bucketsize ;
1123
- if (thisbucketsize < 0 )
1134
+ else
1124
1135
{
1125
- /* not cached yet */
1126
- thisbucketsize = estimate_hash_bucketsize (root ,
1136
+ Assert (is_subseti (restrictinfo -> left_relids ,
1137
+ inner_path -> parent -> relids ));
1138
+ /* lefthand side is inner */
1139
+ thisbucketsize = restrictinfo -> left_bucketsize ;
1140
+ if (thisbucketsize < 0 )
1141
+ {
1142
+ /* not cached yet */
1143
+ thisbucketsize =
1144
+ estimate_hash_bucketsize (root ,
1127
1145
(Var * ) get_leftop (restrictinfo -> clause ),
1128
- virtualbuckets );
1129
- restrictinfo -> left_bucketsize = thisbucketsize ;
1146
+ virtualbuckets );
1147
+ restrictinfo -> left_bucketsize = thisbucketsize ;
1148
+ }
1130
1149
}
1131
- }
1132
1150
1133
- if (innerbucketsize > thisbucketsize )
1134
- innerbucketsize = thisbucketsize ;
1151
+ if (innerbucketsize > thisbucketsize )
1152
+ innerbucketsize = thisbucketsize ;
1153
+ }
1135
1154
}
1136
1155
1137
1156
/*
@@ -1557,7 +1576,7 @@ cost_qual_eval_walker(Node *node, QualCost *total)
1557
1576
* seems OK to live with the approximation.
1558
1577
*/
1559
1578
static Selectivity
1560
- approx_selectivity (Query * root , List * quals )
1579
+ approx_selectivity (Query * root , List * quals , JoinType jointype )
1561
1580
{
1562
1581
Selectivity total = 1.0 ;
1563
1582
List * l ;
@@ -1582,13 +1601,14 @@ approx_selectivity(Query *root, List *quals)
1582
1601
restrictinfo -> this_selec =
1583
1602
clause_selectivity (root ,
1584
1603
(Node * ) restrictinfo -> clause ,
1585
- 0 );
1604
+ 0 ,
1605
+ jointype );
1586
1606
selec = restrictinfo -> this_selec ;
1587
1607
}
1588
1608
else
1589
1609
{
1590
1610
/* If it's a bare expression, must always do it the hard way */
1591
- selec = clause_selectivity (root , qual , 0 );
1611
+ selec = clause_selectivity (root , qual , 0 , jointype );
1592
1612
}
1593
1613
total *= selec ;
1594
1614
}
@@ -1620,7 +1640,8 @@ set_baserel_size_estimates(Query *root, RelOptInfo *rel)
1620
1640
temp = rel -> tuples *
1621
1641
restrictlist_selectivity (root ,
1622
1642
rel -> baserestrictinfo ,
1623
- lfirsti (rel -> relids ));
1643
+ lfirsti (rel -> relids ),
1644
+ JOIN_INNER );
1624
1645
1625
1646
/*
1626
1647
* Force estimate to be at least one row, to make explain output look
@@ -1682,7 +1703,8 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
1682
1703
*/
1683
1704
selec = restrictlist_selectivity (root ,
1684
1705
restrictlist ,
1685
- 0 );
1706
+ 0 ,
1707
+ jointype );
1686
1708
1687
1709
/*
1688
1710
* Basically, we multiply size of Cartesian product by selectivity.
@@ -1694,8 +1716,6 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
1694
1716
* For JOIN_IN and variants, the Cartesian product is figured with
1695
1717
* respect to a unique-ified input, and then we can clamp to the size
1696
1718
* of the other input.
1697
- * XXX it's not at all clear that the ordinary selectivity calculation
1698
- * is appropriate in this case.
1699
1719
*/
1700
1720
switch (jointype )
1701
1721
{
@@ -1798,7 +1818,8 @@ set_function_size_estimates(Query *root, RelOptInfo *rel)
1798
1818
temp = rel -> tuples *
1799
1819
restrictlist_selectivity (root ,
1800
1820
rel -> baserestrictinfo ,
1801
- lfirsti (rel -> relids ));
1821
+ lfirsti (rel -> relids ),
1822
+ JOIN_INNER );
1802
1823
1803
1824
/*
1804
1825
* Force estimate to be at least one row, to make explain output look
0 commit comments