Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit ca4f70c

Browse files
committed
Improve eqjoinsel's ndistinct clamping to work for multiple levels of join.
This patch fixes an oversight in my commit 7f3eba3 of 2008-10-23. That patch accounted for baserel restriction clauses that reduced the number of rows coming out of a table (and hence the number of possibly-distinct values of a join variable), but not for join restriction clauses that might have been applied at a lower level of join. To account for the latter, look up the sizes of the min_lefthand and min_righthand inputs of the current join, and clamp with those in the same way as for the base relations. Noted while investigating a complaint from Ben Chobot, although this in itself doesn't seem to explain his report. Back-patch to 8.4; previous versions used different estimation methods for which this heuristic isn't relevant.
1 parent edf4eda commit ca4f70c

File tree

1 file changed

+73
-8
lines changed

1 file changed

+73
-8
lines changed

src/backend/utils/adt/selfuncs.c

Lines changed: 73 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -141,9 +141,11 @@ static double ineq_histogram_selectivity(PlannerInfo *root,
141141
FmgrInfo *opproc, bool isgt,
142142
Datum constval, Oid consttype);
143143
static double eqjoinsel_inner(Oid operator,
144-
VariableStatData *vardata1, VariableStatData *vardata2);
144+
VariableStatData *vardata1, VariableStatData *vardata2,
145+
RelOptInfo *rel1, RelOptInfo *rel2);
145146
static double eqjoinsel_semi(Oid operator,
146-
VariableStatData *vardata1, VariableStatData *vardata2);
147+
VariableStatData *vardata1, VariableStatData *vardata2,
148+
RelOptInfo *rel1, RelOptInfo *rel2);
147149
static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
148150
Datum lobound, Datum hibound, Oid boundstypid,
149151
double *scaledlobound, double *scaledhibound);
@@ -172,6 +174,7 @@ static bool get_actual_variable_range(PlannerInfo *root,
172174
VariableStatData *vardata,
173175
Oid sortop,
174176
Datum *min, Datum *max);
177+
static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
175178
static Selectivity prefix_selectivity(PlannerInfo *root,
176179
VariableStatData *vardata,
177180
Oid vartype, Oid opfamily, Const *prefixcon);
@@ -2007,24 +2010,47 @@ eqjoinsel(PG_FUNCTION_ARGS)
20072010
VariableStatData vardata1;
20082011
VariableStatData vardata2;
20092012
bool join_is_reversed;
2013+
RelOptInfo *rel1;
2014+
RelOptInfo *rel2;
20102015

20112016
get_join_variables(root, args, sjinfo,
20122017
&vardata1, &vardata2, &join_is_reversed);
20132018

2019+
/*
2020+
* Identify the join's direct input relations. We use the min lefthand
2021+
* and min righthand as the inputs, even though the join might actually
2022+
* get done with larger input relations. The min inputs are guaranteed to
2023+
* have been formed by now, though, and always using them ensures
2024+
* consistency of estimates.
2025+
*/
2026+
if (!join_is_reversed)
2027+
{
2028+
rel1 = find_join_input_rel(root, sjinfo->min_lefthand);
2029+
rel2 = find_join_input_rel(root, sjinfo->min_righthand);
2030+
}
2031+
else
2032+
{
2033+
rel1 = find_join_input_rel(root, sjinfo->min_righthand);
2034+
rel2 = find_join_input_rel(root, sjinfo->min_lefthand);
2035+
}
2036+
20142037
switch (sjinfo->jointype)
20152038
{
20162039
case JOIN_INNER:
20172040
case JOIN_LEFT:
20182041
case JOIN_FULL:
2019-
selec = eqjoinsel_inner(operator, &vardata1, &vardata2);
2042+
selec = eqjoinsel_inner(operator, &vardata1, &vardata2,
2043+
rel1, rel2);
20202044
break;
20212045
case JOIN_SEMI:
20222046
case JOIN_ANTI:
20232047
if (!join_is_reversed)
2024-
selec = eqjoinsel_semi(operator, &vardata1, &vardata2);
2048+
selec = eqjoinsel_semi(operator, &vardata1, &vardata2,
2049+
rel1, rel2);
20252050
else
20262051
selec = eqjoinsel_semi(get_commutator(operator),
2027-
&vardata2, &vardata1);
2052+
&vardata2, &vardata1,
2053+
rel2, rel1);
20282054
break;
20292055
default:
20302056
/* other values not expected here */
@@ -2050,7 +2076,8 @@ eqjoinsel(PG_FUNCTION_ARGS)
20502076
*/
20512077
static double
20522078
eqjoinsel_inner(Oid operator,
2053-
VariableStatData *vardata1, VariableStatData *vardata2)
2079+
VariableStatData *vardata1, VariableStatData *vardata2,
2080+
RelOptInfo *rel1, RelOptInfo *rel2)
20542081
{
20552082
double selec;
20562083
double nd1;
@@ -2251,15 +2278,19 @@ eqjoinsel_inner(Oid operator,
22512278
* be, providing a crude correction for the selectivity of restriction
22522279
* clauses on those relations. (We don't do that in the other path
22532280
* since there we are comparing the nd values to stats for the whole
2254-
* relations.)
2281+
* relations.) We can apply this clamp both with respect to the base
2282+
* relations from which the join variables come, and to the immediate
2283+
* input relations of the current join.
22552284
*/
22562285
double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
22572286
double nullfrac2 = stats2 ? stats2->stanullfrac : 0.0;
22582287

22592288
if (vardata1->rel)
22602289
nd1 = Min(nd1, vardata1->rel->rows);
2290+
nd1 = Min(nd1, rel1->rows);
22612291
if (vardata2->rel)
22622292
nd2 = Min(nd2, vardata2->rel->rows);
2293+
nd2 = Min(nd2, rel2->rows);
22632294

22642295
selec = (1.0 - nullfrac1) * (1.0 - nullfrac2);
22652296
if (nd1 > nd2)
@@ -2286,7 +2317,8 @@ eqjoinsel_inner(Oid operator,
22862317
*/
22872318
static double
22882319
eqjoinsel_semi(Oid operator,
2289-
VariableStatData *vardata1, VariableStatData *vardata2)
2320+
VariableStatData *vardata1, VariableStatData *vardata2,
2321+
RelOptInfo *rel1, RelOptInfo *rel2)
22902322
{
22912323
double selec;
22922324
double nd1;
@@ -2434,8 +2466,10 @@ eqjoinsel_semi(Oid operator,
24342466
{
24352467
if (vardata1->rel)
24362468
nd1 = Min(nd1, vardata1->rel->rows);
2469+
nd1 = Min(nd1, rel1->rows);
24372470
if (vardata2->rel)
24382471
nd2 = Min(nd2, vardata2->rel->rows);
2472+
nd2 = Min(nd2, rel2->rows);
24392473

24402474
if (nd1 <= nd2 || nd2 <= 0)
24412475
selec = 1.0 - nullfrac1;
@@ -4758,6 +4792,37 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
47584792
return have_data;
47594793
}
47604794

4795+
/*
4796+
* find_join_input_rel
4797+
* Look up the input relation for a join.
4798+
*
4799+
* We assume that the input relation's RelOptInfo must have been constructed
4800+
* already.
4801+
*/
4802+
static RelOptInfo *
4803+
find_join_input_rel(PlannerInfo *root, Relids relids)
4804+
{
4805+
RelOptInfo *rel = NULL;
4806+
4807+
switch (bms_membership(relids))
4808+
{
4809+
case BMS_EMPTY_SET:
4810+
/* should not happen */
4811+
break;
4812+
case BMS_SINGLETON:
4813+
rel = find_base_rel(root, bms_singleton_member(relids));
4814+
break;
4815+
case BMS_MULTIPLE:
4816+
rel = find_join_rel(root, relids);
4817+
break;
4818+
}
4819+
4820+
if (rel == NULL)
4821+
elog(ERROR, "could not find RelOptInfo for given relids");
4822+
4823+
return rel;
4824+
}
4825+
47614826

47624827
/*-------------------------------------------------------------------------
47634828
*

0 commit comments

Comments
 (0)