Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 2e46b76

Browse files
committed
Extend join-selectivity API (oprjoin interface) so that join type is
passed to join selectivity estimators. Make use of this in eqjoinsel to derive non-bogus selectivity for IN clauses. Further tweaking of cost estimation for IN. initdb forced because of pg_proc.h changes.
1 parent 955a1f8 commit 2e46b76

File tree

16 files changed

+221
-136
lines changed

16 files changed

+221
-136
lines changed

doc/src/sgml/indexcost.sgml

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<!--
2-
$Header: /cvsroot/pgsql/doc/src/sgml/Attic/indexcost.sgml,v 2.14 2003/01/14 10:19:02 petere Exp $
2+
$Header: /cvsroot/pgsql/doc/src/sgml/Attic/indexcost.sgml,v 2.15 2003/01/28 22:13:24 tgl Exp $
33
-->
44

55
<chapter id="indexcost">
@@ -205,7 +205,8 @@ amcostestimate (Query *root,
205205

206206
<programlisting>
207207
*indexSelectivity = clauselist_selectivity(root, indexQuals,
208-
lfirsti(rel->relids));
208+
lfirsti(rel->relids),
209+
JOIN_INNER);
209210
</programlisting>
210211
</para>
211212
</step>

src/backend/catalog/pg_operator.c

+4-3
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/catalog/pg_operator.c,v 1.77 2002/09/04 20:31:14 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/catalog/pg_operator.c,v 1.78 2003/01/28 22:13:25 tgl Exp $
1212
*
1313
* NOTES
1414
* these routines moved here from commands/define.c and somewhat cleaned up.
@@ -485,10 +485,11 @@ OperatorCreate(const char *operatorName,
485485
typeId[0] = INTERNALOID; /* Query */
486486
typeId[1] = OIDOID; /* operator OID */
487487
typeId[2] = INTERNALOID; /* args list */
488+
typeId[3] = INT2OID; /* jointype */
488489

489-
joinOid = LookupFuncName(joinName, 3, typeId);
490+
joinOid = LookupFuncName(joinName, 4, typeId);
490491
if (!OidIsValid(joinOid))
491-
func_error("OperatorDef", joinName, 3, typeId, NULL);
492+
func_error("OperatorDef", joinName, 4, typeId, NULL);
492493
}
493494
else
494495
joinOid = InvalidOid;

src/backend/optimizer/path/clausesel.c

+25-13
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.55 2003/01/15 19:35:39 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.56 2003/01/28 22:13:29 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -65,12 +65,13 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
6565
Selectivity
6666
restrictlist_selectivity(Query *root,
6767
List *restrictinfo_list,
68-
int varRelid)
68+
int varRelid,
69+
JoinType jointype)
6970
{
7071
List *clauselist = get_actual_clauses(restrictinfo_list);
7172
Selectivity result;
7273

73-
result = clauselist_selectivity(root, clauselist, varRelid);
74+
result = clauselist_selectivity(root, clauselist, varRelid, jointype);
7475
freeList(clauselist);
7576
return result;
7677
}
@@ -81,7 +82,7 @@ restrictlist_selectivity(Query *root,
8182
* expression clauses. The list can be empty, in which case 1.0
8283
* must be returned.
8384
*
84-
* See clause_selectivity() for the meaning of the varRelid parameter.
85+
* See clause_selectivity() for the meaning of the additional parameters.
8586
*
8687
* Our basic approach is to take the product of the selectivities of the
8788
* subclauses. However, that's only right if the subclauses have independent
@@ -113,7 +114,8 @@ restrictlist_selectivity(Query *root,
113114
Selectivity
114115
clauselist_selectivity(Query *root,
115116
List *clauses,
116-
int varRelid)
117+
int varRelid,
118+
JoinType jointype)
117119
{
118120
Selectivity s1 = 1.0;
119121
RangeQueryClause *rqlist = NULL;
@@ -184,7 +186,7 @@ clauselist_selectivity(Query *root,
184186
}
185187
}
186188
/* Not the right form, so treat it generically. */
187-
s2 = clause_selectivity(root, clause, varRelid);
189+
s2 = clause_selectivity(root, clause, varRelid, jointype);
188190
s1 = s1 * s2;
189191
}
190192

@@ -362,11 +364,15 @@ addRangeClause(RangeQueryClause **rqlist, Node *clause,
362364
*
363365
* When varRelid is 0, all variables are treated as variables. This
364366
* is appropriate for ordinary join clauses and restriction clauses.
367+
*
368+
* jointype is the join type, if the clause is a join clause. Pass JOIN_INNER
369+
* if the clause isn't a join clause or the context is uncertain.
365370
*/
366371
Selectivity
367372
clause_selectivity(Query *root,
368373
Node *clause,
369-
int varRelid)
374+
int varRelid,
375+
JoinType jointype)
370376
{
371377
Selectivity s1 = 1.0; /* default for any unhandled clause type */
372378

@@ -424,14 +430,16 @@ clause_selectivity(Query *root,
424430
/* inverse of the selectivity of the underlying clause */
425431
s1 = 1.0 - clause_selectivity(root,
426432
(Node *) get_notclausearg((Expr *) clause),
427-
varRelid);
433+
varRelid,
434+
jointype);
428435
}
429436
else if (and_clause(clause))
430437
{
431438
/* share code with clauselist_selectivity() */
432439
s1 = clauselist_selectivity(root,
433440
((BoolExpr *) clause)->args,
434-
varRelid);
441+
varRelid,
442+
jointype);
435443
}
436444
else if (or_clause(clause))
437445
{
@@ -447,7 +455,8 @@ clause_selectivity(Query *root,
447455
{
448456
Selectivity s2 = clause_selectivity(root,
449457
(Node *) lfirst(arg),
450-
varRelid);
458+
varRelid,
459+
jointype);
451460

452461
s1 = s1 + s2 - s1 * s2;
453462
}
@@ -479,7 +488,8 @@ clause_selectivity(Query *root,
479488
{
480489
/* Estimate selectivity for a join clause. */
481490
s1 = join_selectivity(root, opno,
482-
((OpExpr *) clause)->args);
491+
((OpExpr *) clause)->args,
492+
jointype);
483493
}
484494
else
485495
{
@@ -519,14 +529,16 @@ clause_selectivity(Query *root,
519529
s1 = booltestsel(root,
520530
((BooleanTest *) clause)->booltesttype,
521531
(Node *) ((BooleanTest *) clause)->arg,
522-
varRelid);
532+
varRelid,
533+
jointype);
523534
}
524535
else if (IsA(clause, RelabelType))
525536
{
526537
/* Not sure this case is needed, but it can't hurt */
527538
s1 = clause_selectivity(root,
528539
(Node *) ((RelabelType *) clause)->arg,
529-
varRelid);
540+
varRelid,
541+
jointype);
530542
}
531543

532544
#ifdef SELECTIVITY_DEBUG

src/backend/optimizer/path/costsize.c

+73-52
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
* Portions Copyright (c) 1994, Regents of the University of California
5050
*
5151
* IDENTIFICATION
52-
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.103 2003/01/27 20:51:50 tgl Exp $
52+
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.104 2003/01/28 22:13:33 tgl Exp $
5353
*
5454
*-------------------------------------------------------------------------
5555
*/
@@ -104,7 +104,8 @@ bool enable_hashjoin = true;
104104
static Selectivity estimate_hash_bucketsize(Query *root, Var *var,
105105
int nbuckets);
106106
static bool cost_qual_eval_walker(Node *node, QualCost *total);
107-
static Selectivity approx_selectivity(Query *root, List *quals);
107+
static Selectivity approx_selectivity(Query *root, List *quals,
108+
JoinType jointype);
108109
static void set_rel_width(Query *root, RelOptInfo *rel);
109110
static double relation_byte_size(double tuples, int width);
110111
static double page_size(double tuples, int width);
@@ -697,7 +698,8 @@ cost_nestloop(NestPath *path, Query *root)
697698
*/
698699
if (path->jointype == JOIN_IN)
699700
{
700-
Selectivity qual_selec = approx_selectivity(root, restrictlist);
701+
Selectivity qual_selec = approx_selectivity(root, restrictlist,
702+
path->jointype);
701703
double qptuples;
702704

703705
qptuples = ceil(qual_selec * outer_path_rows * inner_path_rows);
@@ -816,10 +818,12 @@ cost_mergejoin(MergePath *path, Query *root)
816818
* Note: it's probably bogus to use the normal selectivity calculation
817819
* here when either the outer or inner path is a UniquePath.
818820
*/
819-
merge_selec = approx_selectivity(root, mergeclauses);
821+
merge_selec = approx_selectivity(root, mergeclauses,
822+
path->jpath.jointype);
820823
cost_qual_eval(&merge_qual_cost, mergeclauses);
821824
qpquals = set_ptrDifference(restrictlist, mergeclauses);
822-
qp_selec = approx_selectivity(root, qpquals);
825+
qp_selec = approx_selectivity(root, qpquals,
826+
path->jpath.jointype);
823827
cost_qual_eval(&qp_qual_cost, qpquals);
824828
freeList(qpquals);
825829

@@ -1044,10 +1048,12 @@ cost_hashjoin(HashPath *path, Query *root)
10441048
* Note: it's probably bogus to use the normal selectivity calculation
10451049
* here when either the outer or inner path is a UniquePath.
10461050
*/
1047-
hash_selec = approx_selectivity(root, hashclauses);
1051+
hash_selec = approx_selectivity(root, hashclauses,
1052+
path->jpath.jointype);
10481053
cost_qual_eval(&hash_qual_cost, hashclauses);
10491054
qpquals = set_ptrDifference(restrictlist, hashclauses);
1050-
qp_selec = approx_selectivity(root, qpquals);
1055+
qp_selec = approx_selectivity(root, qpquals,
1056+
path->jpath.jointype);
10511057
cost_qual_eval(&qp_qual_cost, qpquals);
10521058
freeList(qpquals);
10531059

@@ -1084,54 +1090,67 @@ cost_hashjoin(HashPath *path, Query *root)
10841090
* Determine bucketsize fraction for inner relation. We use the
10851091
* smallest bucketsize estimated for any individual hashclause;
10861092
* this is undoubtedly conservative.
1093+
*
1094+
* BUT: if inner relation has been unique-ified, we can assume it's
1095+
* good for hashing. This is important both because it's the right
1096+
* answer, and because we avoid contaminating the cache with a value
1097+
* that's wrong for non-unique-ified paths.
10871098
*/
1088-
innerbucketsize = 1.0;
1089-
foreach(hcl, hashclauses)
1099+
if (IsA(inner_path, UniquePath))
1100+
innerbucketsize = 1.0 / virtualbuckets;
1101+
else
10901102
{
1091-
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl);
1092-
Selectivity thisbucketsize;
1103+
innerbucketsize = 1.0;
1104+
foreach(hcl, hashclauses)
1105+
{
1106+
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl);
1107+
Selectivity thisbucketsize;
10931108

1094-
Assert(IsA(restrictinfo, RestrictInfo));
1109+
Assert(IsA(restrictinfo, RestrictInfo));
10951110

1096-
/*
1097-
* First we have to figure out which side of the hashjoin clause is the
1098-
* inner side.
1099-
*
1100-
* Since we tend to visit the same clauses over and over when planning
1101-
* a large query, we cache the bucketsize estimate in the RestrictInfo
1102-
* node to avoid repeated lookups of statistics.
1103-
*/
1104-
if (is_subseti(restrictinfo->right_relids, inner_path->parent->relids))
1105-
{
1106-
/* righthand side is inner */
1107-
thisbucketsize = restrictinfo->right_bucketsize;
1108-
if (thisbucketsize < 0)
1111+
/*
1112+
* First we have to figure out which side of the hashjoin clause
1113+
* is the inner side.
1114+
*
1115+
* Since we tend to visit the same clauses over and over when
1116+
* planning a large query, we cache the bucketsize estimate in the
1117+
* RestrictInfo node to avoid repeated lookups of statistics.
1118+
*/
1119+
if (is_subseti(restrictinfo->right_relids,
1120+
inner_path->parent->relids))
11091121
{
1110-
/* not cached yet */
1111-
thisbucketsize = estimate_hash_bucketsize(root,
1122+
/* righthand side is inner */
1123+
thisbucketsize = restrictinfo->right_bucketsize;
1124+
if (thisbucketsize < 0)
1125+
{
1126+
/* not cached yet */
1127+
thisbucketsize =
1128+
estimate_hash_bucketsize(root,
11121129
(Var *) get_rightop(restrictinfo->clause),
1113-
virtualbuckets);
1114-
restrictinfo->right_bucketsize = thisbucketsize;
1130+
virtualbuckets);
1131+
restrictinfo->right_bucketsize = thisbucketsize;
1132+
}
11151133
}
1116-
}
1117-
else
1118-
{
1119-
Assert(is_subseti(restrictinfo->left_relids,
1120-
inner_path->parent->relids));
1121-
/* lefthand side is inner */
1122-
thisbucketsize = restrictinfo->left_bucketsize;
1123-
if (thisbucketsize < 0)
1134+
else
11241135
{
1125-
/* not cached yet */
1126-
thisbucketsize = estimate_hash_bucketsize(root,
1136+
Assert(is_subseti(restrictinfo->left_relids,
1137+
inner_path->parent->relids));
1138+
/* lefthand side is inner */
1139+
thisbucketsize = restrictinfo->left_bucketsize;
1140+
if (thisbucketsize < 0)
1141+
{
1142+
/* not cached yet */
1143+
thisbucketsize =
1144+
estimate_hash_bucketsize(root,
11271145
(Var *) get_leftop(restrictinfo->clause),
1128-
virtualbuckets);
1129-
restrictinfo->left_bucketsize = thisbucketsize;
1146+
virtualbuckets);
1147+
restrictinfo->left_bucketsize = thisbucketsize;
1148+
}
11301149
}
1131-
}
11321150

1133-
if (innerbucketsize > thisbucketsize)
1134-
innerbucketsize = thisbucketsize;
1151+
if (innerbucketsize > thisbucketsize)
1152+
innerbucketsize = thisbucketsize;
1153+
}
11351154
}
11361155

11371156
/*
@@ -1557,7 +1576,7 @@ cost_qual_eval_walker(Node *node, QualCost *total)
15571576
* seems OK to live with the approximation.
15581577
*/
15591578
static Selectivity
1560-
approx_selectivity(Query *root, List *quals)
1579+
approx_selectivity(Query *root, List *quals, JoinType jointype)
15611580
{
15621581
Selectivity total = 1.0;
15631582
List *l;
@@ -1582,13 +1601,14 @@ approx_selectivity(Query *root, List *quals)
15821601
restrictinfo->this_selec =
15831602
clause_selectivity(root,
15841603
(Node *) restrictinfo->clause,
1585-
0);
1604+
0,
1605+
jointype);
15861606
selec = restrictinfo->this_selec;
15871607
}
15881608
else
15891609
{
15901610
/* If it's a bare expression, must always do it the hard way */
1591-
selec = clause_selectivity(root, qual, 0);
1611+
selec = clause_selectivity(root, qual, 0, jointype);
15921612
}
15931613
total *= selec;
15941614
}
@@ -1620,7 +1640,8 @@ set_baserel_size_estimates(Query *root, RelOptInfo *rel)
16201640
temp = rel->tuples *
16211641
restrictlist_selectivity(root,
16221642
rel->baserestrictinfo,
1623-
lfirsti(rel->relids));
1643+
lfirsti(rel->relids),
1644+
JOIN_INNER);
16241645

16251646
/*
16261647
* Force estimate to be at least one row, to make explain output look
@@ -1682,7 +1703,8 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
16821703
*/
16831704
selec = restrictlist_selectivity(root,
16841705
restrictlist,
1685-
0);
1706+
0,
1707+
jointype);
16861708

16871709
/*
16881710
* Basically, we multiply size of Cartesian product by selectivity.
@@ -1694,8 +1716,6 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
16941716
* For JOIN_IN and variants, the Cartesian product is figured with
16951717
* respect to a unique-ified input, and then we can clamp to the size
16961718
* of the other input.
1697-
* XXX it's not at all clear that the ordinary selectivity calculation
1698-
* is appropriate in this case.
16991719
*/
17001720
switch (jointype)
17011721
{
@@ -1798,7 +1818,8 @@ set_function_size_estimates(Query *root, RelOptInfo *rel)
17981818
temp = rel->tuples *
17991819
restrictlist_selectivity(root,
18001820
rel->baserestrictinfo,
1801-
lfirsti(rel->relids));
1821+
lfirsti(rel->relids),
1822+
JOIN_INNER);
18021823

18031824
/*
18041825
* Force estimate to be at least one row, to make explain output look

0 commit comments

Comments
 (0)