postgrespro
diff --git a/‎doc/src/sgml/indexcost.sgml
+3-2 b/‎doc/src/sgml/indexcost.sgml
+3-2
diff --git a/‎src/backend/catalog/pg_operator.c
+4-3 b/‎src/backend/catalog/pg_operator.c
+4-3
diff --git a/‎src/backend/optimizer/path/clausesel.c
+25-13 b/‎src/backend/optimizer/path/clausesel.c
+25-13
diff --git a/‎src/backend/optimizer/path/costsize.c
+73-52 b/‎src/backend/optimizer/path/costsize.c
+73-52
@@ -1,5 +1,5 @@
 <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/Attic/indexcost.sgml,v 2.14 2003/01/14 10:19:02 petere Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/Attic/indexcost.sgml,v 2.15 2003/01/28 22:13:24 tgl Exp $
 -->
 
  <chapter id="indexcost">
@@ -205,7 +205,8 @@ amcostestimate (Query *root,
 
      <programlisting>
 *indexSelectivity = clauselist_selectivity(root, indexQuals,
-                                           lfirsti(rel->relids));
+                                           lfirsti(rel->relids),
+                                           JOIN_INNER);
      </programlisting>
     </para>
    </step>
 
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/catalog/pg_operator.c,v 1.77 2002/09/04 20:31:14 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/catalog/pg_operator.c,v 1.78 2003/01/28 22:13:25 tgl Exp $
  *
  * NOTES
  *	  these routines moved here from commands/define.c and somewhat cleaned up.
@@ -485,10 +485,11 @@ OperatorCreate(const char *operatorName,
 		typeId[0] = INTERNALOID;	/* Query */
 		typeId[1] = OIDOID;		/* operator OID */
 		typeId[2] = INTERNALOID;	/* args list */
+		typeId[3] = INT2OID;	/* jointype */
 
-		joinOid = LookupFuncName(joinName, 3, typeId);
+		joinOid = LookupFuncName(joinName, 4, typeId);
 		if (!OidIsValid(joinOid))
-			func_error("OperatorDef", joinName, 3, typeId, NULL);
+			func_error("OperatorDef", joinName, 4, typeId, NULL);
 	}
 	else
 		joinOid = InvalidOid;
 
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.55 2003/01/15 19:35:39 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.56 2003/01/28 22:13:29 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -65,12 +65,13 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
 Selectivity
 restrictlist_selectivity(Query *root,
 						 List *restrictinfo_list,
-						 int varRelid)
+						 int varRelid,
+						 JoinType jointype)
 {
 	List	   *clauselist = get_actual_clauses(restrictinfo_list);
 	Selectivity result;
 
-	result = clauselist_selectivity(root, clauselist, varRelid);
+	result = clauselist_selectivity(root, clauselist, varRelid, jointype);
 	freeList(clauselist);
 	return result;
 }
@@ -81,7 +82,7 @@ restrictlist_selectivity(Query *root,
  *	  expression clauses.  The list can be empty, in which case 1.0
  *	  must be returned.
  *
- * See clause_selectivity() for the meaning of the varRelid parameter.
+ * See clause_selectivity() for the meaning of the additional parameters.
  *
  * Our basic approach is to take the product of the selectivities of the
  * subclauses.	However, that's only right if the subclauses have independent
@@ -113,7 +114,8 @@ restrictlist_selectivity(Query *root,
 Selectivity
 clauselist_selectivity(Query *root,
 					   List *clauses,
-					   int varRelid)
+					   int varRelid,
+					   JoinType jointype)
 {
 	Selectivity s1 = 1.0;
 	RangeQueryClause *rqlist = NULL;
@@ -184,7 +186,7 @@ clauselist_selectivity(Query *root,
 			}
 		}
 		/* Not the right form, so treat it generically. */
-		s2 = clause_selectivity(root, clause, varRelid);
+		s2 = clause_selectivity(root, clause, varRelid, jointype);
 		s1 = s1 * s2;
 	}
 
@@ -362,11 +364,15 @@ addRangeClause(RangeQueryClause **rqlist, Node *clause,
  *
  * When varRelid is 0, all variables are treated as variables.	This
  * is appropriate for ordinary join clauses and restriction clauses.
+ *
+ * jointype is the join type, if the clause is a join clause.  Pass JOIN_INNER
+ * if the clause isn't a join clause or the context is uncertain.
  */
 Selectivity
 clause_selectivity(Query *root,
 				   Node *clause,
-				   int varRelid)
+				   int varRelid,
+				   JoinType jointype)
 {
 	Selectivity s1 = 1.0;		/* default for any unhandled clause type */
 
@@ -424,14 +430,16 @@ clause_selectivity(Query *root,
 		/* inverse of the selectivity of the underlying clause */
 		s1 = 1.0 - clause_selectivity(root,
 							  (Node *) get_notclausearg((Expr *) clause),
-									  varRelid);
+									  varRelid,
+									  jointype);
 	}
 	else if (and_clause(clause))
 	{
 		/* share code with clauselist_selectivity() */
 		s1 = clauselist_selectivity(root,
 									((BoolExpr *) clause)->args,
-									varRelid);
+									varRelid,
+									jointype);
 	}
 	else if (or_clause(clause))
 	{
@@ -447,7 +455,8 @@ clause_selectivity(Query *root,
 		{
 			Selectivity s2 = clause_selectivity(root,
 												(Node *) lfirst(arg),
-												varRelid);
+												varRelid,
+												jointype);
 
 			s1 = s1 + s2 - s1 * s2;
 		}
@@ -479,7 +488,8 @@ clause_selectivity(Query *root,
 		{
 			/* Estimate selectivity for a join clause. */
 			s1 = join_selectivity(root, opno,
-								  ((OpExpr *) clause)->args);
+								  ((OpExpr *) clause)->args,
+								  jointype);
 		}
 		else
 		{
@@ -519,14 +529,16 @@ clause_selectivity(Query *root,
 		s1 = booltestsel(root,
 						 ((BooleanTest *) clause)->booltesttype,
 						 (Node *) ((BooleanTest *) clause)->arg,
-						 varRelid);
+						 varRelid,
+						 jointype);
 	}
 	else if (IsA(clause, RelabelType))
 	{
 		/* Not sure this case is needed, but it can't hurt */
 		s1 = clause_selectivity(root,
 								(Node *) ((RelabelType *) clause)->arg,
-								varRelid);
+								varRelid,
+								jointype);
 	}
 
 #ifdef SELECTIVITY_DEBUG
 
@@ -49,7 +49,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.103 2003/01/27 20:51:50 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.104 2003/01/28 22:13:33 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -104,7 +104,8 @@ bool		enable_hashjoin = true;
 static Selectivity estimate_hash_bucketsize(Query *root, Var *var,
 											int nbuckets);
 static bool cost_qual_eval_walker(Node *node, QualCost *total);
-static Selectivity approx_selectivity(Query *root, List *quals);
+static Selectivity approx_selectivity(Query *root, List *quals,
+									  JoinType jointype);
 static void set_rel_width(Query *root, RelOptInfo *rel);
 static double relation_byte_size(double tuples, int width);
 static double page_size(double tuples, int width);
@@ -697,7 +698,8 @@ cost_nestloop(NestPath *path, Query *root)
 	 */
 	if (path->jointype == JOIN_IN)
 	{
-		Selectivity	qual_selec = approx_selectivity(root, restrictlist);
+		Selectivity	qual_selec = approx_selectivity(root, restrictlist,
+													path->jointype);
 		double	qptuples;
 
 		qptuples = ceil(qual_selec * outer_path_rows * inner_path_rows);
@@ -816,10 +818,12 @@ cost_mergejoin(MergePath *path, Query *root)
 	 * Note: it's probably bogus to use the normal selectivity calculation
 	 * here when either the outer or inner path is a UniquePath.
 	 */
-	merge_selec = approx_selectivity(root, mergeclauses);
+	merge_selec = approx_selectivity(root, mergeclauses,
+									 path->jpath.jointype);
 	cost_qual_eval(&merge_qual_cost, mergeclauses);
 	qpquals = set_ptrDifference(restrictlist, mergeclauses);
-	qp_selec = approx_selectivity(root, qpquals);
+	qp_selec = approx_selectivity(root, qpquals,
+								  path->jpath.jointype);
 	cost_qual_eval(&qp_qual_cost, qpquals);
 	freeList(qpquals);
 
@@ -1044,10 +1048,12 @@ cost_hashjoin(HashPath *path, Query *root)
 	 * Note: it's probably bogus to use the normal selectivity calculation
 	 * here when either the outer or inner path is a UniquePath.
 	 */
-	hash_selec = approx_selectivity(root, hashclauses);
+	hash_selec = approx_selectivity(root, hashclauses,
+									path->jpath.jointype);
 	cost_qual_eval(&hash_qual_cost, hashclauses);
 	qpquals = set_ptrDifference(restrictlist, hashclauses);
-	qp_selec = approx_selectivity(root, qpquals);
+	qp_selec = approx_selectivity(root, qpquals,
+								  path->jpath.jointype);
 	cost_qual_eval(&qp_qual_cost, qpquals);
 	freeList(qpquals);
 
@@ -1084,54 +1090,67 @@ cost_hashjoin(HashPath *path, Query *root)
 	 * Determine bucketsize fraction for inner relation.  We use the
 	 * smallest bucketsize estimated for any individual hashclause;
 	 * this is undoubtedly conservative.
+	 *
+	 * BUT: if inner relation has been unique-ified, we can assume it's
+	 * good for hashing.  This is important both because it's the right
+	 * answer, and because we avoid contaminating the cache with a value
+	 * that's wrong for non-unique-ified paths.
 	 */
-	innerbucketsize = 1.0;
-	foreach(hcl, hashclauses)
+	if (IsA(inner_path, UniquePath))
+		innerbucketsize = 1.0 / virtualbuckets;
+	else
 	{
-		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl);
-		Selectivity thisbucketsize;
+		innerbucketsize = 1.0;
+		foreach(hcl, hashclauses)
+		{
+			RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl);
+			Selectivity thisbucketsize;
 
-		Assert(IsA(restrictinfo, RestrictInfo));
+			Assert(IsA(restrictinfo, RestrictInfo));
 
-		/*
-		 * First we have to figure out which side of the hashjoin clause is the
-		 * inner side.
-		 *
-		 * Since we tend to visit the same clauses over and over when planning
-		 * a large query, we cache the bucketsize estimate in the RestrictInfo
-		 * node to avoid repeated lookups of statistics.
-		 */
-		if (is_subseti(restrictinfo->right_relids, inner_path->parent->relids))
-		{
-			/* righthand side is inner */
-			thisbucketsize = restrictinfo->right_bucketsize;
-			if (thisbucketsize < 0)
+			/*
+			 * First we have to figure out which side of the hashjoin clause
+			 * is the inner side.
+			 *
+			 * Since we tend to visit the same clauses over and over when
+			 * planning a large query, we cache the bucketsize estimate in the
+			 * RestrictInfo node to avoid repeated lookups of statistics.
+			 */
+			if (is_subseti(restrictinfo->right_relids,
+						   inner_path->parent->relids))
 			{
-				/* not cached yet */
-				thisbucketsize = estimate_hash_bucketsize(root,
+				/* righthand side is inner */
+				thisbucketsize = restrictinfo->right_bucketsize;
+				if (thisbucketsize < 0)
+				{
+					/* not cached yet */
+					thisbucketsize =
+						estimate_hash_bucketsize(root,
 									(Var *) get_rightop(restrictinfo->clause),
-														  virtualbuckets);
-				restrictinfo->right_bucketsize = thisbucketsize;
+												 virtualbuckets);
+					restrictinfo->right_bucketsize = thisbucketsize;
+				}
 			}
-		}
-		else
-		{
-			Assert(is_subseti(restrictinfo->left_relids,
-							  inner_path->parent->relids));
-			/* lefthand side is inner */
-			thisbucketsize = restrictinfo->left_bucketsize;
-			if (thisbucketsize < 0)
+			else
 			{
-				/* not cached yet */
-				thisbucketsize = estimate_hash_bucketsize(root,
+				Assert(is_subseti(restrictinfo->left_relids,
+								  inner_path->parent->relids));
+				/* lefthand side is inner */
+				thisbucketsize = restrictinfo->left_bucketsize;
+				if (thisbucketsize < 0)
+				{
+					/* not cached yet */
+					thisbucketsize =
+						estimate_hash_bucketsize(root,
 									(Var *) get_leftop(restrictinfo->clause),
-														  virtualbuckets);
-				restrictinfo->left_bucketsize = thisbucketsize;
+												 virtualbuckets);
+					restrictinfo->left_bucketsize = thisbucketsize;
+				}
 			}
-		}
 
-		if (innerbucketsize > thisbucketsize)
-			innerbucketsize = thisbucketsize;
+			if (innerbucketsize > thisbucketsize)
+				innerbucketsize = thisbucketsize;
+		}
 	}
 
 	/*
@@ -1557,7 +1576,7 @@ cost_qual_eval_walker(Node *node, QualCost *total)
  * seems OK to live with the approximation.
  */
 static Selectivity
-approx_selectivity(Query *root, List *quals)
+approx_selectivity(Query *root, List *quals, JoinType jointype)
 {
 	Selectivity total = 1.0;
 	List	   *l;
@@ -1582,13 +1601,14 @@ approx_selectivity(Query *root, List *quals)
 				restrictinfo->this_selec =
 					clause_selectivity(root,
 									   (Node *) restrictinfo->clause,
-									   0);
+									   0,
+									   jointype);
 			selec = restrictinfo->this_selec;
 		}
 		else
 		{
 			/* If it's a bare expression, must always do it the hard way */
-			selec = clause_selectivity(root, qual, 0);
+			selec = clause_selectivity(root, qual, 0, jointype);
 		}
 		total *= selec;
 	}
@@ -1620,7 +1640,8 @@ set_baserel_size_estimates(Query *root, RelOptInfo *rel)
 	temp = rel->tuples *
 		restrictlist_selectivity(root,
 								 rel->baserestrictinfo,
-								 lfirsti(rel->relids));
+								 lfirsti(rel->relids),
+								 JOIN_INNER);
 
 	/*
 	 * Force estimate to be at least one row, to make explain output look
@@ -1682,7 +1703,8 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
 	 */
 	selec = restrictlist_selectivity(root,
 									 restrictlist,
-									 0);
+									 0,
+									 jointype);
 
 	/*
 	 * Basically, we multiply size of Cartesian product by selectivity.
@@ -1694,8 +1716,6 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
 	 * For JOIN_IN and variants, the Cartesian product is figured with
 	 * respect to a unique-ified input, and then we can clamp to the size
 	 * of the other input.
-	 * XXX it's not at all clear that the ordinary selectivity calculation
-	 * is appropriate in this case.
 	 */
 	switch (jointype)
 	{
@@ -1798,7 +1818,8 @@ set_function_size_estimates(Query *root, RelOptInfo *rel)
 	temp = rel->tuples *
 		restrictlist_selectivity(root,
 								 rel->baserestrictinfo,
-								 lfirsti(rel->relids));
+								 lfirsti(rel->relids),
+								 JOIN_INNER);
 
 	/*
 	 * Force estimate to be at least one row, to make explain output look
Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,7 @@`
`8`	`8`	`*`
`9`	`9`	`*`
`10`	`10`	`* IDENTIFICATION`
`11`		`- * $Header: /cvsroot/pgsql/src/backend/catalog/pg_operator.c,v 1.77 2002/09/04 20:31:14 momjian Exp $`
	`11`	`+ * $Header: /cvsroot/pgsql/src/backend/catalog/pg_operator.c,v 1.78 2003/01/28 22:13:25 tgl Exp $`
`12`	`12`	`*`
`13`	`13`	`* NOTES`
`14`	`14`	`* these routines moved here from commands/define.c and somewhat cleaned up.`
`@@ -485,10 +485,11 @@ OperatorCreate(const char *operatorName,`
`485`	`485`	`typeId[0] = INTERNALOID; /* Query */`
`486`	`486`	`typeId[1] = OIDOID; /* operator OID */`
`487`	`487`	`typeId[2] = INTERNALOID; /* args list */`
	`488`	`+ typeId[3] = INT2OID; /* jointype */`
`488`	`489`
`489`		`- joinOid = LookupFuncName(joinName, 3, typeId);`
	`490`	`+ joinOid = LookupFuncName(joinName, 4, typeId);`
`490`	`491`	`if (!OidIsValid(joinOid))`
`491`		`- func_error("OperatorDef", joinName, 3, typeId, NULL);`
	`492`	`+ func_error("OperatorDef", joinName, 4, typeId, NULL);`
`492`	`493`	`}`
`493`	`494`	`else`
`494`	`495`	`joinOid = InvalidOid;`