Improve make_subplanTargetList to avoid including Vars unnecessarily.

tglsfdc · tglsfdc · commit 1bc16a946008 · 2011-07-16T16:46:55.000-04:00
If a Var was used only in a GROUP BY expression, the previous
implementation would include the Var by itself (as well as the expression)
in the generated targetlist.  This wouldn't affect the efficiency of the
scan/join part of the plan at all, but it could result in passing
unnecessarily-wide rows through sorting and grouping steps.  It turns out
to take only a little more code, and not noticeably more time, to generate
a tlist without such redundancy, so let's do that.  Per a recent gripe from
HarmeekSingh Bedi.
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
@@ -85,6 +85,7 @@ static bool choose_hashed_distinct(PlannerInfo *root,
 					   double dNumDistinctRows);
 static List *make_subplanTargetList(PlannerInfo *root, List *tlist,
 					   AttrNumber **groupColIdx, bool *need_tlist_eval);
+static int	get_grouping_column_index(Query *parse, TargetEntry *tle);
 static void locate_grouping_columns(PlannerInfo *root,
 						List *tlist,
 						List *sub_tlist,
@@ -2536,14 +2537,9 @@ choose_hashed_distinct(PlannerInfo *root,
  * For example, given a query like
  *		SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
  * we want to pass this targetlist to the subplan:
- *		a,b,c,d,a+b
+ *		a+b,c,d
  * where the a+b target will be used by the Sort/Group steps, and the
- * other targets will be used for computing the final results.	(In the
- * above example we could theoretically suppress the a and b targets and
- * pass down only c,d,a+b, but it's not really worth the trouble to
- * eliminate simple var references from the subplan.  We will avoid doing
- * the extra computation to recompute a+b at the outer level; see
- * fix_upper_expr() in setrefs.c.)
+ * other targets will be used for computing the final results.
  *
  * If we are grouping or aggregating, *and* there are no non-Var grouping
  * expressions, then the returned tlist is effectively dummy; we do not
@@ -2569,7 +2565,8 @@ make_subplanTargetList(PlannerInfo *root,
 {
 	Query	   *parse = root->parse;
 	List	   *sub_tlist;
-	List	   *extravars;
+	List	   *non_group_cols;
+	List	   *non_group_vars;
 	int			numCols;
 
 	*groupColIdx = NULL;
@@ -2586,71 +2583,132 @@ make_subplanTargetList(PlannerInfo *root,
 	}
 
 	/*
-	 * Otherwise, start with a "flattened" tlist (having just the Vars
-	 * mentioned in the targetlist and HAVING qual).  Note this includes Vars
-	 * used in resjunk items, so we are covering the needs of ORDER BY and
-	 * window specifications.  Vars used within Aggrefs will be pulled out
-	 * here, too.
+	 * Otherwise, we must build a tlist containing all grouping columns,
+	 * plus any other Vars mentioned in the targetlist and HAVING qual.
 	 */
-	sub_tlist = flatten_tlist(tlist,
-							  PVC_RECURSE_AGGREGATES,
-							  PVC_INCLUDE_PLACEHOLDERS);
-	extravars = pull_var_clause(parse->havingQual,
-								PVC_RECURSE_AGGREGATES,
-								PVC_INCLUDE_PLACEHOLDERS);
-	sub_tlist = add_to_flat_tlist(sub_tlist, extravars);
-	list_free(extravars);
+	sub_tlist = NIL;
+	non_group_cols = NIL;
 	*need_tlist_eval = false;	/* only eval if not flat tlist */
 
-	/*
-	 * If grouping, create sub_tlist entries for all GROUP BY expressions
-	 * (GROUP BY items that are simple Vars should be in the list already),
-	 * and make an array showing where the group columns are in the sub_tlist.
-	 */
 	numCols = list_length(parse->groupClause);
 	if (numCols > 0)
 	{
-		int			keyno = 0;
+		/*
+		 * If grouping, create sub_tlist entries for all GROUP BY columns, and
+		 * make an array showing where the group columns are in the sub_tlist.
+		 *
+		 * Note: with this implementation, the array entries will always be
+		 * 1..N, but we don't want callers to assume that.
+		 */
 		AttrNumber *grpColIdx;
-		ListCell   *gl;
+		ListCell   *tl;
 
-		grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
+		grpColIdx = (AttrNumber *) palloc0(sizeof(AttrNumber) * numCols);
 		*groupColIdx = grpColIdx;
 
-		foreach(gl, parse->groupClause)
+		foreach(tl, tlist)
 		{
-			SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
-			Node	   *groupexpr = get_sortgroupclause_expr(grpcl, tlist);
-			TargetEntry *te;
+			TargetEntry *tle = (TargetEntry *) lfirst(tl);
+			int			colno;
 
-			/*
-			 * Find or make a matching sub_tlist entry.  If the groupexpr
-			 * isn't a Var, no point in searching.  (Note that the parser
-			 * won't make multiple groupClause entries for the same TLE.)
-			 */
-			if (groupexpr && IsA(groupexpr, Var))
-				te = tlist_member(groupexpr, sub_tlist);
-			else
-				te = NULL;
+			colno = get_grouping_column_index(parse, tle);
+			if (colno >= 0)
+			{
+				/*
+				 * It's a grouping column, so add it to the result tlist and
+				 * remember its resno in grpColIdx[].
+				 */
+				TargetEntry *newtle;
 
-			if (!te)
+				newtle = makeTargetEntry(tle->expr,
+										 list_length(sub_tlist) + 1,
+										 NULL,
+										 false);
+				sub_tlist = lappend(sub_tlist, newtle);
+
+				Assert(grpColIdx[colno] == 0);	/* no dups expected */
+				grpColIdx[colno] = newtle->resno;
+
+				if (!(newtle->expr && IsA(newtle->expr, Var)))
+					*need_tlist_eval = true;	/* tlist contains non Vars */
+			}
+			else
 			{
-				te = makeTargetEntry((Expr *) groupexpr,
-									 list_length(sub_tlist) + 1,
-									 NULL,
-									 false);
-				sub_tlist = lappend(sub_tlist, te);
-				*need_tlist_eval = true;		/* it's not flat anymore */
+				/*
+				 * Non-grouping column, so just remember the expression
+				 * for later call to pull_var_clause.  There's no need for
+				 * pull_var_clause to examine the TargetEntry node itself.
+				 */
+				non_group_cols = lappend(non_group_cols, tle->expr);
 			}
-
-			/* and save its resno */
-			grpColIdx[keyno++] = te->resno;
 		}
 	}
+	else
+	{
+		/*
+		 * With no grouping columns, just pass whole tlist to pull_var_clause.
+		 * Need (shallow) copy to avoid damaging input tlist below.
+		 */
+		non_group_cols = list_copy(tlist);
+	}
+
+	/*
+	 * If there's a HAVING clause, we'll need the Vars it uses, too.
+	 */
+	if (parse->havingQual)
+		non_group_cols = lappend(non_group_cols, parse->havingQual);
+
+	/*
+	 * Pull out all the Vars mentioned in non-group cols (plus HAVING), and
+	 * add them to the result tlist if not already present.  (A Var used
+	 * directly as a GROUP BY item will be present already.)  Note this
+	 * includes Vars used in resjunk items, so we are covering the needs of
+	 * ORDER BY and window specifications.  Vars used within Aggrefs will be
+	 * pulled out here, too.
+	 */
+	non_group_vars = pull_var_clause((Node *) non_group_cols,
+									 PVC_RECURSE_AGGREGATES,
+									 PVC_INCLUDE_PLACEHOLDERS);
+	sub_tlist = add_to_flat_tlist(sub_tlist, non_group_vars);
+
+	/* clean up cruft */
+	list_free(non_group_vars);
+	list_free(non_group_cols);
 
 	return sub_tlist;
 }
 
+/*
+ * get_grouping_column_index
+ *		Get the GROUP BY column position, if any, of a targetlist entry.
+ *
+ * Returns the index (counting from 0) of the TLE in the GROUP BY list, or -1
+ * if it's not a grouping column.  Note: the result is unique because the
+ * parser won't make multiple groupClause entries for the same TLE.
+ */
+static int
+get_grouping_column_index(Query *parse, TargetEntry *tle)
+{
+	int			colno = 0;
+	Index		ressortgroupref = tle->ressortgroupref;
+	ListCell   *gl;
+
+	/* No need to search groupClause if TLE hasn't got a sortgroupref */
+	if (ressortgroupref == 0)
+		return -1;
+
+	foreach(gl, parse->groupClause)
+	{
+		SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
+
+		if (grpcl->tleSortGroupRef == ressortgroupref)
+			return colno;
+		colno++;
+	}
+
+	return -1;
+}
+
 /*
  * locate_grouping_columns
  *		Locate grouping columns in the tlist chosen by create_plan.