Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane2008-10-04 21:56:55 +0000
committerTom Lane2008-10-04 21:56:55 +0000
commit44d5be0e5308e951c0c5dc522b4bcacf2bcbc476 (patch)
tree516f1c70436225751f631e7e686f7ea61b3db9df /src/backend/optimizer/path
parent607b2be7bb230ea4c558cb3101794f94de35ab85 (diff)
Implement SQL-standard WITH clauses, including WITH RECURSIVE.
There are some unimplemented aspects: recursive queries must use UNION ALL (should allow UNION too), and we don't have SEARCH or CYCLE clauses. These might or might not get done for 8.4, but even without them it's a pretty useful feature. There are also a couple of small loose ends and definitional quibbles, which I'll send a memo about to pgsql-hackers shortly. But let's land the patch now so we can get on with other development. Yoshiyuki Asaba, with lots of help from Tatsuo Ishii and Tom Lane
Diffstat (limited to 'src/backend/optimizer/path')
-rw-r--r--src/backend/optimizer/path/allpaths.c120
-rw-r--r--src/backend/optimizer/path/clausesel.c36
-rw-r--r--src/backend/optimizer/path/costsize.c118
-rw-r--r--src/backend/optimizer/path/joinpath.c13
4 files changed, 253 insertions, 34 deletions
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 3dc41c68073..a942249fd09 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.173 2008/08/25 22:42:32 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.174 2008/10/04 21:56:53 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -57,6 +57,10 @@ static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
+static void set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel,
+ RangeTblEntry *rte);
+static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel,
+ RangeTblEntry *rte);
static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
bool *differentTypes);
@@ -173,14 +177,22 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
}
else if (rel->rtekind == RTE_FUNCTION)
{
- /* RangeFunction --- generate a separate plan for it */
+ /* RangeFunction --- generate a suitable path for it */
set_function_pathlist(root, rel, rte);
}
else if (rel->rtekind == RTE_VALUES)
{
- /* Values list --- generate a separate plan for it */
+ /* Values list --- generate a suitable path for it */
set_values_pathlist(root, rel, rte);
}
+ else if (rel->rtekind == RTE_CTE)
+ {
+ /* CTE reference --- generate a suitable path for it */
+ if (rte->self_reference)
+ set_worktable_pathlist(root, rel, rte);
+ else
+ set_cte_pathlist(root, rel, rte);
+ }
else
{
/* Plain relation */
@@ -585,8 +597,8 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
/* Generate the plan for the subquery */
rel->subplan = subquery_planner(root->glob, subquery,
- root->query_level + 1,
- tuple_fraction,
+ root,
+ false, tuple_fraction,
&subroot);
rel->subrtable = subroot->parse->rtable;
@@ -641,6 +653,104 @@ set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
}
/*
+ * set_cte_pathlist
+ * Build the (single) access path for a non-self-reference CTE RTE
+ */
+static void
+set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+{
+ Plan *cteplan;
+ PlannerInfo *cteroot;
+ Index levelsup;
+ int ndx;
+ ListCell *lc;
+ int plan_id;
+
+ /*
+ * Find the referenced CTE, and locate the plan previously made for it.
+ */
+ levelsup = rte->ctelevelsup;
+ cteroot = root;
+ while (levelsup-- > 0)
+ {
+ cteroot = cteroot->parent_root;
+ if (!cteroot) /* shouldn't happen */
+ elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
+ }
+ /*
+ * Note: cte_plan_ids can be shorter than cteList, if we are still working
+ * on planning the CTEs (ie, this is a side-reference from another CTE).
+ * So we mustn't use forboth here.
+ */
+ ndx = 0;
+ foreach(lc, cteroot->parse->cteList)
+ {
+ CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc);
+
+ if (strcmp(cte->ctename, rte->ctename) == 0)
+ break;
+ ndx++;
+ }
+ if (lc == NULL) /* shouldn't happen */
+ elog(ERROR, "could not find CTE \"%s\"", rte->ctename);
+ if (ndx >= list_length(cteroot->cte_plan_ids))
+ elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename);
+ plan_id = list_nth_int(cteroot->cte_plan_ids, ndx);
+ Assert(plan_id > 0);
+ cteplan = (Plan *) list_nth(root->glob->subplans, plan_id - 1);
+
+ /* Mark rel with estimated output rows, width, etc */
+ set_cte_size_estimates(root, rel, cteplan);
+
+ /* Generate appropriate path */
+ add_path(rel, create_ctescan_path(root, rel));
+
+ /* Select cheapest path (pretty easy in this case...) */
+ set_cheapest(rel);
+}
+
+/*
+ * set_worktable_pathlist
+ * Build the (single) access path for a self-reference CTE RTE
+ */
+static void
+set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+{
+ Plan *cteplan;
+ PlannerInfo *cteroot;
+ Index levelsup;
+
+ /*
+ * We need to find the non-recursive term's plan, which is in the plan
+ * level that's processing the recursive UNION, which is one level
+ * *below* where the CTE comes from.
+ */
+ levelsup = rte->ctelevelsup;
+ if (levelsup == 0) /* shouldn't happen */
+ elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
+ levelsup--;
+ cteroot = root;
+ while (levelsup-- > 0)
+ {
+ cteroot = cteroot->parent_root;
+ if (!cteroot) /* shouldn't happen */
+ elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
+ }
+ cteplan = cteroot->non_recursive_plan;
+ if (!cteplan) /* shouldn't happen */
+ elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename);
+
+ /* Mark rel with estimated output rows, width, etc */
+ set_cte_size_estimates(root, rel, cteplan);
+
+ /* Generate appropriate path */
+ add_path(rel, create_worktablescan_path(root, rel));
+
+ /* Select cheapest path (pretty easy in this case...) */
+ set_cheapest(rel);
+}
+
+/*
* make_rel_from_joinlist
* Build access paths using a "joinlist" to guide the join path search.
*
diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index 9ffdc32e775..e3e4e9f02c0 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.93 2008/08/22 00:16:03 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.94 2008/10/04 21:56:53 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -550,29 +550,17 @@ clause_selectivity(PlannerInfo *root,
if (var->varlevelsup == 0 &&
(varRelid == 0 || varRelid == (int) var->varno))
{
- RangeTblEntry *rte = planner_rt_fetch(var->varno, root);
-
- if (rte->rtekind == RTE_SUBQUERY)
- {
- /*
- * XXX not smart about subquery references... any way to do
- * better than default?
- */
- }
- else
- {
- /*
- * A Var at the top of a clause must be a bool Var. This is
- * equivalent to the clause reln.attribute = 't', so we
- * compute the selectivity as if that is what we have.
- */
- s1 = restriction_selectivity(root,
- BooleanEqualOperator,
- list_make2(var,
- makeBoolConst(true,
- false)),
- varRelid);
- }
+ /*
+ * A Var at the top of a clause must be a bool Var. This is
+ * equivalent to the clause reln.attribute = 't', so we
+ * compute the selectivity as if that is what we have.
+ */
+ s1 = restriction_selectivity(root,
+ BooleanEqualOperator,
+ list_make2(var,
+ makeBoolConst(true,
+ false)),
+ varRelid);
}
}
else if (IsA(clause, Const))
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index bbc77db4e25..7bfc66cac3a 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -54,7 +54,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.197 2008/09/05 21:07:29 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.198 2008/10/04 21:56:53 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -934,6 +934,84 @@ cost_valuesscan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
}
/*
+ * cost_ctescan
+ * Determines and returns the cost of scanning a CTE RTE.
+ *
+ * Note: this is used for both self-reference and regular CTEs; the
+ * possible cost differences are below the threshold of what we could
+ * estimate accurately anyway. Note that the costs of evaluating the
+ * referenced CTE query are added into the final plan as initplan costs,
+ * and should NOT be counted here.
+ */
+void
+cost_ctescan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
+{
+ Cost startup_cost = 0;
+ Cost run_cost = 0;
+ Cost cpu_per_tuple;
+
+ /* Should only be applied to base relations that are CTEs */
+ Assert(baserel->relid > 0);
+ Assert(baserel->rtekind == RTE_CTE);
+
+ /* Charge one CPU tuple cost per row for tuplestore manipulation */
+ cpu_per_tuple = cpu_tuple_cost;
+
+ /* Add scanning CPU costs */
+ startup_cost += baserel->baserestrictcost.startup;
+ cpu_per_tuple += cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
+ run_cost += cpu_per_tuple * baserel->tuples;
+
+ path->startup_cost = startup_cost;
+ path->total_cost = startup_cost + run_cost;
+}
+
+/*
+ * cost_recursive_union
+ * Determines and returns the cost of performing a recursive union,
+ * and also the estimated output size.
+ *
+ * We are given Plans for the nonrecursive and recursive terms.
+ *
+ * Note that the arguments and output are Plans, not Paths as in most of
+ * the rest of this module. That's because we don't bother setting up a
+ * Path representation for recursive union --- we have only one way to do it.
+ */
+void
+cost_recursive_union(Plan *runion, Plan *nrterm, Plan *rterm)
+{
+ Cost startup_cost;
+ Cost total_cost;
+ double total_rows;
+
+ /* We probably have decent estimates for the non-recursive term */
+ startup_cost = nrterm->startup_cost;
+ total_cost = nrterm->total_cost;
+ total_rows = nrterm->plan_rows;
+
+ /*
+ * We arbitrarily assume that about 10 recursive iterations will be
+ * needed, and that we've managed to get a good fix on the cost and
+ * output size of each one of them. These are mighty shaky assumptions
+ * but it's hard to see how to do better.
+ */
+ total_cost += 10 * rterm->total_cost;
+ total_rows += 10 * rterm->plan_rows;
+
+ /*
+ * Also charge cpu_tuple_cost per row to account for the costs of
+ * manipulating the tuplestores. (We don't worry about possible
+ * spill-to-disk costs.)
+ */
+ total_cost += cpu_tuple_cost * total_rows;
+
+ runion->startup_cost = startup_cost;
+ runion->total_cost = total_cost;
+ runion->plan_rows = total_rows;
+ runion->plan_width = Max(nrterm->plan_width, rterm->plan_width);
+}
+
+/*
* cost_sort
* Determines and returns the cost of sorting a relation, including
* the cost of reading the input data.
@@ -2475,6 +2553,44 @@ set_values_size_estimates(PlannerInfo *root, RelOptInfo *rel)
set_baserel_size_estimates(root, rel);
}
+/*
+ * set_cte_size_estimates
+ * Set the size estimates for a base relation that is a CTE reference.
+ *
+ * The rel's targetlist and restrictinfo list must have been constructed
+ * already, and we need the completed plan for the CTE (if a regular CTE)
+ * or the non-recursive term (if a self-reference).
+ *
+ * We set the same fields as set_baserel_size_estimates.
+ */
+void
+set_cte_size_estimates(PlannerInfo *root, RelOptInfo *rel, Plan *cteplan)
+{
+ RangeTblEntry *rte;
+
+ /* Should only be applied to base relations that are CTE references */
+ Assert(rel->relid > 0);
+ rte = planner_rt_fetch(rel->relid, root);
+ Assert(rte->rtekind == RTE_CTE);
+
+ if (rte->self_reference)
+ {
+ /*
+ * In a self-reference, arbitrarily assume the average worktable
+ * size is about 10 times the nonrecursive term's size.
+ */
+ rel->tuples = 10 * cteplan->plan_rows;
+ }
+ else
+ {
+ /* Otherwise just believe the CTE plan's output estimate */
+ rel->tuples = cteplan->plan_rows;
+ }
+
+ /* Now estimate number of output rows, etc */
+ set_baserel_size_estimates(root, rel);
+}
+
/*
* set_rel_width
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 845f429a4b4..eaf0ffa4276 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.117 2008/08/14 18:47:59 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.118 2008/10/04 21:56:53 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -408,10 +408,15 @@ match_unsorted_outer(PlannerInfo *root,
* If the cheapest inner path is a join or seqscan, we should consider
* materializing it. (This is a heuristic: we could consider it
* always, but for inner indexscans it's probably a waste of time.)
+ * Also skip it if the inner path materializes its output anyway.
*/
- if (!(IsA(inner_cheapest_total, IndexPath) ||
- IsA(inner_cheapest_total, BitmapHeapPath) ||
- IsA(inner_cheapest_total, TidPath)))
+ if (!(inner_cheapest_total->pathtype == T_IndexScan ||
+ inner_cheapest_total->pathtype == T_BitmapHeapScan ||
+ inner_cheapest_total->pathtype == T_TidScan ||
+ inner_cheapest_total->pathtype == T_Material ||
+ inner_cheapest_total->pathtype == T_FunctionScan ||
+ inner_cheapest_total->pathtype == T_CteScan ||
+ inner_cheapest_total->pathtype == T_WorkTableScan))
matpath = (Path *)
create_material_path(innerrel, inner_cheapest_total);