Fix planner crash from pfree'ing a partial path that a GatherPath uses.

tglsfdc · tglsfdc · commit c45bf5751b63 · 2016-04-30T12:29:21.000-04:00
We mustn't run generate_gather_paths() during add_paths_to_joinrel(),
because that function can be invoked multiple times for the same target
joinrel.  Not only is it wasteful to build GatherPaths repeatedly, but
a later add_partial_path() could delete the partial path that a previously
created GatherPath depends on.  Instead establish the convention that we
do generate_gather_paths() for a rel only just before set_cheapest().

The code was accidentally not broken for baserels, because as of today there
never is more than one partial path for a baserel.  But that assumption
obviously has a pretty short half-life, so move the generate_gather_paths()
calls for those cases as well.

Also add some generic comments explaining how and why this all works.

Per fuzz testing by Andreas Seltenreich.

Report: &lt;871t5pgwdt.fsf@credativ.de&gt;
diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README
@@ -167,6 +167,16 @@ all the ways to produce the same set of joined base rels will share the
 same RelOptInfo, so the paths produced from different join combinations
 that produce equivalent joinrels will compete in add_path().
 
+The dynamic-programming approach has an important property that's not
+immediately obvious: we will finish constructing all paths for a given
+relation before we construct any paths for relations containing that rel.
+This means that we can reliably identify the "cheapest path" for each rel
+before higher-level relations need to know that.  Also, we can safely
+discard a path when we find that another path for the same rel is better,
+without worrying that maybe there is already a reference to that path in
+some higher-level join path.  Without this, memory management for paths
+would be much more complicated.
+
 Once we have built the final join rel, we use either the cheapest path
 for it or the cheapest path with the desired ordering (if that's cheaper
 than applying a sort to the cheapest other path).
@@ -321,8 +331,9 @@ set up for recursive handling of subqueries
         For each joinrel of the prior level, do make_rels_by_clause_joins()
         if it has join clauses, or make_rels_by_clauseless_joins() if not.
         Also generate "bushy plan" joins between joinrels of lower levels.
-      Back at standard_join_search(), apply set_cheapest() to extract the
-      cheapest path for each newly constructed joinrel.
+      Back at standard_join_search(), generate gather paths if needed for
+      each newly constructed joinrel, then apply set_cheapest() to extract
+      the cheapest path for it.
       Loop back if this wasn't the top join level.
   Back at grouping_planner:
   do grouping (GROUP BY) and aggregation
diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c
@@ -266,6 +266,9 @@ merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, bool force)
 			/* Keep searching if join order is not valid */
 			if (joinrel)
 			{
+				/* Create GatherPaths for any useful partial paths for rel */
+				generate_gather_paths(root, joinrel);
+
 				/* Find and save the cheapest paths for this joinrel */
 				set_cheapest(joinrel);
 
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
@@ -73,7 +73,7 @@ static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 				 Index rti, RangeTblEntry *rte);
 static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel,
 				   RangeTblEntry *rte);
-static void create_parallel_paths(PlannerInfo *root, RelOptInfo *rel);
+static void create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel);
 static void set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel,
 						  RangeTblEntry *rte);
 static bool function_rte_parallel_ok(RangeTblEntry *rte);
@@ -447,6 +447,16 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 		}
 	}
 
+	/*
+	 * If this is a baserel, consider gathering any partial paths we may have
+	 * created for it.  (If we tried to gather inheritance children, we could
+	 * end up with a very large number of gather nodes, each trying to grab
+	 * its own pool of workers, so don't do this for otherrels.  Instead,
+	 * we'll consider gathering partial paths for the parent appendrel.)
+	 */
+	if (rel->reloptkind == RELOPT_BASEREL)
+		generate_gather_paths(root, rel);
+
 	/*
 	 * Allow a plugin to editorialize on the set of Paths for this base
 	 * relation.  It could add new paths (such as CustomPaths) by calling
@@ -643,7 +653,7 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
 
 	/* If appropriate, consider parallel sequential scan */
 	if (rel->consider_parallel && required_outer == NULL)
-		create_parallel_paths(root, rel);
+		create_plain_partial_paths(root, rel);
 
 	/* Consider index scans */
 	create_index_paths(root, rel);
@@ -653,11 +663,11 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
 }
 
 /*
- * create_parallel_paths
- *	  Build parallel access paths for a plain relation
+ * create_plain_partial_paths
+ *	  Build partial access paths for parallel scan of a plain relation
  */
 static void
-create_parallel_paths(PlannerInfo *root, RelOptInfo *rel)
+create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel)
 {
 	int			parallel_degree = 1;
 
@@ -712,16 +722,6 @@ create_parallel_paths(PlannerInfo *root, RelOptInfo *rel)
 
 	/* Add an unordered partial path based on a parallel sequential scan. */
 	add_partial_path(rel, create_seqscan_path(root, rel, NULL, parallel_degree));
-
-	/*
-	 * If this is a baserel, consider gathering any partial paths we may have
-	 * just created.  If we gathered an inheritance child, we could end up
-	 * with a very large number of gather nodes, each trying to grab its own
-	 * pool of workers, so don't do this in that case.  Instead, we'll
-	 * consider gathering partial paths for the appendrel.
-	 */
-	if (rel->reloptkind == RELOPT_BASEREL)
-		generate_gather_paths(root, rel);
 }
 
 /*
@@ -1262,9 +1262,6 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 		appendpath = create_append_path(rel, partial_subpaths, NULL,
 										parallel_degree);
 		add_partial_path(rel, (Path *) appendpath);
-
-		/* Consider gathering it. */
-		generate_gather_paths(root, rel);
 	}
 
 	/*
@@ -1970,6 +1967,10 @@ set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
  * generate_gather_paths
  *		Generate parallel access paths for a relation by pushing a Gather on
  *		top of a partial path.
+ *
+ * This must not be called until after we're done creating all partial paths
+ * for the specified relation.  (Otherwise, add_partial_path might delete a
+ * path that some GatherPath has a reference to.)
  */
 void
 generate_gather_paths(PlannerInfo *root, RelOptInfo *rel)
@@ -1983,7 +1984,9 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel)
 
 	/*
 	 * The output of Gather is currently always unsorted, so there's only one
-	 * partial path of interest: the cheapest one.
+	 * partial path of interest: the cheapest one.  That will be the one at
+	 * the front of partial_pathlist because of the way add_partial_path
+	 * works.
 	 *
 	 * Eventually, we should have a Gather Merge operation that can merge
 	 * multiple tuple streams together while preserving their ordering.  We
@@ -2148,12 +2151,19 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
 		join_search_one_level(root, lev);
 
 		/*
-		 * Do cleanup work on each just-processed rel.
+		 * Run generate_gather_paths() for each just-processed joinrel.  We
+		 * could not do this earlier because both regular and partial paths
+		 * can get added to a particular joinrel at multiple times within
+		 * join_search_one_level.  After that, we're done creating paths
+		 * for the joinrel, so run set_cheapest().
 		 */
 		foreach(lc, root->join_rel_level[lev])
 		{
 			rel = (RelOptInfo *) lfirst(lc);
 
+			/* Create GatherPaths for any useful partial paths for rel */
+			generate_gather_paths(root, rel);
+
 			/* Find and save the cheapest paths for this rel */
 			set_cheapest(rel);
 
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
@@ -223,12 +223,7 @@ add_paths_to_joinrel(PlannerInfo *root,
 												 jointype, &extra);
 
 	/*
-	 * 6. Consider gathering partial paths.
-	 */
-	generate_gather_paths(root, joinrel);
-
-	/*
-	 * 7. Finally, give extensions a chance to manipulate the path list.
+	 * 6. Finally, give extensions a chance to manipulate the path list.
 	 */
 	if (set_join_pathlist_hook)
 		set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
@@ -394,8 +394,14 @@ set_cheapest(RelOptInfo *parent_rel)
  *	  but just recycling discarded Path nodes is a very useful savings in
  *	  a large join tree.  We can recycle the List nodes of pathlist, too.
  *
- *	  BUT: we do not pfree IndexPath objects, since they may be referenced as
- *	  children of BitmapHeapPaths as well as being paths in their own right.
+ *	  As noted in optimizer/README, deleting a previously-accepted Path is
+ *	  safe because we know that Paths of this rel cannot yet be referenced
+ *	  from any other rel, such as a higher-level join.  However, in some cases
+ *	  it is possible that a Path is referenced by another Path for its own
+ *	  rel; we must not delete such a Path, even if it is dominated by the new
+ *	  Path.  Currently this occurs only for IndexPath objects, which may be
+ *	  referenced as children of BitmapHeapPaths as well as being paths in
+ *	  their own right.  Hence, we don't pfree IndexPaths when rejecting them.
  *
  * 'parent_rel' is the relation entry to which the path corresponds.
  * 'new_path' is a potential path for parent_rel.
@@ -711,6 +717,10 @@ add_path_precheck(RelOptInfo *parent_rel,
  *	  parallel such that each worker will generate a subset of the path's
  *	  overall result.
  *
+ *	  As in add_path, the partial_pathlist is kept sorted with the cheapest
+ *	  total path in front.  This is depended on by multiple places, which
+ *	  just take the front entry as the cheapest path without searching.
+ *
  *	  We don't generate parameterized partial paths for several reasons.  Most
  *	  importantly, they're not safe to execute, because there's nothing to
  *	  make sure that a parallel scan within the parameterized portion of the
@@ -721,15 +731,23 @@ add_path_precheck(RelOptInfo *parent_rel,
  *	  side of the plan will be small anyway.  There could be rare cases where
  *	  this wins big - e.g. if join order constraints put a 1-row relation on
  *	  the outer side of the topmost join with a parameterized plan on the inner
- *	  side - but we'll have to be content not to handle such cases until somebody
- *	  builds an executor infrastructure that can cope with them.
+ *	  side - but we'll have to be content not to handle such cases until
+ *	  somebody builds an executor infrastructure that can cope with them.
  *
  *	  Because we don't consider parameterized paths here, we also don't
  *	  need to consider the row counts as a measure of quality: every path will
  *	  produce the same number of rows.  Neither do we need to consider startup
  *	  costs: parallelism is only used for plans that will be run to completion.
  *	  Therefore, this routine is much simpler than add_path: it needs to
  *	  consider only pathkeys and total cost.
+ *
+ *	  As with add_path, we pfree paths that are found to be dominated by
+ *	  another partial path; this requires that there be no other references to
+ *	  such paths yet.  Hence, GatherPaths must not be created for a rel until
+ *	  we're done creating all partial paths for it.  We do not currently build
+ *	  partial indexscan paths, so there is no need for an exception for
+ *	  IndexPaths here; for safety, we instead Assert that a path to be freed
+ *	  isn't an IndexPath.
  */
 void
 add_partial_path(RelOptInfo *parent_rel, Path *new_path)
@@ -808,7 +826,7 @@ add_partial_path(RelOptInfo *parent_rel, Path *new_path)
 		{
 			parent_rel->partial_pathlist =
 				list_delete_cell(parent_rel->partial_pathlist, p1, p1_prev);
-			/* add_path has a special case for IndexPath; we don't need it */
+			/* we should not see IndexPaths here, so always safe to delete */
 			Assert(!IsA(old_path, IndexPath));
 			pfree(old_path);
 			/* p1_prev does not advance */
@@ -842,7 +860,7 @@ add_partial_path(RelOptInfo *parent_rel, Path *new_path)
 	}
 	else
 	{
-		/* add_path has a special case for IndexPath; we don't need it */
+		/* we should not see IndexPaths here, so always safe to delete */
 		Assert(!IsA(new_path, IndexPath));
 		/* Reject and recycle the new path */
 		pfree(new_path);

Original file line number	Diff line number	Diff line change
`@@ -266,6 +266,9 @@ merge_clump(PlannerInfo root, List clumps, Clump *new_clump, bool force)`
`266`	`266`	`/* Keep searching if join order is not valid */`
`267`	`267`	`if (joinrel)`
`268`	`268`	`{`
	`269`	`+ /* Create GatherPaths for any useful partial paths for rel */`
	`270`	`+ generate_gather_paths(root, joinrel);`
	`271`	`+`
`269`	`272`	`/* Find and save the cheapest paths for this joinrel */`
`270`	`273`	`set_cheapest(joinrel);`
`271`	`274`