postgrespro
diff --git a/‎src/backend/nodes/outfuncs.c
Lines changed: 1 addition & 0 deletions b/‎src/backend/nodes/outfuncs.c
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/backend/optimizer/README
Lines changed: 8 additions & 1 deletion b/‎src/backend/optimizer/README
Lines changed: 8 additions & 1 deletion
diff --git a/‎src/backend/optimizer/path/allpaths.c
Lines changed: 47 additions & 0 deletions b/‎src/backend/optimizer/path/allpaths.c
Lines changed: 47 additions & 0 deletions
diff --git a/‎src/backend/optimizer/path/costsize.c
Lines changed: 77 additions & 47 deletions b/‎src/backend/optimizer/path/costsize.c
Lines changed: 77 additions & 47 deletions
@@ -1742,6 +1742,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node)
 	WRITE_FLOAT_FIELD(rows, "%.0f");
 	WRITE_INT_FIELD(width);
 	WRITE_BOOL_FIELD(consider_startup);
+	WRITE_BOOL_FIELD(consider_param_startup);
 	WRITE_NODE_FIELD(reltargetlist);
 	WRITE_NODE_FIELD(pathlist);
 	WRITE_NODE_FIELD(ppilist);
 
@@ -798,7 +798,7 @@ a nestloop that provides parameters to the lower join's inputs).  While we
 do not ignore merge joins entirely, joinpath.c does not fully explore the
 space of potential merge joins with parameterized inputs.  Also, add_path
 treats parameterized paths as having no pathkeys, so that they compete
-only on total cost and rowcount; they don't get preference for producing a
+only on cost and rowcount; they don't get preference for producing a
 special sort order.  This creates additional bias against merge joins,
 since we might discard a path that could have been useful for performing
 a merge without an explicit sort step.  Since a parameterized path must
@@ -807,6 +807,13 @@ uninteresting, these choices do not affect any requirement for the final
 output order of a query --- they only make it harder to use a merge join
 at a lower level.  The savings in planning work justifies that.
 
+Similarly, parameterized paths do not normally get preference in add_path
+for having cheap startup cost; that's seldom of much value when on the
+inside of a nestloop, so it seems not worth keeping extra paths solely for
+that.  An exception occurs for parameterized paths for the RHS relation of
+a SEMI or ANTI join: in those cases, we can stop the inner scan after the
+first match, so it's primarily startup not total cost that we care about.
+
 
 LATERAL subqueries
 ------------------
 
@@ -47,6 +47,7 @@ int			geqo_threshold;
 join_search_hook_type join_search_hook = NULL;
 
 
+static void set_base_rel_consider_startup(PlannerInfo *root);
 static void set_base_rel_sizes(PlannerInfo *root);
 static void set_base_rel_pathlists(PlannerInfo *root);
 static void set_rel_size(PlannerInfo *root, RelOptInfo *rel,
@@ -131,6 +132,9 @@ make_one_rel(PlannerInfo *root, List *joinlist)
 		root->all_baserels = bms_add_member(root->all_baserels, brel->relid);
 	}
 
+	/* Mark base rels as to whether we care about fast-start plans */
+	set_base_rel_consider_startup(root);
+
 	/*
 	 * Generate access paths for the base rels.
 	 */
@@ -150,6 +154,49 @@ make_one_rel(PlannerInfo *root, List *joinlist)
 	return rel;
 }
 
+/*
+ * set_base_rel_consider_startup
+ *	  Set the consider_[param_]startup flags for each base-relation entry.
+ *
+ * For the moment, we only deal with consider_param_startup here; because the
+ * logic for consider_startup is pretty trivial and is the same for every base
+ * relation, we just let build_simple_rel() initialize that flag correctly to
+ * start with.  If that logic ever gets more complicated it would probably
+ * be better to move it here.
+ */
+static void
+set_base_rel_consider_startup(PlannerInfo *root)
+{
+	/*
+	 * Since parameterized paths can only be used on the inside of a nestloop
+	 * join plan, there is usually little value in considering fast-start
+	 * plans for them.  However, for relations that are on the RHS of a SEMI
+	 * or ANTI join, a fast-start plan can be useful because we're only going
+	 * to care about fetching one tuple anyway.
+	 *
+	 * To minimize growth of planning time, we currently restrict this to
+	 * cases where the RHS is a single base relation, not a join; there is no
+	 * provision for consider_param_startup to get set at all on joinrels.
+	 * Also we don't worry about appendrels.  costsize.c's costing rules for
+	 * nestloop semi/antijoins don't consider such cases either.
+	 */
+	ListCell   *lc;
+
+	foreach(lc, root->join_info_list)
+	{
+		SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
+
+		if ((sjinfo->jointype == JOIN_SEMI || sjinfo->jointype == JOIN_ANTI) &&
+			bms_membership(sjinfo->syn_righthand) == BMS_SINGLETON)
+		{
+			int			varno = bms_singleton_member(sjinfo->syn_righthand);
+			RelOptInfo *rel = find_base_rel(root, varno);
+
+			rel->consider_param_startup = true;
+		}
+	}
+}
+
 /*
  * set_base_rel_sizes
  *	  Set the size estimates (rows and widths) for each base-relation entry.
 
@@ -1654,7 +1654,8 @@ cost_group(Path *path, PlannerInfo *root,
  * estimate and getting a tight lower bound.  We choose to not examine the
  * join quals here, since that's by far the most expensive part of the
  * calculations.  The end result is that CPU-cost considerations must be
- * left for the second phase.
+ * left for the second phase; and for SEMI/ANTI joins, we must also postpone
+ * incorporation of the inner path's run cost.
  *
  * 'workspace' is to be filled with startup_cost, total_cost, and perhaps
  *		other data to be used by final_cost_nestloop
@@ -1702,44 +1703,16 @@ initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace,
 
 	if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
 	{
-		double		outer_matched_rows;
-		Selectivity inner_scan_frac;
-
 		/*
 		 * SEMI or ANTI join: executor will stop after first match.
 		 *
-		 * For an outer-rel row that has at least one match, we can expect the
-		 * inner scan to stop after a fraction 1/(match_count+1) of the inner
-		 * rows, if the matches are evenly distributed.  Since they probably
-		 * aren't quite evenly distributed, we apply a fuzz factor of 2.0 to
-		 * that fraction.  (If we used a larger fuzz factor, we'd have to
-		 * clamp inner_scan_frac to at most 1.0; but since match_count is at
-		 * least 1, no such clamp is needed now.)
-		 *
-		 * A complicating factor is that rescans may be cheaper than first
-		 * scans.  If we never scan all the way to the end of the inner rel,
-		 * it might be (depending on the plan type) that we'd never pay the
-		 * whole inner first-scan run cost.  However it is difficult to
-		 * estimate whether that will happen, so be conservative and always
-		 * charge the whole first-scan cost once.
-		 */
-		run_cost += inner_run_cost;
-
-		outer_matched_rows = rint(outer_path_rows * semifactors->outer_match_frac);
-		inner_scan_frac = 2.0 / (semifactors->match_count + 1.0);
-
-		/* Add inner run cost for additional outer tuples having matches */
-		if (outer_matched_rows > 1)
-			run_cost += (outer_matched_rows - 1) * inner_rescan_run_cost * inner_scan_frac;
-
-		/*
-		 * The cost of processing unmatched rows varies depending on the
-		 * details of the joinclauses, so we leave that part for later.
+		 * Getting decent estimates requires inspection of the join quals,
+		 * which we choose to postpone to final_cost_nestloop.
 		 */
 
 		/* Save private data for final_cost_nestloop */
-		workspace->outer_matched_rows = outer_matched_rows;
-		workspace->inner_scan_frac = inner_scan_frac;
+		workspace->inner_run_cost = inner_run_cost;
+		workspace->inner_rescan_run_cost = inner_rescan_run_cost;
 	}
 	else
 	{
@@ -1756,7 +1729,6 @@ initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace,
 	workspace->total_cost = startup_cost + run_cost;
 	/* Save private data for final_cost_nestloop */
 	workspace->run_cost = run_cost;
-	workspace->inner_rescan_run_cost = inner_rescan_run_cost;
 }
 
 /*
@@ -1780,7 +1752,6 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path,
 	double		inner_path_rows = inner_path->rows;
 	Cost		startup_cost = workspace->startup_cost;
 	Cost		run_cost = workspace->run_cost;
-	Cost		inner_rescan_run_cost = workspace->inner_rescan_run_cost;
 	Cost		cpu_per_tuple;
 	QualCost	restrict_qual_cost;
 	double		ntuples;
@@ -1799,42 +1770,101 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path,
 	if (!enable_nestloop)
 		startup_cost += disable_cost;
 
-	/* cost of source data */
+	/* cost of inner-relation source data (we already dealt with outer rel) */
 
 	if (path->jointype == JOIN_SEMI || path->jointype == JOIN_ANTI)
 	{
-		double		outer_matched_rows = workspace->outer_matched_rows;
-		Selectivity inner_scan_frac = workspace->inner_scan_frac;
-
 		/*
 		 * SEMI or ANTI join: executor will stop after first match.
 		 */
+		Cost		inner_run_cost = workspace->inner_run_cost;
+		Cost		inner_rescan_run_cost = workspace->inner_rescan_run_cost;
+		double		outer_matched_rows;
+		Selectivity inner_scan_frac;
 
-		/* Compute number of tuples processed (not number emitted!) */
+		/*
+		 * For an outer-rel row that has at least one match, we can expect the
+		 * inner scan to stop after a fraction 1/(match_count+1) of the inner
+		 * rows, if the matches are evenly distributed.  Since they probably
+		 * aren't quite evenly distributed, we apply a fuzz factor of 2.0 to
+		 * that fraction.  (If we used a larger fuzz factor, we'd have to
+		 * clamp inner_scan_frac to at most 1.0; but since match_count is at
+		 * least 1, no such clamp is needed now.)
+		 */
+		outer_matched_rows = rint(outer_path_rows * semifactors->outer_match_frac);
+		inner_scan_frac = 2.0 / (semifactors->match_count + 1.0);
+
+		/*
+		 * Compute number of tuples processed (not number emitted!).  First,
+		 * account for successfully-matched outer rows.
+		 */
 		ntuples = outer_matched_rows * inner_path_rows * inner_scan_frac;
 
 		/*
-		 * For unmatched outer-rel rows, there are two cases.  If the inner
-		 * path is an indexscan using all the joinquals as indexquals, then an
-		 * unmatched row results in an indexscan returning no rows, which is
-		 * probably quite cheap.  We estimate this case as the same cost to
-		 * return the first tuple of a nonempty scan.  Otherwise, the executor
-		 * will have to scan the whole inner rel; not so cheap.
+		 * Now we need to estimate the actual costs of scanning the inner
+		 * relation, which may be quite a bit less than N times inner_run_cost
+		 * due to early scan stops.  We consider two cases.  If the inner path
+		 * is an indexscan using all the joinquals as indexquals, then an
+		 * unmatched outer row results in an indexscan returning no rows,
+		 * which is probably quite cheap.  Otherwise, the executor will have
+		 * to scan the whole inner rel for an unmatched row; not so cheap.
 		 */
 		if (has_indexed_join_quals(path))
 		{
+			/*
+			 * Successfully-matched outer rows will only require scanning
+			 * inner_scan_frac of the inner relation.  In this case, we don't
+			 * need to charge the full inner_run_cost even when that's more
+			 * than inner_rescan_run_cost, because we can assume that none of
+			 * the inner scans ever scan the whole inner relation.  So it's
+			 * okay to assume that all the inner scan executions can be
+			 * fractions of the full cost, even if materialization is reducing
+			 * the rescan cost.  At this writing, it's impossible to get here
+			 * for a materialized inner scan, so inner_run_cost and
+			 * inner_rescan_run_cost will be the same anyway; but just in
+			 * case, use inner_run_cost for the first matched tuple and
+			 * inner_rescan_run_cost for additional ones.
+			 */
+			run_cost += inner_run_cost * inner_scan_frac;
+			if (outer_matched_rows > 1)
+				run_cost += (outer_matched_rows - 1) * inner_rescan_run_cost * inner_scan_frac;
+
+			/*
+			 * Add the cost of inner-scan executions for unmatched outer rows.
+			 * We estimate this as the same cost as returning the first tuple
+			 * of a nonempty scan.  We consider that these are all rescans,
+			 * since we used inner_run_cost once already.
+			 */
 			run_cost += (outer_path_rows - outer_matched_rows) *
 				inner_rescan_run_cost / inner_path_rows;
 
 			/*
-			 * We won't be evaluating any quals at all for these rows, so
+			 * We won't be evaluating any quals at all for unmatched rows, so
 			 * don't add them to ntuples.
 			 */
 		}
 		else
 		{
+			/*
+			 * Here, a complicating factor is that rescans may be cheaper than
+			 * first scans.  If we never scan all the way to the end of the
+			 * inner rel, it might be (depending on the plan type) that we'd
+			 * never pay the whole inner first-scan run cost.  However it is
+			 * difficult to estimate whether that will happen (and it could
+			 * not happen if there are any unmatched outer rows!), so be
+			 * conservative and always charge the whole first-scan cost once.
+			 */
+			run_cost += inner_run_cost;
+
+			/* Add inner run cost for additional outer tuples having matches */
+			if (outer_matched_rows > 1)
+				run_cost += (outer_matched_rows - 1) * inner_rescan_run_cost * inner_scan_frac;
+
+			/* Add inner run cost for unmatched outer tuples */
 			run_cost += (outer_path_rows - outer_matched_rows) *
 				inner_rescan_run_cost;
+
+			/* And count the unmatched join tuples as being processed */
 			ntuples += (outer_path_rows - outer_matched_rows) *
 				inner_path_rows;
 		}