@@ -161,6 +161,7 @@ static Selectivity get_foreign_key_join_selectivity(PlannerInfo *root,
161
161
static void set_rel_width (PlannerInfo * root , RelOptInfo * rel );
162
162
static double relation_byte_size (double tuples , int width );
163
163
static double page_size (double tuples , int width );
164
+ static double get_parallel_divisor (Path * path );
164
165
165
166
166
167
/*
@@ -238,32 +239,7 @@ cost_seqscan(Path *path, PlannerInfo *root,
238
239
/* Adjust costing for parallelism, if used. */
239
240
if (path -> parallel_workers > 0 )
240
241
{
241
- double parallel_divisor = path -> parallel_workers ;
242
- double leader_contribution ;
243
-
244
- /*
245
- * Early experience with parallel query suggests that when there is
246
- * only one worker, the leader often makes a very substantial
247
- * contribution to executing the parallel portion of the plan, but as
248
- * more workers are added, it does less and less, because it's busy
249
- * reading tuples from the workers and doing whatever non-parallel
250
- * post-processing is needed. By the time we reach 4 workers, the
251
- * leader no longer makes a meaningful contribution. Thus, for now,
252
- * estimate that the leader spends 30% of its time servicing each
253
- * worker, and the remainder executing the parallel plan.
254
- */
255
- leader_contribution = 1.0 - (0.3 * path -> parallel_workers );
256
- if (leader_contribution > 0 )
257
- parallel_divisor += leader_contribution ;
258
-
259
- /*
260
- * In the case of a parallel plan, the row count needs to represent
261
- * the number of tuples processed per worker. Otherwise, higher-level
262
- * plan nodes that appear below the gather will be costed incorrectly,
263
- * because they'll anticipate receiving more rows than any given copy
264
- * will actually get.
265
- */
266
- path -> rows = clamp_row_est (path -> rows / parallel_divisor );
242
+ double parallel_divisor = get_parallel_divisor (path );
267
243
268
244
/* The CPU cost is divided among all the workers. */
269
245
cpu_run_cost /= parallel_divisor ;
@@ -274,6 +250,12 @@ cost_seqscan(Path *path, PlannerInfo *root,
274
250
* prefetching. For now, we assume that the disk run cost can't be
275
251
* amortized at all.
276
252
*/
253
+
254
+ /*
255
+ * In the case of a parallel plan, the row count needs to represent
256
+ * the number of tuples processed per worker.
257
+ */
258
+ path -> rows = clamp_row_est (path -> rows / parallel_divisor );
277
259
}
278
260
279
261
path -> startup_cost = startup_cost ;
@@ -2013,6 +1995,10 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path,
2013
1995
else
2014
1996
path -> path .rows = path -> path .parent -> rows ;
2015
1997
1998
+ /* For partial paths, scale row estimate. */
1999
+ if (path -> path .parallel_workers > 0 )
2000
+ path -> path .rows /= get_parallel_divisor (& path -> path );
2001
+
2016
2002
/*
2017
2003
* We could include disable_cost in the preliminary estimate, but that
2018
2004
* would amount to optimizing for the case where the join method is
@@ -2431,6 +2417,10 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path,
2431
2417
else
2432
2418
path -> jpath .path .rows = path -> jpath .path .parent -> rows ;
2433
2419
2420
+ /* For partial paths, scale row estimate. */
2421
+ if (path -> jpath .path .parallel_workers > 0 )
2422
+ path -> jpath .path .rows /= get_parallel_divisor (& path -> jpath .path );
2423
+
2434
2424
/*
2435
2425
* We could include disable_cost in the preliminary estimate, but that
2436
2426
* would amount to optimizing for the case where the join method is
@@ -2810,6 +2800,10 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
2810
2800
else
2811
2801
path -> jpath .path .rows = path -> jpath .path .parent -> rows ;
2812
2802
2803
+ /* For partial paths, scale row estimate. */
2804
+ if (path -> jpath .path .parallel_workers > 0 )
2805
+ path -> jpath .path .rows /= get_parallel_divisor (& path -> jpath .path );
2806
+
2813
2807
/*
2814
2808
* We could include disable_cost in the preliminary estimate, but that
2815
2809
* would amount to optimizing for the case where the join method is
@@ -4798,3 +4792,31 @@ page_size(double tuples, int width)
4798
4792
{
4799
4793
return ceil (relation_byte_size (tuples , width ) / BLCKSZ );
4800
4794
}
4795
+
4796
+ /*
4797
+ * Estimate the fraction of the work that each worker will do given the
4798
+ * number of workers budgeted for the path.
4799
+ */
4800
+ static double
4801
+ get_parallel_divisor (Path * path )
4802
+ {
4803
+ double parallel_divisor = path -> parallel_workers ;
4804
+ double leader_contribution ;
4805
+
4806
+ /*
4807
+ * Early experience with parallel query suggests that when there is only
4808
+ * one worker, the leader often makes a very substantial contribution to
4809
+ * executing the parallel portion of the plan, but as more workers are
4810
+ * added, it does less and less, because it's busy reading tuples from the
4811
+ * workers and doing whatever non-parallel post-processing is needed. By
4812
+ * the time we reach 4 workers, the leader no longer makes a meaningful
4813
+ * contribution. Thus, for now, estimate that the leader spends 30% of
4814
+ * its time servicing each worker, and the remainder executing the
4815
+ * parallel plan.
4816
+ */
4817
+ leader_contribution = 1.0 - (0.3 * path -> parallel_workers );
4818
+ if (leader_contribution > 0 )
4819
+ parallel_divisor += leader_contribution ;
4820
+
4821
+ return parallel_divisor ;
4822
+ }
0 commit comments