Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Don't allow LIMIT/OFFSET clause within sub-selects to be pushed to workers.
authorAmit Kapila <akapila@postgresql.org>
Fri, 14 Sep 2018 04:35:45 +0000 (10:05 +0530)
committerAmit Kapila <akapila@postgresql.org>
Fri, 14 Sep 2018 04:35:45 +0000 (10:05 +0530)
Allowing sub-select containing LIMIT/OFFSET in workers can lead to
inconsistent results at the top-level as there is no guarantee that the
row order will be fully deterministic.  The fix is to prohibit pushing
LIMIT/OFFSET within sub-selects to workers.

Reported-by: Andrew Fletcher
Bug: 15324
Author: Amit Kapila
Reviewed-by: Dilip Kumar
Backpatch-through: 9.6
Discussion: https://postgr.es/m/153417684333.10284.11356259990921828616@wrigleys.postgresql.org

src/backend/optimizer/path/allpaths.c
src/backend/optimizer/plan/planner.c
src/include/optimizer/planner.h
src/test/regress/expected/select_parallel.out
src/test/regress/sql/select_parallel.sql

index fff2f4c8badc614b77d857a81a7e19b9022866f0..d9a2f9bf695ee7361f4979113d4662d0ecfa450c 100644 (file)
@@ -612,7 +612,20 @@ set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel,
             * the SubqueryScanPath as not parallel-safe.  (Note that
             * set_subquery_pathlist() might push some of these quals down
             * into the subquery itself, but that doesn't change anything.)
+            *
+            * We can't push sub-select containing LIMIT/OFFSET to workers as
+            * there is no guarantee that the row order will be fully
+            * deterministic, and applying LIMIT/OFFSET will lead to
+            * inconsistent results at the top-level.  (In some cases, where
+            * the result is ordered, we could relax this restriction.  But it
+            * doesn't currently seem worth expending extra effort to do so.)
             */
+           {
+               Query      *subquery = castNode(Query, rte->subquery);
+
+               if (limit_needed(subquery))
+                   return;
+           }
            break;
 
        case RTE_JOIN:
index 1946f9ef968412821ab74d28a508fca78d9eeaf4..a8a025cb8533ddea7f7d238130dfa1a177acf8fc 100644 (file)
@@ -121,7 +121,6 @@ static void preprocess_rowmarks(PlannerInfo *root);
 static double preprocess_limit(PlannerInfo *root,
                 double tuple_fraction,
                 int64 *offset_est, int64 *count_est);
-static bool limit_needed(Query *parse);
 static void remove_useless_groupby_columns(PlannerInfo *root);
 static List *preprocess_groupclause(PlannerInfo *root, List *force);
 static List *extract_rollup_sets(List *groupingSets);
@@ -2697,7 +2696,7 @@ preprocess_limit(PlannerInfo *root, double tuple_fraction,
  * a key distinction: here we need hard constants in OFFSET/LIMIT, whereas
  * in preprocess_limit it's good enough to consider estimated values.
  */
-static bool
+bool
 limit_needed(Query *parse)
 {
    Node       *node;
index 2a4cf71e10216e8b6894373fb2b1e4069003e922..acb709a55528582b3365590c8ce1c9e49242909d 100644 (file)
@@ -46,6 +46,8 @@ extern bool is_dummy_plan(Plan *plan);
 extern RowMarkType select_rowmark_type(RangeTblEntry *rte,
                    LockClauseStrength strength);
 
+extern bool limit_needed(Query *parse);
+
 extern void mark_partial_aggref(Aggref *agg, AggSplit aggsplit);
 
 extern Path *get_cheapest_fractional_path(RelOptInfo *rel,
index 9fd86256d2f0da61bb46aabbcc44b2a8b2431c93..2f2aac225482d9e1bd95b5792b3dbce4bf7933cd 100644 (file)
@@ -599,6 +599,25 @@ explain (costs off, verbose)
                                  Output: b.unique1
 (18 rows)
 
+-- LIMIT/OFFSET within sub-selects can't be pushed to workers.
+explain (costs off)
+  select * from tenk1 a where two in
+    (select two from tenk1 b where stringu1 like '%AAAA' limit 3);
+                          QUERY PLAN                           
+---------------------------------------------------------------
+ Hash Semi Join
+   Hash Cond: (a.two = b.two)
+   ->  Gather
+         Workers Planned: 4
+         ->  Parallel Seq Scan on tenk1 a
+   ->  Hash
+         ->  Limit
+               ->  Gather
+                     Workers Planned: 4
+                     ->  Parallel Seq Scan on tenk1 b
+                           Filter: (stringu1 ~~ '%AAAA'::text)
+(11 rows)
+
 -- to increase the parallel query test coverage
 EXPLAIN (analyze, timing off, summary off, costs off) SELECT * FROM tenk1;
                          QUERY PLAN                          
index dd9365d7c6e19f989e9d8fa99003a9ebe2ce24f7..b1c1d7762907d304159229eeb2f15e3cb4d39c89 100644 (file)
@@ -210,6 +210,11 @@ explain (costs off, verbose)
     (select unique1, row_number() over() from tenk1 b);
 
 
+-- LIMIT/OFFSET within sub-selects can't be pushed to workers.
+explain (costs off)
+  select * from tenk1 a where two in
+    (select two from tenk1 b where stringu1 like '%AAAA' limit 3);
+
 -- to increase the parallel query test coverage
 EXPLAIN (analyze, timing off, summary off, costs off) SELECT * FROM tenk1;