Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Revert commit 66c0185a3 and follow-on patches.
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 20 May 2024 19:08:30 +0000 (15:08 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 20 May 2024 19:08:30 +0000 (15:08 -0400)
This reverts 66c0185a3 (Allow planner to use Merge Append to
efficiently implement UNION) as well as the follow-on commits
d5d2205c83b1a7eb287487044d6.  In addition to those, 07746a8ef
had to be removed then re-applied in a different place, because
66c0185a3 moved the relevant code.

The reason for this last-minute thrashing is that depesz found a
case in which the patched code creates a completely wrong plan
that silently gives incorrect query results.  It's unclear what
the cause is or how many cases are affected, but with beta1 wrap
staring us in the face, there's no time for closer investigation.
After we figure that out, we can decide whether to un-revert this
for beta2 or hold it for v18.

Discussion: https://postgr.es/m/Zktzf926vslR35Fv@depesz.com
(also some private discussion among pgsql-release)

18 files changed:
contrib/postgres_fdw/expected/postgres_fdw.out
contrib/postgres_fdw/sql/postgres_fdw.sql
src/backend/optimizer/path/allpaths.c
src/backend/optimizer/path/equivclass.c
src/backend/optimizer/path/pathkeys.c
src/backend/optimizer/plan/planner.c
src/backend/optimizer/plan/subselect.c
src/backend/optimizer/prep/prepunion.c
src/backend/parser/analyze.c
src/include/nodes/pathnodes.h
src/include/optimizer/paths.h
src/include/optimizer/planner.h
src/include/optimizer/prep.h
src/test/regress/expected/collate.icu.utf8.out
src/test/regress/expected/incremental_sort.out
src/test/regress/expected/union.out
src/test/regress/sql/collate.icu.utf8.sql
src/test/regress/sql/union.sql

index 078b8a966f852520cfb6c20006e4b3c6f2ffa444..9ae36d3059d16b10dd146e57f7753a414e3728fb 100644 (file)
@@ -11511,10 +11511,6 @@ DROP INDEX base_tbl1_idx;
 DROP INDEX base_tbl2_idx;
 DROP INDEX async_p3_idx;
 -- UNION queries
-SET enable_sort TO off;
-SET enable_incremental_sort TO off;
--- Adjust fdw_startup_cost so that we get an unordered path in the Append.
-ALTER SERVER loopback2 OPTIONS (ADD fdw_startup_cost '0.00');
 EXPLAIN (VERBOSE, COSTS OFF)
 INSERT INTO result_tbl
 (SELECT a, b, 'AAA' || c FROM async_p1 ORDER BY a LIMIT 10)
@@ -11596,9 +11592,6 @@ SELECT * FROM result_tbl ORDER BY a;
 (12 rows)
 
 DELETE FROM result_tbl;
-RESET enable_incremental_sort;
-RESET enable_sort;
-ALTER SERVER loopback2 OPTIONS (DROP fdw_startup_cost);
 -- Disable async execution if we use gating Result nodes for pseudoconstant
 -- quals
 EXPLAIN (VERBOSE, COSTS OFF)
index 09ba234e43d3ecb16dd5d3e7b62212db54461039..1f31ac14df0bfd3a51943b61e76f8046b05bd3f0 100644 (file)
@@ -3885,11 +3885,6 @@ DROP INDEX base_tbl2_idx;
 DROP INDEX async_p3_idx;
 
 -- UNION queries
-SET enable_sort TO off;
-SET enable_incremental_sort TO off;
--- Adjust fdw_startup_cost so that we get an unordered path in the Append.
-ALTER SERVER loopback2 OPTIONS (ADD fdw_startup_cost '0.00');
-
 EXPLAIN (VERBOSE, COSTS OFF)
 INSERT INTO result_tbl
 (SELECT a, b, 'AAA' || c FROM async_p1 ORDER BY a LIMIT 10)
@@ -3916,10 +3911,6 @@ UNION ALL
 SELECT * FROM result_tbl ORDER BY a;
 DELETE FROM result_tbl;
 
-RESET enable_incremental_sort;
-RESET enable_sort;
-ALTER SERVER loopback2 OPTIONS (DROP fdw_startup_cost);
-
 -- Disable async execution if we use gating Result nodes for pseudoconstant
 -- quals
 EXPLAIN (VERBOSE, COSTS OFF)
index 4895cee994429c0f70cdc3399fba88c550696df9..10aeebc2c1d6e620cf371075ba6060b8c682312c 100644 (file)
@@ -2633,8 +2633,9 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
    Assert(root->plan_params == NIL);
 
    /* Generate a subroot and Paths for the subquery */
-   rel->subroot = subquery_planner(root->glob, subquery, root, false,
-                                   tuple_fraction, NULL);
+   rel->subroot = subquery_planner(root->glob, subquery,
+                                   root,
+                                   false, tuple_fraction);
 
    /* Isolate the params needed by this specific subplan */
    rel->subplan_params = root->plan_params;
index 21ce1ae2e1367f61417533d9283db9cdd41df746..13400af3ef53938dc6cd974046beb88c132bf263 100644 (file)
@@ -2882,67 +2882,6 @@ add_child_join_rel_equivalences(PlannerInfo *root,
    MemoryContextSwitchTo(oldcontext);
 }
 
-/*
- * add_setop_child_rel_equivalences
- *     Add equivalence members for each non-resjunk target in 'child_tlist'
- *     to the EquivalenceClass in the corresponding setop_pathkey's pk_eclass.
- *
- * 'root' is the PlannerInfo belonging to the top-level set operation.
- * 'child_rel' is the RelOptInfo of the child relation we're adding
- * EquivalenceMembers for.
- * 'child_tlist' is the target list for the setop child relation.  The target
- * list expressions are what we add as EquivalenceMembers.
- * 'setop_pathkeys' is a list of PathKeys which must contain an entry for each
- * non-resjunk target in 'child_tlist'.
- */
-void
-add_setop_child_rel_equivalences(PlannerInfo *root, RelOptInfo *child_rel,
-                                List *child_tlist, List *setop_pathkeys)
-{
-   ListCell   *lc;
-   ListCell   *lc2 = list_head(setop_pathkeys);
-
-   foreach(lc, child_tlist)
-   {
-       TargetEntry *tle = lfirst_node(TargetEntry, lc);
-       EquivalenceMember *parent_em;
-       PathKey    *pk;
-
-       if (tle->resjunk)
-           continue;
-
-       if (lc2 == NULL)
-           elog(ERROR, "too few pathkeys for set operation");
-
-       pk = lfirst_node(PathKey, lc2);
-       parent_em = linitial(pk->pk_eclass->ec_members);
-
-       /*
-        * We can safely pass the parent member as the first member in the
-        * ec_members list as this is added first in generate_union_paths,
-        * likewise, the JoinDomain can be that of the initial member of the
-        * Pathkey's EquivalenceClass.
-        */
-       add_eq_member(pk->pk_eclass,
-                     tle->expr,
-                     child_rel->relids,
-                     parent_em->em_jdomain,
-                     parent_em,
-                     exprType((Node *) tle->expr));
-
-       lc2 = lnext(setop_pathkeys, lc2);
-   }
-
-   /*
-    * transformSetOperationStmt() ensures that the targetlist never contains
-    * any resjunk columns, so all eclasses that exist in 'root' must have
-    * received a new member in the loop above.  Add them to the child_rel's
-    * eclass_indexes.
-    */
-   child_rel->eclass_indexes = bms_add_range(child_rel->eclass_indexes, 0,
-                                             list_length(root->eq_classes) - 1);
-}
-
 
 /*
  * generate_implied_equalities_for_column
index 8b258cbef92d2ab2d551e89e73c4f04eacaf4196..157bc6a36d475e586760e81890b1e7632204bcd9 100644 (file)
@@ -2191,22 +2191,6 @@ pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
    return n;
 }
 
-/*
- * pathkeys_useful_for_setop
- *     Count the number of leading common pathkeys root's 'setop_pathkeys' in
- *     'pathkeys'.
- */
-static int
-pathkeys_useful_for_setop(PlannerInfo *root, List *pathkeys)
-{
-   int         n_common_pathkeys;
-
-   (void) pathkeys_count_contained_in(root->setop_pathkeys, pathkeys,
-                                      &n_common_pathkeys);
-
-   return n_common_pathkeys;
-}
-
 /*
  * truncate_useless_pathkeys
  *     Shorten the given pathkey list to just the useful pathkeys.
@@ -2224,9 +2208,6 @@ truncate_useless_pathkeys(PlannerInfo *root,
    if (nuseful2 > nuseful)
        nuseful = nuseful2;
    nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
-   if (nuseful2 > nuseful)
-       nuseful = nuseful2;
-   nuseful2 = pathkeys_useful_for_setop(root, pathkeys);
    if (nuseful2 > nuseful)
        nuseful = nuseful2;
 
index 032818423f6f48011f5ebafcf2a717be28e20139..ddd23878409c60db33b0767193c3d38d08c088f6 100644 (file)
@@ -54,7 +54,6 @@
 #include "optimizer/tlist.h"
 #include "parser/analyze.h"
 #include "parser/parse_agg.h"
-#include "parser/parse_clause.h"
 #include "parser/parse_relation.h"
 #include "parser/parsetree.h"
 #include "partitioning/partdesc.h"
@@ -120,15 +119,12 @@ typedef struct
 {
    List       *activeWindows;  /* active windows, if any */
    grouping_sets_data *gset_data;  /* grouping sets data, if any */
-   SetOperationStmt *setop;    /* parent set operation or NULL if not a
-                                * subquery belonging to a set operation */
 } standard_qp_extra;
 
 /* Local functions */
 static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
 static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
-static void grouping_planner(PlannerInfo *root, double tuple_fraction,
-                            SetOperationStmt *setops);
+static void grouping_planner(PlannerInfo *root, double tuple_fraction);
 static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root);
 static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
                                      int *tleref_to_colnum_map);
@@ -253,8 +249,6 @@ static bool group_by_has_partkey(RelOptInfo *input_rel,
                                 List *targetList,
                                 List *groupClause);
 static int common_prefix_cmp(const void *a, const void *b);
-static List *generate_setop_child_grouplist(SetOperationStmt *op,
-                                           List *targetlist);
 
 
 /*****************************************************************************
@@ -412,7 +406,8 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions,
    }
 
    /* primary planning entry point (may recurse for subqueries) */
-   root = subquery_planner(glob, parse, NULL, false, tuple_fraction, NULL);
+   root = subquery_planner(glob, parse, NULL,
+                           false, tuple_fraction);
 
    /* Select best Path and turn it into a Plan */
    final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
@@ -603,10 +598,6 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions,
  * hasRecursion is true if this is a recursive WITH query.
  * tuple_fraction is the fraction of tuples we expect will be retrieved.
  * tuple_fraction is interpreted as explained for grouping_planner, below.
- * setops is used for set operation subqueries to provide the subquery with
- * the context in which it's being used so that Paths correctly sorted for the
- * set operation can be generated.  NULL when not planning a set operation
- * child.
  *
  * Basically, this routine does the stuff that should only be done once
  * per Query object.  It then calls grouping_planner.  At one time,
@@ -625,9 +616,9 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions,
  *--------------------
  */
 PlannerInfo *
-subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root,
-                bool hasRecursion, double tuple_fraction,
-                SetOperationStmt *setops)
+subquery_planner(PlannerGlobal *glob, Query *parse,
+                PlannerInfo *parent_root,
+                bool hasRecursion, double tuple_fraction)
 {
    PlannerInfo *root;
    List       *newWithCheckOptions;
@@ -1086,7 +1077,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root,
    /*
     * Do the main planning.
     */
-   grouping_planner(root, tuple_fraction, setops);
+   grouping_planner(root, tuple_fraction);
 
    /*
     * Capture the set of outer-level param IDs we have access to, for use in
@@ -1284,11 +1275,7 @@ preprocess_phv_expression(PlannerInfo *root, Expr *expr)
  *   0 < tuple_fraction < 1: expect the given fraction of tuples available
  *     from the plan to be retrieved
  *   tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
- *     expected to be retrieved (ie, a LIMIT specification).
- * setops is used for set operation subqueries to provide the subquery with
- * the context in which it's being used so that Paths correctly sorted for the
- * set operation can be generated.  NULL when not planning a set operation
- * child.
+ *     expected to be retrieved (ie, a LIMIT specification)
  *
  * Returns nothing; the useful output is in the Paths we attach to the
  * (UPPERREL_FINAL, NULL) upperrel in *root.  In addition,
@@ -1299,8 +1286,7 @@ preprocess_phv_expression(PlannerInfo *root, Expr *expr)
  *--------------------
  */
 static void
-grouping_planner(PlannerInfo *root, double tuple_fraction,
-                SetOperationStmt *setops)
+grouping_planner(PlannerInfo *root, double tuple_fraction)
 {
    Query      *parse = root->parse;
    int64       offset_est = 0;
@@ -1335,6 +1321,17 @@ grouping_planner(PlannerInfo *root, double tuple_fraction,
 
    if (parse->setOperations)
    {
+       /*
+        * If there's a top-level ORDER BY, assume we have to fetch all the
+        * tuples.  This might be too simplistic given all the hackery below
+        * to possibly avoid the sort; but the odds of accurate estimates here
+        * are pretty low anyway.  XXX try to get rid of this in favor of
+        * letting plan_set_operations generate both fast-start and
+        * cheapest-total paths.
+        */
+       if (parse->sortClause)
+           root->tuple_fraction = 0.0;
+
        /*
         * Construct Paths for set operations.  The results will not need any
         * work except perhaps a top-level sort and/or LIMIT.  Note that any
@@ -1504,12 +1501,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction,
        qp_extra.activeWindows = activeWindows;
        qp_extra.gset_data = gset_data;
 
-       /*
-        * If we're a subquery for a set operation, store the SetOperationStmt
-        * in qp_extra.
-        */
-       qp_extra.setop = setops;
-
        /*
         * Generate the best unsorted and presorted paths for the scan/join
         * portion of this Query, ie the processing represented by the
@@ -3462,27 +3453,6 @@ standard_qp_callback(PlannerInfo *root, void *extra)
                                      parse->sortClause,
                                      tlist);
 
-   /* setting setop_pathkeys might be useful to the union planner */
-   if (qp_extra->setop != NULL &&
-       set_operation_ordered_results_useful(qp_extra->setop))
-   {
-       List       *groupClauses;
-       bool        sortable;
-
-       groupClauses = generate_setop_child_grouplist(qp_extra->setop, tlist);
-
-       root->setop_pathkeys =
-           make_pathkeys_for_sortclauses_extended(root,
-                                                  &groupClauses,
-                                                  tlist,
-                                                  false,
-                                                  &sortable);
-       if (!sortable)
-           root->setop_pathkeys = NIL;
-   }
-   else
-       root->setop_pathkeys = NIL;
-
    /*
     * Figure out whether we want a sorted result from query_planner.
     *
@@ -3492,9 +3462,7 @@ standard_qp_callback(PlannerInfo *root, void *extra)
     * sortable DISTINCT clause that's more rigorous than the ORDER BY clause,
     * we try to produce output that's sufficiently well sorted for the
     * DISTINCT.  Otherwise, if there is an ORDER BY clause, we want to sort
-    * by the ORDER BY clause.  Otherwise, if we're a subquery being planned
-    * for a set operation which can benefit from presorted results and have a
-    * sortable targetlist, we want to sort by the target list.
+    * by the ORDER BY clause.
     *
     * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset
     * of GROUP BY, it would be tempting to request sort by ORDER BY --- but
@@ -3512,8 +3480,6 @@ standard_qp_callback(PlannerInfo *root, void *extra)
        root->query_pathkeys = root->distinct_pathkeys;
    else if (root->sort_pathkeys)
        root->query_pathkeys = root->sort_pathkeys;
-   else if (root->setop_pathkeys != NIL)
-       root->query_pathkeys = root->setop_pathkeys;
    else
        root->query_pathkeys = NIL;
 }
@@ -7957,43 +7923,3 @@ group_by_has_partkey(RelOptInfo *input_rel,
 
    return true;
 }
-
-/*
- * generate_setop_child_grouplist
- *     Build a SortGroupClause list defining the sort/grouping properties
- *     of the child of a set operation.
- *
- * This is similar to generate_setop_grouplist() but differs as the setop
- * child query's targetlist entries may already have a tleSortGroupRef
- * assigned for other purposes, such as GROUP BYs.  Here we keep the
- * SortGroupClause list in the same order as 'op' groupClauses and just adjust
- * the tleSortGroupRef to reference the TargetEntry's 'ressortgroupref'.
- */
-static List *
-generate_setop_child_grouplist(SetOperationStmt *op, List *targetlist)
-{
-   List       *grouplist = copyObject(op->groupClauses);
-   ListCell   *lg;
-   ListCell   *lt;
-
-   lg = list_head(grouplist);
-   foreach(lt, targetlist)
-   {
-       TargetEntry *tle = (TargetEntry *) lfirst(lt);
-       SortGroupClause *sgc;
-
-       /* resjunk columns could have sortgrouprefs.  Leave these alone */
-       if (tle->resjunk)
-           continue;
-
-       /* we expect every non-resjunk target to have a SortGroupClause */
-       Assert(lg != NULL);
-       sgc = (SortGroupClause *) lfirst(lg);
-       lg = lnext(grouplist, lg);
-
-       /* assign a tleSortGroupRef, or reuse the existing one */
-       sgc->tleSortGroupRef = assignSortGroupRef(tle, targetlist);
-   }
-   Assert(lg == NULL);
-   return grouplist;
-}
index e35ebea8b433f4a36d43df153265f605cc5db9d8..d5fa281b102e27be06ba8a6b17db27d57a994211 100644 (file)
@@ -218,8 +218,9 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
    Assert(root->plan_params == NIL);
 
    /* Generate Paths for the subquery */
-   subroot = subquery_planner(root->glob, subquery, root, false,
-                              tuple_fraction, NULL);
+   subroot = subquery_planner(root->glob, subquery,
+                              root,
+                              false, tuple_fraction);
 
    /* Isolate the params needed by this specific subplan */
    plan_params = root->plan_params;
@@ -265,8 +266,9 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
        if (subquery)
        {
            /* Generate Paths for the ANY subquery; we'll need all rows */
-           subroot = subquery_planner(root->glob, subquery, root, false, 0.0,
-                                      NULL);
+           subroot = subquery_planner(root->glob, subquery,
+                                      root,
+                                      false, 0.0);
 
            /* Isolate the params needed by this specific subplan */
            plan_params = root->plan_params;
@@ -965,8 +967,9 @@ SS_process_ctes(PlannerInfo *root)
         * Generate Paths for the CTE query.  Always plan for full retrieval
         * --- we don't have enough info to predict otherwise.
         */
-       subroot = subquery_planner(root->glob, subquery, root,
-                                  cte->cterecursive, 0.0, NULL);
+       subroot = subquery_planner(root->glob, subquery,
+                                  root,
+                                  cte->cterecursive, 0.0);
 
        /*
         * Since the current query level doesn't yet contain any RTEs, it
index 30068c27a1329207becdcc2dad004f0ceb9b9f93..296f866677a6446f8a7191b01268320e0b8647ee 100644 (file)
@@ -43,15 +43,11 @@ static RelOptInfo *recurse_set_operations(Node *setOp, PlannerInfo *root,
                                          bool junkOK,
                                          int flag, List *refnames_tlist,
                                          List **pTargetList,
-                                         bool *istrivial_tlist);
+                                         double *pNumGroups);
 static RelOptInfo *generate_recursion_path(SetOperationStmt *setOp,
                                           PlannerInfo *root,
                                           List *refnames_tlist,
                                           List **pTargetList);
-static void build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
-                                   bool trivial_tlist, List *child_tlist,
-                                   List *interesting_pathkeys,
-                                   double *pNumGroups);
 static RelOptInfo *generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
                                        List *refnames_tlist,
                                        List **pTargetList);
@@ -61,8 +57,9 @@ static RelOptInfo *generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *ro
 static List *plan_union_children(PlannerInfo *root,
                                 SetOperationStmt *top_union,
                                 List *refnames_tlist,
-                                List **tlist_list,
-                                List **istrivial_tlist);
+                                List **tlist_list);
+static Path *make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
+                              PlannerInfo *root);
 static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
 static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
                                Path *input_path,
@@ -117,10 +114,10 @@ plan_set_operations(PlannerInfo *root)
    Assert(parse->distinctClause == NIL);
 
    /*
-    * In the outer query level, equivalence classes are limited to classes
-    * which define that the top-level target entry is equivalent to the
-    * corresponding child target entry.  There won't be any equivalence class
-    * merging.  Mark that merging is complete to allow us to make pathkeys.
+    * In the outer query level, we won't have any true equivalences to deal
+    * with; but we do want to be able to make pathkeys, which will require
+    * single-member EquivalenceClasses.  Indicate that EC merging is complete
+    * so that pathkeys.c won't complain.
     */
    Assert(root->eq_classes == NIL);
    root->ec_merging_done = true;
@@ -155,8 +152,6 @@ plan_set_operations(PlannerInfo *root)
    }
    else
    {
-       bool        trivial_tlist;
-
        /*
         * Recurse on setOperations tree to generate paths for set ops. The
         * final output paths should have just the column types shown as the
@@ -168,7 +163,7 @@ plan_set_operations(PlannerInfo *root)
                                           true, -1,
                                           leftmostQuery->targetList,
                                           &top_tlist,
-                                          &trivial_tlist);
+                                          NULL);
    }
 
    /* Must return the built tlist into root->processed_tlist. */
@@ -177,31 +172,6 @@ plan_set_operations(PlannerInfo *root)
    return setop_rel;
 }
 
-/*
- * set_operation_ordered_results_useful
- *     Return true if the given SetOperationStmt can be executed by utilizing
- *     paths that provide sorted input according to the setop's targetlist.
- *     Returns false when sorted paths are not any more useful then unsorted
- *     ones.
- */
-bool
-set_operation_ordered_results_useful(SetOperationStmt *setop)
-{
-   /*
-    * Paths sorted by the targetlist are useful for UNION as we can opt to
-    * MergeAppend the sorted paths then Unique them.  Ordered paths are no
-    * more useful than unordered ones for UNION ALL.
-    */
-   if (!setop->all && setop->op == SETOP_UNION)
-       return true;
-
-   /*
-    * EXCEPT / EXCEPT ALL / INTERSECT / INTERSECT ALL cannot yet utilize
-    * correctly sorted input paths.
-    */
-   return false;
-}
-
 /*
  * recurse_set_operations
  *   Recursively handle one step in a tree of set operations
@@ -214,8 +184,8 @@ set_operation_ordered_results_useful(SetOperationStmt *setop)
  *
  * Returns a RelOptInfo for the subtree, as well as these output parameters:
  * *pTargetList: receives the fully-fledged tlist for the subtree's top plan
- * *istrivial_tlist: true if, and only if, datatypes between parent and child
- * match.
+ * *pNumGroups: if not NULL, we estimate the number of distinct groups
+ *     in the result, and store it there
  *
  * The pTargetList output parameter is mostly redundant with the pathtarget
  * of the returned RelOptInfo, but for the moment we need it because much of
@@ -232,11 +202,9 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
                       bool junkOK,
                       int flag, List *refnames_tlist,
                       List **pTargetList,
-                      bool *istrivial_tlist)
+                      double *pNumGroups)
 {
-   RelOptInfo *rel;
-
-   *istrivial_tlist = true;    /* for now */
+   RelOptInfo *rel = NULL;     /* keep compiler quiet */
 
    /* Guard against stack overflow due to overly complex setop nests */
    check_stack_depth();
@@ -245,9 +213,11 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
    {
        RangeTblRef *rtr = (RangeTblRef *) setOp;
        RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
-       SetOperationStmt *setops;
        Query      *subquery = rte->subquery;
        PlannerInfo *subroot;
+       RelOptInfo *final_rel;
+       Path       *subpath;
+       Path       *path;
        List       *tlist;
        bool        trivial_tlist;
 
@@ -259,16 +229,11 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
        /* plan_params should not be in use in current query level */
        Assert(root->plan_params == NIL);
 
-       /*
-        * Pass the set operation details to the subquery_planner to have it
-        * consider generating Paths correctly ordered for the set operation.
-        */
-       setops = castNode(SetOperationStmt, root->parse->setOperations);
-
        /* Generate a subroot and Paths for the subquery */
-       subroot = rel->subroot = subquery_planner(root->glob, subquery, root,
-                                                 false, root->tuple_fraction,
-                                                 setops);
+       subroot = rel->subroot = subquery_planner(root->glob, subquery,
+                                                 root,
+                                                 false,
+                                                 root->tuple_fraction);
 
        /*
         * It should not be possible for the primitive query to contain any
@@ -289,7 +254,90 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
 
        /* Return the fully-fledged tlist to caller, too */
        *pTargetList = tlist;
-       *istrivial_tlist = trivial_tlist;
+
+       /*
+        * Mark rel with estimated output rows, width, etc.  Note that we have
+        * to do this before generating outer-query paths, else
+        * cost_subqueryscan is not happy.
+        */
+       set_subquery_size_estimates(root, rel);
+
+       /*
+        * Since we may want to add a partial path to this relation, we must
+        * set its consider_parallel flag correctly.
+        */
+       final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
+       rel->consider_parallel = final_rel->consider_parallel;
+
+       /*
+        * For the moment, we consider only a single Path for the subquery.
+        * This should change soon (make it look more like
+        * set_subquery_pathlist).
+        */
+       subpath = get_cheapest_fractional_path(final_rel,
+                                              root->tuple_fraction);
+
+       /*
+        * Stick a SubqueryScanPath atop that.
+        *
+        * We don't bother to determine the subquery's output ordering since
+        * it won't be reflected in the set-op result anyhow; so just label
+        * the SubqueryScanPath with nil pathkeys.  (XXX that should change
+        * soon too, likely.)
+        */
+       path = (Path *) create_subqueryscan_path(root, rel, subpath,
+                                                trivial_tlist,
+                                                NIL, NULL);
+
+       add_path(rel, path);
+
+       /*
+        * If we have a partial path for the child relation, we can use that
+        * to build a partial path for this relation.  But there's no point in
+        * considering any path but the cheapest.
+        */
+       if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
+           final_rel->partial_pathlist != NIL)
+       {
+           Path       *partial_subpath;
+           Path       *partial_path;
+
+           partial_subpath = linitial(final_rel->partial_pathlist);
+           partial_path = (Path *)
+               create_subqueryscan_path(root, rel, partial_subpath,
+                                        trivial_tlist,
+                                        NIL, NULL);
+           add_partial_path(rel, partial_path);
+       }
+
+       /*
+        * Estimate number of groups if caller wants it.  If the subquery used
+        * grouping or aggregation, its output is probably mostly unique
+        * anyway; otherwise do statistical estimation.
+        *
+        * XXX you don't really want to know about this: we do the estimation
+        * using the subquery's original targetlist expressions, not the
+        * subroot->processed_tlist which might seem more appropriate.  The
+        * reason is that if the subquery is itself a setop, it may return a
+        * processed_tlist containing "varno 0" Vars generated by
+        * generate_append_tlist, and those would confuse estimate_num_groups
+        * mightily.  We ought to get rid of the "varno 0" hack, but that
+        * requires a redesign of the parsetree representation of setops, so
+        * that there can be an RTE corresponding to each setop's output.
+        */
+       if (pNumGroups)
+       {
+           if (subquery->groupClause || subquery->groupingSets ||
+               subquery->distinctClause ||
+               subroot->hasHavingQual || subquery->hasAggs)
+               *pNumGroups = subpath->rows;
+           else
+               *pNumGroups = estimate_num_groups(subroot,
+                                                 get_tlist_exprs(subquery->targetList, false),
+                                                 subpath->rows,
+                                                 NULL,
+                                                 NULL);
+       }
    }
    else if (IsA(setOp, SetOperationStmt))
    {
@@ -304,6 +352,8 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
            rel = generate_nonunion_paths(op, root,
                                          refnames_tlist,
                                          pTargetList);
+       if (pNumGroups)
+           *pNumGroups = rel->rows;
 
        /*
         * If necessary, add a Result node to project the caller-requested
@@ -333,7 +383,6 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
                                                *pTargetList,
                                                refnames_tlist,
                                                &trivial_tlist);
-           *istrivial_tlist = trivial_tlist;
            target = create_pathtarget(root, *pTargetList);
 
            /* Apply projection to each path */
@@ -364,16 +413,16 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
                lfirst(lc) = path;
            }
        }
-       postprocess_setop_rel(root, rel);
    }
    else
    {
        elog(ERROR, "unrecognized node type: %d",
             (int) nodeTag(setOp));
        *pTargetList = NIL;
-       rel = NULL;             /* keep compiler quiet */
    }
 
+   postprocess_setop_rel(root, rel);
+
    return rel;
 }
 
@@ -392,9 +441,7 @@ generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
    Path       *lpath;
    Path       *rpath;
    List       *lpath_tlist;
-   bool        lpath_trivial_tlist;
    List       *rpath_tlist;
-   bool        rpath_trivial_tlist;
    List       *tlist;
    List       *groupList;
    double      dNumGroups;
@@ -414,10 +461,7 @@ generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
                                  false, -1,
                                  refnames_tlist,
                                  &lpath_tlist,
-                                 &lpath_trivial_tlist);
-   if (lrel->rtekind == RTE_SUBQUERY)
-       build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
-                               NIL, NULL);
+                                 NULL);
    lpath = lrel->cheapest_total_path;
    /* The right path will want to look at the left one ... */
    root->non_recursive_path = lpath;
@@ -426,10 +470,7 @@ generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
                                  false, -1,
                                  refnames_tlist,
                                  &rpath_tlist,
-                                 &rpath_trivial_tlist);
-   if (rrel->rtekind == RTE_SUBQUERY)
-       build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
-                               NIL, NULL);
+                                 NULL);
    rpath = rrel->cheapest_total_path;
    root->non_recursive_path = NULL;
 
@@ -491,204 +532,6 @@ generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
    return result_rel;
 }
 
-/*
- * build_setop_child_paths
- *     Build paths for the set op child relation denoted by 'rel'.
- *
- * interesting_pathkeys: if not NIL, also include paths that suit these
- * pathkeys, sorting any unsorted paths as required.
- * *pNumGroups: if not NULL, we estimate the number of distinct groups
- *     in the result, and store it there
- */
-static void
-build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
-                       bool trivial_tlist, List *child_tlist,
-                       List *interesting_pathkeys, double *pNumGroups)
-{
-   RelOptInfo *final_rel;
-   List       *setop_pathkeys = rel->subroot->setop_pathkeys;
-   ListCell   *lc;
-
-   /* it can't be a set op child rel if it's not a subquery */
-   Assert(rel->rtekind == RTE_SUBQUERY);
-
-   /* when sorting is needed, add child rel equivalences */
-   if (interesting_pathkeys != NIL)
-       add_setop_child_rel_equivalences(root,
-                                        rel,
-                                        child_tlist,
-                                        interesting_pathkeys);
-
-   /*
-    * Mark rel with estimated output rows, width, etc.  Note that we have to
-    * do this before generating outer-query paths, else cost_subqueryscan is
-    * not happy.
-    */
-   set_subquery_size_estimates(root, rel);
-
-   /*
-    * Since we may want to add a partial path to this relation, we must set
-    * its consider_parallel flag correctly.
-    */
-   final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL);
-   rel->consider_parallel = final_rel->consider_parallel;
-
-   /* Generate subquery scan paths for any interesting path in final_rel */
-   foreach(lc, final_rel->pathlist)
-   {
-       Path       *subpath = (Path *) lfirst(lc);
-       List       *pathkeys;
-       Path       *cheapest_input_path = final_rel->cheapest_total_path;
-       bool        is_sorted;
-       int         presorted_keys;
-
-       /*
-        * Include the cheapest path as-is so that the set operation can be
-        * cheaply implemented using a method which does not require the input
-        * to be sorted.
-        */
-       if (subpath == cheapest_input_path)
-       {
-           /* Convert subpath's pathkeys to outer representation */
-           pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
-                                                make_tlist_from_pathtarget(subpath->pathtarget));
-
-           /* Generate outer path using this subpath */
-           add_path(rel, (Path *) create_subqueryscan_path(root,
-                                                           rel,
-                                                           subpath,
-                                                           trivial_tlist,
-                                                           pathkeys,
-                                                           NULL));
-       }
-
-       /* skip dealing with sorted paths if the setop doesn't need them */
-       if (interesting_pathkeys == NIL)
-           continue;
-
-       /*
-        * Create paths to suit final sort order required for setop_pathkeys.
-        * Here we'll sort the cheapest input path (if not sorted already) and
-        * incremental sort any paths which are partially sorted.
-        */
-       is_sorted = pathkeys_count_contained_in(setop_pathkeys,
-                                               subpath->pathkeys,
-                                               &presorted_keys);
-
-       if (!is_sorted)
-       {
-           double      limittuples = rel->subroot->limit_tuples;
-
-           /*
-            * Try at least sorting the cheapest path and also try
-            * incrementally sorting any path which is partially sorted
-            * already (no need to deal with paths which have presorted keys
-            * when incremental sort is disabled unless it's the cheapest
-            * input path).
-            */
-           if (subpath != cheapest_input_path &&
-               (presorted_keys == 0 || !enable_incremental_sort))
-               continue;
-
-           /*
-            * We've no need to consider both a sort and incremental sort.
-            * We'll just do a sort if there are no presorted keys and an
-            * incremental sort when there are presorted keys.
-            */
-           if (presorted_keys == 0 || !enable_incremental_sort)
-               subpath = (Path *) create_sort_path(rel->subroot,
-                                                   final_rel,
-                                                   subpath,
-                                                   setop_pathkeys,
-                                                   limittuples);
-           else
-               subpath = (Path *) create_incremental_sort_path(rel->subroot,
-                                                               final_rel,
-                                                               subpath,
-                                                               setop_pathkeys,
-                                                               presorted_keys,
-                                                               limittuples);
-       }
-
-       /*
-        * subpath is now sorted, so add it to the pathlist.  We already added
-        * the cheapest_input_path above, so don't add it again unless we just
-        * sorted it.
-        */
-       if (subpath != cheapest_input_path)
-       {
-           /* Convert subpath's pathkeys to outer representation */
-           pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
-                                                make_tlist_from_pathtarget(subpath->pathtarget));
-
-           /* Generate outer path using this subpath */
-           add_path(rel, (Path *) create_subqueryscan_path(root,
-                                                           rel,
-                                                           subpath,
-                                                           trivial_tlist,
-                                                           pathkeys,
-                                                           NULL));
-       }
-   }
-
-   /* if consider_parallel is false, there should be no partial paths */
-   Assert(final_rel->consider_parallel ||
-          final_rel->partial_pathlist == NIL);
-
-   /*
-    * If we have a partial path for the child relation, we can use that to
-    * build a partial path for this relation.  But there's no point in
-    * considering any path but the cheapest.
-    */
-   if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
-       final_rel->partial_pathlist != NIL)
-   {
-       Path       *partial_subpath;
-       Path       *partial_path;
-
-       partial_subpath = linitial(final_rel->partial_pathlist);
-       partial_path = (Path *)
-           create_subqueryscan_path(root, rel, partial_subpath,
-                                    trivial_tlist,
-                                    NIL, NULL);
-       add_partial_path(rel, partial_path);
-   }
-
-   postprocess_setop_rel(root, rel);
-
-   /*
-    * Estimate number of groups if caller wants it.  If the subquery used
-    * grouping or aggregation, its output is probably mostly unique anyway;
-    * otherwise do statistical estimation.
-    *
-    * XXX you don't really want to know about this: we do the estimation
-    * using the subquery's original targetlist expressions, not the
-    * subroot->processed_tlist which might seem more appropriate.  The reason
-    * is that if the subquery is itself a setop, it may return a
-    * processed_tlist containing "varno 0" Vars generated by
-    * generate_append_tlist, and those would confuse estimate_num_groups
-    * mightily.  We ought to get rid of the "varno 0" hack, but that requires
-    * a redesign of the parsetree representation of setops, so that there can
-    * be an RTE corresponding to each setop's output.
-    */
-   if (pNumGroups)
-   {
-       PlannerInfo *subroot = rel->subroot;
-       Query      *subquery = subroot->parse;
-
-       if (subquery->groupClause || subquery->groupingSets ||
-           subquery->distinctClause || subroot->hasHavingQual ||
-           subquery->hasAggs)
-           *pNumGroups = rel->cheapest_total_path->rows;
-       else
-           *pNumGroups = estimate_num_groups(subroot,
-                                             get_tlist_exprs(subquery->targetList, false),
-                                             rel->cheapest_total_path->rows,
-                                             NULL,
-                                             NULL);
-   }
-}
-
 /*
  * Generate paths for a UNION or UNION ALL node
  */
@@ -699,38 +542,41 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
 {
    Relids      relids = NULL;
    RelOptInfo *result_rel;
+   double      save_fraction = root->tuple_fraction;
    ListCell   *lc;
-   ListCell   *lc2;
-   ListCell   *lc3;
-   List       *cheapest_pathlist = NIL;
-   List       *ordered_pathlist = NIL;
+   List       *pathlist = NIL;
    List       *partial_pathlist = NIL;
    bool        partial_paths_valid = true;
    bool        consider_parallel = true;
    List       *rellist;
    List       *tlist_list;
-   List       *trivial_tlist_list;
    List       *tlist;
-   List       *groupList = NIL;
-   Path       *apath;
-   Path       *gpath = NULL;
-   bool        try_sorted;
-   List       *union_pathkeys = NIL;
+   Path       *path;
+
+   /*
+    * If plain UNION, tell children to fetch all tuples.
+    *
+    * Note: in UNION ALL, we pass the top-level tuple_fraction unmodified to
+    * each arm of the UNION ALL.  One could make a case for reducing the
+    * tuple fraction for later arms (discounting by the expected size of the
+    * earlier arms' results) but it seems not worth the trouble. The normal
+    * case where tuple_fraction isn't already zero is a LIMIT at top level,
+    * and passing it down as-is is usually enough to get the desired result
+    * of preferring fast-start plans.
+    */
+   if (!op->all)
+       root->tuple_fraction = 0.0;
 
    /*
     * If any of my children are identical UNION nodes (same op, all-flag, and
     * colTypes) then they can be merged into this node so that we generate
-    * only one Append/MergeAppend and unique-ification for the lot.  Recurse
-    * to find such nodes.
+    * only one Append and unique-ification for the lot.  Recurse to find such
+    * nodes and compute their children's paths.
     */
-   rellist = plan_union_children(root,
-                                 op,
-                                 refnames_tlist,
-                                 &tlist_list,
-                                 &trivial_tlist_list);
+   rellist = plan_union_children(root, op, refnames_tlist, &tlist_list);
 
    /*
-    * Generate tlist for Append/MergeAppend plan node.
+    * Generate tlist for Append plan node.
     *
     * The tlist for an Append plan isn't important as far as the Append is
     * concerned, but we must make it look real anyway for the benefit of the
@@ -738,68 +584,15 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
     */
    tlist = generate_append_tlist(op->colTypes, op->colCollations, false,
                                  tlist_list, refnames_tlist);
-   *pTargetList = tlist;
-
-   /* For for UNIONs (not UNION ALL), try sorting, if sorting is possible */
-   try_sorted = !op->all && grouping_is_sortable(op->groupClauses);
-
-   if (try_sorted)
-   {
-       /* Identify the grouping semantics */
-       groupList = generate_setop_grouplist(op, tlist);
-
-       /* Determine the pathkeys for sorting by the whole target list */
-       union_pathkeys = make_pathkeys_for_sortclauses(root, groupList, tlist);
-
-       root->query_pathkeys = union_pathkeys;
-   }
-
-   /*
-    * Now that we've got the append target list, we can build the union child
-    * paths.
-    */
-   forthree(lc, rellist, lc2, trivial_tlist_list, lc3, tlist_list)
-   {
-       RelOptInfo *rel = lfirst(lc);
-       bool        trivial_tlist = lfirst_int(lc2);
-       List       *child_tlist = lfirst_node(List, lc3);
 
-       /* only build paths for the union children */
-       if (rel->rtekind == RTE_SUBQUERY)
-           build_setop_child_paths(root, rel, trivial_tlist, child_tlist,
-                                   union_pathkeys, NULL);
-   }
+   *pTargetList = tlist;
 
    /* Build path lists and relid set. */
    foreach(lc, rellist)
    {
        RelOptInfo *rel = lfirst(lc);
-       Path       *ordered_path;
 
-       cheapest_pathlist = lappend(cheapest_pathlist,
-                                   rel->cheapest_total_path);
-
-       if (try_sorted)
-       {
-           ordered_path = get_cheapest_path_for_pathkeys(rel->pathlist,
-                                                         union_pathkeys,
-                                                         NULL,
-                                                         TOTAL_COST,
-                                                         false);
-
-           if (ordered_path != NULL)
-               ordered_pathlist = lappend(ordered_pathlist, ordered_path);
-           else
-           {
-               /*
-                * If we can't find a sorted path, just give up trying to
-                * generate a list of correctly sorted child paths.  This can
-                * happen when type coercion was added to the targetlist due
-                * to mismatching types from the union children.
-                */
-               try_sorted = false;
-           }
-       }
+       pathlist = lappend(pathlist, rel->cheapest_total_path);
 
        if (consider_parallel)
        {
@@ -822,21 +615,28 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
    result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
    result_rel->reltarget = create_pathtarget(root, tlist);
    result_rel->consider_parallel = consider_parallel;
-   result_rel->consider_startup = (root->tuple_fraction > 0);
 
    /*
-    * Append the child results together using the cheapest paths from each
-    * union child.
+    * Append the child results together.
+    */
+   path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
+                                      NIL, NULL, 0, false, -1);
+
+   /*
+    * For UNION ALL, we just need the Append path.  For UNION, need to add
+    * node(s) to remove duplicates.
     */
-   apath = (Path *) create_append_path(root, result_rel, cheapest_pathlist,
-                                       NIL, NIL, NULL, 0, false, -1);
+   if (!op->all)
+       path = make_union_unique(op, path, tlist, root);
+
+   add_path(result_rel, path);
 
    /*
     * Estimate number of groups.  For now we just assume the output is unique
     * --- this is certainly true for the UNION case, and we want worst-case
     * estimates anyway.
     */
-   result_rel->rows = apath->rows;
+   result_rel->rows = path->rows;
 
    /*
     * Now consider doing the same thing using the partial paths plus Append
@@ -844,7 +644,7 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
     */
    if (partial_paths_valid)
    {
-       Path       *papath;
+       Path       *ppath;
        int         parallel_workers = 0;
 
        /* Find the highest number of workers requested for any subpath. */
@@ -873,137 +673,21 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
        }
        Assert(parallel_workers > 0);
 
-       papath = (Path *)
+       ppath = (Path *)
            create_append_path(root, result_rel, NIL, partial_pathlist,
-                              NIL, NULL, parallel_workers,
-                              enable_parallel_append, -1);
-       gpath = (Path *)
-           create_gather_path(root, result_rel, papath,
+                              NIL, NULL,
+                              parallel_workers, enable_parallel_append,
+                              -1);
+       ppath = (Path *)
+           create_gather_path(root, result_rel, ppath,
                               result_rel->reltarget, NULL, NULL);
+       if (!op->all)
+           ppath = make_union_unique(op, ppath, tlist, root);
+       add_path(result_rel, ppath);
    }
 
-   if (!op->all)
-   {
-       double      dNumGroups;
-       bool        can_sort = grouping_is_sortable(groupList);
-       bool        can_hash = grouping_is_hashable(groupList);
-
-       /*
-        * XXX for the moment, take the number of distinct groups as equal to
-        * the total input size, i.e., the worst case.  This is too
-        * conservative, but it's not clear how to get a decent estimate of
-        * the true size.  One should note as well the propensity of novices
-        * to write UNION rather than UNION ALL even when they don't expect
-        * any duplicates...
-        */
-       dNumGroups = apath->rows;
-
-       if (can_hash)
-       {
-           Path       *path;
-
-           /*
-            * Try a hash aggregate plan on 'apath'.  This is the cheapest
-            * available path containing each append child.
-            */
-           path = (Path *) create_agg_path(root,
-                                           result_rel,
-                                           apath,
-                                           create_pathtarget(root, tlist),
-                                           AGG_HASHED,
-                                           AGGSPLIT_SIMPLE,
-                                           groupList,
-                                           NIL,
-                                           NULL,
-                                           dNumGroups);
-           add_path(result_rel, path);
-
-           /* Try hash aggregate on the Gather path, if valid */
-           if (gpath != NULL)
-           {
-               /* Hashed aggregate plan --- no sort needed */
-               path = (Path *) create_agg_path(root,
-                                               result_rel,
-                                               gpath,
-                                               create_pathtarget(root, tlist),
-                                               AGG_HASHED,
-                                               AGGSPLIT_SIMPLE,
-                                               groupList,
-                                               NIL,
-                                               NULL,
-                                               dNumGroups);
-               add_path(result_rel, path);
-           }
-       }
-
-       if (can_sort)
-       {
-           Path       *path = apath;
-
-           /* Try Sort -> Unique on the Append path */
-           if (groupList != NIL)
-               path = (Path *) create_sort_path(root, result_rel, path,
-                                                make_pathkeys_for_sortclauses(root, groupList, tlist),
-                                                -1.0);
-
-           path = (Path *) create_upper_unique_path(root,
-                                                    result_rel,
-                                                    path,
-                                                    list_length(path->pathkeys),
-                                                    dNumGroups);
-
-           add_path(result_rel, path);
-
-           /* Try Sort -> Unique on the Gather path, if set */
-           if (gpath != NULL)
-           {
-               path = gpath;
-
-               path = (Path *) create_sort_path(root, result_rel, path,
-                                                make_pathkeys_for_sortclauses(root, groupList, tlist),
-                                                -1.0);
-
-               path = (Path *) create_upper_unique_path(root,
-                                                        result_rel,
-                                                        path,
-                                                        list_length(path->pathkeys),
-                                                        dNumGroups);
-               add_path(result_rel, path);
-           }
-       }
-
-       /*
-        * Try making a MergeAppend path if we managed to find a path with the
-        * correct pathkeys in each union child query.
-        */
-       if (try_sorted && groupList != NIL)
-       {
-           Path       *path;
-
-           path = (Path *) create_merge_append_path(root,
-                                                    result_rel,
-                                                    ordered_pathlist,
-                                                    union_pathkeys,
-                                                    NULL);
-
-           /* and make the MergeAppend unique */
-           path = (Path *) create_upper_unique_path(root,
-                                                    result_rel,
-                                                    path,
-                                                    list_length(tlist),
-                                                    dNumGroups);
-
-           add_path(result_rel, path);
-       }
-   }
-   else
-   {
-       /* UNION ALL */
-       add_path(result_rel, apath);
-
-       if (gpath != NULL)
-           add_path(result_rel, gpath);
-   }
+   /* Undo effects of possibly forcing tuple_fraction to 0 */
+   root->tuple_fraction = save_fraction;
 
    return result_rel;
 }
@@ -1029,8 +713,6 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
               *tlist,
               *groupList,
               *pathlist;
-   bool        lpath_trivial_tlist,
-               rpath_trivial_tlist;
    double      dLeftGroups,
                dRightGroups,
                dNumGroups,
@@ -1050,26 +732,14 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
                                  false, 0,
                                  refnames_tlist,
                                  &lpath_tlist,
-                                 &lpath_trivial_tlist);
-   if (lrel->rtekind == RTE_SUBQUERY)
-       build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
-                               NIL, &dLeftGroups);
-   else
-       dLeftGroups = lrel->rows;
-
+                                 &dLeftGroups);
    lpath = lrel->cheapest_total_path;
    rrel = recurse_set_operations(op->rarg, root,
                                  op->colTypes, op->colCollations,
                                  false, 1,
                                  refnames_tlist,
                                  &rpath_tlist,
-                                 &rpath_trivial_tlist);
-   if (rrel->rtekind == RTE_SUBQUERY)
-       build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
-                               NIL, &dRightGroups);
-   else
-       dRightGroups = rrel->rows;
-
+                                 &dRightGroups);
    rpath = rrel->cheapest_total_path;
 
    /* Undo effects of forcing tuple_fraction to 0 */
@@ -1206,16 +876,13 @@ static List *
 plan_union_children(PlannerInfo *root,
                    SetOperationStmt *top_union,
                    List *refnames_tlist,
-                   List **tlist_list,
-                   List **istrivial_tlist)
+                   List **tlist_list)
 {
    List       *pending_rels = list_make1(top_union);
    List       *result = NIL;
    List       *child_tlist;
-   bool        trivial_tlist;
 
    *tlist_list = NIL;
-   *istrivial_tlist = NIL;
 
    while (pending_rels != NIL)
    {
@@ -1254,14 +921,75 @@ plan_union_children(PlannerInfo *root,
                                                        false, -1,
                                                        refnames_tlist,
                                                        &child_tlist,
-                                                       &trivial_tlist));
+                                                       NULL));
        *tlist_list = lappend(*tlist_list, child_tlist);
-       *istrivial_tlist = lappend_int(*istrivial_tlist, trivial_tlist);
    }
 
    return result;
 }
 
+/*
+ * Add nodes to the given path tree to unique-ify the result of a UNION.
+ */
+static Path *
+make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
+                 PlannerInfo *root)
+{
+   RelOptInfo *result_rel = fetch_upper_rel(root, UPPERREL_SETOP, NULL);
+   List       *groupList;
+   double      dNumGroups;
+
+   /* Identify the grouping semantics */
+   groupList = generate_setop_grouplist(op, tlist);
+
+   /*
+    * XXX for the moment, take the number of distinct groups as equal to the
+    * total input size, ie, the worst case.  This is too conservative, but
+    * it's not clear how to get a decent estimate of the true size.  One
+    * should note as well the propensity of novices to write UNION rather
+    * than UNION ALL even when they don't expect any duplicates...
+    */
+   dNumGroups = path->rows;
+
+   /* Decide whether to hash or sort */
+   if (choose_hashed_setop(root, groupList, path,
+                           dNumGroups, dNumGroups,
+                           "UNION"))
+   {
+       /* Hashed aggregate plan --- no sort needed */
+       path = (Path *) create_agg_path(root,
+                                       result_rel,
+                                       path,
+                                       create_pathtarget(root, tlist),
+                                       AGG_HASHED,
+                                       AGGSPLIT_SIMPLE,
+                                       groupList,
+                                       NIL,
+                                       NULL,
+                                       dNumGroups);
+   }
+   else
+   {
+       /* Sort and Unique */
+       if (groupList)
+           path = (Path *)
+               create_sort_path(root,
+                                result_rel,
+                                path,
+                                make_pathkeys_for_sortclauses(root,
+                                                              groupList,
+                                                              tlist),
+                                -1.0);
+       path = (Path *) create_upper_unique_path(root,
+                                                result_rel,
+                                                path,
+                                                list_length(path->pathkeys),
+                                                dNumGroups);
+   }
+
+   return path;
+}
+
 /*
  * postprocess_setop_rel - perform steps required after adding paths
  */
index 28fed9d87f6bcd1e0b0fa2bf16e250a8958f0311..40ea19e6f107f9cd62a8a39ae28438919c9e430a 100644 (file)
@@ -1890,8 +1890,7 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt)
     * For now, we don't support resjunk sort clauses on the output of a
     * setOperation tree --- you can only use the SQL92-spec options of
     * selecting an output column by name or number.  Enforce by checking that
-    * transformSortClause doesn't add any items to tlist.  Note, if changing
-    * this, add_setop_child_rel_equivalences() will need to be updated.
+    * transformSortClause doesn't add any items to tlist.
     */
    tllen = list_length(qry->targetList);
 
index 14ef296ab72f34518d782a5f176c976daafeeca2..6ec81637c1d552aa2d3483802f138b92fafd9807 100644 (file)
@@ -400,8 +400,6 @@ struct PlannerInfo
    List       *distinct_pathkeys;
    /* sortClause pathkeys, if any */
    List       *sort_pathkeys;
-   /* set operator pathkeys, if any */
-   List       *setop_pathkeys;
 
    /* Canonicalised partition schemes used in the query. */
    List       *part_schemes pg_node_attr(read_write_ignore);
index 914d9bdef5878fe9527e9c15c6f37ea5d21c5df4..5f500a1c69fb02ce4af08cdac7502cfe3ae2481a 100644 (file)
@@ -173,10 +173,6 @@ extern void add_child_join_rel_equivalences(PlannerInfo *root,
                                            AppendRelInfo **appinfos,
                                            RelOptInfo *parent_joinrel,
                                            RelOptInfo *child_joinrel);
-extern void add_setop_child_rel_equivalences(PlannerInfo *root,
-                                            RelOptInfo *child_rel,
-                                            List *child_tlist,
-                                            List *setop_pathkeys);
 extern List *generate_implied_equalities_for_column(PlannerInfo *root,
                                                    RelOptInfo *rel,
                                                    ec_matches_callback_type callback,
index 5aeff21b967f074991044f68d818a00f86555436..e1d79ffdf3ca5e94a3db7638d3bea9380b4b2d17 100644 (file)
@@ -44,8 +44,7 @@ extern PlannedStmt *standard_planner(Query *parse, const char *query_string,
 
 extern PlannerInfo *subquery_planner(PlannerGlobal *glob, Query *parse,
                                     PlannerInfo *parent_root,
-                                    bool hasRecursion, double tuple_fraction,
-                                    SetOperationStmt *setops);
+                                    bool hasRecursion, double tuple_fraction);
 
 extern RowMarkType select_rowmark_type(RangeTblEntry *rte,
                                       LockClauseStrength strength);
index a52dec285d569970c49e702f8c0d41aa2505d279..8e00716dc82069af91164632ddee35283ae0e754 100644 (file)
@@ -53,6 +53,6 @@ extern void preprocess_aggrefs(PlannerInfo *root, Node *clause);
  * prototypes for prepunion.c
  */
 extern RelOptInfo *plan_set_operations(PlannerInfo *root);
-extern bool set_operation_ordered_results_useful(SetOperationStmt *setop);
+
 
 #endif                         /* PREP_H */
index 7d59fb44316574cd27313ae16fce3873c2398ec6..2de8924b52436ff142707ec5fe32a6354bfabe05 100644 (file)
@@ -1396,7 +1396,6 @@ SELECT x FROM test3cs WHERE x ~ 'a';
  abc
 (1 row)
 
-SET enable_hashagg TO off;
 SELECT x FROM test1cs UNION SELECT x FROM test2cs ORDER BY x;
   x  
 -----
@@ -1449,7 +1448,6 @@ SELECT DISTINCT x FROM test3cs ORDER BY x;
  ghi
 (4 rows)
 
-RESET enable_hashagg;
 SELECT count(DISTINCT x) FROM test3cs;
  count 
 -------
index 5fd54a10b1aaf88ee7d5ddf472adb2bbc6cdfc46..7fdb685313dbeee1a5f1554cc413b57af630085c 100644 (file)
@@ -1472,19 +1472,14 @@ explain (costs off) select * from t union select * from t order by 1,3;
    Sort Key: t.a, t.c
    Presorted Key: t.a
    ->  Unique
-         ->  Merge Append
+         ->  Sort
                Sort Key: t.a, t.b, t.c
-               ->  Gather Merge
+               ->  Gather
                      Workers Planned: 2
-                     ->  Sort
-                           Sort Key: t.a, t.b, t.c
+                     ->  Parallel Append
                            ->  Parallel Seq Scan on t
-               ->  Gather Merge
-                     Workers Planned: 2
-                     ->  Sort
-                           Sort Key: t_1.a, t_1.b, t_1.c
                            ->  Parallel Seq Scan on t t_1
-(16 rows)
+(11 rows)
 
 -- Full sort, not just incremental sort can be pushed below a gather merge path
 -- by generate_useful_gather_paths.
index 26b718e9033f75bab9c476a93f57371a6d4f4488..882017afc9a72c74c3c923aff965d343d8d04133 100644 (file)
@@ -412,17 +412,16 @@ set enable_hashagg to off;
 explain (costs off)
 select count(*) from
   ( select unique1 from tenk1 union select fivethous from tenk1 ) ss;
-                           QUERY PLAN                           
-----------------------------------------------------------------
+                              QUERY PLAN                              
+----------------------------------------------------------------------
  Aggregate
    ->  Unique
-         ->  Merge Append
+         ->  Sort
                Sort Key: tenk1.unique1
-               ->  Index Only Scan using tenk1_unique1 on tenk1
-               ->  Sort
-                     Sort Key: tenk1_1.fivethous
+               ->  Append
+                     ->  Index Only Scan using tenk1_unique1 on tenk1
                      ->  Seq Scan on tenk1 tenk1_1
-(8 rows)
+(7 rows)
 
 select count(*) from
   ( select unique1 from tenk1 union select fivethous from tenk1 ) ss;
@@ -951,9 +950,16 @@ select except select;
 -- check hashed implementation
 set enable_hashagg = true;
 set enable_sort = false;
--- We've no way to check hashed UNION as the empty pathkeys in the Append are
--- fine to make use of Unique, which is cheaper than HashAggregate and we've
--- no means to disable Unique.
+explain (costs off)
+select from generate_series(1,5) union select from generate_series(1,3);
+                           QUERY PLAN                           
+----------------------------------------------------------------
+ HashAggregate
+   ->  Append
+         ->  Function Scan on generate_series
+         ->  Function Scan on generate_series generate_series_1
+(4 rows)
+
 explain (costs off)
 select from generate_series(1,5) intersect select from generate_series(1,3);
                               QUERY PLAN                              
@@ -966,6 +972,10 @@ select from generate_series(1,5) intersect select from generate_series(1,3);
                ->  Function Scan on generate_series generate_series_1
 (6 rows)
 
+select from generate_series(1,5) union select from generate_series(1,3);
+--
+(1 row)
+
 select from generate_series(1,5) union all select from generate_series(1,3);
 --
 (8 rows)
@@ -1035,20 +1045,6 @@ select from generate_series(1,5) except all select from generate_series(1,3);
 --
 (2 rows)
 
--- Try a variation of the above but with a CTE which contains a column, again
--- with an empty final select list.
--- Ensure we get the expected 1 row with 0 columns
-with cte as materialized (select s from generate_series(1,5) s)
-select from cte union select from cte;
---
-(1 row)
-
--- Ensure we get the same result as the above.
-with cte as not materialized (select s from generate_series(1,5) s)
-select from cte union select from cte;
---
-(1 row)
-
 reset enable_hashagg;
 reset enable_sort;
 --
@@ -1085,7 +1081,6 @@ INSERT INTO t2 VALUES ('ab'), ('xy');
 set enable_seqscan = off;
 set enable_indexscan = on;
 set enable_bitmapscan = off;
-set enable_sort = off;
 explain (costs off)
  SELECT * FROM
  (SELECT a || b AS ab FROM t1
@@ -1167,7 +1162,6 @@ explain (costs off)
 reset enable_seqscan;
 reset enable_indexscan;
 reset enable_bitmapscan;
-reset enable_sort;
 -- This simpler variant of the above test has been observed to fail differently
 create table events (event_id int primary key);
 create table other_events (event_id int primary key);
index 80f28a97d785b27a3dc345704848a2f23bccfadc..03837de846bdaefb71509ccac6eb76bdd234af0e 100644 (file)
@@ -555,7 +555,6 @@ SELECT x FROM test3cs WHERE x LIKE 'a%';
 SELECT x FROM test3cs WHERE x ILIKE 'a%';
 SELECT x FROM test3cs WHERE x SIMILAR TO 'a%';
 SELECT x FROM test3cs WHERE x ~ 'a';
-SET enable_hashagg TO off;
 SELECT x FROM test1cs UNION SELECT x FROM test2cs ORDER BY x;
 SELECT x FROM test2cs UNION SELECT x FROM test1cs ORDER BY x;
 SELECT x FROM test1cs INTERSECT SELECT x FROM test2cs;
@@ -563,7 +562,6 @@ SELECT x FROM test2cs INTERSECT SELECT x FROM test1cs;
 SELECT x FROM test1cs EXCEPT SELECT x FROM test2cs;
 SELECT x FROM test2cs EXCEPT SELECT x FROM test1cs;
 SELECT DISTINCT x FROM test3cs ORDER BY x;
-RESET enable_hashagg;
 SELECT count(DISTINCT x) FROM test3cs;
 SELECT x, count(*) FROM test3cs GROUP BY x ORDER BY x;
 SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3cs ORDER BY x;
index 8afc580c6320139bc56d818c616179edea3d08bc..d160db5458855e2b87303f8a9ce071e25aaf55f9 100644 (file)
@@ -302,12 +302,12 @@ select except select;
 set enable_hashagg = true;
 set enable_sort = false;
 
--- We've no way to check hashed UNION as the empty pathkeys in the Append are
--- fine to make use of Unique, which is cheaper than HashAggregate and we've
--- no means to disable Unique.
+explain (costs off)
+select from generate_series(1,5) union select from generate_series(1,3);
 explain (costs off)
 select from generate_series(1,5) intersect select from generate_series(1,3);
 
+select from generate_series(1,5) union select from generate_series(1,3);
 select from generate_series(1,5) union all select from generate_series(1,3);
 select from generate_series(1,5) intersect select from generate_series(1,3);
 select from generate_series(1,5) intersect all select from generate_series(1,3);
@@ -330,17 +330,6 @@ select from generate_series(1,5) intersect all select from generate_series(1,3);
 select from generate_series(1,5) except select from generate_series(1,3);
 select from generate_series(1,5) except all select from generate_series(1,3);
 
--- Try a variation of the above but with a CTE which contains a column, again
--- with an empty final select list.
-
--- Ensure we get the expected 1 row with 0 columns
-with cte as materialized (select s from generate_series(1,5) s)
-select from cte union select from cte;
-
--- Ensure we get the same result as the above.
-with cte as not materialized (select s from generate_series(1,5) s)
-select from cte union select from cte;
-
 reset enable_hashagg;
 reset enable_sort;
 
@@ -372,7 +361,6 @@ INSERT INTO t2 VALUES ('ab'), ('xy');
 set enable_seqscan = off;
 set enable_indexscan = on;
 set enable_bitmapscan = off;
-set enable_sort = off;
 
 explain (costs off)
  SELECT * FROM
@@ -419,7 +407,6 @@ explain (costs off)
 reset enable_seqscan;
 reset enable_indexscan;
 reset enable_bitmapscan;
-reset enable_sort;
 
 -- This simpler variant of the above test has been observed to fail differently