Revert commit 66c0185a3 and follow-on patches.

author Tom Lane <tgl@sss.pgh.pa.us>

Mon, 20 May 2024 19:08:30 +0000 (15:08 -0400)

committer Tom Lane <tgl@sss.pgh.pa.us>

Mon, 20 May 2024 19:08:30 +0000 (15:08 -0400)
author Tom Lane <tgl@sss.pgh.pa.us>
Mon, 20 May 2024 19:08:30 +0000 (15:08 -0400)
committer Tom Lane <tgl@sss.pgh.pa.us>
Mon, 20 May 2024 19:08:30 +0000 (15:08 -0400)
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out

index 078b8a966f852520cfb6c20006e4b3c6f2ffa444..9ae36d3059d16b10dd146e57f7753a414e3728fb 100644 (file)
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -11511,10 +11511,6 @@ DROP INDEX base_tbl1_idx;
  DROP INDEX base_tbl2_idx;
  DROP INDEX async_p3_idx;
  -- UNION queries
-SET enable_sort TO off;
-SET enable_incremental_sort TO off;
--- Adjust fdw_startup_cost so that we get an unordered path in the Append.
-ALTER SERVER loopback2 OPTIONS (ADD fdw_startup_cost '0.00');
  EXPLAIN (VERBOSE, COSTS OFF)
  INSERT INTO result_tbl
  (SELECT a, b, 'AAA' || c FROM async_p1 ORDER BY a LIMIT 10)
@@ -11596,9 +11592,6 @@ SELECT * FROM result_tbl ORDER BY a;
  (12 rows)
  
  DELETE FROM result_tbl;
-RESET enable_incremental_sort;
-RESET enable_sort;
-ALTER SERVER loopback2 OPTIONS (DROP fdw_startup_cost);
  -- Disable async execution if we use gating Result nodes for pseudoconstant
  -- quals
  EXPLAIN (VERBOSE, COSTS OFF)
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql

index 09ba234e43d3ecb16dd5d3e7b62212db54461039..1f31ac14df0bfd3a51943b61e76f8046b05bd3f0 100644 (file)
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -3885,11 +3885,6 @@ DROP INDEX base_tbl2_idx;
  DROP INDEX async_p3_idx;
  
  -- UNION queries
-SET enable_sort TO off;
-SET enable_incremental_sort TO off;
--- Adjust fdw_startup_cost so that we get an unordered path in the Append.
-ALTER SERVER loopback2 OPTIONS (ADD fdw_startup_cost '0.00');
-
  EXPLAIN (VERBOSE, COSTS OFF)
  INSERT INTO result_tbl
  (SELECT a, b, 'AAA' || c FROM async_p1 ORDER BY a LIMIT 10)
@@ -3916,10 +3911,6 @@ UNION ALL
  SELECT * FROM result_tbl ORDER BY a;
  DELETE FROM result_tbl;
  
-RESET enable_incremental_sort;
-RESET enable_sort;
-ALTER SERVER loopback2 OPTIONS (DROP fdw_startup_cost);
-
  -- Disable async execution if we use gating Result nodes for pseudoconstant
  -- quals
  EXPLAIN (VERBOSE, COSTS OFF)
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c

index 4895cee994429c0f70cdc3399fba88c550696df9..10aeebc2c1d6e620cf371075ba6060b8c682312c 100644 (file)
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -2633,8 +2633,9 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
     Assert(root->plan_params == NIL);
  
     /* Generate a subroot and Paths for the subquery */
-   rel->subroot = subquery_planner(root->glob, subquery, root, false,
-                                   tuple_fraction, NULL);
+   rel->subroot = subquery_planner(root->glob, subquery,
+                                   root,
+                                   false, tuple_fraction);
  
     /* Isolate the params needed by this specific subplan */
     rel->subplan_params = root->plan_params;
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c

index 21ce1ae2e1367f61417533d9283db9cdd41df746..13400af3ef53938dc6cd974046beb88c132bf263 100644 (file)
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -2882,67 +2882,6 @@ add_child_join_rel_equivalences(PlannerInfo *root,
     MemoryContextSwitchTo(oldcontext);
  }
  
-/*
- * add_setop_child_rel_equivalences
- *     Add equivalence members for each non-resjunk target in 'child_tlist'
- *     to the EquivalenceClass in the corresponding setop_pathkey's pk_eclass.
- *
- * 'root' is the PlannerInfo belonging to the top-level set operation.
- * 'child_rel' is the RelOptInfo of the child relation we're adding
- * EquivalenceMembers for.
- * 'child_tlist' is the target list for the setop child relation.  The target
- * list expressions are what we add as EquivalenceMembers.
- * 'setop_pathkeys' is a list of PathKeys which must contain an entry for each
- * non-resjunk target in 'child_tlist'.
- */
-void
-add_setop_child_rel_equivalences(PlannerInfo *root, RelOptInfo *child_rel,
-                                List *child_tlist, List *setop_pathkeys)
-{
-   ListCell   *lc;
-   ListCell   *lc2 = list_head(setop_pathkeys);
-
-   foreach(lc, child_tlist)
-   {
-       TargetEntry *tle = lfirst_node(TargetEntry, lc);
-       EquivalenceMember *parent_em;
-       PathKey    *pk;
-
-       if (tle->resjunk)
-           continue;
-
-       if (lc2 == NULL)
-           elog(ERROR, "too few pathkeys for set operation");
-
-       pk = lfirst_node(PathKey, lc2);
-       parent_em = linitial(pk->pk_eclass->ec_members);
-
-       /*
-        * We can safely pass the parent member as the first member in the
-        * ec_members list as this is added first in generate_union_paths,
-        * likewise, the JoinDomain can be that of the initial member of the
-        * Pathkey's EquivalenceClass.
-        */
-       add_eq_member(pk->pk_eclass,
-                     tle->expr,
-                     child_rel->relids,
-                     parent_em->em_jdomain,
-                     parent_em,
-                     exprType((Node *) tle->expr));
-
-       lc2 = lnext(setop_pathkeys, lc2);
-   }
-
-   /*
-    * transformSetOperationStmt() ensures that the targetlist never contains
-    * any resjunk columns, so all eclasses that exist in 'root' must have
-    * received a new member in the loop above.  Add them to the child_rel's
-    * eclass_indexes.
-    */
-   child_rel->eclass_indexes = bms_add_range(child_rel->eclass_indexes, 0,
-                                             list_length(root->eq_classes) - 1);
-}
-
  
  /*
   * generate_implied_equalities_for_column
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c

index 8b258cbef92d2ab2d551e89e73c4f04eacaf4196..157bc6a36d475e586760e81890b1e7632204bcd9 100644 (file)
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -2191,22 +2191,6 @@ pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
     return n;
  }
  
-/*
- * pathkeys_useful_for_setop
- *     Count the number of leading common pathkeys root's 'setop_pathkeys' in
- *     'pathkeys'.
- */
-static int
-pathkeys_useful_for_setop(PlannerInfo *root, List *pathkeys)
-{
-   int         n_common_pathkeys;
-
-   (void) pathkeys_count_contained_in(root->setop_pathkeys, pathkeys,
-                                      &n_common_pathkeys);
-
-   return n_common_pathkeys;
-}
-
  /*
   * truncate_useless_pathkeys
   *     Shorten the given pathkey list to just the useful pathkeys.
@@ -2224,9 +2208,6 @@ truncate_useless_pathkeys(PlannerInfo *root,
     if (nuseful2 > nuseful)
         nuseful = nuseful2;
     nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
-   if (nuseful2 > nuseful)
-       nuseful = nuseful2;
-   nuseful2 = pathkeys_useful_for_setop(root, pathkeys);
     if (nuseful2 > nuseful)
         nuseful = nuseful2;
  
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c

index 032818423f6f48011f5ebafcf2a717be28e20139..ddd23878409c60db33b0767193c3d38d08c088f6 100644 (file)
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -54,7 +54,6 @@
  #include "optimizer/tlist.h"
  #include "parser/analyze.h"
  #include "parser/parse_agg.h"
-#include "parser/parse_clause.h"
  #include "parser/parse_relation.h"
  #include "parser/parsetree.h"
  #include "partitioning/partdesc.h"
@@ -120,15 +119,12 @@ typedef struct
  {
     List       *activeWindows;  /* active windows, if any */
     grouping_sets_data *gset_data;  /* grouping sets data, if any */
-   SetOperationStmt *setop;    /* parent set operation or NULL if not a
-                                * subquery belonging to a set operation */
  } standard_qp_extra;
  
  /* Local functions */
  static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
  static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
-static void grouping_planner(PlannerInfo *root, double tuple_fraction,
-                            SetOperationStmt *setops);
+static void grouping_planner(PlannerInfo *root, double tuple_fraction);
  static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root);
  static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
                                       int *tleref_to_colnum_map);
@@ -253,8 +249,6 @@ static bool group_by_has_partkey(RelOptInfo *input_rel,
                                  List *targetList,
                                  List *groupClause);
  static int common_prefix_cmp(const void *a, const void *b);
-static List *generate_setop_child_grouplist(SetOperationStmt *op,
-                                           List *targetlist);
  
  
  /*****************************************************************************
@@ -412,7 +406,8 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions,
     }
  
     /* primary planning entry point (may recurse for subqueries) */
-   root = subquery_planner(glob, parse, NULL, false, tuple_fraction, NULL);
+   root = subquery_planner(glob, parse, NULL,
+                           false, tuple_fraction);
  
     /* Select best Path and turn it into a Plan */
     final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
@@ -603,10 +598,6 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions,
   * hasRecursion is true if this is a recursive WITH query.
   * tuple_fraction is the fraction of tuples we expect will be retrieved.
   * tuple_fraction is interpreted as explained for grouping_planner, below.
- * setops is used for set operation subqueries to provide the subquery with
- * the context in which it's being used so that Paths correctly sorted for the
- * set operation can be generated.  NULL when not planning a set operation
- * child.
   *
   * Basically, this routine does the stuff that should only be done once
   * per Query object.  It then calls grouping_planner.  At one time,
@@ -625,9 +616,9 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions,
   *--------------------
   */
  PlannerInfo *
-subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root,
-                bool hasRecursion, double tuple_fraction,
-                SetOperationStmt *setops)
+subquery_planner(PlannerGlobal *glob, Query *parse,
+                PlannerInfo *parent_root,
+                bool hasRecursion, double tuple_fraction)
  {
     PlannerInfo *root;
     List       *newWithCheckOptions;
@@ -1086,7 +1077,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root,
     /*
      * Do the main planning.
      */
-   grouping_planner(root, tuple_fraction, setops);
+   grouping_planner(root, tuple_fraction);
  
     /*
      * Capture the set of outer-level param IDs we have access to, for use in
@@ -1284,11 +1275,7 @@ preprocess_phv_expression(PlannerInfo *root, Expr *expr)
   *   0 < tuple_fraction < 1: expect the given fraction of tuples available
   *     from the plan to be retrieved
   *   tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
- *     expected to be retrieved (ie, a LIMIT specification).
- * setops is used for set operation subqueries to provide the subquery with
- * the context in which it's being used so that Paths correctly sorted for the
- * set operation can be generated.  NULL when not planning a set operation
- * child.
+ *     expected to be retrieved (ie, a LIMIT specification)
   *
   * Returns nothing; the useful output is in the Paths we attach to the
   * (UPPERREL_FINAL, NULL) upperrel in *root.  In addition,
@@ -1299,8 +1286,7 @@ preprocess_phv_expression(PlannerInfo *root, Expr *expr)
   *--------------------
   */
  static void
-grouping_planner(PlannerInfo *root, double tuple_fraction,
-                SetOperationStmt *setops)
+grouping_planner(PlannerInfo *root, double tuple_fraction)
  {
     Query      *parse = root->parse;
     int64       offset_est = 0;
@@ -1335,6 +1321,17 @@ grouping_planner(PlannerInfo *root, double tuple_fraction,
  
     if (parse->setOperations)
     {
+       /*
+        * If there's a top-level ORDER BY, assume we have to fetch all the
+        * tuples.  This might be too simplistic given all the hackery below
+        * to possibly avoid the sort; but the odds of accurate estimates here
+        * are pretty low anyway.  XXX try to get rid of this in favor of
+        * letting plan_set_operations generate both fast-start and
+        * cheapest-total paths.
+        */
+       if (parse->sortClause)
+           root->tuple_fraction = 0.0;
+
         /*
          * Construct Paths for set operations.  The results will not need any
          * work except perhaps a top-level sort and/or LIMIT.  Note that any
@@ -1504,12 +1501,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction,
         qp_extra.activeWindows = activeWindows;
         qp_extra.gset_data = gset_data;
  
-       /*
-        * If we're a subquery for a set operation, store the SetOperationStmt
-        * in qp_extra.
-        */
-       qp_extra.setop = setops;
-
         /*
          * Generate the best unsorted and presorted paths for the scan/join
          * portion of this Query, ie the processing represented by the
@@ -3462,27 +3453,6 @@ standard_qp_callback(PlannerInfo *root, void *extra)
                                       parse->sortClause,
                                       tlist);
  
-   /* setting setop_pathkeys might be useful to the union planner */
-   if (qp_extra->setop != NULL &&
-       set_operation_ordered_results_useful(qp_extra->setop))
-   {
-       List       *groupClauses;
-       bool        sortable;
-
-       groupClauses = generate_setop_child_grouplist(qp_extra->setop, tlist);
-
-       root->setop_pathkeys =
-           make_pathkeys_for_sortclauses_extended(root,
-                                                  &groupClauses,
-                                                  tlist,
-                                                  false,
-                                                  &sortable);
-       if (!sortable)
-           root->setop_pathkeys = NIL;
-   }
-   else
-       root->setop_pathkeys = NIL;
-
     /*
      * Figure out whether we want a sorted result from query_planner.
      *
@@ -3492,9 +3462,7 @@ standard_qp_callback(PlannerInfo *root, void *extra)
      * sortable DISTINCT clause that's more rigorous than the ORDER BY clause,
      * we try to produce output that's sufficiently well sorted for the
      * DISTINCT.  Otherwise, if there is an ORDER BY clause, we want to sort
-    * by the ORDER BY clause.  Otherwise, if we're a subquery being planned
-    * for a set operation which can benefit from presorted results and have a
-    * sortable targetlist, we want to sort by the target list.
+    * by the ORDER BY clause.
      *
      * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset
      * of GROUP BY, it would be tempting to request sort by ORDER BY --- but
@@ -3512,8 +3480,6 @@ standard_qp_callback(PlannerInfo *root, void *extra)
         root->query_pathkeys = root->distinct_pathkeys;
     else if (root->sort_pathkeys)
         root->query_pathkeys = root->sort_pathkeys;
-   else if (root->setop_pathkeys != NIL)
-       root->query_pathkeys = root->setop_pathkeys;
     else
         root->query_pathkeys = NIL;
  }
@@ -7957,43 +7923,3 @@ group_by_has_partkey(RelOptInfo *input_rel,
  
     return true;
  }
-
-/*
- * generate_setop_child_grouplist
- *     Build a SortGroupClause list defining the sort/grouping properties
- *     of the child of a set operation.
- *
- * This is similar to generate_setop_grouplist() but differs as the setop
- * child query's targetlist entries may already have a tleSortGroupRef
- * assigned for other purposes, such as GROUP BYs.  Here we keep the
- * SortGroupClause list in the same order as 'op' groupClauses and just adjust
- * the tleSortGroupRef to reference the TargetEntry's 'ressortgroupref'.
- */
-static List *
-generate_setop_child_grouplist(SetOperationStmt *op, List *targetlist)
-{
-   List       *grouplist = copyObject(op->groupClauses);
-   ListCell   *lg;
-   ListCell   *lt;
-
-   lg = list_head(grouplist);
-   foreach(lt, targetlist)
-   {
-       TargetEntry *tle = (TargetEntry *) lfirst(lt);
-       SortGroupClause *sgc;
-
-       /* resjunk columns could have sortgrouprefs.  Leave these alone */
-       if (tle->resjunk)
-           continue;
-
-       /* we expect every non-resjunk target to have a SortGroupClause */
-       Assert(lg != NULL);
-       sgc = (SortGroupClause *) lfirst(lg);
-       lg = lnext(grouplist, lg);
-
-       /* assign a tleSortGroupRef, or reuse the existing one */
-       sgc->tleSortGroupRef = assignSortGroupRef(tle, targetlist);
-   }
-   Assert(lg == NULL);
-   return grouplist;
-}
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c

index e35ebea8b433f4a36d43df153265f605cc5db9d8..d5fa281b102e27be06ba8a6b17db27d57a994211 100644 (file)
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -218,8 +218,9 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
     Assert(root->plan_params == NIL);
  
     /* Generate Paths for the subquery */
-   subroot = subquery_planner(root->glob, subquery, root, false,
-                              tuple_fraction, NULL);
+   subroot = subquery_planner(root->glob, subquery,
+                              root,
+                              false, tuple_fraction);
  
     /* Isolate the params needed by this specific subplan */
     plan_params = root->plan_params;
@@ -265,8 +266,9 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
         if (subquery)
         {
             /* Generate Paths for the ANY subquery; we'll need all rows */
-           subroot = subquery_planner(root->glob, subquery, root, false, 0.0,
-                                      NULL);
+           subroot = subquery_planner(root->glob, subquery,
+                                      root,
+                                      false, 0.0);
  
             /* Isolate the params needed by this specific subplan */
             plan_params = root->plan_params;
@@ -965,8 +967,9 @@ SS_process_ctes(PlannerInfo *root)
          * Generate Paths for the CTE query.  Always plan for full retrieval
          * --- we don't have enough info to predict otherwise.
          */
-       subroot = subquery_planner(root->glob, subquery, root,
-                                  cte->cterecursive, 0.0, NULL);
+       subroot = subquery_planner(root->glob, subquery,
+                                  root,
+                                  cte->cterecursive, 0.0);
  
         /*
          * Since the current query level doesn't yet contain any RTEs, it
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c

index 30068c27a1329207becdcc2dad004f0ceb9b9f93..296f866677a6446f8a7191b01268320e0b8647ee 100644 (file)
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -43,15 +43,11 @@ static RelOptInfo *recurse_set_operations(Node *setOp, PlannerInfo *root,
                                           bool junkOK,
                                           int flag, List *refnames_tlist,
                                           List **pTargetList,
-                                         bool *istrivial_tlist);
+                                         double *pNumGroups);
  static RelOptInfo *generate_recursion_path(SetOperationStmt *setOp,
                                            PlannerInfo *root,
                                            List *refnames_tlist,
                                            List **pTargetList);
-static void build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
-                                   bool trivial_tlist, List *child_tlist,
-                                   List *interesting_pathkeys,
-                                   double *pNumGroups);
  static RelOptInfo *generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
                                         List *refnames_tlist,
                                         List **pTargetList);
@@ -61,8 +57,9 @@ static RelOptInfo *generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *ro
  static List *plan_union_children(PlannerInfo *root,
                                  SetOperationStmt *top_union,
                                  List *refnames_tlist,
-                                List **tlist_list,
-                                List **istrivial_tlist);
+                                List **tlist_list);
+static Path *make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
+                              PlannerInfo *root);
  static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
  static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
                                 Path *input_path,
@@ -117,10 +114,10 @@ plan_set_operations(PlannerInfo *root)
     Assert(parse->distinctClause == NIL);
  
     /*
-    * In the outer query level, equivalence classes are limited to classes
-    * which define that the top-level target entry is equivalent to the
-    * corresponding child target entry.  There won't be any equivalence class
-    * merging.  Mark that merging is complete to allow us to make pathkeys.
+    * In the outer query level, we won't have any true equivalences to deal
+    * with; but we do want to be able to make pathkeys, which will require
+    * single-member EquivalenceClasses.  Indicate that EC merging is complete
+    * so that pathkeys.c won't complain.
      */
     Assert(root->eq_classes == NIL);
     root->ec_merging_done = true;
@@ -155,8 +152,6 @@ plan_set_operations(PlannerInfo *root)
     }
     else
     {
-       bool        trivial_tlist;
-
         /*
          * Recurse on setOperations tree to generate paths for set ops. The
          * final output paths should have just the column types shown as the
@@ -168,7 +163,7 @@ plan_set_operations(PlannerInfo *root)
                                            true, -1,
                                            leftmostQuery->targetList,
                                            &top_tlist,
-                                          &trivial_tlist);
+                                          NULL);
     }
  
     /* Must return the built tlist into root->processed_tlist. */
@@ -177,31 +172,6 @@ plan_set_operations(PlannerInfo *root)
     return setop_rel;
  }
  
-/*
- * set_operation_ordered_results_useful
- *     Return true if the given SetOperationStmt can be executed by utilizing
- *     paths that provide sorted input according to the setop's targetlist.
- *     Returns false when sorted paths are not any more useful then unsorted
- *     ones.
- */
-bool
-set_operation_ordered_results_useful(SetOperationStmt *setop)
-{
-   /*
-    * Paths sorted by the targetlist are useful for UNION as we can opt to
-    * MergeAppend the sorted paths then Unique them.  Ordered paths are no
-    * more useful than unordered ones for UNION ALL.
-    */
-   if (!setop->all && setop->op == SETOP_UNION)
-       return true;
-
-   /*
-    * EXCEPT / EXCEPT ALL / INTERSECT / INTERSECT ALL cannot yet utilize
-    * correctly sorted input paths.
-    */
-   return false;
-}
-
  /*
   * recurse_set_operations
   *   Recursively handle one step in a tree of set operations
@@ -214,8 +184,8 @@ set_operation_ordered_results_useful(SetOperationStmt *setop)
   *
   * Returns a RelOptInfo for the subtree, as well as these output parameters:
   * *pTargetList: receives the fully-fledged tlist for the subtree's top plan
- * *istrivial_tlist: true if, and only if, datatypes between parent and child
- * match.
+ * *pNumGroups: if not NULL, we estimate the number of distinct groups
+ *     in the result, and store it there
   *
   * The pTargetList output parameter is mostly redundant with the pathtarget
   * of the returned RelOptInfo, but for the moment we need it because much of
@@ -232,11 +202,9 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
                        bool junkOK,
                        int flag, List *refnames_tlist,
                        List **pTargetList,
-                      bool *istrivial_tlist)
+                      double *pNumGroups)
  {
-   RelOptInfo *rel;
-
-   *istrivial_tlist = true;    /* for now */
+   RelOptInfo *rel = NULL;     /* keep compiler quiet */
  
     /* Guard against stack overflow due to overly complex setop nests */
     check_stack_depth();
@@ -245,9 +213,11 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
     {
         RangeTblRef *rtr = (RangeTblRef *) setOp;
         RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
-       SetOperationStmt *setops;
         Query      *subquery = rte->subquery;
         PlannerInfo *subroot;
+       RelOptInfo *final_rel;
+       Path       *subpath;
+       Path       *path;
         List       *tlist;
         bool        trivial_tlist;
  
@@ -259,16 +229,11 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
         /* plan_params should not be in use in current query level */
         Assert(root->plan_params == NIL);
  
-       /*
-        * Pass the set operation details to the subquery_planner to have it
-        * consider generating Paths correctly ordered for the set operation.
-        */
-       setops = castNode(SetOperationStmt, root->parse->setOperations);
-
         /* Generate a subroot and Paths for the subquery */
-       subroot = rel->subroot = subquery_planner(root->glob, subquery, root,
-                                                 false, root->tuple_fraction,
-                                                 setops);
+       subroot = rel->subroot = subquery_planner(root->glob, subquery,
+                                                 root,
+                                                 false,
+                                                 root->tuple_fraction);
  
         /*
          * It should not be possible for the primitive query to contain any
@@ -289,7 +254,90 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
  
         /* Return the fully-fledged tlist to caller, too */
         *pTargetList = tlist;
-       *istrivial_tlist = trivial_tlist;
+
+       /*
+        * Mark rel with estimated output rows, width, etc.  Note that we have
+        * to do this before generating outer-query paths, else
+        * cost_subqueryscan is not happy.
+        */
+       set_subquery_size_estimates(root, rel);
+
+       /*
+        * Since we may want to add a partial path to this relation, we must
+        * set its consider_parallel flag correctly.
+        */
+       final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
+       rel->consider_parallel = final_rel->consider_parallel;
+
+       /*
+        * For the moment, we consider only a single Path for the subquery.
+        * This should change soon (make it look more like
+        * set_subquery_pathlist).
+        */
+       subpath = get_cheapest_fractional_path(final_rel,
+                                              root->tuple_fraction);
+
+       /*
+        * Stick a SubqueryScanPath atop that.
+        *
+        * We don't bother to determine the subquery's output ordering since
+        * it won't be reflected in the set-op result anyhow; so just label
+        * the SubqueryScanPath with nil pathkeys.  (XXX that should change
+        * soon too, likely.)
+        */
+       path = (Path *) create_subqueryscan_path(root, rel, subpath,
+                                                trivial_tlist,
+                                                NIL, NULL);
+
+       add_path(rel, path);
+
+       /*
+        * If we have a partial path for the child relation, we can use that
+        * to build a partial path for this relation.  But there's no point in
+        * considering any path but the cheapest.
+        */
+       if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
+           final_rel->partial_pathlist != NIL)
+       {
+           Path       *partial_subpath;
+           Path       *partial_path;
+
+           partial_subpath = linitial(final_rel->partial_pathlist);
+           partial_path = (Path *)
+               create_subqueryscan_path(root, rel, partial_subpath,
+                                        trivial_tlist,
+                                        NIL, NULL);
+           add_partial_path(rel, partial_path);
+       }
+
+       /*
+        * Estimate number of groups if caller wants it.  If the subquery used
+        * grouping or aggregation, its output is probably mostly unique
+        * anyway; otherwise do statistical estimation.
+        *
+        * XXX you don't really want to know about this: we do the estimation
+        * using the subquery's original targetlist expressions, not the
+        * subroot->processed_tlist which might seem more appropriate.  The
+        * reason is that if the subquery is itself a setop, it may return a
+        * processed_tlist containing "varno 0" Vars generated by
+        * generate_append_tlist, and those would confuse estimate_num_groups
+        * mightily.  We ought to get rid of the "varno 0" hack, but that
+        * requires a redesign of the parsetree representation of setops, so
+        * that there can be an RTE corresponding to each setop's output.
+        */
+       if (pNumGroups)
+       {
+           if (subquery->groupClause || subquery->groupingSets ||
+               subquery->distinctClause ||
+               subroot->hasHavingQual || subquery->hasAggs)
+               *pNumGroups = subpath->rows;
+           else
+               *pNumGroups = estimate_num_groups(subroot,
+                                                 get_tlist_exprs(subquery->targetList, false),
+                                                 subpath->rows,
+                                                 NULL,
+                                                 NULL);
+       }
     }
     else if (IsA(setOp, SetOperationStmt))
     {
@@ -304,6 +352,8 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
             rel = generate_nonunion_paths(op, root,
                                           refnames_tlist,
                                           pTargetList);
+       if (pNumGroups)
+           *pNumGroups = rel->rows;
  
         /*
          * If necessary, add a Result node to project the caller-requested
@@ -333,7 +383,6 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
                                                 *pTargetList,
                                                 refnames_tlist,
                                                 &trivial_tlist);
-           *istrivial_tlist = trivial_tlist;
             target = create_pathtarget(root, *pTargetList);
  
             /* Apply projection to each path */
@@ -364,16 +413,16 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
                 lfirst(lc) = path;
             }
         }
-       postprocess_setop_rel(root, rel);
     }
     else
     {
         elog(ERROR, "unrecognized node type: %d",
              (int) nodeTag(setOp));
         *pTargetList = NIL;
-       rel = NULL;             /* keep compiler quiet */
     }
  
+   postprocess_setop_rel(root, rel);
+
     return rel;
  }
  
@@ -392,9 +441,7 @@ generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
     Path       *lpath;
     Path       *rpath;
     List       *lpath_tlist;
-   bool        lpath_trivial_tlist;
     List       *rpath_tlist;
-   bool        rpath_trivial_tlist;
     List       *tlist;
     List       *groupList;
     double      dNumGroups;
@@ -414,10 +461,7 @@ generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
                                   false, -1,
                                   refnames_tlist,
                                   &lpath_tlist,
-                                 &lpath_trivial_tlist);
-   if (lrel->rtekind == RTE_SUBQUERY)
-       build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
-                               NIL, NULL);
+                                 NULL);
     lpath = lrel->cheapest_total_path;
     /* The right path will want to look at the left one ... */
     root->non_recursive_path = lpath;
@@ -426,10 +470,7 @@ generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
                                   false, -1,
                                   refnames_tlist,
                                   &rpath_tlist,
-                                 &rpath_trivial_tlist);
-   if (rrel->rtekind == RTE_SUBQUERY)
-       build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
-                               NIL, NULL);
+                                 NULL);
     rpath = rrel->cheapest_total_path;
     root->non_recursive_path = NULL;
  
@@ -491,204 +532,6 @@ generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
     return result_rel;
  }
  
-/*
- * build_setop_child_paths
- *     Build paths for the set op child relation denoted by 'rel'.
- *
- * interesting_pathkeys: if not NIL, also include paths that suit these
- * pathkeys, sorting any unsorted paths as required.
- * *pNumGroups: if not NULL, we estimate the number of distinct groups
- *     in the result, and store it there
- */
-static void
-build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
-                       bool trivial_tlist, List *child_tlist,
-                       List *interesting_pathkeys, double *pNumGroups)
-{
-   RelOptInfo *final_rel;
-   List       *setop_pathkeys = rel->subroot->setop_pathkeys;
-   ListCell   *lc;
-
-   /* it can't be a set op child rel if it's not a subquery */
-   Assert(rel->rtekind == RTE_SUBQUERY);
-
-   /* when sorting is needed, add child rel equivalences */
-   if (interesting_pathkeys != NIL)
-       add_setop_child_rel_equivalences(root,
-                                        rel,
-                                        child_tlist,
-                                        interesting_pathkeys);
-
-   /*
-    * Mark rel with estimated output rows, width, etc.  Note that we have to
-    * do this before generating outer-query paths, else cost_subqueryscan is
-    * not happy.
-    */
-   set_subquery_size_estimates(root, rel);
-
-   /*
-    * Since we may want to add a partial path to this relation, we must set
-    * its consider_parallel flag correctly.
-    */
-   final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL);
-   rel->consider_parallel = final_rel->consider_parallel;
-
-   /* Generate subquery scan paths for any interesting path in final_rel */
-   foreach(lc, final_rel->pathlist)
-   {
-       Path       *subpath = (Path *) lfirst(lc);
-       List       *pathkeys;
-       Path       *cheapest_input_path = final_rel->cheapest_total_path;
-       bool        is_sorted;
-       int         presorted_keys;
-
-       /*
-        * Include the cheapest path as-is so that the set operation can be
-        * cheaply implemented using a method which does not require the input
-        * to be sorted.
-        */
-       if (subpath == cheapest_input_path)
-       {
-           /* Convert subpath's pathkeys to outer representation */
-           pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
-                                                make_tlist_from_pathtarget(subpath->pathtarget));
-
-           /* Generate outer path using this subpath */
-           add_path(rel, (Path *) create_subqueryscan_path(root,
-                                                           rel,
-                                                           subpath,
-                                                           trivial_tlist,
-                                                           pathkeys,
-                                                           NULL));
-       }
-
-       /* skip dealing with sorted paths if the setop doesn't need them */
-       if (interesting_pathkeys == NIL)
-           continue;
-
-       /*
-        * Create paths to suit final sort order required for setop_pathkeys.
-        * Here we'll sort the cheapest input path (if not sorted already) and
-        * incremental sort any paths which are partially sorted.
-        */
-       is_sorted = pathkeys_count_contained_in(setop_pathkeys,
-                                               subpath->pathkeys,
-                                               &presorted_keys);
-
-       if (!is_sorted)
-       {
-           double      limittuples = rel->subroot->limit_tuples;
-
-           /*
-            * Try at least sorting the cheapest path and also try
-            * incrementally sorting any path which is partially sorted
-            * already (no need to deal with paths which have presorted keys
-            * when incremental sort is disabled unless it's the cheapest
-            * input path).
-            */
-           if (subpath != cheapest_input_path &&
-               (presorted_keys == 0 || !enable_incremental_sort))
-               continue;
-
-           /*
-            * We've no need to consider both a sort and incremental sort.
-            * We'll just do a sort if there are no presorted keys and an
-            * incremental sort when there are presorted keys.
-            */
-           if (presorted_keys == 0 || !enable_incremental_sort)
-               subpath = (Path *) create_sort_path(rel->subroot,
-                                                   final_rel,
-                                                   subpath,
-                                                   setop_pathkeys,
-                                                   limittuples);
-           else
-               subpath = (Path *) create_incremental_sort_path(rel->subroot,
-                                                               final_rel,
-                                                               subpath,
-                                                               setop_pathkeys,
-                                                               presorted_keys,
-                                                               limittuples);
-       }
-
-       /*
-        * subpath is now sorted, so add it to the pathlist.  We already added
-        * the cheapest_input_path above, so don't add it again unless we just
-        * sorted it.
-        */
-       if (subpath != cheapest_input_path)
-       {
-           /* Convert subpath's pathkeys to outer representation */
-           pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
-                                                make_tlist_from_pathtarget(subpath->pathtarget));
-
-           /* Generate outer path using this subpath */
-           add_path(rel, (Path *) create_subqueryscan_path(root,
-                                                           rel,
-                                                           subpath,
-                                                           trivial_tlist,
-                                                           pathkeys,
-                                                           NULL));
-       }
-   }
-
-   /* if consider_parallel is false, there should be no partial paths */
-   Assert(final_rel->consider_parallel ||
-          final_rel->partial_pathlist == NIL);
-
-   /*
-    * If we have a partial path for the child relation, we can use that to
-    * build a partial path for this relation.  But there's no point in
-    * considering any path but the cheapest.
-    */
-   if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
-       final_rel->partial_pathlist != NIL)
-   {
-       Path       *partial_subpath;
-       Path       *partial_path;
-
-       partial_subpath = linitial(final_rel->partial_pathlist);
-       partial_path = (Path *)
-           create_subqueryscan_path(root, rel, partial_subpath,
-                                    trivial_tlist,
-                                    NIL, NULL);
-       add_partial_path(rel, partial_path);
-   }
-
-   postprocess_setop_rel(root, rel);
-
-   /*
-    * Estimate number of groups if caller wants it.  If the subquery used
-    * grouping or aggregation, its output is probably mostly unique anyway;
-    * otherwise do statistical estimation.
-    *
-    * XXX you don't really want to know about this: we do the estimation
-    * using the subquery's original targetlist expressions, not the
-    * subroot->processed_tlist which might seem more appropriate.  The reason
-    * is that if the subquery is itself a setop, it may return a
-    * processed_tlist containing "varno 0" Vars generated by
-    * generate_append_tlist, and those would confuse estimate_num_groups
-    * mightily.  We ought to get rid of the "varno 0" hack, but that requires
-    * a redesign of the parsetree representation of setops, so that there can
-    * be an RTE corresponding to each setop's output.
-    */
-   if (pNumGroups)
-   {
-       PlannerInfo *subroot = rel->subroot;
-       Query      *subquery = subroot->parse;
-
-       if (subquery->groupClause || subquery->groupingSets ||
-           subquery->distinctClause || subroot->hasHavingQual ||
-           subquery->hasAggs)
-           *pNumGroups = rel->cheapest_total_path->rows;
-       else
-           *pNumGroups = estimate_num_groups(subroot,
-                                             get_tlist_exprs(subquery->targetList, false),
-                                             rel->cheapest_total_path->rows,
-                                             NULL,
-                                             NULL);
-   }
-}
-
  /*
   * Generate paths for a UNION or UNION ALL node
   */
@@ -699,38 +542,41 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
  {
     Relids      relids = NULL;
     RelOptInfo *result_rel;
+   double      save_fraction = root->tuple_fraction;
     ListCell   *lc;
-   ListCell   *lc2;
-   ListCell   *lc3;
-   List       *cheapest_pathlist = NIL;
-   List       *ordered_pathlist = NIL;
+   List       *pathlist = NIL;
     List       *partial_pathlist = NIL;
     bool        partial_paths_valid = true;
     bool        consider_parallel = true;
     List       *rellist;
     List       *tlist_list;
-   List       *trivial_tlist_list;
     List       *tlist;
-   List       *groupList = NIL;
-   Path       *apath;
-   Path       *gpath = NULL;
-   bool        try_sorted;
-   List       *union_pathkeys = NIL;
+   Path       *path;
+
+   /*
+    * If plain UNION, tell children to fetch all tuples.
+    *
+    * Note: in UNION ALL, we pass the top-level tuple_fraction unmodified to
+    * each arm of the UNION ALL.  One could make a case for reducing the
+    * tuple fraction for later arms (discounting by the expected size of the
+    * earlier arms' results) but it seems not worth the trouble. The normal
+    * case where tuple_fraction isn't already zero is a LIMIT at top level,
+    * and passing it down as-is is usually enough to get the desired result
+    * of preferring fast-start plans.
+    */
+   if (!op->all)
+       root->tuple_fraction = 0.0;
  
     /*
      * If any of my children are identical UNION nodes (same op, all-flag, and
      * colTypes) then they can be merged into this node so that we generate
-    * only one Append/MergeAppend and unique-ification for the lot.  Recurse
-    * to find such nodes.
+    * only one Append and unique-ification for the lot.  Recurse to find such
+    * nodes and compute their children's paths.
      */
-   rellist = plan_union_children(root,
-                                 op,
-                                 refnames_tlist,
-                                 &tlist_list,
-                                 &trivial_tlist_list);
+   rellist = plan_union_children(root, op, refnames_tlist, &tlist_list);
  
     /*
-    * Generate tlist for Append/MergeAppend plan node.
+    * Generate tlist for Append plan node.
      *
      * The tlist for an Append plan isn't important as far as the Append is
      * concerned, but we must make it look real anyway for the benefit of the
@@ -738,68 +584,15 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
      */
     tlist = generate_append_tlist(op->colTypes, op->colCollations, false,
                                   tlist_list, refnames_tlist);
-   *pTargetList = tlist;
-
-   /* For for UNIONs (not UNION ALL), try sorting, if sorting is possible */
-   try_sorted = !op->all && grouping_is_sortable(op->groupClauses);
-
-   if (try_sorted)
-   {
-       /* Identify the grouping semantics */
-       groupList = generate_setop_grouplist(op, tlist);
-
-       /* Determine the pathkeys for sorting by the whole target list */
-       union_pathkeys = make_pathkeys_for_sortclauses(root, groupList, tlist);
-
-       root->query_pathkeys = union_pathkeys;
-   }
-
-   /*
-    * Now that we've got the append target list, we can build the union child
-    * paths.
-    */
-   forthree(lc, rellist, lc2, trivial_tlist_list, lc3, tlist_list)
-   {
-       RelOptInfo *rel = lfirst(lc);
-       bool        trivial_tlist = lfirst_int(lc2);
-       List       *child_tlist = lfirst_node(List, lc3);
  
-       /* only build paths for the union children */
-       if (rel->rtekind == RTE_SUBQUERY)
-           build_setop_child_paths(root, rel, trivial_tlist, child_tlist,
-                                   union_pathkeys, NULL);
-   }
+   *pTargetList = tlist;
  
     /* Build path lists and relid set. */
     foreach(lc, rellist)
     {
         RelOptInfo *rel = lfirst(lc);
-       Path       *ordered_path;
  
-       cheapest_pathlist = lappend(cheapest_pathlist,
-                                   rel->cheapest_total_path);
-
-       if (try_sorted)
-       {
-           ordered_path = get_cheapest_path_for_pathkeys(rel->pathlist,
-                                                         union_pathkeys,
-                                                         NULL,
-                                                         TOTAL_COST,
-                                                         false);
-
-           if (ordered_path != NULL)
-               ordered_pathlist = lappend(ordered_pathlist, ordered_path);
-           else
-           {
-               /*
-                * If we can't find a sorted path, just give up trying to
-                * generate a list of correctly sorted child paths.  This can
-                * happen when type coercion was added to the targetlist due
-                * to mismatching types from the union children.
-                */
-               try_sorted = false;
-           }
-       }
+       pathlist = lappend(pathlist, rel->cheapest_total_path);
  
         if (consider_parallel)
         {
@@ -822,21 +615,28 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
     result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
     result_rel->reltarget = create_pathtarget(root, tlist);
     result_rel->consider_parallel = consider_parallel;
-   result_rel->consider_startup = (root->tuple_fraction > 0);
  
     /*
-    * Append the child results together using the cheapest paths from each
-    * union child.
+    * Append the child results together.
+    */
+   path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
+                                      NIL, NULL, 0, false, -1);
+
+   /*
+    * For UNION ALL, we just need the Append path.  For UNION, need to add
+    * node(s) to remove duplicates.
      */
-   apath = (Path *) create_append_path(root, result_rel, cheapest_pathlist,
-                                       NIL, NIL, NULL, 0, false, -1);
+   if (!op->all)
+       path = make_union_unique(op, path, tlist, root);
+
+   add_path(result_rel, path);
  
     /*
      * Estimate number of groups.  For now we just assume the output is unique
      * --- this is certainly true for the UNION case, and we want worst-case
      * estimates anyway.
      */
-   result_rel->rows = apath->rows;
+   result_rel->rows = path->rows;
  
     /*
      * Now consider doing the same thing using the partial paths plus Append
@@ -844,7 +644,7 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
      */
     if (partial_paths_valid)
     {
-       Path       *papath;
+       Path       *ppath;
         int         parallel_workers = 0;
  
         /* Find the highest number of workers requested for any subpath. */
@@ -873,137 +673,21 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
         }
         Assert(parallel_workers > 0);
  
-       papath = (Path *)
+       ppath = (Path *)
             create_append_path(root, result_rel, NIL, partial_pathlist,
-                              NIL, NULL, parallel_workers,
-                              enable_parallel_append, -1);
-       gpath = (Path *)
-           create_gather_path(root, result_rel, papath,
+                              NIL, NULL,
+                              parallel_workers, enable_parallel_append,
+                              -1);
+       ppath = (Path *)
+           create_gather_path(root, result_rel, ppath,
                                result_rel->reltarget, NULL, NULL);
+       if (!op->all)
+           ppath = make_union_unique(op, ppath, tlist, root);
+       add_path(result_rel, ppath);
     }
  
-   if (!op->all)
-   {
-       double      dNumGroups;
-       bool        can_sort = grouping_is_sortable(groupList);
-       bool        can_hash = grouping_is_hashable(groupList);
-
-       /*
-        * XXX for the moment, take the number of distinct groups as equal to
-        * the total input size, i.e., the worst case.  This is too
-        * conservative, but it's not clear how to get a decent estimate of
-        * the true size.  One should note as well the propensity of novices
-        * to write UNION rather than UNION ALL even when they don't expect
-        * any duplicates...
-        */
-       dNumGroups = apath->rows;
-
-       if (can_hash)
-       {
-           Path       *path;
-
-           /*
-            * Try a hash aggregate plan on 'apath'.  This is the cheapest
-            * available path containing each append child.
-            */
-           path = (Path *) create_agg_path(root,
-                                           result_rel,
-                                           apath,
-                                           create_pathtarget(root, tlist),
-                                           AGG_HASHED,
-                                           AGGSPLIT_SIMPLE,
-                                           groupList,
-                                           NIL,
-                                           NULL,
-                                           dNumGroups);
-           add_path(result_rel, path);
-
-           /* Try hash aggregate on the Gather path, if valid */
-           if (gpath != NULL)
-           {
-               /* Hashed aggregate plan --- no sort needed */
-               path = (Path *) create_agg_path(root,
-                                               result_rel,
-                                               gpath,
-                                               create_pathtarget(root, tlist),
-                                               AGG_HASHED,
-                                               AGGSPLIT_SIMPLE,
-                                               groupList,
-                                               NIL,
-                                               NULL,
-                                               dNumGroups);
-               add_path(result_rel, path);
-           }
-       }
-
-       if (can_sort)
-       {
-           Path       *path = apath;
-
-           /* Try Sort -> Unique on the Append path */
-           if (groupList != NIL)
-               path = (Path *) create_sort_path(root, result_rel, path,
-                                                make_pathkeys_for_sortclauses(root, groupList, tlist),
-                                                -1.0);
-
-           path = (Path *) create_upper_unique_path(root,
-                                                    result_rel,
-                                                    path,
-                                                    list_length(path->pathkeys),
-                                                    dNumGroups);
-
-           add_path(result_rel, path);
-
-           /* Try Sort -> Unique on the Gather path, if set */
-           if (gpath != NULL)
-           {
-               path = gpath;
-
-               path = (Path *) create_sort_path(root, result_rel, path,
-                                                make_pathkeys_for_sortclauses(root, groupList, tlist),
-                                                -1.0);
-
-               path = (Path *) create_upper_unique_path(root,
-                                                        result_rel,
-                                                        path,
-                                                        list_length(path->pathkeys),
-                                                        dNumGroups);
-               add_path(result_rel, path);
-           }
-       }
-
-       /*
-        * Try making a MergeAppend path if we managed to find a path with the
-        * correct pathkeys in each union child query.
-        */
-       if (try_sorted && groupList != NIL)
-       {
-           Path       *path;
-
-           path = (Path *) create_merge_append_path(root,
-                                                    result_rel,
-                                                    ordered_pathlist,
-                                                    union_pathkeys,
-                                                    NULL);
-
-           /* and make the MergeAppend unique */
-           path = (Path *) create_upper_unique_path(root,
-                                                    result_rel,
-                                                    path,
-                                                    list_length(tlist),
-                                                    dNumGroups);
-
-           add_path(result_rel, path);
-       }
-   }
-   else
-   {
-       /* UNION ALL */
-       add_path(result_rel, apath);
-
-       if (gpath != NULL)
-           add_path(result_rel, gpath);
-   }
+   /* Undo effects of possibly forcing tuple_fraction to 0 */
+   root->tuple_fraction = save_fraction;
  
     return result_rel;
  }
@@ -1029,8 +713,6 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
                *tlist,
                *groupList,
                *pathlist;
-   bool        lpath_trivial_tlist,
-               rpath_trivial_tlist;
     double      dLeftGroups,
                 dRightGroups,
                 dNumGroups,
@@ -1050,26 +732,14 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
                                   false, 0,
                                   refnames_tlist,
                                   &lpath_tlist,
-                                 &lpath_trivial_tlist);
-   if (lrel->rtekind == RTE_SUBQUERY)
-       build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
-                               NIL, &dLeftGroups);
-   else
-       dLeftGroups = lrel->rows;
-
+                                 &dLeftGroups);
     lpath = lrel->cheapest_total_path;
     rrel = recurse_set_operations(op->rarg, root,
                                   op->colTypes, op->colCollations,
                                   false, 1,
                                   refnames_tlist,
                                   &rpath_tlist,
-                                 &rpath_trivial_tlist);
-   if (rrel->rtekind == RTE_SUBQUERY)
-       build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
-                               NIL, &dRightGroups);
-   else
-       dRightGroups = rrel->rows;
-
+                                 &dRightGroups);
     rpath = rrel->cheapest_total_path;
  
     /* Undo effects of forcing tuple_fraction to 0 */
@@ -1206,16 +876,13 @@ static List *
  plan_union_children(PlannerInfo *root,
                     SetOperationStmt *top_union,
                     List *refnames_tlist,
-                   List **tlist_list,
-                   List **istrivial_tlist)
+                   List **tlist_list)
  {
     List       *pending_rels = list_make1(top_union);
     List       *result = NIL;
     List       *child_tlist;
-   bool        trivial_tlist;
  
     *tlist_list = NIL;
-   *istrivial_tlist = NIL;
  
     while (pending_rels != NIL)
     {
@@ -1254,14 +921,75 @@ plan_union_children(PlannerInfo *root,
                                                         false, -1,
                                                         refnames_tlist,
                                                         &child_tlist,
-                                                       &trivial_tlist));
+                                                       NULL));
         *tlist_list = lappend(*tlist_list, child_tlist);
-       *istrivial_tlist = lappend_int(*istrivial_tlist, trivial_tlist);
     }
  
     return result;
  }
  
+/*
+ * Add nodes to the given path tree to unique-ify the result of a UNION.
+ */
+static Path *
+make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
+                 PlannerInfo *root)
+{
+   RelOptInfo *result_rel = fetch_upper_rel(root, UPPERREL_SETOP, NULL);
+   List       *groupList;
+   double      dNumGroups;
+
+   /* Identify the grouping semantics */
+   groupList = generate_setop_grouplist(op, tlist);
+
+   /*
+    * XXX for the moment, take the number of distinct groups as equal to the
+    * total input size, ie, the worst case.  This is too conservative, but
+    * it's not clear how to get a decent estimate of the true size.  One
+    * should note as well the propensity of novices to write UNION rather
+    * than UNION ALL even when they don't expect any duplicates...
+    */
+   dNumGroups = path->rows;
+
+   /* Decide whether to hash or sort */
+   if (choose_hashed_setop(root, groupList, path,
+                           dNumGroups, dNumGroups,
+                           "UNION"))
+   {
+       /* Hashed aggregate plan --- no sort needed */
+       path = (Path *) create_agg_path(root,
+                                       result_rel,
+                                       path,
+                                       create_pathtarget(root, tlist),
+                                       AGG_HASHED,
+                                       AGGSPLIT_SIMPLE,
+                                       groupList,
+                                       NIL,
+                                       NULL,
+                                       dNumGroups);
+   }
+   else
+   {
+       /* Sort and Unique */
+       if (groupList)
+           path = (Path *)
+               create_sort_path(root,
+                                result_rel,
+                                path,
+                                make_pathkeys_for_sortclauses(root,
+                                                              groupList,
+                                                              tlist),
+                                -1.0);
+       path = (Path *) create_upper_unique_path(root,
+                                                result_rel,
+                                                path,
+                                                list_length(path->pathkeys),
+                                                dNumGroups);
+   }
+
+   return path;
+}
+
  /*
   * postprocess_setop_rel - perform steps required after adding paths
   */
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c

index 28fed9d87f6bcd1e0b0fa2bf16e250a8958f0311..40ea19e6f107f9cd62a8a39ae28438919c9e430a 100644 (file)
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -1890,8 +1890,7 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt)
      * For now, we don't support resjunk sort clauses on the output of a
      * setOperation tree --- you can only use the SQL92-spec options of
      * selecting an output column by name or number.  Enforce by checking that
-    * transformSortClause doesn't add any items to tlist.  Note, if changing
-    * this, add_setop_child_rel_equivalences() will need to be updated.
+    * transformSortClause doesn't add any items to tlist.
      */
     tllen = list_length(qry->targetList);
  
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h

index 14ef296ab72f34518d782a5f176c976daafeeca2..6ec81637c1d552aa2d3483802f138b92fafd9807 100644 (file)
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -400,8 +400,6 @@ struct PlannerInfo
     List       *distinct_pathkeys;
     /* sortClause pathkeys, if any */
     List       *sort_pathkeys;
-   /* set operator pathkeys, if any */
-   List       *setop_pathkeys;
  
     /* Canonicalised partition schemes used in the query. */
     List       *part_schemes pg_node_attr(read_write_ignore);
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h

index 914d9bdef5878fe9527e9c15c6f37ea5d21c5df4..5f500a1c69fb02ce4af08cdac7502cfe3ae2481a 100644 (file)
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -173,10 +173,6 @@ extern void add_child_join_rel_equivalences(PlannerInfo *root,
                                             AppendRelInfo **appinfos,
                                             RelOptInfo *parent_joinrel,
                                             RelOptInfo *child_joinrel);
-extern void add_setop_child_rel_equivalences(PlannerInfo *root,
-                                            RelOptInfo *child_rel,
-                                            List *child_tlist,
-                                            List *setop_pathkeys);
  extern List *generate_implied_equalities_for_column(PlannerInfo *root,
                                                     RelOptInfo *rel,
                                                     ec_matches_callback_type callback,
diff --git a/src/include/optimizer/planner.h b/src/include/optimizer/planner.h

index 5aeff21b967f074991044f68d818a00f86555436..e1d79ffdf3ca5e94a3db7638d3bea9380b4b2d17 100644 (file)
--- a/src/include/optimizer/planner.h
+++ b/src/include/optimizer/planner.h
@@ -44,8 +44,7 @@ extern PlannedStmt *standard_planner(Query *parse, const char *query_string,
  
  extern PlannerInfo *subquery_planner(PlannerGlobal *glob, Query *parse,
                                      PlannerInfo *parent_root,
-                                    bool hasRecursion, double tuple_fraction,
-                                    SetOperationStmt *setops);
+                                    bool hasRecursion, double tuple_fraction);
  
  extern RowMarkType select_rowmark_type(RangeTblEntry *rte,
                                        LockClauseStrength strength);
diff --git a/src/include/optimizer/prep.h b/src/include/optimizer/prep.h

index a52dec285d569970c49e702f8c0d41aa2505d279..8e00716dc82069af91164632ddee35283ae0e754 100644 (file)
--- a/src/include/optimizer/prep.h
+++ b/src/include/optimizer/prep.h
@@ -53,6 +53,6 @@ extern void preprocess_aggrefs(PlannerInfo *root, Node *clause);
   * prototypes for prepunion.c
   */
  extern RelOptInfo *plan_set_operations(PlannerInfo *root);
-extern bool set_operation_ordered_results_useful(SetOperationStmt *setop);
+
  
  #endif                         /* PREP_H */
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out

index 7d59fb44316574cd27313ae16fce3873c2398ec6..2de8924b52436ff142707ec5fe32a6354bfabe05 100644 (file)
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1396,7 +1396,6 @@ SELECT x FROM test3cs WHERE x ~ 'a';
   abc
  (1 row)
  
-SET enable_hashagg TO off;
  SELECT x FROM test1cs UNION SELECT x FROM test2cs ORDER BY x;
    x  
  -----
@@ -1449,7 +1448,6 @@ SELECT DISTINCT x FROM test3cs ORDER BY x;
   ghi
  (4 rows)
  
-RESET enable_hashagg;
  SELECT count(DISTINCT x) FROM test3cs;
   count 
  -------
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out

index 5fd54a10b1aaf88ee7d5ddf472adb2bbc6cdfc46..7fdb685313dbeee1a5f1554cc413b57af630085c 100644 (file)
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1472,19 +1472,14 @@ explain (costs off) select * from t union select * from t order by 1,3;
     Sort Key: t.a, t.c
     Presorted Key: t.a
     ->  Unique
-         ->  Merge Append
+         ->  Sort
                 Sort Key: t.a, t.b, t.c
-               ->  Gather Merge
+               ->  Gather
                       Workers Planned: 2
-                     ->  Sort
-                           Sort Key: t.a, t.b, t.c
+                     ->  Parallel Append
                             ->  Parallel Seq Scan on t
-               ->  Gather Merge
-                     Workers Planned: 2
-                     ->  Sort
-                           Sort Key: t_1.a, t_1.b, t_1.c
                             ->  Parallel Seq Scan on t t_1
-(16 rows)
+(11 rows)
  
  -- Full sort, not just incremental sort can be pushed below a gather merge path
  -- by generate_useful_gather_paths.
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out

index 26b718e9033f75bab9c476a93f57371a6d4f4488..882017afc9a72c74c3c923aff965d343d8d04133 100644 (file)
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -412,17 +412,16 @@ set enable_hashagg to off;
  explain (costs off)
  select count(*) from
    ( select unique1 from tenk1 union select fivethous from tenk1 ) ss;
-                           QUERY PLAN                           
-----------------------------------------------------------------
+                              QUERY PLAN                              
+----------------------------------------------------------------------
   Aggregate
     ->  Unique
-         ->  Merge Append
+         ->  Sort
                 Sort Key: tenk1.unique1
-               ->  Index Only Scan using tenk1_unique1 on tenk1
-               ->  Sort
-                     Sort Key: tenk1_1.fivethous
+               ->  Append
+                     ->  Index Only Scan using tenk1_unique1 on tenk1
                       ->  Seq Scan on tenk1 tenk1_1
-(8 rows)
+(7 rows)
  
  select count(*) from
    ( select unique1 from tenk1 union select fivethous from tenk1 ) ss;
@@ -951,9 +950,16 @@ select except select;
  -- check hashed implementation
  set enable_hashagg = true;
  set enable_sort = false;
--- We've no way to check hashed UNION as the empty pathkeys in the Append are
--- fine to make use of Unique, which is cheaper than HashAggregate and we've
--- no means to disable Unique.
+explain (costs off)
+select from generate_series(1,5) union select from generate_series(1,3);
+                           QUERY PLAN                           
+----------------------------------------------------------------
+ HashAggregate
+   ->  Append
+         ->  Function Scan on generate_series
+         ->  Function Scan on generate_series generate_series_1
+(4 rows)
+
  explain (costs off)
  select from generate_series(1,5) intersect select from generate_series(1,3);
                                QUERY PLAN                              
@@ -966,6 +972,10 @@ select from generate_series(1,5) intersect select from generate_series(1,3);
                 ->  Function Scan on generate_series generate_series_1
  (6 rows)
  
+select from generate_series(1,5) union select from generate_series(1,3);
+--
+(1 row)
+
  select from generate_series(1,5) union all select from generate_series(1,3);
  --
  (8 rows)
@@ -1035,20 +1045,6 @@ select from generate_series(1,5) except all select from generate_series(1,3);
  --
  (2 rows)
  
--- Try a variation of the above but with a CTE which contains a column, again
--- with an empty final select list.
--- Ensure we get the expected 1 row with 0 columns
-with cte as materialized (select s from generate_series(1,5) s)
-select from cte union select from cte;
---
-(1 row)
-
--- Ensure we get the same result as the above.
-with cte as not materialized (select s from generate_series(1,5) s)
-select from cte union select from cte;
---
-(1 row)
-
  reset enable_hashagg;
  reset enable_sort;
  --
@@ -1085,7 +1081,6 @@ INSERT INTO t2 VALUES ('ab'), ('xy');
  set enable_seqscan = off;
  set enable_indexscan = on;
  set enable_bitmapscan = off;
-set enable_sort = off;
  explain (costs off)
   SELECT * FROM
   (SELECT a || b AS ab FROM t1
@@ -1167,7 +1162,6 @@ explain (costs off)
  reset enable_seqscan;
  reset enable_indexscan;
  reset enable_bitmapscan;
-reset enable_sort;
  -- This simpler variant of the above test has been observed to fail differently
  create table events (event_id int primary key);
  create table other_events (event_id int primary key);
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql

index 80f28a97d785b27a3dc345704848a2f23bccfadc..03837de846bdaefb71509ccac6eb76bdd234af0e 100644 (file)
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -555,7 +555,6 @@ SELECT x FROM test3cs WHERE x LIKE 'a%';
  SELECT x FROM test3cs WHERE x ILIKE 'a%';
  SELECT x FROM test3cs WHERE x SIMILAR TO 'a%';
  SELECT x FROM test3cs WHERE x ~ 'a';
-SET enable_hashagg TO off;
  SELECT x FROM test1cs UNION SELECT x FROM test2cs ORDER BY x;
  SELECT x FROM test2cs UNION SELECT x FROM test1cs ORDER BY x;
  SELECT x FROM test1cs INTERSECT SELECT x FROM test2cs;
@@ -563,7 +562,6 @@ SELECT x FROM test2cs INTERSECT SELECT x FROM test1cs;
  SELECT x FROM test1cs EXCEPT SELECT x FROM test2cs;
  SELECT x FROM test2cs EXCEPT SELECT x FROM test1cs;
  SELECT DISTINCT x FROM test3cs ORDER BY x;
-RESET enable_hashagg;
  SELECT count(DISTINCT x) FROM test3cs;
  SELECT x, count(*) FROM test3cs GROUP BY x ORDER BY x;
  SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3cs ORDER BY x;
diff --git a/src/test/regress/sql/union.sql b/src/test/regress/sql/union.sql

index 8afc580c6320139bc56d818c616179edea3d08bc..d160db5458855e2b87303f8a9ce071e25aaf55f9 100644 (file)
--- a/src/test/regress/sql/union.sql
+++ b/src/test/regress/sql/union.sql
@@ -302,12 +302,12 @@ select except select;
  set enable_hashagg = true;
  set enable_sort = false;
  
--- We've no way to check hashed UNION as the empty pathkeys in the Append are
--- fine to make use of Unique, which is cheaper than HashAggregate and we've
--- no means to disable Unique.
+explain (costs off)
+select from generate_series(1,5) union select from generate_series(1,3);
  explain (costs off)
  select from generate_series(1,5) intersect select from generate_series(1,3);
  
+select from generate_series(1,5) union select from generate_series(1,3);
  select from generate_series(1,5) union all select from generate_series(1,3);
  select from generate_series(1,5) intersect select from generate_series(1,3);
  select from generate_series(1,5) intersect all select from generate_series(1,3);
@@ -330,17 +330,6 @@ select from generate_series(1,5) intersect all select from generate_series(1,3);
  select from generate_series(1,5) except select from generate_series(1,3);
  select from generate_series(1,5) except all select from generate_series(1,3);
  
--- Try a variation of the above but with a CTE which contains a column, again
--- with an empty final select list.
-
--- Ensure we get the expected 1 row with 0 columns
-with cte as materialized (select s from generate_series(1,5) s)
-select from cte union select from cte;
-
--- Ensure we get the same result as the above.
-with cte as not materialized (select s from generate_series(1,5) s)
-select from cte union select from cte;
-
  reset enable_hashagg;
  reset enable_sort;
  
@@ -372,7 +361,6 @@ INSERT INTO t2 VALUES ('ab'), ('xy');
  set enable_seqscan = off;
  set enable_indexscan = on;
  set enable_bitmapscan = off;
-set enable_sort = off;
  
  explain (costs off)
   SELECT * FROM
@@ -419,7 +407,6 @@ explain (costs off)
  reset enable_seqscan;
  reset enable_indexscan;
  reset enable_bitmapscan;
-reset enable_sort;
  
  -- This simpler variant of the above test has been observed to fail differently
author	Tom Lane <tgl@sss.pgh.pa.us>
	Mon, 20 May 2024 19:08:30 +0000 (15:08 -0400)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Mon, 20 May 2024 19:08:30 +0000 (15:08 -0400)
contrib/postgres_fdw/expected/postgres_fdw.out		patch \| blob \| blame \| history
contrib/postgres_fdw/sql/postgres_fdw.sql		patch \| blob \| blame \| history
src/backend/optimizer/path/allpaths.c		patch \| blob \| blame \| history
src/backend/optimizer/path/equivclass.c		patch \| blob \| blame \| history
src/backend/optimizer/path/pathkeys.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/planner.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/subselect.c		patch \| blob \| blame \| history
src/backend/optimizer/prep/prepunion.c		patch \| blob \| blame \| history
src/backend/parser/analyze.c		patch \| blob \| blame \| history
src/include/nodes/pathnodes.h		patch \| blob \| blame \| history
src/include/optimizer/paths.h		patch \| blob \| blame \| history
src/include/optimizer/planner.h		patch \| blob \| blame \| history
src/include/optimizer/prep.h		patch \| blob \| blame \| history
src/test/regress/expected/collate.icu.utf8.out		patch \| blob \| blame \| history
src/test/regress/expected/incremental_sort.out		patch \| blob \| blame \| history
src/test/regress/expected/union.out		patch \| blob \| blame \| history
src/test/regress/sql/collate.icu.utf8.sql		patch \| blob \| blame \| history
src/test/regress/sql/union.sql		patch \| blob \| blame \| history