diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 1131a8bf77ec..bbc6d25c8647 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -6090,7 +6090,8 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, */ Assert(fpinfo->relation_index == 0); /* shouldn't be set yet */ fpinfo->relation_index = - list_length(root->parse->rtable) + list_length(root->join_rel_list); + list_length(root->parse->rtable) + + list_length(root->join_rel_list->items); return true; } diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 3d62c8bd2748..d46b31611294 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -5308,6 +5308,21 @@ ANY num_sync ( + enable_eager_aggregate (boolean) + + enable_eager_aggregate configuration parameter + + + + + Enables or disables the query planner's ability to partially push + aggregation past a join, and finalize it once all the relations are + joined. The default is off. + + + + enable_gathermerge (boolean) diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README index f341d9f303c7..45236ca46b2f 100644 --- a/src/backend/optimizer/README +++ b/src/backend/optimizer/README @@ -1497,3 +1497,83 @@ breaking down aggregation or grouping over a partitioned relation into aggregation or grouping over its partitions is called partitionwise aggregation. Especially when the partition keys match the GROUP BY clause, this can be significantly faster than the regular method. + +Eager aggregation +----------------- + +Eager aggregation is a query optimization technique that partially pushes +aggregation past a join, and finalizes it once all the relations are joined. +Eager aggregation may reduce the number of input rows to the join and thus +could result in a better overall plan. + +For example: + + EXPLAIN (COSTS OFF) + SELECT a.i, avg(b.y) + FROM a JOIN b ON a.i = b.j + GROUP BY a.i; + + Finalize HashAggregate + Group Key: a.i + -> Nested Loop + -> Partial HashAggregate + Group Key: b.j + -> Seq Scan on b + -> Index Only Scan using a_pkey on a + Index Cond: (i = b.j) + +If the partial aggregation on table B significantly reduces the number of +input rows, the join above will be much cheaper, leading to a more efficient +final plan. + +For the partial aggregation that is pushed down to a non-aggregated relation, +we need to consider all expressions from this relation that are involved in +upper join clauses and include them in the grouping keys, using compatible +operators. This is essential to ensure that an aggregated row from the partial +aggregation matches the other side of the join if and only if each row in the +partial group does. This ensures that all rows within the same partial group +share the same 'destiny', which is crucial for maintaining correctness. + +One restriction is that we cannot push partial aggregation down to a relation +that is in the nullable side of an outer join, because the NULL-extended rows +produced by the outer join would not be available when we perform the partial +aggregation, while with a non-eager-aggregation plan these rows are available +for the top-level aggregation. Pushing partial aggregation in this case may +result in the rows being grouped differently than expected, or produce +incorrect values from the aggregate functions. + +We can also apply eager aggregation to a join: + + EXPLAIN (COSTS OFF) + SELECT a.i, avg(b.y + c.z) + FROM a JOIN b ON a.i = b.j + JOIN c ON b.j = c.i + GROUP BY a.i; + + Finalize HashAggregate + Group Key: a.i + -> Nested Loop + -> Partial HashAggregate + Group Key: b.j + -> Hash Join + Hash Cond: (b.j = c.i) + -> Seq Scan on b + -> Hash + -> Seq Scan on c + -> Index Only Scan using a_pkey on a + Index Cond: (i = b.j) + +During the construction of the join tree, we evaluate each base or join +relation to determine if eager aggregation can be applied. If feasible, we +create a separate RelOptInfo called a "grouped relation" and generate grouped +paths by adding sorted and hashed partial aggregation paths on top of the +non-grouped paths. To limit planning time, we consider only the cheapest or +suitably-sorted non-grouped paths in this step. + +Another way to generate grouped paths is to join a grouped relation with a +non-grouped relation. Joining two grouped relations does not seem to be very +useful and is currently not supported. + +If we have generated a grouped relation for the topmost join relation, we need +to finalize its paths at the end. The final paths will compete in the usual +way with paths built from regular planning. diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c index f07d1dc8ac69..e69eac9bffa9 100644 --- a/src/backend/optimizer/geqo/geqo_eval.c +++ b/src/backend/optimizer/geqo/geqo_eval.c @@ -39,10 +39,20 @@ typedef struct int size; /* number of input relations in clump */ } Clump; +/* The original length and hashtable of a RelInfoList */ +typedef struct +{ + int savelength; + struct HTAB *savehash; +} RelInfoListInfo; + static List *merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, int num_gene, bool force); static bool desirable_join(PlannerInfo *root, RelOptInfo *outer_rel, RelOptInfo *inner_rel); +static RelInfoListInfo save_relinfolist(RelInfoList *relinfo_list); +static void restore_relinfolist(RelInfoList *relinfo_list, + RelInfoListInfo *info); /* @@ -60,8 +70,8 @@ geqo_eval(PlannerInfo *root, Gene *tour, int num_gene) MemoryContext oldcxt; RelOptInfo *joinrel; Cost fitness; - int savelength; - struct HTAB *savehash; + RelInfoListInfo save_join_rel; + RelInfoListInfo save_grouped_rel; /* * Create a private memory context that will hold all temp storage @@ -78,25 +88,29 @@ geqo_eval(PlannerInfo *root, Gene *tour, int num_gene) oldcxt = MemoryContextSwitchTo(mycontext); /* - * gimme_tree will add entries to root->join_rel_list, which may or may - * not already contain some entries. The newly added entries will be - * recycled by the MemoryContextDelete below, so we must ensure that the - * list is restored to its former state before exiting. We can do this by - * truncating the list to its original length. NOTE this assumes that any - * added entries are appended at the end! + * gimme_tree will add entries to root->join_rel_list and + * root->grouped_rel_list, which may or may not already contain some + * entries. The newly added entries will be recycled by the + * MemoryContextDelete below, so we must ensure that each list within the + * RelInfoList structures is restored to its former state before exiting. + * We can do this by truncating each list to its original length. NOTE + * this assumes that any added entries are appended at the end! * - * We also must take care not to mess up the outer join_rel_hash, if there - * is one. We can do this by just temporarily setting the link to NULL. - * (If we are dealing with enough join rels, which we very likely are, a - * new hash table will get built and used locally.) + * We also must take care not to mess up the outer hash tables within the + * RelInfoList structures, if any. We can do this by just temporarily + * setting each link to NULL. (If we are dealing with enough join rels or + * grouped rels, which we very likely are, new hash tables will get built + * and used locally.) * * join_rel_level[] shouldn't be in use, so just Assert it isn't. */ - savelength = list_length(root->join_rel_list); - savehash = root->join_rel_hash; + save_join_rel = save_relinfolist(root->join_rel_list); + save_grouped_rel = save_relinfolist(root->grouped_rel_list); + Assert(root->join_rel_level == NULL); - root->join_rel_hash = NULL; + root->join_rel_list->hash = NULL; + root->grouped_rel_list->hash = NULL; /* construct the best path for the given combination of relations */ joinrel = gimme_tree(root, tour, num_gene); @@ -118,12 +132,11 @@ geqo_eval(PlannerInfo *root, Gene *tour, int num_gene) fitness = DBL_MAX; /* - * Restore join_rel_list to its former state, and put back original - * hashtable if any. + * Restore each of the list in join_rel_list and grouped_rel_list to its + * former state, and put back original hashtables if any. */ - root->join_rel_list = list_truncate(root->join_rel_list, - savelength); - root->join_rel_hash = savehash; + restore_relinfolist(root->join_rel_list, &save_join_rel); + restore_relinfolist(root->grouped_rel_list, &save_grouped_rel); /* release all the memory acquired within gimme_tree */ MemoryContextSwitchTo(oldcxt); @@ -279,6 +292,27 @@ merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, int num_gene, /* Find and save the cheapest paths for this joinrel */ set_cheapest(joinrel); + /* + * Except for the topmost scan/join rel, consider generating + * partial aggregation paths for the grouped relation on top + * of the paths of this rel. After that, we're done creating + * paths for the grouped relation, so run set_cheapest(). + */ + if (!bms_equal(joinrel->relids, root->all_query_rels)) + { + RelOptInfo *rel_grouped; + + rel_grouped = find_grouped_rel(root, joinrel->relids); + if (rel_grouped) + { + Assert(IS_GROUPED_REL(rel_grouped)); + + generate_grouped_paths(root, rel_grouped, joinrel, + rel_grouped->agg_info); + set_cheapest(rel_grouped); + } + } + /* Absorb new clump into old */ old_clump->joinrel = joinrel; old_clump->size += new_clump->size; @@ -336,3 +370,27 @@ desirable_join(PlannerInfo *root, /* Otherwise postpone the join till later. */ return false; } + +/* + * Save the original length and hashtable of a RelInfoList. + */ +static RelInfoListInfo +save_relinfolist(RelInfoList *relinfo_list) +{ + RelInfoListInfo info; + + info.savelength = list_length(relinfo_list->items); + info.savehash = relinfo_list->hash; + + return info; +} + +/* + * Restore the original length and hashtable of a RelInfoList. + */ +static void +restore_relinfolist(RelInfoList *relinfo_list, RelInfoListInfo *info) +{ + relinfo_list->items = list_truncate(relinfo_list->items, info->savelength); + relinfo_list->hash = info->savehash; +} diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index df3453f99f0f..2839bcfe07a8 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -40,6 +40,7 @@ #include "optimizer/paths.h" #include "optimizer/plancat.h" #include "optimizer/planner.h" +#include "optimizer/prep.h" #include "optimizer/tlist.h" #include "parser/parse_clause.h" #include "parser/parsetree.h" @@ -47,6 +48,7 @@ #include "port/pg_bitutils.h" #include "rewrite/rewriteManip.h" #include "utils/lsyscache.h" +#include "utils/selfuncs.h" /* Bitmask flags for pushdown_safety_info.unsafeFlags */ @@ -77,6 +79,7 @@ typedef enum pushdown_safe_type /* These parameters are set by GUC */ bool enable_geqo = false; /* just in case GUC doesn't set it */ +bool enable_eager_aggregate = false; int geqo_threshold; int min_parallel_table_scan_size; int min_parallel_index_scan_size; @@ -90,6 +93,7 @@ join_search_hook_type join_search_hook = NULL; static void set_base_rel_consider_startup(PlannerInfo *root); static void set_base_rel_sizes(PlannerInfo *root); +static void setup_base_grouped_rels(PlannerInfo *root); static void set_base_rel_pathlists(PlannerInfo *root); static void set_rel_size(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte); @@ -114,6 +118,7 @@ static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte); static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte); +static void set_grouped_rel_pathlist(PlannerInfo *root, RelOptInfo *rel); static void generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel, List *live_childrels, List *all_child_pathkeys); @@ -182,6 +187,11 @@ make_one_rel(PlannerInfo *root, List *joinlist) */ set_base_rel_sizes(root); + /* + * Build grouped relations for base rels where possible. + */ + setup_base_grouped_rels(root); + /* * We should now have size estimates for every actual table involved in * the query, and we also know which if any have been deleted from the @@ -323,6 +333,45 @@ set_base_rel_sizes(PlannerInfo *root) } } +/* + * setup_base_grouped_rels + * For each "plain" base relation, build a grouped base relation if eager + * aggregation is possible and if this relation can produce grouped paths. + */ +static void +setup_base_grouped_rels(PlannerInfo *root) +{ + Index rti; + + /* + * If there are no aggregate expressions or grouping expressions, eager + * aggregation is not possible. + */ + if (root->agg_clause_list == NIL || + root->group_expr_list == NIL) + return; + + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *rel = root->simple_rel_array[rti]; + RelOptInfo *rel_grouped; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (rel == NULL) + continue; + + Assert(rel->relid == rti); /* sanity check on array */ + Assert(IS_SIMPLE_REL(rel)); /* sanity check on rel */ + + rel_grouped = build_simple_grouped_rel(root, rel); + if (rel_grouped) + { + /* Make the grouped relation available for joining. */ + add_grouped_rel(root, rel_grouped); + } + } +} + /* * set_base_rel_pathlists * Finds all paths available for scanning each base-relation entry. @@ -559,6 +608,15 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, /* Now find the cheapest of the paths for this rel */ set_cheapest(rel); + /* + * If a grouped relation for this rel exists, build partial aggregation + * paths for it. + * + * Note that this can only happen after we've called set_cheapest() for + * this base rel, because we need its cheapest paths. + */ + set_grouped_rel_pathlist(root, rel); + #ifdef OPTIMIZER_DEBUG pprint(rel); #endif @@ -1305,6 +1363,36 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, add_paths_to_append_rel(root, rel, live_childrels); } +/* + * set_grouped_rel_pathlist + * If a grouped relation for the given 'rel' exists, build partial + * aggregation paths for it. + */ +static void +set_grouped_rel_pathlist(PlannerInfo *root, RelOptInfo *rel) +{ + RelOptInfo *rel_grouped; + + /* + * If there are no aggregate expressions or grouping expressions, eager + * aggregation is not possible. + */ + if (root->agg_clause_list == NIL || + root->group_expr_list == NIL) + return; + + /* Add paths to the grouped base relation if one exists. */ + rel_grouped = find_grouped_rel(root, rel->relids); + if (rel_grouped) + { + Assert(IS_GROUPED_REL(rel_grouped)); + + generate_grouped_paths(root, rel_grouped, rel, + rel_grouped->agg_info); + set_cheapest(rel_grouped); + } +} + /* * add_paths_to_append_rel @@ -3327,6 +3415,318 @@ generate_useful_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_r } } +/* + * generate_grouped_paths + * Generate paths for a grouped relation by adding sorted and hashed + * partial aggregation paths on top of paths of the plain base or join + * relation. + * + * The information needed are provided by the RelAggInfo structure. + */ +void +generate_grouped_paths(PlannerInfo *root, RelOptInfo *rel_grouped, + RelOptInfo *rel_plain, RelAggInfo *agg_info) +{ + AggClauseCosts agg_costs; + bool can_hash; + bool can_sort; + Path *cheapest_total_path = NULL; + Path *cheapest_partial_path = NULL; + double dNumGroups = 0; + double dNumPartialGroups = 0; + + if (IS_DUMMY_REL(rel_plain)) + { + mark_dummy_rel(rel_grouped); + return; + } + + /* + * If the grouped paths for the given relation are not considered useful, + * do not bother to generate them. + */ + if (!agg_info->agg_useful) + return; + + MemSet(&agg_costs, 0, sizeof(AggClauseCosts)); + get_agg_clause_costs(root, AGGSPLIT_INITIAL_SERIAL, &agg_costs); + + /* + * Determine whether it's possible to perform sort-based implementations + * of grouping. + */ + can_sort = grouping_is_sortable(agg_info->group_clauses); + + /* + * Determine whether we should consider hash-based implementations of + * grouping. + */ + Assert(root->numOrderedAggs == 0); + can_hash = (agg_info->group_clauses != NIL && + grouping_is_hashable(agg_info->group_clauses)); + + /* + * Consider whether we should generate partially aggregated non-partial + * paths. We can only do this if we have a non-partial path. + */ + if (rel_plain->pathlist != NIL) + { + cheapest_total_path = rel_plain->cheapest_total_path; + Assert(cheapest_total_path != NULL); + } + + /* + * If parallelism is possible for rel_grouped, then we should consider + * generating partially-grouped partial paths. However, if the plain rel + * has no partial paths, then we can't. + */ + if (rel_grouped->consider_parallel && rel_plain->partial_pathlist != NIL) + { + cheapest_partial_path = linitial(rel_plain->partial_pathlist); + Assert(cheapest_partial_path != NULL); + } + + /* Estimate number of partial groups. */ + if (cheapest_total_path != NULL) + dNumGroups = estimate_num_groups(root, + agg_info->group_exprs, + cheapest_total_path->rows, + NULL, NULL); + if (cheapest_partial_path != NULL) + dNumPartialGroups = estimate_num_groups(root, + agg_info->group_exprs, + cheapest_partial_path->rows, + NULL, NULL); + + if (can_sort && cheapest_total_path != NULL) + { + ListCell *lc; + + /* + * Use any available suitably-sorted path as input, and also consider + * sorting the cheapest-total path. + */ + foreach(lc, rel_plain->pathlist) + { + Path *input_path = (Path *) lfirst(lc); + Path *path; + bool is_sorted; + int presorted_keys; + + /* + * Since the path originates from a non-grouped relation that is + * not aware of eager aggregation, we must ensure that it provides + * the correct input for partial aggregation. + */ + path = (Path *) create_projection_path(root, + rel_grouped, + input_path, + agg_info->agg_input); + + is_sorted = pathkeys_count_contained_in(agg_info->group_pathkeys, + path->pathkeys, + &presorted_keys); + if (!is_sorted) + { + /* + * Try at least sorting the cheapest path and also try + * incrementally sorting any path which is partially sorted + * already (no need to deal with paths which have presorted + * keys when incremental sort is disabled unless it's the + * cheapest input path). + */ + if (input_path != cheapest_total_path && + (presorted_keys == 0 || !enable_incremental_sort)) + continue; + + /* + * We've no need to consider both a sort and incremental sort. + * We'll just do a sort if there are no presorted keys and an + * incremental sort when there are presorted keys. + */ + if (presorted_keys == 0 || !enable_incremental_sort) + path = (Path *) create_sort_path(root, + rel_grouped, + path, + agg_info->group_pathkeys, + -1.0); + else + path = (Path *) create_incremental_sort_path(root, + rel_grouped, + path, + agg_info->group_pathkeys, + presorted_keys, + -1.0); + } + + /* + * qual is NIL because the HAVING clause cannot be evaluated until + * the final value of the aggregate is known. + */ + path = (Path *) create_agg_path(root, + rel_grouped, + path, + agg_info->target, + AGG_SORTED, + AGGSPLIT_INITIAL_SERIAL, + agg_info->group_clauses, + NIL, + &agg_costs, + dNumGroups); + + add_path(rel_grouped, path); + } + } + + if (can_sort && cheapest_partial_path != NULL) + { + ListCell *lc; + + /* Similar to above logic, but for partial paths. */ + foreach(lc, rel_plain->partial_pathlist) + { + Path *input_path = (Path *) lfirst(lc); + Path *path; + bool is_sorted; + int presorted_keys; + + /* + * Since the path originates from a non-grouped relation that is + * not aware of eager aggregation, we must ensure that it provides + * the correct input for partial aggregation. + */ + path = (Path *) create_projection_path(root, + rel_grouped, + input_path, + agg_info->agg_input); + + is_sorted = pathkeys_count_contained_in(agg_info->group_pathkeys, + path->pathkeys, + &presorted_keys); + + if (!is_sorted) + { + /* + * Try at least sorting the cheapest path and also try + * incrementally sorting any path which is partially sorted + * already (no need to deal with paths which have presorted + * keys when incremental sort is disabled unless it's the + * cheapest input path). + */ + if (input_path != cheapest_partial_path && + (presorted_keys == 0 || !enable_incremental_sort)) + continue; + + /* + * We've no need to consider both a sort and incremental sort. + * We'll just do a sort if there are no presorted keys and an + * incremental sort when there are presorted keys. + */ + if (presorted_keys == 0 || !enable_incremental_sort) + path = (Path *) create_sort_path(root, + rel_grouped, + path, + agg_info->group_pathkeys, + -1.0); + else + path = (Path *) create_incremental_sort_path(root, + rel_grouped, + path, + agg_info->group_pathkeys, + presorted_keys, + -1.0); + } + + /* + * qual is NIL because the HAVING clause cannot be evaluated until + * the final value of the aggregate is known. + */ + path = (Path *) create_agg_path(root, + rel_grouped, + path, + agg_info->target, + AGG_SORTED, + AGGSPLIT_INITIAL_SERIAL, + agg_info->group_clauses, + NIL, + &agg_costs, + dNumPartialGroups); + + add_partial_path(rel_grouped, path); + } + } + + /* + * Add a partially-grouped HashAgg Path where possible + */ + if (can_hash && cheapest_total_path != NULL) + { + Path *path; + + /* + * Since the path originates from a non-grouped relation that is not + * aware of eager aggregation, we must ensure that it provides the + * correct input for partial aggregation. + */ + path = (Path *) create_projection_path(root, + rel_grouped, + cheapest_total_path, + agg_info->agg_input); + + /* + * qual is NIL because the HAVING clause cannot be evaluated until the + * final value of the aggregate is known. + */ + path = (Path *) create_agg_path(root, + rel_grouped, + path, + agg_info->target, + AGG_HASHED, + AGGSPLIT_INITIAL_SERIAL, + agg_info->group_clauses, + NIL, + &agg_costs, + dNumGroups); + + add_path(rel_grouped, path); + } + + /* + * Now add a partially-grouped HashAgg partial Path where possible + */ + if (can_hash && cheapest_partial_path != NULL) + { + Path *path; + + /* + * Since the path originates from a non-grouped relation that is not + * aware of eager aggregation, we must ensure that it provides the + * correct input for partial aggregation. + */ + path = (Path *) create_projection_path(root, + rel_grouped, + cheapest_partial_path, + agg_info->agg_input); + + /* + * qual is NIL because the HAVING clause cannot be evaluated until the + * final value of the aggregate is known. + */ + path = (Path *) create_agg_path(root, + rel_grouped, + path, + agg_info->target, + AGG_HASHED, + AGGSPLIT_INITIAL_SERIAL, + agg_info->group_clauses, + NIL, + &agg_costs, + dNumPartialGroups); + + add_partial_path(rel_grouped, path); + } +} + /* * make_rel_from_joinlist * Build access paths using a "joinlist" to guide the join path search. @@ -3435,9 +3835,10 @@ make_rel_from_joinlist(PlannerInfo *root, List *joinlist) * needed for these paths need have been instantiated. * * Note to plugin authors: the functions invoked during standard_join_search() - * modify root->join_rel_list and root->join_rel_hash. If you want to do more - * than one join-order search, you'll probably need to save and restore the - * original states of those data structures. See geqo_eval() for an example. + * modify root->join_rel_list->items and root->join_rel_list->hash. If you + * want to do more than one join-order search, you'll probably need to save and + * restore the original states of those data structures. See geqo_eval() for + * an example. */ RelOptInfo * standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels) @@ -3486,6 +3887,10 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels) * * After that, we're done creating paths for the joinrel, so run * set_cheapest(). + * + * In addition, we also run generate_grouped_paths() for the grouped + * relation of each just-processed joinrel, and run set_cheapest() for + * the grouped relation afterwards. */ foreach(lc, root->join_rel_level[lev]) { @@ -3506,6 +3911,27 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels) /* Find and save the cheapest paths for this rel */ set_cheapest(rel); + /* + * Except for the topmost scan/join rel, consider generating + * partial aggregation paths for the grouped relation on top of + * the paths of this rel. After that, we're done creating paths + * for the grouped relation, so run set_cheapest(). + */ + if (!bms_equal(rel->relids, root->all_query_rels)) + { + RelOptInfo *rel_grouped; + + rel_grouped = find_grouped_rel(root, rel->relids); + if (rel_grouped) + { + Assert(IS_GROUPED_REL(rel_grouped)); + + generate_grouped_paths(root, rel_grouped, rel, + rel_grouped->agg_info); + set_cheapest(rel_grouped); + } + } + #ifdef OPTIMIZER_DEBUG pprint(rel); #endif @@ -4375,6 +4801,29 @@ generate_partitionwise_join_paths(PlannerInfo *root, RelOptInfo *rel) if (IS_DUMMY_REL(child_rel)) continue; + /* + * Except for the topmost scan/join rel, consider generating partial + * aggregation paths for the grouped relation on top of the paths of + * this partitioned child-join. After that, we're done creating paths + * for the grouped relation, so run set_cheapest(). + */ + if (!bms_equal(IS_OTHER_REL(rel) ? + rel->top_parent_relids : rel->relids, + root->all_query_rels)) + { + RelOptInfo *rel_grouped; + + rel_grouped = find_grouped_rel(root, child_rel->relids); + if (rel_grouped) + { + Assert(IS_GROUPED_REL(rel_grouped)); + + generate_grouped_paths(root, rel_grouped, child_rel, + rel_grouped->agg_info); + set_cheapest(rel_grouped); + } + } + #ifdef OPTIMIZER_DEBUG pprint(child_rel); #endif diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 256568d05a29..a409f9ebfd35 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -180,6 +180,8 @@ static bool cost_qual_eval_walker(Node *node, cost_qual_eval_context *context); static void get_restriction_qual_cost(PlannerInfo *root, RelOptInfo *baserel, ParamPathInfo *param_info, QualCost *qpqual_cost); +static void set_joinpath_size(PlannerInfo *root, JoinPath *jpath, + SpecialJoinInfo *sjinfo); static bool has_indexed_join_quals(NestPath *path); static double approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals); @@ -3370,19 +3372,7 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path, if (inner_path_rows <= 0) inner_path_rows = 1; /* Mark the path with the correct row estimate */ - if (path->jpath.path.param_info) - path->jpath.path.rows = path->jpath.path.param_info->ppi_rows; - else - path->jpath.path.rows = path->jpath.path.parent->rows; - - /* For partial paths, scale row estimate. */ - if (path->jpath.path.parallel_workers > 0) - { - double parallel_divisor = get_parallel_divisor(&path->jpath.path); - - path->jpath.path.rows = - clamp_row_est(path->jpath.path.rows / parallel_divisor); - } + set_joinpath_size(root, &path->jpath, extra->sjinfo); /* cost of inner-relation source data (we already dealt with outer rel) */ @@ -3867,19 +3857,7 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path, inner_path_rows = 1; /* Mark the path with the correct row estimate */ - if (path->jpath.path.param_info) - path->jpath.path.rows = path->jpath.path.param_info->ppi_rows; - else - path->jpath.path.rows = path->jpath.path.parent->rows; - - /* For partial paths, scale row estimate. */ - if (path->jpath.path.parallel_workers > 0) - { - double parallel_divisor = get_parallel_divisor(&path->jpath.path); - - path->jpath.path.rows = - clamp_row_est(path->jpath.path.rows / parallel_divisor); - } + set_joinpath_size(root, &path->jpath, extra->sjinfo); /* * Compute cost of the mergequals and qpquals (other restriction clauses) @@ -4299,19 +4277,7 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, path->jpath.path.disabled_nodes = workspace->disabled_nodes; /* Mark the path with the correct row estimate */ - if (path->jpath.path.param_info) - path->jpath.path.rows = path->jpath.path.param_info->ppi_rows; - else - path->jpath.path.rows = path->jpath.path.parent->rows; - - /* For partial paths, scale row estimate. */ - if (path->jpath.path.parallel_workers > 0) - { - double parallel_divisor = get_parallel_divisor(&path->jpath.path); - - path->jpath.path.rows = - clamp_row_est(path->jpath.path.rows / parallel_divisor); - } + set_joinpath_size(root, &path->jpath, extra->sjinfo); /* mark the path with estimated # of batches */ path->num_batches = numbatches; @@ -5071,6 +5037,57 @@ get_restriction_qual_cost(PlannerInfo *root, RelOptInfo *baserel, *qpqual_cost = baserel->baserestrictcost; } +/* + * set_joinpath_size + * Set the correct row estimate for the given join path. + * + * 'jpath' is the join path under consideration. + * 'sjinfo' is any SpecialJoinInfo relevant to this join. + * + * Note that for a grouped join relation, its paths could have very different + * rowcount estimates, so we need to calculate the rowcount estimate using the + * outer path and inner path of the given join path. + */ +static void +set_joinpath_size(PlannerInfo *root, JoinPath *jpath, SpecialJoinInfo *sjinfo) +{ + if (IS_GROUPED_REL(jpath->path.parent)) + { + Path *outer_path = jpath->outerjoinpath; + Path *inner_path = jpath->innerjoinpath; + + /* + * Estimate the number of rows of this grouped join path as the sizes + * of the outer and inner paths times the selectivity of the clauses + * that have ended up at this join node. + */ + jpath->path.rows = calc_joinrel_size_estimate(root, + jpath->path.parent, + outer_path->parent, + inner_path->parent, + outer_path->rows, + inner_path->rows, + sjinfo, + jpath->joinrestrictinfo); + } + else + { + if (jpath->path.param_info) + jpath->path.rows = jpath->path.param_info->ppi_rows; + else + jpath->path.rows = jpath->path.parent->rows; + + /* For partial paths, scale row estimate. */ + if (jpath->path.parallel_workers > 0) + { + double parallel_divisor = get_parallel_divisor(&jpath->path); + + jpath->path.rows = + clamp_row_est(jpath->path.rows / parallel_divisor); + } + } +} + /* * compute_semi_anti_join_factors diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 60d65762b5d5..a68d40f98355 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -35,6 +35,9 @@ static bool has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel); static bool restriction_is_constant_false(List *restrictlist, RelOptInfo *joinrel, bool only_pushed_down); +static void make_grouped_join_rel(PlannerInfo *root, RelOptInfo *rel1, + RelOptInfo *rel2, RelOptInfo *joinrel, + SpecialJoinInfo *sjinfo, List *restrictlist); static void populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, RelOptInfo *joinrel, SpecialJoinInfo *sjinfo, List *restrictlist); @@ -772,6 +775,10 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) return joinrel; } + /* Build a grouped join relation for 'joinrel' if possible. */ + make_grouped_join_rel(root, rel1, rel2, joinrel, sjinfo, + restrictlist); + /* Add paths to the join relation. */ populate_joinrel_with_paths(root, rel1, rel2, joinrel, sjinfo, restrictlist); @@ -883,6 +890,135 @@ add_outer_joins_to_relids(PlannerInfo *root, Relids input_relids, return input_relids; } +/* + * make_grouped_join_rel + * Build a grouped join relation out of 'joinrel' if eager aggregation is + * possible and the 'joinrel' can produce grouped paths. + * + * We also generate partial aggregation paths for the grouped relation by + * joining the grouped paths of 'rel1' to the plain paths of 'rel2', or by + * joining the grouped paths of 'rel2' to the plain paths of 'rel1'. + */ +static void +make_grouped_join_rel(PlannerInfo *root, RelOptInfo *rel1, + RelOptInfo *rel2, RelOptInfo *joinrel, + SpecialJoinInfo *sjinfo, List *restrictlist) +{ + RelOptInfo *rel_grouped; + RelOptInfo *rel1_grouped; + RelOptInfo *rel2_grouped; + bool rel1_empty; + bool rel2_empty; + bool yet_to_add = false; + + /* + * If there are no aggregate expressions or grouping expressions, eager + * aggregation is not possible. + */ + if (root->agg_clause_list == NIL || + root->group_expr_list == NIL) + return; + + /* + * See if we already have a grouped joinrel for this joinrel. + */ + rel_grouped = find_grouped_rel(root, joinrel->relids); + + /* + * Construct a new RelOptInfo for the grouped join relation if there is no + * existing one. + */ + if (rel_grouped == NULL) + { + RelAggInfo *agg_info = NULL; + + /* + * Prepare the information needed to create grouped paths for this + * join relation. + */ + agg_info = create_rel_agg_info(root, joinrel); + if (agg_info == NULL) + return; + + /* build a grouped relation out of the plain relation */ + rel_grouped = build_grouped_rel(root, joinrel); + rel_grouped->reltarget = agg_info->target; + rel_grouped->rows = agg_info->grouped_rows; + rel_grouped->agg_info = agg_info; + + /* + * If the grouped paths for the given join relation are considered + * useful, add the grouped relation we just built to the PlannerInfo + * to make it available for further joining or for acting as the upper + * rel representing the result of partial aggregation. Otherwise, we + * need to postpone the decision on adding the grouped relation to the + * PlannerInfo, as it depends on whether we can generate any grouped + * paths by joining the given pair of input relations. + */ + if (agg_info->agg_useful) + add_grouped_rel(root, rel_grouped); + else + yet_to_add = true; + } + + Assert(IS_GROUPED_REL(rel_grouped)); + + /* We may have already proven this grouped join relation to be dummy. */ + if (IS_DUMMY_REL(rel_grouped)) + return; + + /* Retrieve the grouped relations for the two input rels */ + rel1_grouped = find_grouped_rel(root, rel1->relids); + rel2_grouped = find_grouped_rel(root, rel2->relids); + + rel1_empty = (rel1_grouped == NULL || IS_DUMMY_REL(rel1_grouped)); + rel2_empty = (rel2_grouped == NULL || IS_DUMMY_REL(rel2_grouped)); + + /* Nothing to do if there's no grouped relation. */ + if (rel1_empty && rel2_empty) + return; + + /* Joining two grouped relations is currently not supported */ + if (!rel1_empty && !rel2_empty) + return; + + /* Generate partial aggregation paths for the grouped relation */ + if (!rel1_empty) + { + populate_joinrel_with_paths(root, rel1_grouped, rel2, rel_grouped, + sjinfo, restrictlist); + + /* + * It shouldn't happen that we have marked rel1_grouped as dummy in + * populate_joinrel_with_paths due to provably constant-false join + * restrictions, hence we wouldn't end up with a plan that has Aggref + * in non-Agg plan node. + */ + Assert(!IS_DUMMY_REL(rel1_grouped)); + } + else if (!rel2_empty) + { + populate_joinrel_with_paths(root, rel1, rel2_grouped, rel_grouped, + sjinfo, restrictlist); + + /* + * It shouldn't happen that we have marked rel2_grouped as dummy in + * populate_joinrel_with_paths due to provably constant-false join + * restrictions, hence we wouldn't end up with a plan that has Aggref + * in non-Agg plan node. + */ + Assert(!IS_DUMMY_REL(rel2_grouped)); + } + + /* + * Since we have generated grouped paths by joining the given pair of + * input relations, add the grouped relation to the PlannerInfo if we have + * not already done so. + */ + if (yet_to_add) + add_grouped_rel(root, rel_grouped); +} + /* * populate_joinrel_with_paths * Add paths to the given joinrel for given pair of joining relations. The @@ -1675,6 +1811,11 @@ try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, adjust_child_relids(joinrel->relids, nappinfos, appinfos))); + /* Build a grouped join relation for 'child_joinrel' if possible */ + make_grouped_join_rel(root, child_rel1, child_rel2, + child_joinrel, child_sjinfo, + child_restrictlist); + /* And make paths for the child join */ populate_joinrel_with_paths(root, child_rel1, child_rel2, child_joinrel, child_sjinfo, diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 1d1aa27d4506..9605d7dfeffc 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -14,6 +14,7 @@ */ #include "postgres.h" +#include "access/nbtree.h" #include "catalog/pg_constraint.h" #include "catalog/pg_type.h" #include "nodes/makefuncs.h" @@ -81,6 +82,8 @@ typedef struct JoinTreeItem } JoinTreeItem; +static void create_agg_clause_infos(PlannerInfo *root); +static void create_grouping_expr_infos(PlannerInfo *root); static void extract_lateral_references(PlannerInfo *root, RelOptInfo *brel, Index rtindex); static List *deconstruct_recurse(PlannerInfo *root, Node *jtnode, @@ -628,6 +631,261 @@ remove_useless_groupby_columns(PlannerInfo *root) } } +/* + * setup_eager_aggregation + * Check if eager aggregation is applicable, and if so collect suitable + * aggregate expressions and grouping expressions in the query. + */ +void +setup_eager_aggregation(PlannerInfo *root) +{ + /* + * Don't apply eager aggregation if disabled by user. + */ + if (!enable_eager_aggregate) + return; + + /* + * Don't apply eager aggregation if there are no available GROUP BY + * clauses. + */ + if (!root->processed_groupClause) + return; + + /* + * For now we don't try to support grouping sets. + */ + if (root->parse->groupingSets) + return; + + /* + * For now we don't try to support DISTINCT or ORDER BY aggregates. + */ + if (root->numOrderedAggs > 0) + return; + + /* + * If there are any aggregates that do not support partial mode, or any + * partial aggregates that are non-serializable, do not apply eager + * aggregation. + */ + if (root->hasNonPartialAggs || root->hasNonSerialAggs) + return; + + /* + * We don't try to apply eager aggregation if there are set-returning + * functions in targetlist. + */ + if (root->parse->hasTargetSRFs) + return; + + /* + * Eager aggregation only makes sense if there are multiple base rels in + * the query. + */ + if (bms_membership(root->all_baserels) != BMS_MULTIPLE) + return; + + /* + * Collect aggregate expressions and plain Vars that appear in targetlist + * and havingQual. + */ + create_agg_clause_infos(root); + + /* + * If there are no suitable aggregate expressions, we cannot apply eager + * aggregation. + */ + if (root->agg_clause_list == NIL) + return; + + /* + * Collect grouping expressions that appear in grouping clauses. + */ + create_grouping_expr_infos(root); +} + +/* + * create_agg_clause_infos + * Search the targetlist and havingQual for Aggrefs and plain Vars, and + * create an AggClauseInfo for each Aggref node. + */ +static void +create_agg_clause_infos(PlannerInfo *root) +{ + List *tlist_exprs; + List *agg_clause_list = NIL; + List *tlist_vars = NIL; + ListCell *lc; + + Assert(root->agg_clause_list == NIL); + Assert(root->tlist_vars == NIL); + + tlist_exprs = pull_var_clause((Node *) root->processed_tlist, + PVC_INCLUDE_AGGREGATES | + PVC_RECURSE_WINDOWFUNCS | + PVC_RECURSE_PLACEHOLDERS); + + /* + * Aggregates within the HAVING clause need to be processed in the same + * way as those in the targetlist. Note that HAVING can contain Aggrefs + * but not WindowFuncs. + */ + if (root->parse->havingQual != NULL) + { + List *having_exprs; + + having_exprs = pull_var_clause((Node *) root->parse->havingQual, + PVC_INCLUDE_AGGREGATES | + PVC_RECURSE_PLACEHOLDERS); + if (having_exprs != NIL) + { + tlist_exprs = list_concat(tlist_exprs, having_exprs); + list_free(having_exprs); + } + } + + foreach(lc, tlist_exprs) + { + Expr *expr = (Expr *) lfirst(lc); + Aggref *aggref; + AggClauseInfo *ac_info; + + /* For now we don't try to support GROUPING() expressions */ + if (IsA(expr, GroupingFunc)) + { + list_free_deep(agg_clause_list); + list_free(tlist_vars); + list_free(tlist_exprs); + + return; + } + + /* Collect plain Vars for future reference */ + if (IsA(expr, Var)) + { + tlist_vars = list_append_unique(tlist_vars, expr); + continue; + } + + aggref = castNode(Aggref, expr); + + Assert(aggref->aggorder == NIL); + Assert(aggref->aggdistinct == NIL); + + /* + * If there are any securityQuals, do not try to apply eager + * aggregation if any non-leakproof aggregate functions are present. + * This is overly strict, but for now... + */ + if (root->qual_security_level > 0 && + !get_func_leakproof(aggref->aggfnoid)) + { + list_free_deep(agg_clause_list); + list_free(tlist_vars); + list_free(tlist_exprs); + + return; + } + + ac_info = makeNode(AggClauseInfo); + ac_info->aggref = aggref; + ac_info->agg_eval_at = pull_varnos(root, (Node *) aggref); + + agg_clause_list = list_append_unique(agg_clause_list, ac_info); + } + + list_free(tlist_exprs); + + root->agg_clause_list = agg_clause_list; + root->tlist_vars = tlist_vars; +} + +/* + * create_grouping_expr_infos + * Create GroupExprInfo for each expression usable as grouping key. + * + * If any grouping expression is not suitable, we will just return with + * root->group_expr_list being NIL. + */ +static void +create_grouping_expr_infos(PlannerInfo *root) +{ + List *exprs = NIL; + List *sortgrouprefs = NIL; + List *btree_opfamilies = NIL; + ListCell *lc, + *lc1, + *lc2, + *lc3; + + Assert(root->group_expr_list == NIL); + + foreach(lc, root->processed_groupClause) + { + SortGroupClause *sgc = lfirst_node(SortGroupClause, lc); + TargetEntry *tle = get_sortgroupclause_tle(sgc, root->processed_tlist); + TypeCacheEntry *tce; + Oid equalimageproc; + + Assert(tle->ressortgroupref > 0); + + /* + * For now we only support plain Vars as grouping expressions. + */ + if (!IsA(tle->expr, Var)) + return; + + /* + * Eager aggregation is only possible if equality implies image + * equality for each grouping key. Otherwise, placing keys with + * different byte images into the same group may result in the loss of + * information that could be necessary to evaluate upper qual clauses. + * + * For instance, the NUMERIC data type is not supported, as values + * that are considered equal by the equality operator (e.g., 0 and + * 0.0) can have different scales. + */ + tce = lookup_type_cache(exprType((Node *) tle->expr), + TYPECACHE_BTREE_OPFAMILY); + if (!OidIsValid(tce->btree_opf) || + !OidIsValid(tce->btree_opintype)) + return; + + equalimageproc = get_opfamily_proc(tce->btree_opf, + tce->btree_opintype, + tce->btree_opintype, + BTEQUALIMAGE_PROC); + if (!OidIsValid(equalimageproc) || + !DatumGetBool(OidFunctionCall1Coll(equalimageproc, + tce->typcollation, + ObjectIdGetDatum(tce->btree_opintype)))) + return; + + exprs = lappend(exprs, tle->expr); + sortgrouprefs = lappend_int(sortgrouprefs, tle->ressortgroupref); + btree_opfamilies = lappend_oid(btree_opfamilies, tce->btree_opf); + } + + /* + * Construct GroupExprInfo for each expression. + */ + forthree(lc1, exprs, lc2, sortgrouprefs, lc3, btree_opfamilies) + { + Expr *expr = (Expr *) lfirst(lc1); + int sortgroupref = lfirst_int(lc2); + Oid btree_opfamily = lfirst_oid(lc3); + GroupExprInfo *ge_info; + + ge_info = makeNode(GroupExprInfo); + ge_info->expr = (Expr *) copyObject(expr); + ge_info->sortgroupref = sortgroupref; + ge_info->btree_opfamily = btree_opfamily; + + root->group_expr_list = lappend(root->group_expr_list, ge_info); + } +} + /***************************************************************************** * * LATERAL REFERENCES diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index 5467e094ca7e..721b3174ffaf 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -64,8 +64,12 @@ query_planner(PlannerInfo *root, * NOTE: append_rel_list was set up by subquery_planner, so do not touch * here. */ - root->join_rel_list = NIL; - root->join_rel_hash = NULL; + root->join_rel_list = makeNode(RelInfoList); + root->join_rel_list->items = NIL; + root->join_rel_list->hash = NULL; + root->grouped_rel_list = makeNode(RelInfoList); + root->grouped_rel_list->items = NIL; + root->grouped_rel_list->hash = NULL; root->join_rel_level = NULL; root->join_cur_level = 0; root->canon_pathkeys = NIL; @@ -76,6 +80,9 @@ query_planner(PlannerInfo *root, root->placeholder_list = NIL; root->placeholder_array = NULL; root->placeholder_array_size = 0; + root->agg_clause_list = NIL; + root->group_expr_list = NIL; + root->tlist_vars = NIL; root->fkey_list = NIL; root->initial_rels = NIL; @@ -265,6 +272,12 @@ query_planner(PlannerInfo *root, */ extract_restriction_or_clauses(root); + /* + * Check if eager aggregation is applicable, and if so, set up + * root->agg_clause_list and root->group_expr_list. + */ + setup_eager_aggregation(root); + /* * Now expand appendrels by adding "otherrels" for their children. We * delay this to the end so that we have as much information as possible diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index a4d523dcb0ff..4204d85b992e 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -230,7 +230,6 @@ static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo *partially_grouped_rel, const AggClauseCosts *agg_costs, grouping_sets_data *gd, - double dNumGroups, GroupPathExtraData *extra); static RelOptInfo *create_partial_grouping_paths(PlannerInfo *root, RelOptInfo *grouped_rel, @@ -3931,9 +3930,7 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, GroupPathExtraData *extra, RelOptInfo **partially_grouped_rel_p) { - Path *cheapest_path = input_rel->cheapest_total_path; RelOptInfo *partially_grouped_rel = NULL; - double dNumGroups; PartitionwiseAggregateType patype = PARTITIONWISE_AGGREGATE_NONE; /* @@ -4015,23 +4012,16 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, /* Gather any partially grouped partial paths. */ if (partially_grouped_rel && partially_grouped_rel->partial_pathlist) - { gather_grouping_paths(root, partially_grouped_rel); - set_cheapest(partially_grouped_rel); - } - /* - * Estimate number of groups. - */ - dNumGroups = get_number_of_groups(root, - cheapest_path->rows, - gd, - extra->targetList); + /* Now choose the best path(s) for partially_grouped_rel. */ + if (partially_grouped_rel && partially_grouped_rel->pathlist) + set_cheapest(partially_grouped_rel); /* Build final grouping paths */ add_paths_to_grouping_rel(root, input_rel, grouped_rel, partially_grouped_rel, agg_costs, gd, - dNumGroups, extra); + extra); /* Give a helpful error if we failed to find any implementation */ if (grouped_rel->pathlist == NIL) @@ -6976,16 +6966,42 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo *grouped_rel, RelOptInfo *partially_grouped_rel, const AggClauseCosts *agg_costs, - grouping_sets_data *gd, double dNumGroups, + grouping_sets_data *gd, GroupPathExtraData *extra) { Query *parse = root->parse; Path *cheapest_path = input_rel->cheapest_total_path; + Path *cheapest_partially_grouped_path = NULL; ListCell *lc; bool can_hash = (extra->flags & GROUPING_CAN_USE_HASH) != 0; bool can_sort = (extra->flags & GROUPING_CAN_USE_SORT) != 0; List *havingQual = (List *) extra->havingQual; AggClauseCosts *agg_final_costs = &extra->agg_final_costs; + double dNumGroups = 0; + double dNumFinalGroups = 0; + + /* + * Estimate number of groups for non-split aggregation. + */ + dNumGroups = get_number_of_groups(root, + cheapest_path->rows, + gd, + extra->targetList); + + if (partially_grouped_rel && partially_grouped_rel->pathlist) + { + cheapest_partially_grouped_path = + partially_grouped_rel->cheapest_total_path; + + /* + * Estimate number of groups for final phase of partial aggregation. + */ + dNumFinalGroups = + get_number_of_groups(root, + cheapest_partially_grouped_path->rows, + gd, + extra->targetList); + } if (can_sort) { @@ -7098,7 +7114,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, path = make_ordered_path(root, grouped_rel, path, - partially_grouped_rel->cheapest_total_path, + cheapest_partially_grouped_path, info->pathkeys, -1.0); @@ -7116,7 +7132,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, info->clauses, havingQual, agg_final_costs, - dNumGroups)); + dNumFinalGroups)); else add_path(grouped_rel, (Path *) create_group_path(root, @@ -7124,7 +7140,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, path, info->clauses, havingQual, - dNumGroups)); + dNumFinalGroups)); } } @@ -7166,19 +7182,17 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, */ if (partially_grouped_rel && partially_grouped_rel->pathlist) { - Path *path = partially_grouped_rel->cheapest_total_path; - add_path(grouped_rel, (Path *) create_agg_path(root, grouped_rel, - path, + cheapest_partially_grouped_path, grouped_rel->reltarget, AGG_HASHED, AGGSPLIT_FINAL_DESERIAL, root->processed_groupClause, havingQual, agg_final_costs, - dNumGroups)); + dNumFinalGroups)); } } @@ -7228,6 +7242,21 @@ create_partial_grouping_paths(PlannerInfo *root, bool can_hash = (extra->flags & GROUPING_CAN_USE_HASH) != 0; bool can_sort = (extra->flags & GROUPING_CAN_USE_SORT) != 0; + /* + * The partially_grouped_rel could have been already created due to eager + * aggregation. + */ + partially_grouped_rel = find_grouped_rel(root, input_rel->relids); + Assert(enable_eager_aggregate || partially_grouped_rel == NULL); + + /* + * It is possible that the partially_grouped_rel created by eager + * aggregation is dummy. In this case we just set it to NULL. It might + * be created again by the following logic if possible. + */ + if (partially_grouped_rel && IS_DUMMY_REL(partially_grouped_rel)) + partially_grouped_rel = NULL; + /* * Consider whether we should generate partially aggregated non-partial * paths. We can only do this if we have a non-partial path, and only if @@ -7251,19 +7280,27 @@ create_partial_grouping_paths(PlannerInfo *root, * If we can't partially aggregate partial paths, and we can't partially * aggregate non-partial paths, then don't bother creating the new * RelOptInfo at all, unless the caller specified force_rel_creation. + * + * Note that the partially_grouped_rel could have been already created and + * populated with appropriate paths by eager aggregation. */ if (cheapest_total_path == NULL && cheapest_partial_path == NULL && + (partially_grouped_rel == NULL || + partially_grouped_rel->pathlist == NIL) && !force_rel_creation) return NULL; /* * Build a new upper relation to represent the result of partially - * aggregating the rows from the input relation. - */ - partially_grouped_rel = fetch_upper_rel(root, - UPPERREL_PARTIAL_GROUP_AGG, - grouped_rel->relids); + * aggregating the rows from the input relation. The relation may already + * exist due to eager aggregation, in which case we don't need to create + * it. + */ + if (partially_grouped_rel == NULL) + partially_grouped_rel = fetch_upper_rel(root, + UPPERREL_PARTIAL_GROUP_AGG, + grouped_rel->relids); partially_grouped_rel->consider_parallel = grouped_rel->consider_parallel; partially_grouped_rel->reloptkind = grouped_rel->reloptkind; @@ -7272,6 +7309,14 @@ create_partial_grouping_paths(PlannerInfo *root, partially_grouped_rel->useridiscurrent = grouped_rel->useridiscurrent; partially_grouped_rel->fdwroutine = grouped_rel->fdwroutine; + /* + * Partially-grouped partial paths may have been generated by eager + * aggregation. If we find that parallelism is not possible for + * partially_grouped_rel, we need to drop these partial paths. + */ + if (!partially_grouped_rel->consider_parallel) + partially_grouped_rel->partial_pathlist = NIL; + /* * Build target list for partial aggregate paths. These paths cannot just * emit the same tlist as regular aggregate paths, because (1) we must diff --git a/src/backend/optimizer/util/appendinfo.c b/src/backend/optimizer/util/appendinfo.c index 5b3dc0d86539..91c852aebf35 100644 --- a/src/backend/optimizer/util/appendinfo.c +++ b/src/backend/optimizer/util/appendinfo.c @@ -516,6 +516,66 @@ adjust_appendrel_attrs_mutator(Node *node, return (Node *) newinfo; } + /* + * We have to process RelAggInfo nodes specially. + */ + if (IsA(node, RelAggInfo)) + { + RelAggInfo *oldinfo = (RelAggInfo *) node; + RelAggInfo *newinfo = makeNode(RelAggInfo); + + /* Copy all flat-copiable fields */ + memcpy(newinfo, oldinfo, sizeof(RelAggInfo)); + + newinfo->relids = adjust_child_relids(oldinfo->relids, + context->nappinfos, + context->appinfos); + + newinfo->target = (PathTarget *) + adjust_appendrel_attrs_mutator((Node *) oldinfo->target, + context); + + newinfo->agg_input = (PathTarget *) + adjust_appendrel_attrs_mutator((Node *) oldinfo->agg_input, + context); + + newinfo->group_clauses = (List *) + adjust_appendrel_attrs_mutator((Node *) oldinfo->group_clauses, + context); + + newinfo->group_exprs = (List *) + adjust_appendrel_attrs_mutator((Node *) oldinfo->group_exprs, + context); + + return (Node *) newinfo; + } + + /* + * We have to process PathTarget nodes specially. + */ + if (IsA(node, PathTarget)) + { + PathTarget *oldtarget = (PathTarget *) node; + PathTarget *newtarget = makeNode(PathTarget); + + /* Copy all flat-copiable fields */ + memcpy(newtarget, oldtarget, sizeof(PathTarget)); + + if (oldtarget->sortgrouprefs) + { + Size nbytes = list_length(oldtarget->exprs) * sizeof(Index); + + newtarget->exprs = (List *) + adjust_appendrel_attrs_mutator((Node *) oldtarget->exprs, + context); + + newtarget->sortgrouprefs = (Index *) palloc(nbytes); + memcpy(newtarget->sortgrouprefs, oldtarget->sortgrouprefs, nbytes); + } + + return (Node *) newtarget; + } + /* * NOTE: we do not need to recurse into sublinks, because they should * already have been converted to subplans before we see them. diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 93e73cb44dbb..9d5df0553b92 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -262,6 +262,12 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor) * unparameterized path, too, if there is one; the users of that list find * it more convenient if that's included. * + * cheapest_parameterized_paths also always includes the fewest-row + * unparameterized path, if there is one, for grouped relations. Different + * paths of a grouped relation can have very different row counts, and in some + * cases the cheapest-total unparameterized path may not be the one with the + * fewest row. + * * This is normally called only after we've finished constructing the path * list for the rel node. */ @@ -271,6 +277,7 @@ set_cheapest(RelOptInfo *parent_rel) Path *cheapest_startup_path; Path *cheapest_total_path; Path *best_param_path; + Path *fewest_row_path; List *parameterized_paths; ListCell *p; @@ -280,6 +287,7 @@ set_cheapest(RelOptInfo *parent_rel) elog(ERROR, "could not devise a query plan for the given query"); cheapest_startup_path = cheapest_total_path = best_param_path = NULL; + fewest_row_path = NULL; parameterized_paths = NIL; foreach(p, parent_rel->pathlist) @@ -341,6 +349,8 @@ set_cheapest(RelOptInfo *parent_rel) if (cheapest_total_path == NULL) { cheapest_startup_path = cheapest_total_path = path; + if (IS_GROUPED_REL(parent_rel)) + fewest_row_path = path; continue; } @@ -364,6 +374,27 @@ set_cheapest(RelOptInfo *parent_rel) compare_pathkeys(cheapest_total_path->pathkeys, path->pathkeys) == PATHKEYS_BETTER2)) cheapest_total_path = path; + + /* + * Find the fewest-row unparameterized path for a grouped + * relation. If we find two paths of the same row count, try to + * keep the one with the cheaper total cost; if the costs are + * identical, keep the better-sorted one. + */ + if (IS_GROUPED_REL(parent_rel)) + { + if (fewest_row_path->rows > path->rows) + fewest_row_path = path; + else if (fewest_row_path->rows == path->rows) + { + cmp = compare_path_costs(fewest_row_path, path, TOTAL_COST); + if (cmp > 0 || + (cmp == 0 && + compare_pathkeys(fewest_row_path->pathkeys, + path->pathkeys) == PATHKEYS_BETTER2)) + fewest_row_path = path; + } + } } } @@ -371,6 +402,10 @@ set_cheapest(RelOptInfo *parent_rel) if (cheapest_total_path) parameterized_paths = lcons(cheapest_total_path, parameterized_paths); + /* Add fewest-row unparameterized path, if any, to parameterized_paths */ + if (fewest_row_path && fewest_row_path != cheapest_total_path) + parameterized_paths = lcons(fewest_row_path, parameterized_paths); + /* * If there is no unparameterized path, use the best parameterized path as * cheapest_total_path (but not as cheapest_startup_path). @@ -2787,8 +2822,7 @@ create_projection_path(PlannerInfo *root, pathnode->path.pathtype = T_Result; pathnode->path.parent = rel; pathnode->path.pathtarget = target; - /* For now, assume we are above any joins, so no parameterization */ - pathnode->path.param_info = NULL; + pathnode->path.param_info = subpath->param_info; pathnode->path.parallel_aware = false; pathnode->path.parallel_safe = rel->consider_parallel && subpath->parallel_safe && @@ -3043,8 +3077,7 @@ create_incremental_sort_path(PlannerInfo *root, pathnode->path.parent = rel; /* Sort doesn't project, so use source path's pathtarget */ pathnode->path.pathtarget = subpath->pathtarget; - /* For now, assume we are above any joins, so no parameterization */ - pathnode->path.param_info = NULL; + pathnode->path.param_info = subpath->param_info; pathnode->path.parallel_aware = false; pathnode->path.parallel_safe = rel->consider_parallel && subpath->parallel_safe; @@ -3091,8 +3124,7 @@ create_sort_path(PlannerInfo *root, pathnode->path.parent = rel; /* Sort doesn't project, so use source path's pathtarget */ pathnode->path.pathtarget = subpath->pathtarget; - /* For now, assume we are above any joins, so no parameterization */ - pathnode->path.param_info = NULL; + pathnode->path.param_info = subpath->param_info; pathnode->path.parallel_aware = false; pathnode->path.parallel_safe = rel->consider_parallel && subpath->parallel_safe; @@ -3253,8 +3285,7 @@ create_agg_path(PlannerInfo *root, pathnode->path.pathtype = T_Agg; pathnode->path.parent = rel; pathnode->path.pathtarget = target; - /* For now, assume we are above any joins, so no parameterization */ - pathnode->path.param_info = NULL; + pathnode->path.param_info = subpath->param_info; pathnode->path.parallel_aware = false; pathnode->path.parallel_safe = rel->consider_parallel && subpath->parallel_safe; diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index ff507331a061..0f72110063b7 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -16,6 +16,8 @@ #include +#include "access/nbtree.h" +#include "catalog/pg_constraint.h" #include "miscadmin.h" #include "nodes/nodeFuncs.h" #include "optimizer/appendinfo.h" @@ -27,19 +29,27 @@ #include "optimizer/paths.h" #include "optimizer/placeholder.h" #include "optimizer/plancat.h" +#include "optimizer/planner.h" #include "optimizer/restrictinfo.h" #include "optimizer/tlist.h" +#include "parser/parse_oper.h" #include "parser/parse_relation.h" #include "rewrite/rewriteManip.h" #include "utils/hsearch.h" #include "utils/lsyscache.h" +#include "utils/selfuncs.h" +#include "utils/typcache.h" -typedef struct JoinHashEntry +/* + * An entry of a hash table that we use to make lookup for RelOptInfo + * structures more efficient. + */ +typedef struct RelHashEntry { - Relids join_relids; /* hash key --- MUST BE FIRST */ - RelOptInfo *join_rel; -} JoinHashEntry; + Relids relids; /* hash key --- MUST BE FIRST */ + RelOptInfo *rel; +} RelHashEntry; static void build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *input_rel, @@ -83,7 +93,17 @@ static void build_child_join_reltarget(PlannerInfo *root, RelOptInfo *childrel, int nappinfos, AppendRelInfo **appinfos); - +static bool eager_aggregation_possible_for_relation(PlannerInfo *root, + RelOptInfo *rel); +static bool init_grouping_targets(PlannerInfo *root, RelOptInfo *rel, + PathTarget *target, PathTarget *agg_input, + List **group_clauses, List **group_exprs); +static bool is_var_in_aggref_only(PlannerInfo *root, Var *var); +static bool is_var_needed_by_join(PlannerInfo *root, Var *var, RelOptInfo *rel); +static Index get_expression_sortgroupref(PlannerInfo *root, Expr *expr); + +/* Minimum row reduction ratio at which a grouped path is considered useful */ +#define EAGER_AGGREGATE_RATIO 0.5 /* * setup_simple_rel_arrays @@ -276,6 +296,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) rel->joininfo = NIL; rel->has_eclass_joins = false; rel->consider_partitionwise_join = false; /* might get changed later */ + rel->agg_info = NULL; rel->part_scheme = NULL; rel->nparts = -1; rel->boundinfo = NULL; @@ -406,6 +427,99 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) return rel; } +/* + * build_simple_grouped_rel + * Construct a new RelOptInfo for a grouped base relation out of an existing + * non-grouped base relation. + */ +RelOptInfo * +build_simple_grouped_rel(PlannerInfo *root, RelOptInfo *rel_plain) +{ + RelOptInfo *rel_grouped; + RelAggInfo *agg_info; + + /* + * We should have available aggregate expressions and grouping + * expressions, otherwise we cannot reach here. + */ + Assert(root->agg_clause_list != NIL); + Assert(root->group_expr_list != NIL); + + /* nothing to do for dummy rel */ + if (IS_DUMMY_REL(rel_plain)) + return NULL; + + /* + * Prepare the information needed to create grouped paths for this base + * relation. + */ + agg_info = create_rel_agg_info(root, rel_plain); + if (agg_info == NULL) + return NULL; + + /* + * If the grouped paths for the given base relation are not considered + * useful, do not build the grouped relation. + */ + if (!agg_info->agg_useful) + return NULL; + + /* build a grouped relation out of the plain relation */ + rel_grouped = build_grouped_rel(root, rel_plain); + rel_grouped->reltarget = agg_info->target; + rel_grouped->rows = agg_info->grouped_rows; + rel_grouped->agg_info = agg_info; + + return rel_grouped; +} + +/* + * build_grouped_rel + * Build a grouped relation by flat copying a plain relation and resetting + * the necessary fields. + */ +RelOptInfo * +build_grouped_rel(PlannerInfo *root, RelOptInfo *rel_plain) +{ + RelOptInfo *rel_grouped; + + rel_grouped = makeNode(RelOptInfo); + memcpy(rel_grouped, rel_plain, sizeof(RelOptInfo)); + + /* + * clear path info + */ + rel_grouped->pathlist = NIL; + rel_grouped->ppilist = NIL; + rel_grouped->partial_pathlist = NIL; + rel_grouped->cheapest_startup_path = NULL; + rel_grouped->cheapest_total_path = NULL; + rel_grouped->cheapest_unique_path = NULL; + rel_grouped->cheapest_parameterized_paths = NIL; + + /* + * clear partition info + */ + rel_grouped->part_scheme = NULL; + rel_grouped->nparts = -1; + rel_grouped->boundinfo = NULL; + rel_grouped->partbounds_merged = false; + rel_grouped->partition_qual = NIL; + rel_grouped->part_rels = NULL; + rel_grouped->live_parts = NULL; + rel_grouped->all_partrels = NULL; + rel_grouped->partexprs = NULL; + rel_grouped->nullable_partexprs = NULL; + rel_grouped->consider_partitionwise_join = false; + + /* + * clear size estimates + */ + rel_grouped->rows = 0; + + return rel_grouped; +} + /* * find_base_rel * Find a base or otherrel relation entry, which must already exist. @@ -479,11 +593,11 @@ find_base_rel_ignore_join(PlannerInfo *root, int relid) } /* - * build_join_rel_hash - * Construct the auxiliary hash table for join relations. + * build_rel_hash + * Construct the auxiliary hash table for relations. */ static void -build_join_rel_hash(PlannerInfo *root) +build_rel_hash(RelInfoList *list) { HTAB *hashtab; HASHCTL hash_ctl; @@ -491,47 +605,46 @@ build_join_rel_hash(PlannerInfo *root) /* Create the hash table */ hash_ctl.keysize = sizeof(Relids); - hash_ctl.entrysize = sizeof(JoinHashEntry); + hash_ctl.entrysize = sizeof(RelHashEntry); hash_ctl.hash = bitmap_hash; hash_ctl.match = bitmap_match; hash_ctl.hcxt = CurrentMemoryContext; - hashtab = hash_create("JoinRelHashTable", + hashtab = hash_create("RelHashTable", 256L, &hash_ctl, HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT); - /* Insert all the already-existing joinrels */ - foreach(l, root->join_rel_list) + /* Insert all the already-existing RelOptInfos */ + foreach(l, list->items) { RelOptInfo *rel = (RelOptInfo *) lfirst(l); - JoinHashEntry *hentry; + RelHashEntry *hentry; bool found; - hentry = (JoinHashEntry *) hash_search(hashtab, - &(rel->relids), - HASH_ENTER, - &found); + hentry = (RelHashEntry *) hash_search(hashtab, + &(rel->relids), + HASH_ENTER, + &found); Assert(!found); - hentry->join_rel = rel; + hentry->rel = rel; } - root->join_rel_hash = hashtab; + list->hash = hashtab; } /* - * find_join_rel - * Returns relation entry corresponding to 'relids' (a set of RT indexes), - * or NULL if none exists. This is for join relations. + * find_rel_info + * Find a RelOptInfo entry corresponding to 'relids'. */ -RelOptInfo * -find_join_rel(PlannerInfo *root, Relids relids) +static RelOptInfo * +find_rel_info(RelInfoList *list, Relids relids) { /* * Switch to using hash lookup when list grows "too long". The threshold * is arbitrary and is known only here. */ - if (!root->join_rel_hash && list_length(root->join_rel_list) > 32) - build_join_rel_hash(root); + if (!list->hash && list_length(list->items) > 32) + build_rel_hash(list); /* * Use either hashtable lookup or linear search, as appropriate. @@ -541,23 +654,23 @@ find_join_rel(PlannerInfo *root, Relids relids) * so would force relids out of a register and thus probably slow down the * list-search case. */ - if (root->join_rel_hash) + if (list->hash) { Relids hashkey = relids; - JoinHashEntry *hentry; + RelHashEntry *hentry; - hentry = (JoinHashEntry *) hash_search(root->join_rel_hash, - &hashkey, - HASH_FIND, - NULL); + hentry = (RelHashEntry *) hash_search(list->hash, + &hashkey, + HASH_FIND, + NULL); if (hentry) - return hentry->join_rel; + return hentry->rel; } else { ListCell *l; - foreach(l, root->join_rel_list) + foreach(l, list->items) { RelOptInfo *rel = (RelOptInfo *) lfirst(l); @@ -569,6 +682,28 @@ find_join_rel(PlannerInfo *root, Relids relids) return NULL; } +/* + * find_join_rel + * Returns relation entry corresponding to 'relids' (a set of RT indexes), + * or NULL if none exists. This is for join relations. + */ +RelOptInfo * +find_join_rel(PlannerInfo *root, Relids relids) +{ + return find_rel_info(root->join_rel_list, relids); +} + +/* + * find_grouped_rel + * Returns relation entry corresponding to 'relids' (a set of RT indexes), + * or NULL if none exists. This is for grouped relations. + */ +RelOptInfo * +find_grouped_rel(PlannerInfo *root, Relids relids) +{ + return find_rel_info(root->grouped_rel_list, relids); +} + /* * set_foreign_rel_properties * Set up foreign-join fields if outer and inner relation are foreign @@ -619,31 +754,53 @@ set_foreign_rel_properties(RelOptInfo *joinrel, RelOptInfo *outer_rel, } /* - * add_join_rel - * Add given join relation to the list of join relations in the given - * PlannerInfo. Also add it to the auxiliary hashtable if there is one. + * add_rel_info + * Add given relation to the list, and also add it to the auxiliary + * hashtable if there is one. */ static void -add_join_rel(PlannerInfo *root, RelOptInfo *joinrel) +add_rel_info(RelInfoList *list, RelOptInfo *rel) { - /* GEQO requires us to append the new joinrel to the end of the list! */ - root->join_rel_list = lappend(root->join_rel_list, joinrel); + /* GEQO requires us to append the new relation to the end of the list! */ + list->items = lappend(list->items, rel); /* store it into the auxiliary hashtable if there is one. */ - if (root->join_rel_hash) + if (list->hash) { - JoinHashEntry *hentry; + RelHashEntry *hentry; bool found; - hentry = (JoinHashEntry *) hash_search(root->join_rel_hash, - &(joinrel->relids), - HASH_ENTER, - &found); + hentry = (RelHashEntry *) hash_search(list->hash, + &(rel->relids), + HASH_ENTER, + &found); Assert(!found); - hentry->join_rel = joinrel; + hentry->rel = rel; } } +/* + * add_join_rel + * Add given join relation to the list of join relations in the given + * PlannerInfo. + */ +static void +add_join_rel(PlannerInfo *root, RelOptInfo *joinrel) +{ + add_rel_info(root->join_rel_list, joinrel); +} + +/* + * add_grouped_rel + * Add given grouped relation to the list of grouped relations in the + * given PlannerInfo. + */ +void +add_grouped_rel(PlannerInfo *root, RelOptInfo *rel) +{ + add_rel_info(root->grouped_rel_list, rel); +} + /* * build_join_rel * Returns relation entry corresponding to the union of two given rels, @@ -755,6 +912,7 @@ build_join_rel(PlannerInfo *root, joinrel->joininfo = NIL; joinrel->has_eclass_joins = false; joinrel->consider_partitionwise_join = false; /* might get changed later */ + joinrel->agg_info = NULL; joinrel->parent = NULL; joinrel->top_parent = NULL; joinrel->top_parent_relids = NULL; @@ -939,6 +1097,7 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, joinrel->joininfo = NIL; joinrel->has_eclass_joins = false; joinrel->consider_partitionwise_join = false; /* might get changed later */ + joinrel->agg_info = NULL; joinrel->parent = parent_joinrel; joinrel->top_parent = parent_joinrel->top_parent ? parent_joinrel->top_parent : parent_joinrel; joinrel->top_parent_relids = joinrel->top_parent->relids; @@ -2518,3 +2677,504 @@ build_child_join_reltarget(PlannerInfo *root, childrel->reltarget->cost.per_tuple = parentrel->reltarget->cost.per_tuple; childrel->reltarget->width = parentrel->reltarget->width; } + +/* + * create_rel_agg_info + * Create the RelAggInfo structure for the given relation if it can produce + * grouped paths. The given relation is the non-grouped one which has the + * reltarget already constructed. + */ +RelAggInfo * +create_rel_agg_info(PlannerInfo *root, RelOptInfo *rel) +{ + ListCell *lc; + RelAggInfo *result; + PathTarget *agg_input; + PathTarget *target; + List *group_clauses = NIL; + List *group_exprs = NIL; + + /* + * The lists of aggregate expressions and grouping expressions should have + * been constructed. + */ + Assert(root->agg_clause_list != NIL); + Assert(root->group_expr_list != NIL); + + /* + * If this is a child rel, the grouped rel for its parent rel must have + * been created if it can. So we can just use parent's RelAggInfo if + * there is one, with appropriate variable substitutions. + */ + if (IS_OTHER_REL(rel)) + { + RelOptInfo *rel_grouped; + RelAggInfo *agg_info; + + Assert(!bms_is_empty(rel->top_parent_relids)); + rel_grouped = find_grouped_rel(root, rel->top_parent_relids); + + if (rel_grouped == NULL) + return NULL; + + Assert(IS_GROUPED_REL(rel_grouped)); + /* Must do multi-level transformation */ + agg_info = (RelAggInfo *) + adjust_appendrel_attrs_multilevel(root, + (Node *) rel_grouped->agg_info, + rel, + rel->top_parent); + + agg_info->grouped_rows = + estimate_num_groups(root, agg_info->group_exprs, + rel->rows, NULL, NULL); + + /* + * The grouped paths for the given relation are considered useful iff + * the row reduction ratio is no less than EAGER_AGGREGATE_RATIO. + */ + agg_info->agg_useful = + (agg_info->grouped_rows <= rel->rows * (1 - EAGER_AGGREGATE_RATIO)); + + return agg_info; + } + + /* Check if it's possible to produce grouped paths for this relation. */ + if (!eager_aggregation_possible_for_relation(root, rel)) + return NULL; + + /* + * Create targets for the grouped paths and for the input paths of the + * grouped paths. + */ + target = create_empty_pathtarget(); + agg_input = create_empty_pathtarget(); + + /* ... and initialize these targets */ + if (!init_grouping_targets(root, rel, target, agg_input, + &group_clauses, &group_exprs)) + return NULL; + + /* + * Eager aggregation is not applicable if there are no available grouping + * expressions. + */ + if (list_length(group_clauses) == 0) + return NULL; + + /* build the RelAggInfo result */ + result = makeNode(RelAggInfo); + + result->group_clauses = group_clauses; + result->group_exprs = group_exprs; + + /* Calculate pathkeys that represent this grouping requirements */ + result->group_pathkeys = + make_pathkeys_for_sortclauses(root, result->group_clauses, + make_tlist_from_pathtarget(target)); + + /* Add aggregates to the grouping target */ + foreach(lc, root->agg_clause_list) + { + AggClauseInfo *ac_info = lfirst_node(AggClauseInfo, lc); + Aggref *aggref; + + Assert(IsA(ac_info->aggref, Aggref)); + + aggref = (Aggref *) copyObject(ac_info->aggref); + mark_partial_aggref(aggref, AGGSPLIT_INITIAL_SERIAL); + + add_column_to_pathtarget(target, (Expr *) aggref, 0); + } + + /* Set the estimated eval cost and output width for both targets */ + set_pathtarget_cost_width(root, target); + set_pathtarget_cost_width(root, agg_input); + + result->relids = bms_copy(rel->relids); + result->target = target; + result->agg_input = agg_input; + result->grouped_rows = estimate_num_groups(root, result->group_exprs, + rel->rows, NULL, NULL); + + /* + * The grouped paths for the given relation are considered useful iff the + * row reduction ratio is no less than EAGER_AGGREGATE_RATIO. + */ + result->agg_useful = + (result->grouped_rows <= rel->rows * (1 - EAGER_AGGREGATE_RATIO)); + + return result; +} + +/* + * eager_aggregation_possible_for_relation + * Check if it's possible to produce grouped paths for the given relation. + */ +static bool +eager_aggregation_possible_for_relation(PlannerInfo *root, RelOptInfo *rel) +{ + ListCell *lc; + int cur_relid; + + /* + * Check to see if the given relation is in the nullable side of an outer + * join. In this case, we cannot push a partial aggregation down to the + * relation, because the NULL-extended rows produced by the outer join + * would not be available when we perform the partial aggregation, while + * with a non-eager-aggregation plan these rows are available for the + * top-level aggregation. Doing so may result in the rows being grouped + * differently than expected, or produce incorrect values from the + * aggregate functions. + */ + cur_relid = -1; + while ((cur_relid = bms_next_member(rel->relids, cur_relid)) >= 0) + { + RelOptInfo *baserel = find_base_rel_ignore_join(root, cur_relid); + + if (baserel == NULL) + continue; /* ignore outer joins in rel->relids */ + + if (!bms_is_subset(baserel->nulling_relids, rel->relids)) + return false; + } + + /* + * For now we don't try to support PlaceHolderVars. + */ + foreach(lc, rel->reltarget->exprs) + { + Expr *expr = lfirst(lc); + + if (IsA(expr, PlaceHolderVar)) + return false; + } + + /* Caller should only pass base relations or joins. */ + Assert(rel->reloptkind == RELOPT_BASEREL || + rel->reloptkind == RELOPT_JOINREL); + + /* + * Check if all aggregate expressions can be evaluated on this relation + * level. + */ + foreach(lc, root->agg_clause_list) + { + AggClauseInfo *ac_info = lfirst_node(AggClauseInfo, lc); + + Assert(IsA(ac_info->aggref, Aggref)); + + /* + * Give up if any aggregate requires relations other than the current + * one. If the aggregate requires the current relation plus + * additional relations, grouping the current relation could make some + * input rows unavailable for the higher aggregate and may reduce the + * number of input rows it receives. If the aggregate does not + * require the current relation at all, it should not be grouped, as + * we do not support joining two grouped relations. + */ + if (!bms_is_subset(ac_info->agg_eval_at, rel->relids)) + return false; + } + + return true; +} + +/* + * init_grouping_targets + * Initialize the target for grouped paths (target) as well as the target + * for paths that generate input for the grouped paths (agg_input). + * + * We also construct the list of SortGroupClauses and the list of grouping + * expressions for the partial aggregation, and return them in *group_clause + * and *group_exprs. + * + * Return true if the targets could be initialized, false otherwise. + */ +static bool +init_grouping_targets(PlannerInfo *root, RelOptInfo *rel, + PathTarget *target, PathTarget *agg_input, + List **group_clauses, List **group_exprs) +{ + ListCell *lc; + List *possibly_dependent = NIL; + Index maxSortGroupRef; + + /* Identify the max sortgroupref */ + maxSortGroupRef = 0; + foreach(lc, root->processed_tlist) + { + Index ref = ((TargetEntry *) lfirst(lc))->ressortgroupref; + + if (ref > maxSortGroupRef) + maxSortGroupRef = ref; + } + + foreach(lc, rel->reltarget->exprs) + { + Expr *expr = (Expr *) lfirst(lc); + Index sortgroupref; + + /* + * Given that PlaceHolderVar currently prevents us from doing eager + * aggregation, the source target cannot contain anything more complex + * than a Var. + */ + Assert(IsA(expr, Var)); + + /* Get the sortgroupref if the expr can act as grouping expression. */ + sortgroupref = get_expression_sortgroupref(root, expr); + if (sortgroupref > 0) + { + SortGroupClause *sgc; + + /* Find the matching SortGroupClause */ + sgc = get_sortgroupref_clause(sortgroupref, root->processed_groupClause); + Assert(sgc->tleSortGroupRef <= maxSortGroupRef); + + /* + * If the target expression can be used as a grouping key, it + * should be emitted by the grouped paths that have been pushed + * down to this relation level. + */ + add_column_to_pathtarget(target, expr, sortgroupref); + + /* + * ... and it also should be emitted by the input paths. + */ + add_column_to_pathtarget(agg_input, expr, sortgroupref); + + /* + * Record this SortGroupClause and grouping expression. Note that + * this SortGroupClause might have already been recorded. + */ + if (!list_member(*group_clauses, sgc)) + { + *group_clauses = lappend(*group_clauses, sgc); + *group_exprs = lappend(*group_exprs, expr); + } + } + else if (is_var_needed_by_join(root, (Var *) expr, rel)) + { + /* + * The expression is needed for an upper join but is neither in + * the GROUP BY clause nor derivable from it using EC (otherwise, + * it would have already been included in the targets above). We + * need to create a special SortGroupClause for this expression. + * + * It is important to include such expressions in the grouping + * keys. This is essential to ensure that an aggregated row from + * the partial aggregation matches the other side of the join if + * and only if each row in the partial group does. This ensures + * that all rows within the same partial group share the same + * 'destiny', which is crucial for maintaining correctness. + */ + SortGroupClause *sgc; + TypeCacheEntry *tce; + Oid equalimageproc; + + /* + * But first, check if equality implies image equality for this + * expression. If not, we cannot use it as a grouping key. See + * comments in create_grouping_expr_infos(). + */ + tce = lookup_type_cache(exprType((Node *) expr), + TYPECACHE_BTREE_OPFAMILY); + if (!OidIsValid(tce->btree_opf) || + !OidIsValid(tce->btree_opintype)) + return false; + + equalimageproc = get_opfamily_proc(tce->btree_opf, + tce->btree_opintype, + tce->btree_opintype, + BTEQUALIMAGE_PROC); + if (!OidIsValid(equalimageproc) || + !DatumGetBool(OidFunctionCall1Coll(equalimageproc, + tce->typcollation, + ObjectIdGetDatum(tce->btree_opintype)))) + return false; + + /* Create the SortGroupClause. */ + sgc = makeNode(SortGroupClause); + + /* Initialize the SortGroupClause. */ + sgc->tleSortGroupRef = ++maxSortGroupRef; + get_sort_group_operators(exprType((Node *) expr), + false, true, false, + &sgc->sortop, &sgc->eqop, NULL, + &sgc->hashable); + + /* This expression should be emitted by the grouped paths */ + add_column_to_pathtarget(target, expr, sgc->tleSortGroupRef); + + /* ... and it also should be emitted by the input paths. */ + add_column_to_pathtarget(agg_input, expr, sgc->tleSortGroupRef); + + /* Record this SortGroupClause and grouping expression */ + *group_clauses = lappend(*group_clauses, sgc); + *group_exprs = lappend(*group_exprs, expr); + } + else if (is_var_in_aggref_only(root, (Var *) expr)) + { + /* + * The expression is referenced by an aggregate function pushed + * down to this relation and does not appear elsewhere in the + * targetlist or havingQual. Add it to 'agg_input' but not to + * 'target'. + */ + add_new_column_to_pathtarget(agg_input, expr); + } + else + { + /* + * The expression may be functionally dependent on other + * expressions in the target, but we cannot verify this until all + * target expressions have been constructed. + */ + possibly_dependent = lappend(possibly_dependent, expr); + } + } + + /* + * Now we can verify whether an expression is functionally dependent on + * others. + */ + foreach(lc, possibly_dependent) + { + Var *tvar; + List *deps = NIL; + RangeTblEntry *rte; + + tvar = lfirst_node(Var, lc); + rte = root->simple_rte_array[tvar->varno]; + + if (check_functional_grouping(rte->relid, tvar->varno, + tvar->varlevelsup, + target->exprs, &deps)) + { + /* + * The expression is functionally dependent on other target + * expressions, so it can be included in the targets. Since it + * will not be used as a grouping key, a sortgroupref is not + * needed for it. + */ + add_new_column_to_pathtarget(target, (Expr *) tvar); + add_new_column_to_pathtarget(agg_input, (Expr *) tvar); + } + else + { + /* + * We may arrive here with a grouping expression that is proven + * redundant by EquivalenceClass processing, such as 't1.a' in the + * query below. + * + * select max(t1.c) from t t1, t t2 where t1.a = 1 group by t1.a, + * t1.b; + * + * For now we just give up in this case. + */ + return false; + } + } + + return true; +} + +/* + * is_var_in_aggref_only + * Check whether the given Var appears in aggregate expressions and not + * elsewhere in the targetlist or havingQual. + */ +static bool +is_var_in_aggref_only(PlannerInfo *root, Var *var) +{ + ListCell *lc; + + /* + * Search the list of aggregate expressions for the Var. + */ + foreach(lc, root->agg_clause_list) + { + AggClauseInfo *ac_info = lfirst_node(AggClauseInfo, lc); + List *vars; + + Assert(IsA(ac_info->aggref, Aggref)); + + if (!bms_is_member(var->varno, ac_info->agg_eval_at)) + continue; + + vars = pull_var_clause((Node *) ac_info->aggref, + PVC_RECURSE_AGGREGATES | + PVC_RECURSE_WINDOWFUNCS | + PVC_RECURSE_PLACEHOLDERS); + + if (list_member(vars, var)) + { + list_free(vars); + break; + } + + list_free(vars); + } + + return (lc != NULL && !list_member(root->tlist_vars, var)); +} + +/* + * is_var_needed_by_join + * Check if the given Var is needed by joins above the current rel. + */ +static bool +is_var_needed_by_join(PlannerInfo *root, Var *var, RelOptInfo *rel) +{ + Relids relids; + int attno; + RelOptInfo *baserel; + + /* + * Note that when checking if the Var is needed by joins above, we want to + * exclude cases where the Var is only needed in the final output. So + * include "relation 0" in the check. + */ + relids = bms_copy(rel->relids); + relids = bms_add_member(relids, 0); + + baserel = find_base_rel(root, var->varno); + attno = var->varattno - baserel->min_attr; + + return bms_nonempty_difference(baserel->attr_needed[attno], relids); +} + +/* + * get_expression_sortgroupref + * Return sortgroupref if the given 'expr' can act as grouping expression, + * or 0 otherwise. + * + * We first check if 'expr' is among the grouping expressions. If it is not, + * we then check if 'expr' is known equal to any of the grouping expressions + * due to equivalence relationships. + */ +static Index +get_expression_sortgroupref(PlannerInfo *root, Expr *expr) +{ + ListCell *lc; + + foreach(lc, root->group_expr_list) + { + GroupExprInfo *ge_info = lfirst_node(GroupExprInfo, lc); + + Assert(IsA(ge_info->expr, Var)); + + if (equal(ge_info->expr, expr) || + exprs_known_equal(root, (Node *) expr, (Node *) ge_info->expr, + ge_info->btree_opfamily)) + { + Assert(ge_info->sortgroupref > 0); + + return ge_info->sortgroupref; + } + } + + /* The expression cannot act as grouping expression. */ + return 0; +} diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 9c0b10ad4dc2..0ca0ca48f6eb 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -933,6 +933,16 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { + {"enable_eager_aggregate", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Enables eager aggregation."), + NULL, + GUC_EXPLAIN + }, + &enable_eager_aggregate, + false, + NULL, NULL, NULL + }, { {"enable_parallel_append", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables the planner's use of parallel append plans."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 8de86e0c9454..d6abdf0c7e3d 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -415,6 +415,7 @@ #enable_tidscan = on #enable_group_by_reordering = on #enable_distinct_reordering = on +#enable_eager_aggregate = off # - Planner Cost Constants - diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index fbf05322c75f..d1e30136b6b8 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -80,6 +80,25 @@ typedef enum UpperRelationKind /* NB: UPPERREL_FINAL must be last enum entry; it's used to size arrays */ } UpperRelationKind; +/* + * A structure consisting of a list and a hash table to store relations. + * + * For small problems we just scan the list to do lookups, but when there are + * many relations we build a hash table for faster lookups. The hash table is + * present and valid when 'hash' is not NULL. Note that we still maintain the + * list even when using the hash table for lookups; this simplifies life for + * GEQO. + */ +typedef struct RelInfoList +{ + pg_node_attr(no_copy_equal, no_read) + + NodeTag type; + + List *items; + struct HTAB *hash pg_node_attr(read_write_ignore); +} RelInfoList; + /*---------- * PlannerGlobal * Global information for planning/optimization @@ -291,15 +310,16 @@ struct PlannerInfo /* * join_rel_list is a list of all join-relation RelOptInfos we have - * considered in this planning run. For small problems we just scan the - * list to do lookups, but when there are many join relations we build a - * hash table for faster lookups. The hash table is present and valid - * when join_rel_hash is not NULL. Note that we still maintain the list - * even when using the hash table for lookups; this simplifies life for - * GEQO. + * considered in this planning run. */ - List *join_rel_list; - struct HTAB *join_rel_hash pg_node_attr(read_write_ignore); + RelInfoList *join_rel_list; /* list of join-relation RelOptInfos */ + + /* + * grouped_rel_list is a list of all grouped-relation RelOptInfos we have + * considered in this planning run. This is only used by eager + * aggregation. + */ + RelInfoList *grouped_rel_list; /* list of grouped-relation RelOptInfos */ /* * When doing a dynamic-programming-style join search, join_rel_level[k] @@ -394,6 +414,15 @@ struct PlannerInfo /* list of PlaceHolderInfos */ List *placeholder_list; + /* list of AggClauseInfos */ + List *agg_clause_list; + + /* list of GroupExprInfos */ + List *group_expr_list; + + /* list of plain Vars contained in targetlist and havingQual */ + List *tlist_vars; + /* array of PlaceHolderInfos indexed by phid */ struct PlaceHolderInfo **placeholder_array pg_node_attr(read_write_ignore, array_size(placeholder_array_size)); /* allocated size of array */ @@ -638,7 +667,9 @@ typedef struct PartitionSchemeData *PartitionScheme; * the set of RT indexes for its component baserels, along with RT indexes * for any outer joins it has computed. We create RelOptInfo nodes for each * baserel and joinrel, and store them in the PlannerInfo's simple_rel_array - * and join_rel_list respectively. + * and join_rel_list respectively. We also create RelOptInfo nodes for each + * grouped relation when eager aggregation is enabled, and store them in the + * PlannerInfo's grouped_rel_list. * * Note that there is only one joinrel for any given set of component * baserels, no matter what order we assemble them in; so an unordered @@ -703,7 +734,10 @@ typedef struct PartitionSchemeData *PartitionScheme; * cheapest_unique_path - for caching cheapest path to produce unique * (no duplicates) output from relation; NULL if not yet requested * cheapest_parameterized_paths - best paths for their parameterizations; - * always includes cheapest_total_path, even if that's unparameterized + * always includes cheapest_total_path, even if that's unparameterized; + * in the grouped relation case, always includes the unparameterized + * path with the fewest rows, if there is one and it is not + * cheapest_total_path * direct_lateral_relids - rels this rel has direct LATERAL references to * lateral_relids - required outer rels for LATERAL, as a Relids set * (includes both direct and indirect lateral references) @@ -1022,6 +1056,12 @@ typedef struct RelOptInfo /* consider partitionwise join paths? (if partitioned rel) */ bool consider_partitionwise_join; + /* + * used by eager aggregation: + */ + /* information needed to create grouped paths */ + struct RelAggInfo *agg_info; + /* * inheritance links, if this is an otherrel (otherwise NULL): */ @@ -1095,6 +1135,68 @@ typedef struct RelOptInfo ((rel)->part_scheme && (rel)->boundinfo && (rel)->nparts > 0 && \ (rel)->part_rels && (rel)->partexprs && (rel)->nullable_partexprs) +/* + * Is the given relation a grouped relation? + */ +#define IS_GROUPED_REL(rel) \ + ((rel)->agg_info != NULL) + +/* + * RelAggInfo + * Information needed to create grouped paths for base and join rels. + * + * "relids" is the set of relation identifiers (RT indexes). + * + * "target" is the output tlist for the grouped paths. + * + * "agg_input" is the output tlist for the paths that provide input to the + * grouped paths. One difference from the reltarget of the non-grouped + * relation is that agg_input has its sortgrouprefs[] initialized. + * + * "grouped_rows" is the estimated number of result tuples of the grouped + * relation. + * + * "group_clauses", "group_exprs" and "group_pathkeys" are lists of + * SortGroupClauses, the corresponding grouping expressions and PathKeys + * respectively. + * + * "agg_useful" is a flag to indicate whether the grouped paths are considered + * useful. + */ +typedef struct RelAggInfo +{ + pg_node_attr(no_copy_equal, no_read, no_query_jumble) + + NodeTag type; + + /* set of base + OJ relids (rangetable indexes) */ + Relids relids; + + /* + * default result targetlist for Paths scanning this grouped relation; + * list of Vars/Exprs, cost, width + */ + struct PathTarget *target; + + /* + * the targetlist for Paths that provide input to the grouped paths + */ + struct PathTarget *agg_input; + + /* estimated number of result tuples */ + Cardinality grouped_rows; + + /* a list of SortGroupClauses */ + List *group_clauses; + /* a list of grouping expressions */ + List *group_exprs; + /* a list of PathKeys */ + List *group_pathkeys; + + /* the grouped paths are considered useful? */ + bool agg_useful; +} RelAggInfo; + /* * IndexOptInfo * Per-index information for planning/optimization @@ -3168,6 +3270,41 @@ typedef struct MinMaxAggInfo Param *param; } MinMaxAggInfo; +/* + * The aggregate expressions that appear in targetlist and having clauses + */ +typedef struct AggClauseInfo +{ + pg_node_attr(no_read, no_query_jumble) + + NodeTag type; + + /* the Aggref expr */ + Aggref *aggref; + + /* lowest level we can evaluate this aggregate at */ + Relids agg_eval_at; +} AggClauseInfo; + +/* + * The grouping expressions that appear in grouping clauses + */ +typedef struct GroupExprInfo +{ + pg_node_attr(no_read, no_query_jumble) + + NodeTag type; + + /* the represented expression */ + Expr *expr; + + /* the tleSortGroupRef of the corresponding SortGroupClause */ + Index sortgroupref; + + /* btree opfamily defining the ordering */ + Oid btree_opfamily; +} GroupExprInfo; + /* * At runtime, PARAM_EXEC slots are used to pass values around from one plan * node to another. They can be used to pass values down into subqueries (for diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 719be3897f63..7747fb339734 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -313,10 +313,16 @@ extern void setup_simple_rel_arrays(PlannerInfo *root); extern void expand_planner_arrays(PlannerInfo *root, int add_size); extern RelOptInfo *build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent); +extern RelOptInfo *build_simple_grouped_rel(PlannerInfo *root, + RelOptInfo *rel_plain); +extern RelOptInfo *build_grouped_rel(PlannerInfo *root, + RelOptInfo *rel_plain); extern RelOptInfo *find_base_rel(PlannerInfo *root, int relid); extern RelOptInfo *find_base_rel_noerr(PlannerInfo *root, int relid); extern RelOptInfo *find_base_rel_ignore_join(PlannerInfo *root, int relid); extern RelOptInfo *find_join_rel(PlannerInfo *root, Relids relids); +extern void add_grouped_rel(PlannerInfo *root, RelOptInfo *rel); +extern RelOptInfo *find_grouped_rel(PlannerInfo *root, Relids relids); extern RelOptInfo *build_join_rel(PlannerInfo *root, Relids joinrelids, RelOptInfo *outer_rel, @@ -352,4 +358,5 @@ extern RelOptInfo *build_child_join_rel(PlannerInfo *root, SpecialJoinInfo *sjinfo, int nappinfos, AppendRelInfo **appinfos); +extern RelAggInfo *create_rel_agg_info(PlannerInfo *root, RelOptInfo *rel); #endif /* PATHNODE_H */ diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index bc5dfd7db417..dd8744a3432a 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -21,6 +21,7 @@ * allpaths.c */ extern PGDLLIMPORT bool enable_geqo; +extern PGDLLIMPORT bool enable_eager_aggregate; extern PGDLLIMPORT int geqo_threshold; extern PGDLLIMPORT int min_parallel_table_scan_size; extern PGDLLIMPORT int min_parallel_index_scan_size; @@ -57,6 +58,10 @@ extern void generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows); extern void generate_useful_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows); +extern void generate_grouped_paths(PlannerInfo *root, + RelOptInfo *rel_grouped, + RelOptInfo *rel_plain, + RelAggInfo *agg_info); extern int compute_parallel_worker(RelOptInfo *rel, double heap_pages, double index_pages, int max_workers); extern void create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel, diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index 5a9301996114..73196ec99a8c 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -76,6 +76,7 @@ extern void add_vars_to_targetlist(PlannerInfo *root, List *vars, extern void add_vars_to_attr_needed(PlannerInfo *root, List *vars, Relids where_needed); extern void remove_useless_groupby_columns(PlannerInfo *root); +extern void setup_eager_aggregation(PlannerInfo *root); extern void find_lateral_references(PlannerInfo *root); extern void rebuild_lateral_attr_needed(PlannerInfo *root); extern void create_lateral_join_info(PlannerInfo *root); diff --git a/src/test/regress/expected/eager_aggregate.out b/src/test/regress/expected/eager_aggregate.out new file mode 100644 index 000000000000..9f63472eff1b --- /dev/null +++ b/src/test/regress/expected/eager_aggregate.out @@ -0,0 +1,1308 @@ +-- +-- EAGER AGGREGATION +-- Test we can push aggregation down below join +-- +-- Enable eager aggregation, which by default is disabled. +SET enable_eager_aggregate TO on; +CREATE TABLE eager_agg_t1 (a int, b int, c double precision); +CREATE TABLE eager_agg_t2 (a int, b int, c double precision); +CREATE TABLE eager_agg_t3 (a int, b int, c double precision); +INSERT INTO eager_agg_t1 SELECT i, i, i FROM generate_series(1, 1000)i; +INSERT INTO eager_agg_t2 SELECT i, i%10, i FROM generate_series(1, 1000)i; +INSERT INTO eager_agg_t3 SELECT i%10, i%10, i FROM generate_series(1, 1000)i; +ANALYZE eager_agg_t1; +ANALYZE eager_agg_t2; +ANALYZE eager_agg_t3; +-- +-- Test eager aggregation over base rel +-- +-- Perform scan of a table, aggregate the result, join it to the other table +-- and finalize the aggregation. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.a, avg(t2.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a ORDER BY t1.a; + QUERY PLAN +------------------------------------------------------------------ + Finalize GroupAggregate + Output: t1.a, avg(t2.c) + Group Key: t1.a + -> Sort + Output: t1.a, (PARTIAL avg(t2.c)) + Sort Key: t1.a + -> Hash Join + Output: t1.a, (PARTIAL avg(t2.c)) + Hash Cond: (t1.b = t2.b) + -> Seq Scan on public.eager_agg_t1 t1 + Output: t1.a, t1.b, t1.c + -> Hash + Output: t2.b, (PARTIAL avg(t2.c)) + -> Partial HashAggregate + Output: t2.b, PARTIAL avg(t2.c) + Group Key: t2.b + -> Seq Scan on public.eager_agg_t2 t2 + Output: t2.a, t2.b, t2.c +(18 rows) + +SELECT t1.a, avg(t2.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a ORDER BY t1.a; + a | avg +---+----- + 1 | 496 + 2 | 497 + 3 | 498 + 4 | 499 + 5 | 500 + 6 | 501 + 7 | 502 + 8 | 503 + 9 | 504 +(9 rows) + +-- Produce results with sorting aggregation +SET enable_hashagg TO off; +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.a, avg(t2.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a ORDER BY t1.a; + QUERY PLAN +------------------------------------------------------------------------ + Finalize GroupAggregate + Output: t1.a, avg(t2.c) + Group Key: t1.a + -> Sort + Output: t1.a, (PARTIAL avg(t2.c)) + Sort Key: t1.a + -> Hash Join + Output: t1.a, (PARTIAL avg(t2.c)) + Hash Cond: (t1.b = t2.b) + -> Seq Scan on public.eager_agg_t1 t1 + Output: t1.a, t1.b, t1.c + -> Hash + Output: t2.b, (PARTIAL avg(t2.c)) + -> Partial GroupAggregate + Output: t2.b, PARTIAL avg(t2.c) + Group Key: t2.b + -> Sort + Output: t2.c, t2.b + Sort Key: t2.b + -> Seq Scan on public.eager_agg_t2 t2 + Output: t2.c, t2.b +(21 rows) + +SELECT t1.a, avg(t2.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a ORDER BY t1.a; + a | avg +---+----- + 1 | 496 + 2 | 497 + 3 | 498 + 4 | 499 + 5 | 500 + 6 | 501 + 7 | 502 + 8 | 503 + 9 | 504 +(9 rows) + +RESET enable_hashagg; +-- +-- Test eager aggregation over join rel +-- +-- Perform join of tables, aggregate the result, join it to the other table +-- and finalize the aggregation. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.a, avg(t2.c + t3.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b JOIN eager_agg_t3 t3 ON t2.a = t3.a GROUP BY t1.a ORDER BY t1.a; + QUERY PLAN +------------------------------------------------------------------------------ + Finalize GroupAggregate + Output: t1.a, avg((t2.c + t3.c)) + Group Key: t1.a + -> Sort + Output: t1.a, (PARTIAL avg((t2.c + t3.c))) + Sort Key: t1.a + -> Hash Join + Output: t1.a, (PARTIAL avg((t2.c + t3.c))) + Hash Cond: (t1.b = t2.b) + -> Seq Scan on public.eager_agg_t1 t1 + Output: t1.a, t1.b, t1.c + -> Hash + Output: t2.b, (PARTIAL avg((t2.c + t3.c))) + -> Partial HashAggregate + Output: t2.b, PARTIAL avg((t2.c + t3.c)) + Group Key: t2.b + -> Hash Join + Output: t2.c, t2.b, t3.c + Hash Cond: (t3.a = t2.a) + -> Seq Scan on public.eager_agg_t3 t3 + Output: t3.a, t3.b, t3.c + -> Hash + Output: t2.c, t2.b, t2.a + -> Seq Scan on public.eager_agg_t2 t2 + Output: t2.c, t2.b, t2.a +(25 rows) + +SELECT t1.a, avg(t2.c + t3.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b JOIN eager_agg_t3 t3 ON t2.a = t3.a GROUP BY t1.a ORDER BY t1.a; + a | avg +---+----- + 1 | 497 + 2 | 499 + 3 | 501 + 4 | 503 + 5 | 505 + 6 | 507 + 7 | 509 + 8 | 511 + 9 | 513 +(9 rows) + +-- Produce results with sorting aggregation +SET enable_hashagg TO off; +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.a, avg(t2.c + t3.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b JOIN eager_agg_t3 t3 ON t2.a = t3.a GROUP BY t1.a ORDER BY t1.a; + QUERY PLAN +------------------------------------------------------------------------------------ + Finalize GroupAggregate + Output: t1.a, avg((t2.c + t3.c)) + Group Key: t1.a + -> Sort + Output: t1.a, (PARTIAL avg((t2.c + t3.c))) + Sort Key: t1.a + -> Hash Join + Output: t1.a, (PARTIAL avg((t2.c + t3.c))) + Hash Cond: (t1.b = t2.b) + -> Seq Scan on public.eager_agg_t1 t1 + Output: t1.a, t1.b, t1.c + -> Hash + Output: t2.b, (PARTIAL avg((t2.c + t3.c))) + -> Partial GroupAggregate + Output: t2.b, PARTIAL avg((t2.c + t3.c)) + Group Key: t2.b + -> Sort + Output: t2.c, t2.b, t3.c + Sort Key: t2.b + -> Hash Join + Output: t2.c, t2.b, t3.c + Hash Cond: (t3.a = t2.a) + -> Seq Scan on public.eager_agg_t3 t3 + Output: t3.a, t3.b, t3.c + -> Hash + Output: t2.c, t2.b, t2.a + -> Seq Scan on public.eager_agg_t2 t2 + Output: t2.c, t2.b, t2.a +(28 rows) + +SELECT t1.a, avg(t2.c + t3.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b JOIN eager_agg_t3 t3 ON t2.a = t3.a GROUP BY t1.a ORDER BY t1.a; + a | avg +---+----- + 1 | 497 + 2 | 499 + 3 | 501 + 4 | 503 + 5 | 505 + 6 | 507 + 7 | 509 + 8 | 511 + 9 | 513 +(9 rows) + +RESET enable_hashagg; +-- +-- Test that eager aggregation works for outer join +-- +-- Ensure aggregation can be pushed down to the non-nullable side +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.a, avg(t2.c) FROM eager_agg_t1 t1 RIGHT JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a ORDER BY t1.a; + QUERY PLAN +------------------------------------------------------------------ + Finalize GroupAggregate + Output: t1.a, avg(t2.c) + Group Key: t1.a + -> Sort + Output: t1.a, (PARTIAL avg(t2.c)) + Sort Key: t1.a + -> Hash Right Join + Output: t1.a, (PARTIAL avg(t2.c)) + Hash Cond: (t1.b = t2.b) + -> Seq Scan on public.eager_agg_t1 t1 + Output: t1.a, t1.b, t1.c + -> Hash + Output: t2.b, (PARTIAL avg(t2.c)) + -> Partial HashAggregate + Output: t2.b, PARTIAL avg(t2.c) + Group Key: t2.b + -> Seq Scan on public.eager_agg_t2 t2 + Output: t2.a, t2.b, t2.c +(18 rows) + +SELECT t1.a, avg(t2.c) FROM eager_agg_t1 t1 RIGHT JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a ORDER BY t1.a; + a | avg +---+----- + 1 | 496 + 2 | 497 + 3 | 498 + 4 | 499 + 5 | 500 + 6 | 501 + 7 | 502 + 8 | 503 + 9 | 504 + | 505 +(10 rows) + +-- Ensure aggregation cannot be pushed down to the nullable side +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t2.b, avg(t2.c) FROM eager_agg_t1 t1 LEFT JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t2.b ORDER BY t2.b; + QUERY PLAN +------------------------------------------------------------ + Sort + Output: t2.b, (avg(t2.c)) + Sort Key: t2.b + -> HashAggregate + Output: t2.b, avg(t2.c) + Group Key: t2.b + -> Hash Right Join + Output: t2.b, t2.c + Hash Cond: (t2.b = t1.b) + -> Seq Scan on public.eager_agg_t2 t2 + Output: t2.a, t2.b, t2.c + -> Hash + Output: t1.b + -> Seq Scan on public.eager_agg_t1 t1 + Output: t1.b +(15 rows) + +SELECT t2.b, avg(t2.c) FROM eager_agg_t1 t1 LEFT JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t2.b ORDER BY t2.b; + b | avg +---+----- + 1 | 496 + 2 | 497 + 3 | 498 + 4 | 499 + 5 | 500 + 6 | 501 + 7 | 502 + 8 | 503 + 9 | 504 + | +(10 rows) + +-- +-- Test that eager aggregation works for parallel plans +-- +SET parallel_setup_cost=0; +SET parallel_tuple_cost=0; +SET min_parallel_table_scan_size=0; +SET max_parallel_workers_per_gather=4; +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.a, avg(t2.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a ORDER BY t1.a; + QUERY PLAN +--------------------------------------------------------------------------------- + Finalize GroupAggregate + Output: t1.a, avg(t2.c) + Group Key: t1.a + -> Gather Merge + Output: t1.a, (PARTIAL avg(t2.c)) + Workers Planned: 2 + -> Sort + Output: t1.a, (PARTIAL avg(t2.c)) + Sort Key: t1.a + -> Parallel Hash Join + Output: t1.a, (PARTIAL avg(t2.c)) + Hash Cond: (t1.b = t2.b) + -> Parallel Seq Scan on public.eager_agg_t1 t1 + Output: t1.a, t1.b, t1.c + -> Parallel Hash + Output: t2.b, (PARTIAL avg(t2.c)) + -> Partial HashAggregate + Output: t2.b, PARTIAL avg(t2.c) + Group Key: t2.b + -> Parallel Seq Scan on public.eager_agg_t2 t2 + Output: t2.a, t2.b, t2.c +(21 rows) + +SELECT t1.a, avg(t2.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a ORDER BY t1.a; + a | avg +---+----- + 1 | 496 + 2 | 497 + 3 | 498 + 4 | 499 + 5 | 500 + 6 | 501 + 7 | 502 + 8 | 503 + 9 | 504 +(9 rows) + +RESET parallel_setup_cost; +RESET parallel_tuple_cost; +RESET min_parallel_table_scan_size; +RESET max_parallel_workers_per_gather; +DROP TABLE eager_agg_t1; +DROP TABLE eager_agg_t2; +DROP TABLE eager_agg_t3; +-- +-- Test eager aggregation for partitionwise join +-- +-- Enable partitionwise aggregate, which by default is disabled. +SET enable_partitionwise_aggregate TO true; +-- Enable partitionwise join, which by default is disabled. +SET enable_partitionwise_join TO true; +CREATE TABLE eager_agg_tab1(x int, y int) PARTITION BY RANGE(x); +CREATE TABLE eager_agg_tab1_p1 PARTITION OF eager_agg_tab1 FOR VALUES FROM (0) TO (10); +CREATE TABLE eager_agg_tab1_p2 PARTITION OF eager_agg_tab1 FOR VALUES FROM (10) TO (20); +CREATE TABLE eager_agg_tab1_p3 PARTITION OF eager_agg_tab1 FOR VALUES FROM (20) TO (30); +CREATE TABLE eager_agg_tab2(x int, y int) PARTITION BY RANGE(y); +CREATE TABLE eager_agg_tab2_p1 PARTITION OF eager_agg_tab2 FOR VALUES FROM (0) TO (10); +CREATE TABLE eager_agg_tab2_p2 PARTITION OF eager_agg_tab2 FOR VALUES FROM (10) TO (20); +CREATE TABLE eager_agg_tab2_p3 PARTITION OF eager_agg_tab2 FOR VALUES FROM (20) TO (30); +INSERT INTO eager_agg_tab1 SELECT i % 30, i % 20 FROM generate_series(0, 299, 2) i; +INSERT INTO eager_agg_tab2 SELECT i % 20, i % 30 FROM generate_series(0, 299, 3) i; +ANALYZE eager_agg_tab1; +ANALYZE eager_agg_tab2; +-- When GROUP BY clause matches; full aggregation is performed for each partition. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.x, sum(t1.y), count(*) FROM eager_agg_tab1 t1, eager_agg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY t1.x; + QUERY PLAN +--------------------------------------------------------------------------------------- + Sort + Output: t1.x, (sum(t1.y)), (count(*)) + Sort Key: t1.x + -> Append + -> Finalize HashAggregate + Output: t1.x, sum(t1.y), count(*) + Group Key: t1.x + -> Hash Join + Output: t1.x, (PARTIAL sum(t1.y)), (PARTIAL count(*)) + Hash Cond: (t2.y = t1.x) + -> Seq Scan on public.eager_agg_tab2_p1 t2 + Output: t2.y + -> Hash + Output: t1.x, (PARTIAL sum(t1.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t1.x, PARTIAL sum(t1.y), PARTIAL count(*) + Group Key: t1.x + -> Seq Scan on public.eager_agg_tab1_p1 t1 + Output: t1.x, t1.y + -> Finalize HashAggregate + Output: t1_1.x, sum(t1_1.y), count(*) + Group Key: t1_1.x + -> Hash Join + Output: t1_1.x, (PARTIAL sum(t1_1.y)), (PARTIAL count(*)) + Hash Cond: (t2_1.y = t1_1.x) + -> Seq Scan on public.eager_agg_tab2_p2 t2_1 + Output: t2_1.y + -> Hash + Output: t1_1.x, (PARTIAL sum(t1_1.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t1_1.x, PARTIAL sum(t1_1.y), PARTIAL count(*) + Group Key: t1_1.x + -> Seq Scan on public.eager_agg_tab1_p2 t1_1 + Output: t1_1.x, t1_1.y + -> Finalize HashAggregate + Output: t1_2.x, sum(t1_2.y), count(*) + Group Key: t1_2.x + -> Hash Join + Output: t1_2.x, (PARTIAL sum(t1_2.y)), (PARTIAL count(*)) + Hash Cond: (t2_2.y = t1_2.x) + -> Seq Scan on public.eager_agg_tab2_p3 t2_2 + Output: t2_2.y + -> Hash + Output: t1_2.x, (PARTIAL sum(t1_2.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t1_2.x, PARTIAL sum(t1_2.y), PARTIAL count(*) + Group Key: t1_2.x + -> Seq Scan on public.eager_agg_tab1_p3 t1_2 + Output: t1_2.x, t1_2.y +(49 rows) + +SELECT t1.x, sum(t1.y), count(*) FROM eager_agg_tab1 t1, eager_agg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY t1.x; + x | sum | count +----+------+------- + 0 | 500 | 100 + 6 | 1100 | 100 + 12 | 700 | 100 + 18 | 1300 | 100 + 24 | 900 | 100 +(5 rows) + +-- GROUP BY having other matching key +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t2.y, sum(t1.y), count(*) FROM eager_agg_tab1 t1, eager_agg_tab2 t2 WHERE t1.x = t2.y GROUP BY t2.y ORDER BY t2.y; + QUERY PLAN +--------------------------------------------------------------------------------------- + Sort + Output: t2.y, (sum(t1.y)), (count(*)) + Sort Key: t2.y + -> Append + -> Finalize HashAggregate + Output: t2.y, sum(t1.y), count(*) + Group Key: t2.y + -> Hash Join + Output: t2.y, (PARTIAL sum(t1.y)), (PARTIAL count(*)) + Hash Cond: (t2.y = t1.x) + -> Seq Scan on public.eager_agg_tab2_p1 t2 + Output: t2.y + -> Hash + Output: t1.x, (PARTIAL sum(t1.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t1.x, PARTIAL sum(t1.y), PARTIAL count(*) + Group Key: t1.x + -> Seq Scan on public.eager_agg_tab1_p1 t1 + Output: t1.y, t1.x + -> Finalize HashAggregate + Output: t2_1.y, sum(t1_1.y), count(*) + Group Key: t2_1.y + -> Hash Join + Output: t2_1.y, (PARTIAL sum(t1_1.y)), (PARTIAL count(*)) + Hash Cond: (t2_1.y = t1_1.x) + -> Seq Scan on public.eager_agg_tab2_p2 t2_1 + Output: t2_1.y + -> Hash + Output: t1_1.x, (PARTIAL sum(t1_1.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t1_1.x, PARTIAL sum(t1_1.y), PARTIAL count(*) + Group Key: t1_1.x + -> Seq Scan on public.eager_agg_tab1_p2 t1_1 + Output: t1_1.y, t1_1.x + -> Finalize HashAggregate + Output: t2_2.y, sum(t1_2.y), count(*) + Group Key: t2_2.y + -> Hash Join + Output: t2_2.y, (PARTIAL sum(t1_2.y)), (PARTIAL count(*)) + Hash Cond: (t2_2.y = t1_2.x) + -> Seq Scan on public.eager_agg_tab2_p3 t2_2 + Output: t2_2.y + -> Hash + Output: t1_2.x, (PARTIAL sum(t1_2.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t1_2.x, PARTIAL sum(t1_2.y), PARTIAL count(*) + Group Key: t1_2.x + -> Seq Scan on public.eager_agg_tab1_p3 t1_2 + Output: t1_2.y, t1_2.x +(49 rows) + +SELECT t2.y, sum(t1.y), count(*) FROM eager_agg_tab1 t1, eager_agg_tab2 t2 WHERE t1.x = t2.y GROUP BY t2.y ORDER BY t2.y; + y | sum | count +----+------+------- + 0 | 500 | 100 + 6 | 1100 | 100 + 12 | 700 | 100 + 18 | 1300 | 100 + 24 | 900 | 100 +(5 rows) + +-- When GROUP BY clause does not match; partial aggregation is performed for each partition. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t2.x, sum(t1.x), count(*) FROM eager_agg_tab1 t1, eager_agg_tab2 t2 WHERE t1.x = t2.y GROUP BY t2.x HAVING avg(t1.x) > 10 ORDER BY t2.x; + QUERY PLAN +------------------------------------------------------------------------------------------------------------ + Sort + Output: t2.x, (sum(t1.x)), (count(*)) + Sort Key: t2.x + -> Finalize HashAggregate + Output: t2.x, sum(t1.x), count(*) + Group Key: t2.x + Filter: (avg(t1.x) > '10'::numeric) + -> Append + -> Hash Join + Output: t2_1.x, (PARTIAL sum(t1_1.x)), (PARTIAL count(*)), (PARTIAL avg(t1_1.x)) + Hash Cond: (t2_1.y = t1_1.x) + -> Seq Scan on public.eager_agg_tab2_p1 t2_1 + Output: t2_1.x, t2_1.y + -> Hash + Output: t1_1.x, (PARTIAL sum(t1_1.x)), (PARTIAL count(*)), (PARTIAL avg(t1_1.x)) + -> Partial HashAggregate + Output: t1_1.x, PARTIAL sum(t1_1.x), PARTIAL count(*), PARTIAL avg(t1_1.x) + Group Key: t1_1.x + -> Seq Scan on public.eager_agg_tab1_p1 t1_1 + Output: t1_1.x + -> Hash Join + Output: t2_2.x, (PARTIAL sum(t1_2.x)), (PARTIAL count(*)), (PARTIAL avg(t1_2.x)) + Hash Cond: (t2_2.y = t1_2.x) + -> Seq Scan on public.eager_agg_tab2_p2 t2_2 + Output: t2_2.x, t2_2.y + -> Hash + Output: t1_2.x, (PARTIAL sum(t1_2.x)), (PARTIAL count(*)), (PARTIAL avg(t1_2.x)) + -> Partial HashAggregate + Output: t1_2.x, PARTIAL sum(t1_2.x), PARTIAL count(*), PARTIAL avg(t1_2.x) + Group Key: t1_2.x + -> Seq Scan on public.eager_agg_tab1_p2 t1_2 + Output: t1_2.x + -> Hash Join + Output: t2_3.x, (PARTIAL sum(t1_3.x)), (PARTIAL count(*)), (PARTIAL avg(t1_3.x)) + Hash Cond: (t2_3.y = t1_3.x) + -> Seq Scan on public.eager_agg_tab2_p3 t2_3 + Output: t2_3.x, t2_3.y + -> Hash + Output: t1_3.x, (PARTIAL sum(t1_3.x)), (PARTIAL count(*)), (PARTIAL avg(t1_3.x)) + -> Partial HashAggregate + Output: t1_3.x, PARTIAL sum(t1_3.x), PARTIAL count(*), PARTIAL avg(t1_3.x) + Group Key: t1_3.x + -> Seq Scan on public.eager_agg_tab1_p3 t1_3 + Output: t1_3.x +(44 rows) + +SELECT t2.x, sum(t1.x), count(*) FROM eager_agg_tab1 t1, eager_agg_tab2 t2 WHERE t1.x = t2.y GROUP BY t2.x HAVING avg(t1.x) > 10 ORDER BY t2.x; + x | sum | count +----+------+------- + 2 | 600 | 50 + 4 | 1200 | 50 + 8 | 900 | 50 + 12 | 600 | 50 + 14 | 1200 | 50 + 18 | 900 | 50 +(6 rows) + +-- Check with eager aggregation over join rel +-- full aggregation +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.x, sum(t2.y + t3.y) FROM eager_agg_tab1 t1 JOIN eager_agg_tab1 t2 ON t1.x = t2.x JOIN eager_agg_tab1 t3 ON t2.x = t3.x GROUP BY t1.x ORDER BY t1.x; + QUERY PLAN +------------------------------------------------------------------------------------------- + Sort + Output: t1.x, (sum((t2.y + t3.y))) + Sort Key: t1.x + -> Append + -> Finalize HashAggregate + Output: t1.x, sum((t2.y + t3.y)) + Group Key: t1.x + -> Hash Join + Output: t1.x, (PARTIAL sum((t2.y + t3.y))) + Hash Cond: (t1.x = t2.x) + -> Seq Scan on public.eager_agg_tab1_p1 t1 + Output: t1.x + -> Hash + Output: t2.x, t3.x, (PARTIAL sum((t2.y + t3.y))) + -> Partial HashAggregate + Output: t2.x, t3.x, PARTIAL sum((t2.y + t3.y)) + Group Key: t2.x + -> Hash Join + Output: t2.y, t2.x, t3.y, t3.x + Hash Cond: (t2.x = t3.x) + -> Seq Scan on public.eager_agg_tab1_p1 t2 + Output: t2.y, t2.x + -> Hash + Output: t3.y, t3.x + -> Seq Scan on public.eager_agg_tab1_p1 t3 + Output: t3.y, t3.x + -> Finalize HashAggregate + Output: t1_1.x, sum((t2_1.y + t3_1.y)) + Group Key: t1_1.x + -> Hash Join + Output: t1_1.x, (PARTIAL sum((t2_1.y + t3_1.y))) + Hash Cond: (t1_1.x = t2_1.x) + -> Seq Scan on public.eager_agg_tab1_p2 t1_1 + Output: t1_1.x + -> Hash + Output: t2_1.x, t3_1.x, (PARTIAL sum((t2_1.y + t3_1.y))) + -> Partial HashAggregate + Output: t2_1.x, t3_1.x, PARTIAL sum((t2_1.y + t3_1.y)) + Group Key: t2_1.x + -> Hash Join + Output: t2_1.y, t2_1.x, t3_1.y, t3_1.x + Hash Cond: (t2_1.x = t3_1.x) + -> Seq Scan on public.eager_agg_tab1_p2 t2_1 + Output: t2_1.y, t2_1.x + -> Hash + Output: t3_1.y, t3_1.x + -> Seq Scan on public.eager_agg_tab1_p2 t3_1 + Output: t3_1.y, t3_1.x + -> Finalize HashAggregate + Output: t1_2.x, sum((t2_2.y + t3_2.y)) + Group Key: t1_2.x + -> Hash Join + Output: t1_2.x, (PARTIAL sum((t2_2.y + t3_2.y))) + Hash Cond: (t1_2.x = t2_2.x) + -> Seq Scan on public.eager_agg_tab1_p3 t1_2 + Output: t1_2.x + -> Hash + Output: t2_2.x, t3_2.x, (PARTIAL sum((t2_2.y + t3_2.y))) + -> Partial HashAggregate + Output: t2_2.x, t3_2.x, PARTIAL sum((t2_2.y + t3_2.y)) + Group Key: t2_2.x + -> Hash Join + Output: t2_2.y, t2_2.x, t3_2.y, t3_2.x + Hash Cond: (t2_2.x = t3_2.x) + -> Seq Scan on public.eager_agg_tab1_p3 t2_2 + Output: t2_2.y, t2_2.x + -> Hash + Output: t3_2.y, t3_2.x + -> Seq Scan on public.eager_agg_tab1_p3 t3_2 + Output: t3_2.y, t3_2.x +(70 rows) + +SELECT t1.x, sum(t2.y + t3.y) FROM eager_agg_tab1 t1 JOIN eager_agg_tab1 t2 ON t1.x = t2.x JOIN eager_agg_tab1 t3 ON t2.x = t3.x GROUP BY t1.x ORDER BY t1.x; + x | sum +----+------- + 0 | 10000 + 2 | 14000 + 4 | 18000 + 6 | 22000 + 8 | 26000 + 10 | 10000 + 12 | 14000 + 14 | 18000 + 16 | 22000 + 18 | 26000 + 20 | 10000 + 22 | 14000 + 24 | 18000 + 26 | 22000 + 28 | 26000 +(15 rows) + +-- partial aggregation +SET enable_hashagg TO off; +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t3.y, sum(t2.y + t3.y) FROM eager_agg_tab1 t1 JOIN eager_agg_tab1 t2 ON t1.x = t2.x JOIN eager_agg_tab1 t3 ON t2.x = t3.x GROUP BY t3.y ORDER BY t3.y; + QUERY PLAN +------------------------------------------------------------------------------------------- + Finalize GroupAggregate + Output: t3.y, sum((t2.y + t3.y)) + Group Key: t3.y + -> Sort + Output: t3.y, (PARTIAL sum((t2.y + t3.y))) + Sort Key: t3.y + -> Append + -> Hash Join + Output: t3_1.y, (PARTIAL sum((t2_1.y + t3_1.y))) + Hash Cond: (t2_1.x = t1_1.x) + -> Partial GroupAggregate + Output: t2_1.x, t3_1.y, t3_1.x, PARTIAL sum((t2_1.y + t3_1.y)) + Group Key: t2_1.x, t3_1.y, t3_1.x + -> Incremental Sort + Output: t2_1.y, t2_1.x, t3_1.y, t3_1.x + Sort Key: t2_1.x, t3_1.y + Presorted Key: t2_1.x + -> Merge Join + Output: t2_1.y, t2_1.x, t3_1.y, t3_1.x + Merge Cond: (t2_1.x = t3_1.x) + -> Sort + Output: t2_1.y, t2_1.x + Sort Key: t2_1.x + -> Seq Scan on public.eager_agg_tab1_p1 t2_1 + Output: t2_1.y, t2_1.x + -> Sort + Output: t3_1.y, t3_1.x + Sort Key: t3_1.x + -> Seq Scan on public.eager_agg_tab1_p1 t3_1 + Output: t3_1.y, t3_1.x + -> Hash + Output: t1_1.x + -> Seq Scan on public.eager_agg_tab1_p1 t1_1 + Output: t1_1.x + -> Hash Join + Output: t3_2.y, (PARTIAL sum((t2_2.y + t3_2.y))) + Hash Cond: (t2_2.x = t1_2.x) + -> Partial GroupAggregate + Output: t2_2.x, t3_2.y, t3_2.x, PARTIAL sum((t2_2.y + t3_2.y)) + Group Key: t2_2.x, t3_2.y, t3_2.x + -> Incremental Sort + Output: t2_2.y, t2_2.x, t3_2.y, t3_2.x + Sort Key: t2_2.x, t3_2.y + Presorted Key: t2_2.x + -> Merge Join + Output: t2_2.y, t2_2.x, t3_2.y, t3_2.x + Merge Cond: (t2_2.x = t3_2.x) + -> Sort + Output: t2_2.y, t2_2.x + Sort Key: t2_2.x + -> Seq Scan on public.eager_agg_tab1_p2 t2_2 + Output: t2_2.y, t2_2.x + -> Sort + Output: t3_2.y, t3_2.x + Sort Key: t3_2.x + -> Seq Scan on public.eager_agg_tab1_p2 t3_2 + Output: t3_2.y, t3_2.x + -> Hash + Output: t1_2.x + -> Seq Scan on public.eager_agg_tab1_p2 t1_2 + Output: t1_2.x + -> Hash Join + Output: t3_3.y, (PARTIAL sum((t2_3.y + t3_3.y))) + Hash Cond: (t2_3.x = t1_3.x) + -> Partial GroupAggregate + Output: t2_3.x, t3_3.y, t3_3.x, PARTIAL sum((t2_3.y + t3_3.y)) + Group Key: t2_3.x, t3_3.y, t3_3.x + -> Incremental Sort + Output: t2_3.y, t2_3.x, t3_3.y, t3_3.x + Sort Key: t2_3.x, t3_3.y + Presorted Key: t2_3.x + -> Merge Join + Output: t2_3.y, t2_3.x, t3_3.y, t3_3.x + Merge Cond: (t2_3.x = t3_3.x) + -> Sort + Output: t2_3.y, t2_3.x + Sort Key: t2_3.x + -> Seq Scan on public.eager_agg_tab1_p3 t2_3 + Output: t2_3.y, t2_3.x + -> Sort + Output: t3_3.y, t3_3.x + Sort Key: t3_3.x + -> Seq Scan on public.eager_agg_tab1_p3 t3_3 + Output: t3_3.y, t3_3.x + -> Hash + Output: t1_3.x + -> Seq Scan on public.eager_agg_tab1_p3 t1_3 + Output: t1_3.x +(88 rows) + +SELECT t3.y, sum(t2.y + t3.y) FROM eager_agg_tab1 t1 JOIN eager_agg_tab1 t2 ON t1.x = t2.x JOIN eager_agg_tab1 t3 ON t2.x = t3.x GROUP BY t3.y ORDER BY t3.y; + y | sum +----+------- + 0 | 7500 + 2 | 13500 + 4 | 19500 + 6 | 25500 + 8 | 31500 + 10 | 22500 + 12 | 28500 + 14 | 34500 + 16 | 40500 + 18 | 46500 +(10 rows) + +RESET enable_hashagg; +DROP TABLE eager_agg_tab1; +DROP TABLE eager_agg_tab2; +-- +-- Test with multi-level partitioning scheme +-- +CREATE TABLE eager_agg_tab_ml(x int, y int) PARTITION BY RANGE(x); +CREATE TABLE eager_agg_tab_ml_p1 PARTITION OF eager_agg_tab_ml FOR VALUES FROM (0) TO (10); +CREATE TABLE eager_agg_tab_ml_p2 PARTITION OF eager_agg_tab_ml FOR VALUES FROM (10) TO (20) PARTITION BY RANGE(x); +CREATE TABLE eager_agg_tab_ml_p2_s1 PARTITION OF eager_agg_tab_ml_p2 FOR VALUES FROM (10) TO (15); +CREATE TABLE eager_agg_tab_ml_p2_s2 PARTITION OF eager_agg_tab_ml_p2 FOR VALUES FROM (15) TO (20); +CREATE TABLE eager_agg_tab_ml_p3 PARTITION OF eager_agg_tab_ml FOR VALUES FROM (20) TO (30) PARTITION BY RANGE(x); +CREATE TABLE eager_agg_tab_ml_p3_s1 PARTITION OF eager_agg_tab_ml_p3 FOR VALUES FROM (20) TO (25); +CREATE TABLE eager_agg_tab_ml_p3_s2 PARTITION OF eager_agg_tab_ml_p3 FOR VALUES FROM (25) TO (30); +INSERT INTO eager_agg_tab_ml SELECT i % 30, i % 30 FROM generate_series(1, 1000) i; +ANALYZE eager_agg_tab_ml; +-- When GROUP BY clause matches; full aggregation is performed for each partition. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.x, sum(t2.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x GROUP BY t1.x ORDER BY t1.x; + QUERY PLAN +--------------------------------------------------------------------------------------- + Sort + Output: t1.x, (sum(t2.y)), (count(*)) + Sort Key: t1.x + -> Append + -> Finalize HashAggregate + Output: t1.x, sum(t2.y), count(*) + Group Key: t1.x + -> Hash Join + Output: t1.x, (PARTIAL sum(t2.y)), (PARTIAL count(*)) + Hash Cond: (t1.x = t2.x) + -> Seq Scan on public.eager_agg_tab_ml_p1 t1 + Output: t1.x + -> Hash + Output: t2.x, (PARTIAL sum(t2.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2.x, PARTIAL sum(t2.y), PARTIAL count(*) + Group Key: t2.x + -> Seq Scan on public.eager_agg_tab_ml_p1 t2 + Output: t2.y, t2.x + -> Finalize HashAggregate + Output: t1_1.x, sum(t2_1.y), count(*) + Group Key: t1_1.x + -> Hash Join + Output: t1_1.x, (PARTIAL sum(t2_1.y)), (PARTIAL count(*)) + Hash Cond: (t1_1.x = t2_1.x) + -> Seq Scan on public.eager_agg_tab_ml_p2_s1 t1_1 + Output: t1_1.x + -> Hash + Output: t2_1.x, (PARTIAL sum(t2_1.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_1.x, PARTIAL sum(t2_1.y), PARTIAL count(*) + Group Key: t2_1.x + -> Seq Scan on public.eager_agg_tab_ml_p2_s1 t2_1 + Output: t2_1.y, t2_1.x + -> Finalize HashAggregate + Output: t1_2.x, sum(t2_2.y), count(*) + Group Key: t1_2.x + -> Hash Join + Output: t1_2.x, (PARTIAL sum(t2_2.y)), (PARTIAL count(*)) + Hash Cond: (t1_2.x = t2_2.x) + -> Seq Scan on public.eager_agg_tab_ml_p2_s2 t1_2 + Output: t1_2.x + -> Hash + Output: t2_2.x, (PARTIAL sum(t2_2.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_2.x, PARTIAL sum(t2_2.y), PARTIAL count(*) + Group Key: t2_2.x + -> Seq Scan on public.eager_agg_tab_ml_p2_s2 t2_2 + Output: t2_2.y, t2_2.x + -> Finalize HashAggregate + Output: t1_3.x, sum(t2_3.y), count(*) + Group Key: t1_3.x + -> Hash Join + Output: t1_3.x, (PARTIAL sum(t2_3.y)), (PARTIAL count(*)) + Hash Cond: (t1_3.x = t2_3.x) + -> Seq Scan on public.eager_agg_tab_ml_p3_s1 t1_3 + Output: t1_3.x + -> Hash + Output: t2_3.x, (PARTIAL sum(t2_3.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_3.x, PARTIAL sum(t2_3.y), PARTIAL count(*) + Group Key: t2_3.x + -> Seq Scan on public.eager_agg_tab_ml_p3_s1 t2_3 + Output: t2_3.y, t2_3.x + -> Finalize HashAggregate + Output: t1_4.x, sum(t2_4.y), count(*) + Group Key: t1_4.x + -> Hash Join + Output: t1_4.x, (PARTIAL sum(t2_4.y)), (PARTIAL count(*)) + Hash Cond: (t1_4.x = t2_4.x) + -> Seq Scan on public.eager_agg_tab_ml_p3_s2 t1_4 + Output: t1_4.x + -> Hash + Output: t2_4.x, (PARTIAL sum(t2_4.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_4.x, PARTIAL sum(t2_4.y), PARTIAL count(*) + Group Key: t2_4.x + -> Seq Scan on public.eager_agg_tab_ml_p3_s2 t2_4 + Output: t2_4.y, t2_4.x +(79 rows) + +SELECT t1.x, sum(t2.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x GROUP BY t1.x ORDER BY t1.x; + x | sum | count +----+-------+------- + 0 | 0 | 1089 + 1 | 1156 | 1156 + 2 | 2312 | 1156 + 3 | 3468 | 1156 + 4 | 4624 | 1156 + 5 | 5780 | 1156 + 6 | 6936 | 1156 + 7 | 8092 | 1156 + 8 | 9248 | 1156 + 9 | 10404 | 1156 + 10 | 11560 | 1156 + 11 | 11979 | 1089 + 12 | 13068 | 1089 + 13 | 14157 | 1089 + 14 | 15246 | 1089 + 15 | 16335 | 1089 + 16 | 17424 | 1089 + 17 | 18513 | 1089 + 18 | 19602 | 1089 + 19 | 20691 | 1089 + 20 | 21780 | 1089 + 21 | 22869 | 1089 + 22 | 23958 | 1089 + 23 | 25047 | 1089 + 24 | 26136 | 1089 + 25 | 27225 | 1089 + 26 | 28314 | 1089 + 27 | 29403 | 1089 + 28 | 30492 | 1089 + 29 | 31581 | 1089 +(30 rows) + +-- When GROUP BY clause does not match; partial aggregation is performed for each partition. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.y, sum(t2.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x GROUP BY t1.y ORDER BY t1.y; + QUERY PLAN +--------------------------------------------------------------------------------------- + Sort + Output: t1.y, (sum(t2.y)), (count(*)) + Sort Key: t1.y + -> Finalize HashAggregate + Output: t1.y, sum(t2.y), count(*) + Group Key: t1.y + -> Append + -> Hash Join + Output: t1_1.y, (PARTIAL sum(t2_1.y)), (PARTIAL count(*)) + Hash Cond: (t1_1.x = t2_1.x) + -> Seq Scan on public.eager_agg_tab_ml_p1 t1_1 + Output: t1_1.y, t1_1.x + -> Hash + Output: t2_1.x, (PARTIAL sum(t2_1.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_1.x, PARTIAL sum(t2_1.y), PARTIAL count(*) + Group Key: t2_1.x + -> Seq Scan on public.eager_agg_tab_ml_p1 t2_1 + Output: t2_1.y, t2_1.x + -> Hash Join + Output: t1_2.y, (PARTIAL sum(t2_2.y)), (PARTIAL count(*)) + Hash Cond: (t1_2.x = t2_2.x) + -> Seq Scan on public.eager_agg_tab_ml_p2_s1 t1_2 + Output: t1_2.y, t1_2.x + -> Hash + Output: t2_2.x, (PARTIAL sum(t2_2.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_2.x, PARTIAL sum(t2_2.y), PARTIAL count(*) + Group Key: t2_2.x + -> Seq Scan on public.eager_agg_tab_ml_p2_s1 t2_2 + Output: t2_2.y, t2_2.x + -> Hash Join + Output: t1_3.y, (PARTIAL sum(t2_3.y)), (PARTIAL count(*)) + Hash Cond: (t1_3.x = t2_3.x) + -> Seq Scan on public.eager_agg_tab_ml_p2_s2 t1_3 + Output: t1_3.y, t1_3.x + -> Hash + Output: t2_3.x, (PARTIAL sum(t2_3.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_3.x, PARTIAL sum(t2_3.y), PARTIAL count(*) + Group Key: t2_3.x + -> Seq Scan on public.eager_agg_tab_ml_p2_s2 t2_3 + Output: t2_3.y, t2_3.x + -> Hash Join + Output: t1_4.y, (PARTIAL sum(t2_4.y)), (PARTIAL count(*)) + Hash Cond: (t1_4.x = t2_4.x) + -> Seq Scan on public.eager_agg_tab_ml_p3_s1 t1_4 + Output: t1_4.y, t1_4.x + -> Hash + Output: t2_4.x, (PARTIAL sum(t2_4.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_4.x, PARTIAL sum(t2_4.y), PARTIAL count(*) + Group Key: t2_4.x + -> Seq Scan on public.eager_agg_tab_ml_p3_s1 t2_4 + Output: t2_4.y, t2_4.x + -> Hash Join + Output: t1_5.y, (PARTIAL sum(t2_5.y)), (PARTIAL count(*)) + Hash Cond: (t1_5.x = t2_5.x) + -> Seq Scan on public.eager_agg_tab_ml_p3_s2 t1_5 + Output: t1_5.y, t1_5.x + -> Hash + Output: t2_5.x, (PARTIAL sum(t2_5.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_5.x, PARTIAL sum(t2_5.y), PARTIAL count(*) + Group Key: t2_5.x + -> Seq Scan on public.eager_agg_tab_ml_p3_s2 t2_5 + Output: t2_5.y, t2_5.x +(67 rows) + +SELECT t1.y, sum(t2.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x GROUP BY t1.y ORDER BY t1.y; + y | sum | count +----+-------+------- + 0 | 0 | 1089 + 1 | 1156 | 1156 + 2 | 2312 | 1156 + 3 | 3468 | 1156 + 4 | 4624 | 1156 + 5 | 5780 | 1156 + 6 | 6936 | 1156 + 7 | 8092 | 1156 + 8 | 9248 | 1156 + 9 | 10404 | 1156 + 10 | 11560 | 1156 + 11 | 11979 | 1089 + 12 | 13068 | 1089 + 13 | 14157 | 1089 + 14 | 15246 | 1089 + 15 | 16335 | 1089 + 16 | 17424 | 1089 + 17 | 18513 | 1089 + 18 | 19602 | 1089 + 19 | 20691 | 1089 + 20 | 21780 | 1089 + 21 | 22869 | 1089 + 22 | 23958 | 1089 + 23 | 25047 | 1089 + 24 | 26136 | 1089 + 25 | 27225 | 1089 + 26 | 28314 | 1089 + 27 | 29403 | 1089 + 28 | 30492 | 1089 + 29 | 31581 | 1089 +(30 rows) + +-- Check with eager aggregation over join rel +-- full aggregation +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.x, sum(t2.y + t3.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x JOIN eager_agg_tab_ml t3 on t2.x = t3.x GROUP BY t1.x ORDER BY t1.x; + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + Sort + Output: t1.x, (sum((t2.y + t3.y))), (count(*)) + Sort Key: t1.x + -> Append + -> Finalize HashAggregate + Output: t1.x, sum((t2.y + t3.y)), count(*) + Group Key: t1.x + -> Hash Join + Output: t1.x, (PARTIAL sum((t2.y + t3.y))), (PARTIAL count(*)) + Hash Cond: (t1.x = t2.x) + -> Seq Scan on public.eager_agg_tab_ml_p1 t1 + Output: t1.x + -> Hash + Output: t2.x, t3.x, (PARTIAL sum((t2.y + t3.y))), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2.x, t3.x, PARTIAL sum((t2.y + t3.y)), PARTIAL count(*) + Group Key: t2.x + -> Hash Join + Output: t2.y, t2.x, t3.y, t3.x + Hash Cond: (t2.x = t3.x) + -> Seq Scan on public.eager_agg_tab_ml_p1 t2 + Output: t2.y, t2.x + -> Hash + Output: t3.y, t3.x + -> Seq Scan on public.eager_agg_tab_ml_p1 t3 + Output: t3.y, t3.x + -> Finalize HashAggregate + Output: t1_1.x, sum((t2_1.y + t3_1.y)), count(*) + Group Key: t1_1.x + -> Hash Join + Output: t1_1.x, (PARTIAL sum((t2_1.y + t3_1.y))), (PARTIAL count(*)) + Hash Cond: (t1_1.x = t2_1.x) + -> Seq Scan on public.eager_agg_tab_ml_p2_s1 t1_1 + Output: t1_1.x + -> Hash + Output: t2_1.x, t3_1.x, (PARTIAL sum((t2_1.y + t3_1.y))), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_1.x, t3_1.x, PARTIAL sum((t2_1.y + t3_1.y)), PARTIAL count(*) + Group Key: t2_1.x + -> Hash Join + Output: t2_1.y, t2_1.x, t3_1.y, t3_1.x + Hash Cond: (t2_1.x = t3_1.x) + -> Seq Scan on public.eager_agg_tab_ml_p2_s1 t2_1 + Output: t2_1.y, t2_1.x + -> Hash + Output: t3_1.y, t3_1.x + -> Seq Scan on public.eager_agg_tab_ml_p2_s1 t3_1 + Output: t3_1.y, t3_1.x + -> Finalize HashAggregate + Output: t1_2.x, sum((t2_2.y + t3_2.y)), count(*) + Group Key: t1_2.x + -> Hash Join + Output: t1_2.x, (PARTIAL sum((t2_2.y + t3_2.y))), (PARTIAL count(*)) + Hash Cond: (t1_2.x = t2_2.x) + -> Seq Scan on public.eager_agg_tab_ml_p2_s2 t1_2 + Output: t1_2.x + -> Hash + Output: t2_2.x, t3_2.x, (PARTIAL sum((t2_2.y + t3_2.y))), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_2.x, t3_2.x, PARTIAL sum((t2_2.y + t3_2.y)), PARTIAL count(*) + Group Key: t2_2.x + -> Hash Join + Output: t2_2.y, t2_2.x, t3_2.y, t3_2.x + Hash Cond: (t2_2.x = t3_2.x) + -> Seq Scan on public.eager_agg_tab_ml_p2_s2 t2_2 + Output: t2_2.y, t2_2.x + -> Hash + Output: t3_2.y, t3_2.x + -> Seq Scan on public.eager_agg_tab_ml_p2_s2 t3_2 + Output: t3_2.y, t3_2.x + -> Finalize HashAggregate + Output: t1_3.x, sum((t2_3.y + t3_3.y)), count(*) + Group Key: t1_3.x + -> Hash Join + Output: t1_3.x, (PARTIAL sum((t2_3.y + t3_3.y))), (PARTIAL count(*)) + Hash Cond: (t1_3.x = t2_3.x) + -> Seq Scan on public.eager_agg_tab_ml_p3_s1 t1_3 + Output: t1_3.x + -> Hash + Output: t2_3.x, t3_3.x, (PARTIAL sum((t2_3.y + t3_3.y))), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_3.x, t3_3.x, PARTIAL sum((t2_3.y + t3_3.y)), PARTIAL count(*) + Group Key: t2_3.x + -> Hash Join + Output: t2_3.y, t2_3.x, t3_3.y, t3_3.x + Hash Cond: (t2_3.x = t3_3.x) + -> Seq Scan on public.eager_agg_tab_ml_p3_s1 t2_3 + Output: t2_3.y, t2_3.x + -> Hash + Output: t3_3.y, t3_3.x + -> Seq Scan on public.eager_agg_tab_ml_p3_s1 t3_3 + Output: t3_3.y, t3_3.x + -> Finalize HashAggregate + Output: t1_4.x, sum((t2_4.y + t3_4.y)), count(*) + Group Key: t1_4.x + -> Hash Join + Output: t1_4.x, (PARTIAL sum((t2_4.y + t3_4.y))), (PARTIAL count(*)) + Hash Cond: (t1_4.x = t2_4.x) + -> Seq Scan on public.eager_agg_tab_ml_p3_s2 t1_4 + Output: t1_4.x + -> Hash + Output: t2_4.x, t3_4.x, (PARTIAL sum((t2_4.y + t3_4.y))), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_4.x, t3_4.x, PARTIAL sum((t2_4.y + t3_4.y)), PARTIAL count(*) + Group Key: t2_4.x + -> Hash Join + Output: t2_4.y, t2_4.x, t3_4.y, t3_4.x + Hash Cond: (t2_4.x = t3_4.x) + -> Seq Scan on public.eager_agg_tab_ml_p3_s2 t2_4 + Output: t2_4.y, t2_4.x + -> Hash + Output: t3_4.y, t3_4.x + -> Seq Scan on public.eager_agg_tab_ml_p3_s2 t3_4 + Output: t3_4.y, t3_4.x +(114 rows) + +SELECT t1.x, sum(t2.y + t3.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x JOIN eager_agg_tab_ml t3 on t2.x = t3.x GROUP BY t1.x ORDER BY t1.x; + x | sum | count +----+---------+------- + 0 | 0 | 35937 + 1 | 78608 | 39304 + 2 | 157216 | 39304 + 3 | 235824 | 39304 + 4 | 314432 | 39304 + 5 | 393040 | 39304 + 6 | 471648 | 39304 + 7 | 550256 | 39304 + 8 | 628864 | 39304 + 9 | 707472 | 39304 + 10 | 786080 | 39304 + 11 | 790614 | 35937 + 12 | 862488 | 35937 + 13 | 934362 | 35937 + 14 | 1006236 | 35937 + 15 | 1078110 | 35937 + 16 | 1149984 | 35937 + 17 | 1221858 | 35937 + 18 | 1293732 | 35937 + 19 | 1365606 | 35937 + 20 | 1437480 | 35937 + 21 | 1509354 | 35937 + 22 | 1581228 | 35937 + 23 | 1653102 | 35937 + 24 | 1724976 | 35937 + 25 | 1796850 | 35937 + 26 | 1868724 | 35937 + 27 | 1940598 | 35937 + 28 | 2012472 | 35937 + 29 | 2084346 | 35937 +(30 rows) + +-- partial aggregation +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t3.y, sum(t2.y + t3.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x JOIN eager_agg_tab_ml t3 on t2.x = t3.x GROUP BY t3.y ORDER BY t3.y; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------ + Sort + Output: t3.y, (sum((t2.y + t3.y))), (count(*)) + Sort Key: t3.y + -> Finalize HashAggregate + Output: t3.y, sum((t2.y + t3.y)), count(*) + Group Key: t3.y + -> Append + -> Hash Join + Output: t3_1.y, (PARTIAL sum((t2_1.y + t3_1.y))), (PARTIAL count(*)) + Hash Cond: (t1_1.x = t2_1.x) + -> Seq Scan on public.eager_agg_tab_ml_p1 t1_1 + Output: t1_1.x + -> Hash + Output: t2_1.x, t3_1.y, t3_1.x, (PARTIAL sum((t2_1.y + t3_1.y))), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_1.x, t3_1.y, t3_1.x, PARTIAL sum((t2_1.y + t3_1.y)), PARTIAL count(*) + Group Key: t2_1.x, t3_1.y, t3_1.x + -> Hash Join + Output: t2_1.y, t2_1.x, t3_1.y, t3_1.x + Hash Cond: (t2_1.x = t3_1.x) + -> Seq Scan on public.eager_agg_tab_ml_p1 t2_1 + Output: t2_1.y, t2_1.x + -> Hash + Output: t3_1.y, t3_1.x + -> Seq Scan on public.eager_agg_tab_ml_p1 t3_1 + Output: t3_1.y, t3_1.x + -> Hash Join + Output: t3_2.y, (PARTIAL sum((t2_2.y + t3_2.y))), (PARTIAL count(*)) + Hash Cond: (t1_2.x = t2_2.x) + -> Seq Scan on public.eager_agg_tab_ml_p2_s1 t1_2 + Output: t1_2.x + -> Hash + Output: t2_2.x, t3_2.y, t3_2.x, (PARTIAL sum((t2_2.y + t3_2.y))), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_2.x, t3_2.y, t3_2.x, PARTIAL sum((t2_2.y + t3_2.y)), PARTIAL count(*) + Group Key: t2_2.x, t3_2.y, t3_2.x + -> Hash Join + Output: t2_2.y, t2_2.x, t3_2.y, t3_2.x + Hash Cond: (t2_2.x = t3_2.x) + -> Seq Scan on public.eager_agg_tab_ml_p2_s1 t2_2 + Output: t2_2.y, t2_2.x + -> Hash + Output: t3_2.y, t3_2.x + -> Seq Scan on public.eager_agg_tab_ml_p2_s1 t3_2 + Output: t3_2.y, t3_2.x + -> Hash Join + Output: t3_3.y, (PARTIAL sum((t2_3.y + t3_3.y))), (PARTIAL count(*)) + Hash Cond: (t1_3.x = t2_3.x) + -> Seq Scan on public.eager_agg_tab_ml_p2_s2 t1_3 + Output: t1_3.x + -> Hash + Output: t2_3.x, t3_3.y, t3_3.x, (PARTIAL sum((t2_3.y + t3_3.y))), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_3.x, t3_3.y, t3_3.x, PARTIAL sum((t2_3.y + t3_3.y)), PARTIAL count(*) + Group Key: t2_3.x, t3_3.y, t3_3.x + -> Hash Join + Output: t2_3.y, t2_3.x, t3_3.y, t3_3.x + Hash Cond: (t2_3.x = t3_3.x) + -> Seq Scan on public.eager_agg_tab_ml_p2_s2 t2_3 + Output: t2_3.y, t2_3.x + -> Hash + Output: t3_3.y, t3_3.x + -> Seq Scan on public.eager_agg_tab_ml_p2_s2 t3_3 + Output: t3_3.y, t3_3.x + -> Hash Join + Output: t3_4.y, (PARTIAL sum((t2_4.y + t3_4.y))), (PARTIAL count(*)) + Hash Cond: (t1_4.x = t2_4.x) + -> Seq Scan on public.eager_agg_tab_ml_p3_s1 t1_4 + Output: t1_4.x + -> Hash + Output: t2_4.x, t3_4.y, t3_4.x, (PARTIAL sum((t2_4.y + t3_4.y))), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_4.x, t3_4.y, t3_4.x, PARTIAL sum((t2_4.y + t3_4.y)), PARTIAL count(*) + Group Key: t2_4.x, t3_4.y, t3_4.x + -> Hash Join + Output: t2_4.y, t2_4.x, t3_4.y, t3_4.x + Hash Cond: (t2_4.x = t3_4.x) + -> Seq Scan on public.eager_agg_tab_ml_p3_s1 t2_4 + Output: t2_4.y, t2_4.x + -> Hash + Output: t3_4.y, t3_4.x + -> Seq Scan on public.eager_agg_tab_ml_p3_s1 t3_4 + Output: t3_4.y, t3_4.x + -> Hash Join + Output: t3_5.y, (PARTIAL sum((t2_5.y + t3_5.y))), (PARTIAL count(*)) + Hash Cond: (t1_5.x = t2_5.x) + -> Seq Scan on public.eager_agg_tab_ml_p3_s2 t1_5 + Output: t1_5.x + -> Hash + Output: t2_5.x, t3_5.y, t3_5.x, (PARTIAL sum((t2_5.y + t3_5.y))), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_5.x, t3_5.y, t3_5.x, PARTIAL sum((t2_5.y + t3_5.y)), PARTIAL count(*) + Group Key: t2_5.x, t3_5.y, t3_5.x + -> Hash Join + Output: t2_5.y, t2_5.x, t3_5.y, t3_5.x + Hash Cond: (t2_5.x = t3_5.x) + -> Seq Scan on public.eager_agg_tab_ml_p3_s2 t2_5 + Output: t2_5.y, t2_5.x + -> Hash + Output: t3_5.y, t3_5.x + -> Seq Scan on public.eager_agg_tab_ml_p3_s2 t3_5 + Output: t3_5.y, t3_5.x +(102 rows) + +SELECT t3.y, sum(t2.y + t3.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x JOIN eager_agg_tab_ml t3 on t2.x = t3.x GROUP BY t3.y ORDER BY t3.y; + y | sum | count +----+---------+------- + 0 | 0 | 35937 + 1 | 78608 | 39304 + 2 | 157216 | 39304 + 3 | 235824 | 39304 + 4 | 314432 | 39304 + 5 | 393040 | 39304 + 6 | 471648 | 39304 + 7 | 550256 | 39304 + 8 | 628864 | 39304 + 9 | 707472 | 39304 + 10 | 786080 | 39304 + 11 | 790614 | 35937 + 12 | 862488 | 35937 + 13 | 934362 | 35937 + 14 | 1006236 | 35937 + 15 | 1078110 | 35937 + 16 | 1149984 | 35937 + 17 | 1221858 | 35937 + 18 | 1293732 | 35937 + 19 | 1365606 | 35937 + 20 | 1437480 | 35937 + 21 | 1509354 | 35937 + 22 | 1581228 | 35937 + 23 | 1653102 | 35937 + 24 | 1724976 | 35937 + 25 | 1796850 | 35937 + 26 | 1868724 | 35937 + 27 | 1940598 | 35937 + 28 | 2012472 | 35937 + 29 | 2084346 | 35937 +(30 rows) + +DROP TABLE eager_agg_tab_ml; diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out index 83228cfca293..6cd2a7e827b9 100644 --- a/src/test/regress/expected/sysviews.out +++ b/src/test/regress/expected/sysviews.out @@ -151,6 +151,7 @@ select name, setting from pg_settings where name like 'enable%'; enable_async_append | on enable_bitmapscan | on enable_distinct_reordering | on + enable_eager_aggregate | off enable_gathermerge | on enable_group_by_reordering | on enable_hashagg | on diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 0a35f2f8f6a9..de7101d70524 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -119,7 +119,7 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare conversion tr # The stats test resets stats, so nothing else needing stats access can be in # this group. # ---------- -test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression memoize stats predicate +test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression memoize stats predicate eager_aggregate # event_trigger depends on create_am and cannot run concurrently with # any test that runs DDL diff --git a/src/test/regress/sql/eager_aggregate.sql b/src/test/regress/sql/eager_aggregate.sql new file mode 100644 index 000000000000..4050e4df44d6 --- /dev/null +++ b/src/test/regress/sql/eager_aggregate.sql @@ -0,0 +1,192 @@ +-- +-- EAGER AGGREGATION +-- Test we can push aggregation down below join +-- + +-- Enable eager aggregation, which by default is disabled. +SET enable_eager_aggregate TO on; + +CREATE TABLE eager_agg_t1 (a int, b int, c double precision); +CREATE TABLE eager_agg_t2 (a int, b int, c double precision); +CREATE TABLE eager_agg_t3 (a int, b int, c double precision); + +INSERT INTO eager_agg_t1 SELECT i, i, i FROM generate_series(1, 1000)i; +INSERT INTO eager_agg_t2 SELECT i, i%10, i FROM generate_series(1, 1000)i; +INSERT INTO eager_agg_t3 SELECT i%10, i%10, i FROM generate_series(1, 1000)i; + +ANALYZE eager_agg_t1; +ANALYZE eager_agg_t2; +ANALYZE eager_agg_t3; + + +-- +-- Test eager aggregation over base rel +-- + +-- Perform scan of a table, aggregate the result, join it to the other table +-- and finalize the aggregation. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.a, avg(t2.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a ORDER BY t1.a; +SELECT t1.a, avg(t2.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a ORDER BY t1.a; + +-- Produce results with sorting aggregation +SET enable_hashagg TO off; + +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.a, avg(t2.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a ORDER BY t1.a; +SELECT t1.a, avg(t2.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a ORDER BY t1.a; + +RESET enable_hashagg; + + +-- +-- Test eager aggregation over join rel +-- + +-- Perform join of tables, aggregate the result, join it to the other table +-- and finalize the aggregation. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.a, avg(t2.c + t3.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b JOIN eager_agg_t3 t3 ON t2.a = t3.a GROUP BY t1.a ORDER BY t1.a; +SELECT t1.a, avg(t2.c + t3.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b JOIN eager_agg_t3 t3 ON t2.a = t3.a GROUP BY t1.a ORDER BY t1.a; + +-- Produce results with sorting aggregation +SET enable_hashagg TO off; + +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.a, avg(t2.c + t3.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b JOIN eager_agg_t3 t3 ON t2.a = t3.a GROUP BY t1.a ORDER BY t1.a; +SELECT t1.a, avg(t2.c + t3.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b JOIN eager_agg_t3 t3 ON t2.a = t3.a GROUP BY t1.a ORDER BY t1.a; + +RESET enable_hashagg; + + +-- +-- Test that eager aggregation works for outer join +-- + +-- Ensure aggregation can be pushed down to the non-nullable side +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.a, avg(t2.c) FROM eager_agg_t1 t1 RIGHT JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a ORDER BY t1.a; +SELECT t1.a, avg(t2.c) FROM eager_agg_t1 t1 RIGHT JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a ORDER BY t1.a; + +-- Ensure aggregation cannot be pushed down to the nullable side +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t2.b, avg(t2.c) FROM eager_agg_t1 t1 LEFT JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t2.b ORDER BY t2.b; +SELECT t2.b, avg(t2.c) FROM eager_agg_t1 t1 LEFT JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t2.b ORDER BY t2.b; + + +-- +-- Test that eager aggregation works for parallel plans +-- + +SET parallel_setup_cost=0; +SET parallel_tuple_cost=0; +SET min_parallel_table_scan_size=0; +SET max_parallel_workers_per_gather=4; + +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.a, avg(t2.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a ORDER BY t1.a; +SELECT t1.a, avg(t2.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a ORDER BY t1.a; + +RESET parallel_setup_cost; +RESET parallel_tuple_cost; +RESET min_parallel_table_scan_size; +RESET max_parallel_workers_per_gather; + + +DROP TABLE eager_agg_t1; +DROP TABLE eager_agg_t2; +DROP TABLE eager_agg_t3; + + +-- +-- Test eager aggregation for partitionwise join +-- + +-- Enable partitionwise aggregate, which by default is disabled. +SET enable_partitionwise_aggregate TO true; +-- Enable partitionwise join, which by default is disabled. +SET enable_partitionwise_join TO true; + +CREATE TABLE eager_agg_tab1(x int, y int) PARTITION BY RANGE(x); +CREATE TABLE eager_agg_tab1_p1 PARTITION OF eager_agg_tab1 FOR VALUES FROM (0) TO (10); +CREATE TABLE eager_agg_tab1_p2 PARTITION OF eager_agg_tab1 FOR VALUES FROM (10) TO (20); +CREATE TABLE eager_agg_tab1_p3 PARTITION OF eager_agg_tab1 FOR VALUES FROM (20) TO (30); +CREATE TABLE eager_agg_tab2(x int, y int) PARTITION BY RANGE(y); +CREATE TABLE eager_agg_tab2_p1 PARTITION OF eager_agg_tab2 FOR VALUES FROM (0) TO (10); +CREATE TABLE eager_agg_tab2_p2 PARTITION OF eager_agg_tab2 FOR VALUES FROM (10) TO (20); +CREATE TABLE eager_agg_tab2_p3 PARTITION OF eager_agg_tab2 FOR VALUES FROM (20) TO (30); +INSERT INTO eager_agg_tab1 SELECT i % 30, i % 20 FROM generate_series(0, 299, 2) i; +INSERT INTO eager_agg_tab2 SELECT i % 20, i % 30 FROM generate_series(0, 299, 3) i; + +ANALYZE eager_agg_tab1; +ANALYZE eager_agg_tab2; + +-- When GROUP BY clause matches; full aggregation is performed for each partition. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.x, sum(t1.y), count(*) FROM eager_agg_tab1 t1, eager_agg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY t1.x; +SELECT t1.x, sum(t1.y), count(*) FROM eager_agg_tab1 t1, eager_agg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY t1.x; + +-- GROUP BY having other matching key +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t2.y, sum(t1.y), count(*) FROM eager_agg_tab1 t1, eager_agg_tab2 t2 WHERE t1.x = t2.y GROUP BY t2.y ORDER BY t2.y; +SELECT t2.y, sum(t1.y), count(*) FROM eager_agg_tab1 t1, eager_agg_tab2 t2 WHERE t1.x = t2.y GROUP BY t2.y ORDER BY t2.y; + +-- When GROUP BY clause does not match; partial aggregation is performed for each partition. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t2.x, sum(t1.x), count(*) FROM eager_agg_tab1 t1, eager_agg_tab2 t2 WHERE t1.x = t2.y GROUP BY t2.x HAVING avg(t1.x) > 10 ORDER BY t2.x; +SELECT t2.x, sum(t1.x), count(*) FROM eager_agg_tab1 t1, eager_agg_tab2 t2 WHERE t1.x = t2.y GROUP BY t2.x HAVING avg(t1.x) > 10 ORDER BY t2.x; + +-- Check with eager aggregation over join rel +-- full aggregation +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.x, sum(t2.y + t3.y) FROM eager_agg_tab1 t1 JOIN eager_agg_tab1 t2 ON t1.x = t2.x JOIN eager_agg_tab1 t3 ON t2.x = t3.x GROUP BY t1.x ORDER BY t1.x; +SELECT t1.x, sum(t2.y + t3.y) FROM eager_agg_tab1 t1 JOIN eager_agg_tab1 t2 ON t1.x = t2.x JOIN eager_agg_tab1 t3 ON t2.x = t3.x GROUP BY t1.x ORDER BY t1.x; + +-- partial aggregation +SET enable_hashagg TO off; +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t3.y, sum(t2.y + t3.y) FROM eager_agg_tab1 t1 JOIN eager_agg_tab1 t2 ON t1.x = t2.x JOIN eager_agg_tab1 t3 ON t2.x = t3.x GROUP BY t3.y ORDER BY t3.y; +SELECT t3.y, sum(t2.y + t3.y) FROM eager_agg_tab1 t1 JOIN eager_agg_tab1 t2 ON t1.x = t2.x JOIN eager_agg_tab1 t3 ON t2.x = t3.x GROUP BY t3.y ORDER BY t3.y; +RESET enable_hashagg; + +DROP TABLE eager_agg_tab1; +DROP TABLE eager_agg_tab2; + + +-- +-- Test with multi-level partitioning scheme +-- +CREATE TABLE eager_agg_tab_ml(x int, y int) PARTITION BY RANGE(x); +CREATE TABLE eager_agg_tab_ml_p1 PARTITION OF eager_agg_tab_ml FOR VALUES FROM (0) TO (10); +CREATE TABLE eager_agg_tab_ml_p2 PARTITION OF eager_agg_tab_ml FOR VALUES FROM (10) TO (20) PARTITION BY RANGE(x); +CREATE TABLE eager_agg_tab_ml_p2_s1 PARTITION OF eager_agg_tab_ml_p2 FOR VALUES FROM (10) TO (15); +CREATE TABLE eager_agg_tab_ml_p2_s2 PARTITION OF eager_agg_tab_ml_p2 FOR VALUES FROM (15) TO (20); +CREATE TABLE eager_agg_tab_ml_p3 PARTITION OF eager_agg_tab_ml FOR VALUES FROM (20) TO (30) PARTITION BY RANGE(x); +CREATE TABLE eager_agg_tab_ml_p3_s1 PARTITION OF eager_agg_tab_ml_p3 FOR VALUES FROM (20) TO (25); +CREATE TABLE eager_agg_tab_ml_p3_s2 PARTITION OF eager_agg_tab_ml_p3 FOR VALUES FROM (25) TO (30); +INSERT INTO eager_agg_tab_ml SELECT i % 30, i % 30 FROM generate_series(1, 1000) i; + +ANALYZE eager_agg_tab_ml; + +-- When GROUP BY clause matches; full aggregation is performed for each partition. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.x, sum(t2.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x GROUP BY t1.x ORDER BY t1.x; +SELECT t1.x, sum(t2.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x GROUP BY t1.x ORDER BY t1.x; + +-- When GROUP BY clause does not match; partial aggregation is performed for each partition. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.y, sum(t2.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x GROUP BY t1.y ORDER BY t1.y; +SELECT t1.y, sum(t2.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x GROUP BY t1.y ORDER BY t1.y; + +-- Check with eager aggregation over join rel +-- full aggregation +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.x, sum(t2.y + t3.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x JOIN eager_agg_tab_ml t3 on t2.x = t3.x GROUP BY t1.x ORDER BY t1.x; +SELECT t1.x, sum(t2.y + t3.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x JOIN eager_agg_tab_ml t3 on t2.x = t3.x GROUP BY t1.x ORDER BY t1.x; + +-- partial aggregation +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t3.y, sum(t2.y + t3.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x JOIN eager_agg_tab_ml t3 on t2.x = t3.x GROUP BY t3.y ORDER BY t3.y; +SELECT t3.y, sum(t2.y + t3.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x JOIN eager_agg_tab_ml t3 on t2.x = t3.x GROUP BY t3.y ORDER BY t3.y; + +DROP TABLE eager_agg_tab_ml; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 93339ef3c58f..7decd2bd601d 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -41,6 +41,7 @@ AfterTriggersTableData AfterTriggersTransData Agg AggClauseCosts +AggClauseInfo AggInfo AggPath AggSplit @@ -1081,6 +1082,7 @@ GrantTargetType Group GroupByOrdering GroupClause +GroupExprInfo GroupPath GroupPathExtraData GroupResultPath @@ -1314,7 +1316,6 @@ Join JoinCostWorkspace JoinDomain JoinExpr -JoinHashEntry JoinPath JoinPathExtraData JoinState @@ -2411,13 +2412,17 @@ ReindexObjectType ReindexParams ReindexStmt ReindexType +RelAggInfo RelFileLocator RelFileLocatorBackend RelFileNumber +RelHashEntry RelIdCacheEnt RelIdToTypeIdCacheEntry RelInfo RelInfoArr +RelInfoList +RelInfoListInfo RelMapFile RelMapping RelOptInfo