Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 505c008

Browse files
committed
Restore preprocess_groupclause()
0452b46 made optimizer explore alternative orderings of group-by pathkeys. It eliminated preprocess_groupclause(), which was intended to match items between GROUP BY and ORDER BY. Instead, get_useful_group_keys_orderings() function generates orderings of GROUP BY elements at the time of grouping paths generation. The get_useful_group_keys_orderings() function takes into account 3 orderings of GROUP BY pathkeys and clauses: original order as written in GROUP BY, matching ORDER BY clauses as much as possible, and matching the input path as much as possible. Given that even before 0452b46, preprocess_groupclause() could change the original order of GROUP BY clauses we don't need to consider it apart from ordering matching ORDER BY clauses. This commit restores preprocess_groupclause() to provide an ordering of GROUP BY elements matching ORDER BY before generation of paths. The new version of preprocess_groupclause() takes into account an incremental sort. The get_useful_group_keys_orderings() function now takes into 2 orderings of GROUP BY elements: the order generated preprocess_groupclause() and the order matching the input path as much as possible. Discussion: https://postgr.es/m/CAPpHfdvyWLMGwvxaf%3D7KAp-z-4mxbSH8ti2f6mNOQv5metZFzg%40mail.gmail.com Author: Alexander Korotkov Reviewed-by: Andrei Lepikhov, Pavel Borisov
1 parent 0c1af2c commit 505c008

File tree

4 files changed

+108
-67
lines changed

4 files changed

+108
-67
lines changed

src/backend/optimizer/path/pathkeys.c

+5-50
Original file line numberDiff line numberDiff line change
@@ -447,26 +447,6 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
447447
return n;
448448
}
449449

450-
/*
451-
* pathkeys_are_duplicate
452-
* Check if give pathkeys are already contained the list of
453-
* GroupByOrdering's.
454-
*/
455-
static bool
456-
pathkeys_are_duplicate(List *infos, List *pathkeys)
457-
{
458-
ListCell *lc;
459-
460-
foreach(lc, infos)
461-
{
462-
GroupByOrdering *info = lfirst_node(GroupByOrdering, lc);
463-
464-
if (compare_pathkeys(pathkeys, info->pathkeys) == PATHKEYS_EQUAL)
465-
return true;
466-
}
467-
return false;
468-
}
469-
470450
/*
471451
* get_useful_group_keys_orderings
472452
* Determine which orderings of GROUP BY keys are potentially interesting.
@@ -475,11 +455,11 @@ pathkeys_are_duplicate(List *infos, List *pathkeys)
475455
* ordering of GROUP BY keys. Each item stores pathkeys and clauses in the
476456
* matching order.
477457
*
478-
* The function considers (and keeps) multiple GROUP BY orderings:
458+
* The function considers (and keeps) following GROUP BY orderings:
479459
*
480-
* - the original ordering, as specified by the GROUP BY clause,
481-
* - GROUP BY keys reordered to match 'path' ordering (as much as possible),
482-
* - GROUP BY keys to match target ORDER BY clause (as much as possible).
460+
* - GROUP BY keys as ordered by preprocess_groupclause() to match target
461+
* ORDER BY clause (as much as possible),
462+
* - GROUP BY keys reordered to match 'path' ordering (as much as possible).
483463
*/
484464
List *
485465
get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
@@ -526,32 +506,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
526506

527507
if (n > 0 &&
528508
(enable_incremental_sort || n == root->num_groupby_pathkeys) &&
529-
!pathkeys_are_duplicate(infos, pathkeys))
530-
{
531-
info = makeNode(GroupByOrdering);
532-
info->pathkeys = pathkeys;
533-
info->clauses = clauses;
534-
535-
infos = lappend(infos, info);
536-
}
537-
}
538-
539-
/*
540-
* Try reordering pathkeys to minimize the sort cost (this time consider
541-
* the ORDER BY clause).
542-
*/
543-
if (root->sort_pathkeys &&
544-
!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys))
545-
{
546-
int n;
547-
548-
n = group_keys_reorder_by_pathkeys(root->sort_pathkeys, &pathkeys,
549-
&clauses,
550-
root->num_groupby_pathkeys);
551-
552-
if (n > 0 &&
553-
(enable_incremental_sort || n == list_length(root->sort_pathkeys)) &&
554-
!pathkeys_are_duplicate(infos, pathkeys))
509+
compare_pathkeys(pathkeys, root->group_pathkeys) != PATHKEYS_EQUAL)
555510
{
556511
info = makeNode(GroupByOrdering);
557512
info->pathkeys = pathkeys;

src/backend/optimizer/plan/planner.c

+95-13
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ static double preprocess_limit(PlannerInfo *root,
137137
double tuple_fraction,
138138
int64 *offset_est, int64 *count_est);
139139
static void remove_useless_groupby_columns(PlannerInfo *root);
140-
static List *groupclause_apply_groupingset(PlannerInfo *root, List *force);
140+
static List *preprocess_groupclause(PlannerInfo *root, List *force);
141141
static List *extract_rollup_sets(List *groupingSets);
142142
static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
143143
static void standard_qp_callback(PlannerInfo *root, void *extra);
@@ -1422,7 +1422,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction,
14221422
else if (parse->groupClause)
14231423
{
14241424
/* Preprocess regular GROUP BY clause, if any */
1425-
root->processed_groupClause = list_copy(parse->groupClause);
1425+
root->processed_groupClause = preprocess_groupclause(root, NIL);
14261426
/* Remove any redundant GROUP BY columns */
14271427
remove_useless_groupby_columns(root);
14281428
}
@@ -2169,7 +2169,7 @@ preprocess_grouping_sets(PlannerInfo *root)
21692169
* The groupClauses for hashed grouping sets are built later on.)
21702170
*/
21712171
if (gs->set)
2172-
rollup->groupClause = groupclause_apply_groupingset(root, gs->set);
2172+
rollup->groupClause = preprocess_groupclause(root, gs->set);
21732173
else
21742174
rollup->groupClause = NIL;
21752175

@@ -2821,24 +2821,106 @@ remove_useless_groupby_columns(PlannerInfo *root)
28212821
}
28222822

28232823
/*
2824-
* groupclause_apply_groupingset
2825-
* Apply the order of GROUP BY clauses defined by grouping sets. Items
2826-
* not in the grouping set are skipped.
2824+
* preprocess_groupclause - do preparatory work on GROUP BY clause
2825+
*
2826+
* The idea here is to adjust the ordering of the GROUP BY elements
2827+
* (which in itself is semantically insignificant) to match ORDER BY,
2828+
* thereby allowing a single sort operation to both implement the ORDER BY
2829+
* requirement and set up for a Unique step that implements GROUP BY.
2830+
* We also consider partial match between GROUP BY and ORDER BY elements,
2831+
* which could allow to implement ORDER BY using the incremental sort.
2832+
*
2833+
* We also consider other orderings of the GROUP BY elements, which could
2834+
* match the sort ordering of other possible plans (eg an indexscan) and
2835+
* thereby reduce cost. This is implemented during the generation of grouping
2836+
* paths. See get_useful_group_keys_orderings() for details.
2837+
*
2838+
* Note: we need no comparable processing of the distinctClause because
2839+
* the parser already enforced that that matches ORDER BY.
2840+
*
2841+
* Note: we return a fresh List, but its elements are the same
2842+
* SortGroupClauses appearing in parse->groupClause. This is important
2843+
* because later processing may modify the processed_groupClause list.
2844+
*
2845+
* For grouping sets, the order of items is instead forced to agree with that
2846+
* of the grouping set (and items not in the grouping set are skipped). The
2847+
* work of sorting the order of grouping set elements to match the ORDER BY if
2848+
* possible is done elsewhere.
28272849
*/
28282850
static List *
2829-
groupclause_apply_groupingset(PlannerInfo *root, List *gset)
2851+
preprocess_groupclause(PlannerInfo *root, List *force)
28302852
{
28312853
Query *parse = root->parse;
28322854
List *new_groupclause = NIL;
28332855
ListCell *sl;
2856+
ListCell *gl;
28342857

2835-
foreach(sl, gset)
2858+
/* For grouping sets, we need to force the ordering */
2859+
if (force)
28362860
{
2837-
Index ref = lfirst_int(sl);
2838-
SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
2861+
foreach(sl, force)
2862+
{
2863+
Index ref = lfirst_int(sl);
2864+
SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
2865+
2866+
new_groupclause = lappend(new_groupclause, cl);
2867+
}
28392868

2840-
new_groupclause = lappend(new_groupclause, cl);
2869+
return new_groupclause;
28412870
}
2871+
2872+
/* If no ORDER BY, nothing useful to do here */
2873+
if (parse->sortClause == NIL)
2874+
return list_copy(parse->groupClause);
2875+
2876+
/*
2877+
* Scan the ORDER BY clause and construct a list of matching GROUP BY
2878+
* items, but only as far as we can make a matching prefix.
2879+
*
2880+
* This code assumes that the sortClause contains no duplicate items.
2881+
*/
2882+
foreach(sl, parse->sortClause)
2883+
{
2884+
SortGroupClause *sc = lfirst_node(SortGroupClause, sl);
2885+
2886+
foreach(gl, parse->groupClause)
2887+
{
2888+
SortGroupClause *gc = lfirst_node(SortGroupClause, gl);
2889+
2890+
if (equal(gc, sc))
2891+
{
2892+
new_groupclause = lappend(new_groupclause, gc);
2893+
break;
2894+
}
2895+
}
2896+
if (gl == NULL)
2897+
break; /* no match, so stop scanning */
2898+
}
2899+
2900+
2901+
/* If no match at all, no point in reordering GROUP BY */
2902+
if (new_groupclause == NIL)
2903+
return list_copy(parse->groupClause);
2904+
2905+
/*
2906+
* Add any remaining GROUP BY items to the new list. We don't require a
2907+
* complete match, because even partial match allows ORDER BY to be
2908+
* implemented using incremental sort. Also, give up if there are any
2909+
* non-sortable GROUP BY items, since then there's no hope anyway.
2910+
*/
2911+
foreach(gl, parse->groupClause)
2912+
{
2913+
SortGroupClause *gc = lfirst_node(SortGroupClause, gl);
2914+
2915+
if (list_member_ptr(new_groupclause, gc))
2916+
continue; /* it matched an ORDER BY item */
2917+
if (!OidIsValid(gc->sortop)) /* give up, GROUP BY can't be sorted */
2918+
return list_copy(parse->groupClause);
2919+
new_groupclause = lappend(new_groupclause, gc);
2920+
}
2921+
2922+
/* Success --- install the rearranged GROUP BY list */
2923+
Assert(list_length(parse->groupClause) == list_length(new_groupclause));
28422924
return new_groupclause;
28432925
}
28442926

@@ -4170,7 +4252,7 @@ consider_groupingsets_paths(PlannerInfo *root,
41704252
{
41714253
rollup = makeNode(RollupData);
41724254

4173-
rollup->groupClause = groupclause_apply_groupingset(root, gset);
4255+
rollup->groupClause = preprocess_groupclause(root, gset);
41744256
rollup->gsets_data = list_make1(gs);
41754257
rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
41764258
rollup->gsets_data,
@@ -4359,7 +4441,7 @@ consider_groupingsets_paths(PlannerInfo *root,
43594441

43604442
Assert(gs->set != NIL);
43614443

4362-
rollup->groupClause = groupclause_apply_groupingset(root, gs->set);
4444+
rollup->groupClause = preprocess_groupclause(root, gs->set);
43634445
rollup->gsets_data = list_make1(gs);
43644446
rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
43654447
rollup->gsets_data,

src/include/nodes/pathnodes.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,11 @@ struct PlannerInfo
426426
* items to be proven redundant, implying that there is only one group
427427
* containing all the query's rows. Hence, if you want to check whether
428428
* GROUP BY was specified, test for nonempty parse->groupClause, not for
429-
* nonempty processed_groupClause.
429+
* nonempty processed_groupClause. Optimizer chooses specific order of
430+
* group-by clauses during the upper paths generation process, attempting
431+
* to use different strategies to minimize number of sorts or engage
432+
* incremental sort. See preprocess_groupclause() and
433+
* get_useful_group_keys_orderings() for details.
430434
*
431435
* Currently, when grouping sets are specified we do not attempt to
432436
* optimize the groupClause, so that processed_groupClause will be

src/test/regress/expected/partition_aggregate.out

+3-3
Original file line numberDiff line numberDiff line change
@@ -901,15 +901,15 @@ SELECT a, c, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY (a+b)/2, 2, 1 HAV
901901
Sort Key: pagg_tab_m.a, pagg_tab_m.c, (sum(pagg_tab_m.b))
902902
-> Append
903903
-> HashAggregate
904-
Group Key: ((pagg_tab_m.a + pagg_tab_m.b) / 2), pagg_tab_m.c, pagg_tab_m.a
904+
Group Key: pagg_tab_m.a, pagg_tab_m.c, ((pagg_tab_m.a + pagg_tab_m.b) / 2)
905905
Filter: ((sum(pagg_tab_m.b) = 50) AND (avg(pagg_tab_m.c) > '25'::numeric))
906906
-> Seq Scan on pagg_tab_m_p1 pagg_tab_m
907907
-> HashAggregate
908-
Group Key: ((pagg_tab_m_1.a + pagg_tab_m_1.b) / 2), pagg_tab_m_1.c, pagg_tab_m_1.a
908+
Group Key: pagg_tab_m_1.a, pagg_tab_m_1.c, ((pagg_tab_m_1.a + pagg_tab_m_1.b) / 2)
909909
Filter: ((sum(pagg_tab_m_1.b) = 50) AND (avg(pagg_tab_m_1.c) > '25'::numeric))
910910
-> Seq Scan on pagg_tab_m_p2 pagg_tab_m_1
911911
-> HashAggregate
912-
Group Key: ((pagg_tab_m_2.a + pagg_tab_m_2.b) / 2), pagg_tab_m_2.c, pagg_tab_m_2.a
912+
Group Key: pagg_tab_m_2.a, pagg_tab_m_2.c, ((pagg_tab_m_2.a + pagg_tab_m_2.b) / 2)
913913
Filter: ((sum(pagg_tab_m_2.b) = 50) AND (avg(pagg_tab_m_2.c) > '25'::numeric))
914914
-> Seq Scan on pagg_tab_m_p3 pagg_tab_m_2
915915
(15 rows)

0 commit comments

Comments
 (0)