Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit fa52221

Browse files
committed
Add GROUP-BY strategy: put the most distinct column at the head.
Let's allow GROUP-BY to utilize cost_sort feature which can differentiate orders of pathkeys lists according to the ndistinct of the first column. Task: 9578. Tags: optimized_group_by.
1 parent e5f5238 commit fa52221

File tree

3 files changed

+96
-30
lines changed

3 files changed

+96
-30
lines changed

src/backend/optimizer/path/pathkeys.c

+70-3
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "optimizer/paths.h"
2727
#include "partitioning/partbounds.h"
2828
#include "utils/lsyscache.h"
29+
#include "utils/selfuncs.h"
2930

3031
/* Consider reordering of GROUP BY keys? */
3132
bool enable_group_by_reordering = true;
@@ -471,6 +472,10 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
471472
List *pathkeys = root->group_pathkeys;
472473
List *clauses = root->processed_groupClause;
473474

475+
double nd_max = 0;
476+
PathKey *pk_opt = NULL;
477+
ListCell *lc1, *lc2;
478+
474479
/* always return at least the original pathkeys/clauses */
475480
info = makeNode(GroupByOrdering);
476481
info->pathkeys = pathkeys;
@@ -524,9 +529,6 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
524529
/* Test consistency of info structures */
525530
for_each_from(lc, infos, 1)
526531
{
527-
ListCell *lc1,
528-
*lc2;
529-
530532
info = lfirst_node(GroupByOrdering, lc);
531533

532534
Assert(list_length(info->clauses) == list_length(pinfo->clauses));
@@ -544,6 +546,71 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
544546
}
545547
}
546548
#endif
549+
550+
/*
551+
* Let's try the order with the column having max ndistinct value
552+
*/
553+
554+
forboth(lc1, root->group_pathkeys, lc2, root->processed_groupClause)
555+
{
556+
PathKey *pkey = lfirst_node(PathKey, lc1);
557+
SortGroupClause *gc = (SortGroupClause *) lfirst(lc2);
558+
Node *node;
559+
Bitmapset *relids;
560+
VariableStatData vardata;
561+
double nd = -1;
562+
bool isdefault;
563+
564+
if (foreach_current_index(lc1) >= root->num_groupby_pathkeys)
565+
break;
566+
567+
node = get_sortgroupclause_expr(gc, root->parse->targetList);
568+
relids = pull_varnos(root, node);
569+
570+
if (bms_num_members(relids) != 1 && bms_is_member(0, relids))
571+
/*
572+
*Although functional index can estimate distincts here, the chance
573+
* is too low.
574+
*/
575+
continue;
576+
577+
examine_variable(root, node, 0, &vardata);
578+
if (!HeapTupleIsValid(vardata.statsTuple))
579+
continue;
580+
nd = get_variable_numdistinct(&vardata, &isdefault);
581+
ReleaseVariableStats(vardata);
582+
if (isdefault)
583+
continue;
584+
585+
Assert(nd >= 0);
586+
if (nd_max == 0 || nd > nd_max)
587+
{
588+
nd_max = nd;
589+
pk_opt = pkey;
590+
}
591+
}
592+
593+
if (pk_opt != NULL)
594+
{
595+
List *new_pathkeys = list_make1(pk_opt);
596+
int n;
597+
598+
new_pathkeys = list_concat_unique_ptr(new_pathkeys, root->group_pathkeys);
599+
n = group_keys_reorder_by_pathkeys(new_pathkeys, &pathkeys, &clauses,
600+
root->num_groupby_pathkeys);
601+
602+
if (n > 0 &&
603+
(enable_incremental_sort || n == root->num_groupby_pathkeys) &&
604+
compare_pathkeys(pathkeys, root->group_pathkeys) != PATHKEYS_EQUAL)
605+
{
606+
info = makeNode(GroupByOrdering);
607+
info->pathkeys = pathkeys;
608+
info->clauses = clauses;
609+
610+
infos = lappend(infos, info);
611+
}
612+
}
613+
547614
return infos;
548615
}
549616

src/test/regress/expected/aggregates.out

+21-22
Original file line numberDiff line numberDiff line change
@@ -2786,13 +2786,13 @@ SELECT balk(hundred) FROM tenk1;
27862786
ROLLBACK;
27872787
-- GROUP BY optimization by reordering GROUP BY clauses
27882788
CREATE TABLE btg AS SELECT
2789-
i % 10 AS x,
2790-
i % 10 AS y,
2791-
'abc' || i % 10 AS z,
2789+
i % 231 AS x,
2790+
i % 49 AS y,
2791+
'abc' || i % 2 AS z,
27922792
i AS w
2793-
FROM generate_series(1, 100) AS i;
2793+
FROM generate_series(1, 1000) AS i;
27942794
CREATE INDEX btg_x_y_idx ON btg(x, y);
2795-
ANALYZE btg;
2795+
VACUUM ANALYZE btg;
27962796
SET enable_hashagg = off;
27972797
SET enable_seqscan = off;
27982798
-- Utilize the ordering of index scan to avoid a Sort operation
@@ -2839,21 +2839,19 @@ EXPLAIN (COSTS OFF)
28392839
SELECT count(*)
28402840
FROM btg t1 JOIN btg t2 ON t1.z = t2.z AND t1.w = t2.w AND t1.x = t2.x
28412841
GROUP BY t1.x, t1.y, t1.z, t1.w;
2842-
QUERY PLAN
2843-
-------------------------------------------------------------------------------
2842+
QUERY PLAN
2843+
----------------------------------------------------------------
28442844
GroupAggregate
2845-
Group Key: t1.x, t1.y, t1.z, t1.w
2845+
Group Key: t1.w, t1.x, t1.y, t1.z
28462846
-> Sort
2847-
Sort Key: t1.x, t1.y, t1.z, t1.w
2847+
Sort Key: t1.w, t1.x, t1.y, t1.z
28482848
-> Merge Join
2849-
Merge Cond: ((t1.w = t2.w) AND (t1.z = t2.z) AND (t1.x = t2.x))
2850-
-> Sort
2851-
Sort Key: t1.w, t1.z, t1.x
2852-
-> Index Scan using btg_x_y_idx on btg t1
2853-
-> Sort
2854-
Sort Key: t2.w, t2.z, t2.x
2849+
Merge Cond: (t1.x = t2.x)
2850+
Join Filter: ((t2.z = t1.z) AND (t2.w = t1.w))
2851+
-> Index Scan using btg_x_y_idx on btg t1
2852+
-> Materialize
28552853
-> Index Scan using btg_x_y_idx on btg t2
2856-
(12 rows)
2854+
(10 rows)
28572855

28582856
RESET enable_nestloop;
28592857
RESET enable_hashjoin;
@@ -2877,11 +2875,12 @@ SELECT count(*) FROM btg GROUP BY w, x, y, z ORDER BY x*x, z;
28772875
Sort
28782876
Sort Key: ((x * x)), z
28792877
-> GroupAggregate
2880-
Group Key: w, x, y, z
2881-
-> Sort
2882-
Sort Key: w, x, y, z
2878+
Group Key: x, y, w, z
2879+
-> Incremental Sort
2880+
Sort Key: x, y, w, z
2881+
Presorted Key: x, y
28832882
-> Index Scan using btg_x_y_idx on btg
2884-
(7 rows)
2883+
(8 rows)
28852884

28862885
-- Test the case where the number of incoming subtree path keys is more than
28872886
-- the number of grouping keys.
@@ -2918,9 +2917,9 @@ GROUP BY c1.w, c1.z;
29182917
QUERY PLAN
29192918
-----------------------------------------------------
29202919
GroupAggregate
2921-
Group Key: c1.w, c1.z
2920+
Group Key: c1.z, c1.w
29222921
-> Sort
2923-
Sort Key: c1.w, c1.z, c1.x, c1.y
2922+
Sort Key: c1.z, c1.w, c1.x, c1.y
29242923
-> Merge Join
29252924
Merge Cond: (c1.x = c2.x)
29262925
-> Sort

src/test/regress/sql/aggregates.sql

+5-5
Original file line numberDiff line numberDiff line change
@@ -1199,13 +1199,13 @@ ROLLBACK;
11991199

12001200
-- GROUP BY optimization by reordering GROUP BY clauses
12011201
CREATE TABLE btg AS SELECT
1202-
i % 10 AS x,
1203-
i % 10 AS y,
1204-
'abc' || i % 10 AS z,
1202+
i % 231 AS x,
1203+
i % 49 AS y,
1204+
'abc' || i % 2 AS z,
12051205
i AS w
1206-
FROM generate_series(1, 100) AS i;
1206+
FROM generate_series(1, 1000) AS i;
12071207
CREATE INDEX btg_x_y_idx ON btg(x, y);
1208-
ANALYZE btg;
1208+
VACUUM ANALYZE btg;
12091209

12101210
SET enable_hashagg = off;
12111211
SET enable_seqscan = off;

0 commit comments

Comments
 (0)