Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit ec73b56

Browse files
committed
Make GROUP BY work properly for datatypes that only support hashing and not
sorting. The infrastructure for this was all in place already; it's only necessary to fix the planner to not assume that sorting is always an available option.
1 parent 82a1f09 commit ec73b56

File tree

3 files changed

+112
-58
lines changed

3 files changed

+112
-58
lines changed

src/backend/optimizer/plan/planmain.c

+2-3
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
*
1515
*
1616
* IDENTIFICATION
17-
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.107 2008/07/31 22:47:56 tgl Exp $
17+
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.108 2008/08/03 19:10:52 tgl Exp $
1818
*
1919
*-------------------------------------------------------------------------
2020
*/
@@ -288,8 +288,7 @@ query_planner(PlannerInfo *root, List *tlist,
288288
* levels of sort --- and, therefore, certainly need to read all the
289289
* tuples --- unless ORDER BY is a subset of GROUP BY.
290290
*/
291-
if (root->group_pathkeys && root->sort_pathkeys &&
292-
!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys))
291+
if (!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys))
293292
tuple_fraction = 0.0;
294293
}
295294
else if (parse->hasAggs || root->hasHavingQual)

src/backend/optimizer/plan/planner.c

+108-49
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.236 2008/08/02 21:32:00 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.237 2008/08/03 19:10:52 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -69,11 +69,12 @@ static double preprocess_limit(PlannerInfo *root,
6969
int64 *offset_est, int64 *count_est);
7070
static void preprocess_groupclause(PlannerInfo *root);
7171
static Oid *extract_grouping_ops(List *groupClause);
72+
static bool grouping_is_sortable(List *groupClause);
73+
static bool grouping_is_hashable(List *groupClause);
7274
static bool choose_hashed_grouping(PlannerInfo *root,
7375
double tuple_fraction, double limit_tuples,
7476
Path *cheapest_path, Path *sorted_path,
75-
Oid *groupOperators, double dNumGroups,
76-
AggClauseCounts *agg_counts);
77+
double dNumGroups, AggClauseCounts *agg_counts);
7778
static List *make_subplanTargetList(PlannerInfo *root, List *tlist,
7879
AttrNumber **groupColIdx, bool *need_tlist_eval);
7980
static void locate_grouping_columns(PlannerInfo *root,
@@ -839,7 +840,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
839840
List *sub_tlist;
840841
List *group_pathkeys;
841842
AttrNumber *groupColIdx = NULL;
842-
Oid *groupOperators = NULL;
843843
bool need_tlist_eval = true;
844844
QualCost tlist_cost;
845845
Path *cheapest_path;
@@ -877,11 +877,15 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
877877
* DISTINCT and ORDER BY requirements. This should be changed
878878
* someday, but DISTINCT ON is a bit of a problem ...
879879
*/
880-
root->group_pathkeys =
881-
make_pathkeys_for_sortclauses(root,
882-
parse->groupClause,
883-
tlist,
884-
false);
880+
if (parse->groupClause && grouping_is_sortable(parse->groupClause))
881+
root->group_pathkeys =
882+
make_pathkeys_for_sortclauses(root,
883+
parse->groupClause,
884+
tlist,
885+
false);
886+
else
887+
root->group_pathkeys = NIL;
888+
885889
if (list_length(parse->distinctClause) > list_length(parse->sortClause))
886890
root->sort_pathkeys =
887891
make_pathkeys_for_sortclauses(root,
@@ -915,12 +919,12 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
915919
/*
916920
* Figure out whether we need a sorted result from query_planner.
917921
*
918-
* If we have a GROUP BY clause, then we want a result sorted properly
919-
* for grouping. Otherwise, if there is an ORDER BY clause, we want
920-
* to sort by the ORDER BY clause. (Note: if we have both, and ORDER
921-
* BY is a superset of GROUP BY, it would be tempting to request sort
922-
* by ORDER BY --- but that might just leave us failing to exploit an
923-
* available sort order at all. Needs more thought...)
922+
* If we have a sortable GROUP BY clause, then we want a result sorted
923+
* properly for grouping. Otherwise, if there is an ORDER BY clause,
924+
* we want to sort by the ORDER BY clause. (Note: if we have both, and
925+
* ORDER BY is a superset of GROUP BY, it would be tempting to request
926+
* sort by ORDER BY --- but that might just leave us failing to
927+
* exploit an available sort order at all. Needs more thought...)
924928
*/
925929
if (root->group_pathkeys)
926930
root->query_pathkeys = root->group_pathkeys;
@@ -942,17 +946,39 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
942946
sort_pathkeys = root->sort_pathkeys;
943947

944948
/*
945-
* If grouping, extract the grouping operators and decide whether we
946-
* want to use hashed grouping.
949+
* If grouping, decide whether to use sorted or hashed grouping.
947950
*/
948951
if (parse->groupClause)
949952
{
950-
groupOperators = extract_grouping_ops(parse->groupClause);
951-
use_hashed_grouping =
952-
choose_hashed_grouping(root, tuple_fraction, limit_tuples,
953-
cheapest_path, sorted_path,
954-
groupOperators, dNumGroups,
955-
&agg_counts);
953+
bool can_hash;
954+
bool can_sort;
955+
956+
/*
957+
* Executor doesn't support hashed aggregation with DISTINCT
958+
* aggregates. (Doing so would imply storing *all* the input
959+
* values in the hash table, which seems like a certain loser.)
960+
*/
961+
can_hash = (agg_counts.numDistinctAggs == 0 &&
962+
grouping_is_hashable(parse->groupClause));
963+
can_sort = grouping_is_sortable(parse->groupClause);
964+
if (can_hash && can_sort)
965+
{
966+
/* we have a meaningful choice to make ... */
967+
use_hashed_grouping =
968+
choose_hashed_grouping(root,
969+
tuple_fraction, limit_tuples,
970+
cheapest_path, sorted_path,
971+
dNumGroups, &agg_counts);
972+
}
973+
else if (can_hash)
974+
use_hashed_grouping = true;
975+
else if (can_sort)
976+
use_hashed_grouping = false;
977+
else
978+
ereport(ERROR,
979+
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
980+
errmsg("could not implement GROUP BY"),
981+
errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
956982

957983
/* Also convert # groups to long int --- but 'ware overflow! */
958984
numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
@@ -1088,7 +1114,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
10881114
AGG_HASHED,
10891115
numGroupCols,
10901116
groupColIdx,
1091-
groupOperators,
1117+
extract_grouping_ops(parse->groupClause),
10921118
numGroups,
10931119
agg_counts.numAggs,
10941120
result_plan);
@@ -1131,7 +1157,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
11311157
aggstrategy,
11321158
numGroupCols,
11331159
groupColIdx,
1134-
groupOperators,
1160+
extract_grouping_ops(parse->groupClause),
11351161
numGroups,
11361162
agg_counts.numAggs,
11371163
result_plan);
@@ -1160,7 +1186,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
11601186
(List *) parse->havingQual,
11611187
numGroupCols,
11621188
groupColIdx,
1163-
groupOperators,
1189+
extract_grouping_ops(parse->groupClause),
11641190
dNumGroups,
11651191
result_plan);
11661192
/* The Group node won't change sort ordering */
@@ -1495,6 +1521,9 @@ preprocess_limit(PlannerInfo *root, double tuple_fraction,
14951521
* GROUP BY elements, which could match the sort ordering of other
14961522
* possible plans (eg an indexscan) and thereby reduce cost. We don't
14971523
* bother with that, though. Hashed grouping will frequently win anyway.
1524+
*
1525+
* Note: we need no comparable processing of the distinctClause because
1526+
* the parser already enforced that that matches ORDER BY.
14981527
*/
14991528
static void
15001529
preprocess_groupclause(PlannerInfo *root)
@@ -1505,7 +1534,7 @@ preprocess_groupclause(PlannerInfo *root)
15051534
ListCell *sl;
15061535
ListCell *gl;
15071536

1508-
/* If no ORDER BY, nothing useful to do here anyway */
1537+
/* If no ORDER BY, nothing useful to do here */
15091538
if (parse->sortClause == NIL)
15101539
return;
15111540

@@ -1546,7 +1575,8 @@ preprocess_groupclause(PlannerInfo *root)
15461575
* were able to make a complete match. In other words, we only
15471576
* rearrange the GROUP BY list if the result is that one list is a
15481577
* prefix of the other --- otherwise there's no possibility of a
1549-
* common sort.
1578+
* common sort. Also, give up if there are any non-sortable GROUP BY
1579+
* items, since then there's no hope anyway.
15501580
*/
15511581
foreach(gl, parse->groupClause)
15521582
{
@@ -1556,6 +1586,8 @@ preprocess_groupclause(PlannerInfo *root)
15561586
continue; /* it matched an ORDER BY item */
15571587
if (partial_match)
15581588
return; /* give up, no common sort possible */
1589+
if (!OidIsValid(gc->sortop))
1590+
return; /* give up, GROUP BY can't be sorted */
15591591
new_groupclause = lappend(new_groupclause, gc);
15601592
}
15611593

@@ -1566,7 +1598,7 @@ preprocess_groupclause(PlannerInfo *root)
15661598

15671599
/*
15681600
* extract_grouping_ops - make an array of the equality operator OIDs
1569-
* for the GROUP BY clause
1601+
* for a SortGroupClause list
15701602
*/
15711603
static Oid *
15721604
extract_grouping_ops(List *groupClause)
@@ -1590,15 +1622,59 @@ extract_grouping_ops(List *groupClause)
15901622
return groupOperators;
15911623
}
15921624

1625+
/*
1626+
* grouping_is_sortable - is it possible to implement grouping list by sorting?
1627+
*
1628+
* This is easy since the parser will have included a sortop if one exists.
1629+
*/
1630+
static bool
1631+
grouping_is_sortable(List *groupClause)
1632+
{
1633+
ListCell *glitem;
1634+
1635+
foreach(glitem, groupClause)
1636+
{
1637+
SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
1638+
1639+
if (!OidIsValid(groupcl->sortop))
1640+
return false;
1641+
}
1642+
return true;
1643+
}
1644+
1645+
/*
1646+
* grouping_is_hashable - is it possible to implement grouping list by hashing?
1647+
*
1648+
* We assume hashing is OK if the equality operators are marked oprcanhash.
1649+
* (If there isn't actually a supporting hash function, the executor will
1650+
* complain at runtime; but this is a misdeclaration of the operator, not
1651+
* a system bug.)
1652+
*/
1653+
static bool
1654+
grouping_is_hashable(List *groupClause)
1655+
{
1656+
ListCell *glitem;
1657+
1658+
foreach(glitem, groupClause)
1659+
{
1660+
SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
1661+
1662+
if (!op_hashjoinable(groupcl->eqop))
1663+
return false;
1664+
}
1665+
return true;
1666+
}
1667+
15931668
/*
15941669
* choose_hashed_grouping - should we use hashed grouping?
1670+
*
1671+
* Note: this is only applied when both alternatives are actually feasible.
15951672
*/
15961673
static bool
15971674
choose_hashed_grouping(PlannerInfo *root,
15981675
double tuple_fraction, double limit_tuples,
15991676
Path *cheapest_path, Path *sorted_path,
1600-
Oid *groupOperators, double dNumGroups,
1601-
AggClauseCounts *agg_counts)
1677+
double dNumGroups, AggClauseCounts *agg_counts)
16021678
{
16031679
int numGroupCols = list_length(root->parse->groupClause);
16041680
double cheapest_path_rows;
@@ -1607,27 +1683,10 @@ choose_hashed_grouping(PlannerInfo *root,
16071683
List *current_pathkeys;
16081684
Path hashed_p;
16091685
Path sorted_p;
1610-
int i;
16111686

1612-
/*
1613-
* Check can't-do-it conditions, including whether the grouping operators
1614-
* are hashjoinable. (We assume hashing is OK if they are marked
1615-
* oprcanhash. If there isn't actually a supporting hash function, the
1616-
* executor will complain at runtime.)
1617-
*
1618-
* Executor doesn't support hashed aggregation with DISTINCT aggregates.
1619-
* (Doing so would imply storing *all* the input values in the hash table,
1620-
* which seems like a certain loser.)
1621-
*/
1687+
/* Prefer sorting when enable_hashagg is off */
16221688
if (!enable_hashagg)
16231689
return false;
1624-
if (agg_counts->numDistinctAggs != 0)
1625-
return false;
1626-
for (i = 0; i < numGroupCols; i++)
1627-
{
1628-
if (!op_hashjoinable(groupOperators[i]))
1629-
return false;
1630-
}
16311690

16321691
/*
16331692
* Don't do it if it doesn't look like the hashtable will fit into

src/backend/parser/parse_clause.c

+2-6
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.172 2008/08/02 21:32:00 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.173 2008/08/03 19:10:52 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -1351,15 +1351,11 @@ transformGroupClause(ParseState *pstate, List *grouplist,
13511351
/*
13521352
* If no match in ORDER BY, just add it to the result using
13531353
* default sort/group semantics.
1354-
*
1355-
* XXX for now, the planner requires groupClause to be sortable,
1356-
* so we have to insist on that here.
13571354
*/
13581355
if (!found)
13591356
result = addTargetToGroupList(pstate, tle,
13601357
result, *targetlist,
1361-
true, /* XXX for now */
1362-
true);
1358+
false, true);
13631359
}
13641360

13651361
return result;

0 commit comments

Comments
 (0)