Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 2d1d96b

Browse files
committed
Teach the system how to use hashing for UNION. (INTERSECT/EXCEPT will follow,
but seem like a separate patch since most of the remaining work is on the executor side.) I took the opportunity to push selection of the grouping operators for set operations into the parser where it belongs. Otherwise this is just a small exercise in making prepunion.c consider both alternatives. As with the recent DISTINCT patch, this means we can UNION on datatypes that can hash but not sort, and it means that UNION without ORDER BY is no longer certain to produce sorted output.
1 parent 3d40d5e commit 2d1d96b

File tree

20 files changed

+462
-212
lines changed

20 files changed

+462
-212
lines changed

src/backend/catalog/dependency.c

+10-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.77 2008/08/02 21:31:59 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.78 2008/08/07 01:11:46 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -1597,6 +1597,15 @@ find_expr_references_walker(Node *node,
15971597
context->rtables = list_delete_first(context->rtables);
15981598
return result;
15991599
}
1600+
else if (IsA(node, SetOperationStmt))
1601+
{
1602+
SetOperationStmt *setop = (SetOperationStmt *) node;
1603+
1604+
/* we need to look at the groupClauses for operator references */
1605+
find_expr_references_walker((Node *) setop->groupClauses, context);
1606+
/* fall through to examine child nodes */
1607+
}
1608+
16001609
return expression_tree_walker(node, find_expr_references_walker,
16011610
(void *) context);
16021611
}

src/backend/nodes/copyfuncs.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
* Portions Copyright (c) 1994, Regents of the University of California
1616
*
1717
* IDENTIFICATION
18-
* $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.396 2008/08/02 21:31:59 tgl Exp $
18+
* $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.397 2008/08/07 01:11:46 tgl Exp $
1919
*
2020
*-------------------------------------------------------------------------
2121
*/
@@ -1943,6 +1943,7 @@ _copySetOperationStmt(SetOperationStmt *from)
19431943
COPY_NODE_FIELD(rarg);
19441944
COPY_NODE_FIELD(colTypes);
19451945
COPY_NODE_FIELD(colTypmods);
1946+
COPY_NODE_FIELD(groupClauses);
19461947

19471948
return newnode;
19481949
}

src/backend/nodes/equalfuncs.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
* Portions Copyright (c) 1994, Regents of the University of California
1919
*
2020
* IDENTIFICATION
21-
* $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.325 2008/08/02 21:31:59 tgl Exp $
21+
* $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.326 2008/08/07 01:11:47 tgl Exp $
2222
*
2323
*-------------------------------------------------------------------------
2424
*/
@@ -839,6 +839,7 @@ _equalSetOperationStmt(SetOperationStmt *a, SetOperationStmt *b)
839839
COMPARE_NODE_FIELD(rarg);
840840
COMPARE_NODE_FIELD(colTypes);
841841
COMPARE_NODE_FIELD(colTypmods);
842+
COMPARE_NODE_FIELD(groupClauses);
842843

843844
return true;
844845
}

src/backend/nodes/outfuncs.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.330 2008/08/05 02:43:17 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.331 2008/08/07 01:11:48 tgl Exp $
1212
*
1313
* NOTES
1414
* Every node type that can appear in stored rules' parsetrees *must*
@@ -1780,6 +1780,7 @@ _outSetOperationStmt(StringInfo str, SetOperationStmt *node)
17801780
WRITE_NODE_FIELD(rarg);
17811781
WRITE_NODE_FIELD(colTypes);
17821782
WRITE_NODE_FIELD(colTypmods);
1783+
WRITE_NODE_FIELD(groupClauses);
17831784
}
17841785

17851786
static void

src/backend/nodes/readfuncs.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/nodes/readfuncs.c,v 1.211 2008/08/02 21:31:59 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/nodes/readfuncs.c,v 1.212 2008/08/07 01:11:49 tgl Exp $
1212
*
1313
* NOTES
1414
* Path and Plan nodes do not have any readfuncs support, because we
@@ -232,6 +232,7 @@ _readSetOperationStmt(void)
232232
READ_NODE_FIELD(rarg);
233233
READ_NODE_FIELD(colTypes);
234234
READ_NODE_FIELD(colTypmods);
235+
READ_NODE_FIELD(groupClauses);
235236

236237
READ_DONE();
237238
}

src/backend/optimizer/plan/planner.c

+6-104
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.239 2008/08/05 16:03:10 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.240 2008/08/07 01:11:50 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -68,10 +68,6 @@ static double preprocess_limit(PlannerInfo *root,
6868
double tuple_fraction,
6969
int64 *offset_est, int64 *count_est);
7070
static void preprocess_groupclause(PlannerInfo *root);
71-
static Oid *extract_grouping_ops(List *groupClause);
72-
static AttrNumber *extract_grouping_cols(List *groupClause, List *tlist);
73-
static bool grouping_is_sortable(List *groupClause);
74-
static bool grouping_is_hashable(List *groupClause);
7571
static bool choose_hashed_grouping(PlannerInfo *root,
7672
double tuple_fraction, double limit_tuples,
7773
Path *cheapest_path, Path *sorted_path,
@@ -784,10 +780,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
784780

785781
/*
786782
* If there's a top-level ORDER BY, assume we have to fetch all the
787-
* tuples. This might seem too simplistic given all the hackery below
788-
* to possibly avoid the sort ... but a nonzero tuple_fraction is only
789-
* of use to plan_set_operations() when the setop is UNION ALL, and
790-
* the result of UNION ALL is always unsorted.
783+
* tuples. This might be too simplistic given all the hackery below
784+
* to possibly avoid the sort; but the odds of accurate estimates
785+
* here are pretty low anyway.
791786
*/
792787
if (parse->sortClause)
793788
tuple_fraction = 0.0;
@@ -818,7 +813,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
818813
*/
819814
Assert(parse->commandType == CMD_SELECT);
820815

821-
tlist = postprocess_setop_tlist(result_plan->targetlist, tlist);
816+
tlist = postprocess_setop_tlist(copyObject(result_plan->targetlist),
817+
tlist);
822818

823819
/*
824820
* Can't handle FOR UPDATE/SHARE here (parser should have checked
@@ -1714,100 +1710,6 @@ preprocess_groupclause(PlannerInfo *root)
17141710
parse->groupClause = new_groupclause;
17151711
}
17161712

1717-
/*
1718-
* extract_grouping_ops - make an array of the equality operator OIDs
1719-
* for a SortGroupClause list
1720-
*/
1721-
static Oid *
1722-
extract_grouping_ops(List *groupClause)
1723-
{
1724-
int numCols = list_length(groupClause);
1725-
int colno = 0;
1726-
Oid *groupOperators;
1727-
ListCell *glitem;
1728-
1729-
groupOperators = (Oid *) palloc(sizeof(Oid) * numCols);
1730-
1731-
foreach(glitem, groupClause)
1732-
{
1733-
SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
1734-
1735-
groupOperators[colno] = groupcl->eqop;
1736-
Assert(OidIsValid(groupOperators[colno]));
1737-
colno++;
1738-
}
1739-
1740-
return groupOperators;
1741-
}
1742-
1743-
/*
1744-
* extract_grouping_cols - make an array of the grouping column resnos
1745-
* for a SortGroupClause list
1746-
*/
1747-
static AttrNumber *
1748-
extract_grouping_cols(List *groupClause, List *tlist)
1749-
{
1750-
AttrNumber *grpColIdx;
1751-
int numCols = list_length(groupClause);
1752-
int colno = 0;
1753-
ListCell *glitem;
1754-
1755-
grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
1756-
1757-
foreach(glitem, groupClause)
1758-
{
1759-
SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
1760-
TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist);
1761-
1762-
grpColIdx[colno++] = tle->resno;
1763-
}
1764-
1765-
return grpColIdx;
1766-
}
1767-
1768-
/*
1769-
* grouping_is_sortable - is it possible to implement grouping list by sorting?
1770-
*
1771-
* This is easy since the parser will have included a sortop if one exists.
1772-
*/
1773-
static bool
1774-
grouping_is_sortable(List *groupClause)
1775-
{
1776-
ListCell *glitem;
1777-
1778-
foreach(glitem, groupClause)
1779-
{
1780-
SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
1781-
1782-
if (!OidIsValid(groupcl->sortop))
1783-
return false;
1784-
}
1785-
return true;
1786-
}
1787-
1788-
/*
1789-
* grouping_is_hashable - is it possible to implement grouping list by hashing?
1790-
*
1791-
* We assume hashing is OK if the equality operators are marked oprcanhash.
1792-
* (If there isn't actually a supporting hash function, the executor will
1793-
* complain at runtime; but this is a misdeclaration of the operator, not
1794-
* a system bug.)
1795-
*/
1796-
static bool
1797-
grouping_is_hashable(List *groupClause)
1798-
{
1799-
ListCell *glitem;
1800-
1801-
foreach(glitem, groupClause)
1802-
{
1803-
SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
1804-
1805-
if (!op_hashjoinable(groupcl->eqop))
1806-
return false;
1807-
}
1808-
return true;
1809-
}
1810-
18111713
/*
18121714
* choose_hashed_grouping - should we use hashed grouping?
18131715
*

0 commit comments

Comments
 (0)