Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 488d70a

Browse files
committed
Implement "join removal" for cases where the inner side of a left join
is unique and is not referenced above the join. In this case the inner side doesn't affect the query result and can be thrown away entirely. Although perhaps nobody would ever write such a thing by hand, it's a reasonably common case in machine-generated SQL. The current implementation only recognizes the case where the inner side is a simple relation with a unique index matching the query conditions. This is enough for the use-cases that have been shown so far, but we might want to try to handle other cases later. Robert Haas, somewhat rewritten by Tom
1 parent e3f0271 commit 488d70a

File tree

11 files changed

+349
-11
lines changed

11 files changed

+349
-11
lines changed

src/backend/nodes/outfuncs.c

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.363 2009/07/30 02:45:37 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.364 2009/09/17 20:49:28 tgl Exp $
1212
*
1313
* NOTES
1414
* Every node type that can appear in stored rules' parsetrees *must*
@@ -1421,6 +1421,16 @@ _outUniquePath(StringInfo str, UniquePath *node)
14211421
WRITE_FLOAT_FIELD(rows, "%.0f");
14221422
}
14231423

1424+
static void
1425+
_outNoOpPath(StringInfo str, NoOpPath *node)
1426+
{
1427+
WRITE_NODE_TYPE("NOOPPATH");
1428+
1429+
_outPathInfo(str, (Path *) node);
1430+
1431+
WRITE_NODE_FIELD(subpath);
1432+
}
1433+
14241434
static void
14251435
_outNestPath(StringInfo str, NestPath *node)
14261436
{
@@ -2634,6 +2644,9 @@ _outNode(StringInfo str, void *obj)
26342644
case T_UniquePath:
26352645
_outUniquePath(str, obj);
26362646
break;
2647+
case T_NoOpPath:
2648+
_outNoOpPath(str, obj);
2649+
break;
26372650
case T_NestPath:
26382651
_outNestPath(str, obj);
26392652
break;

src/backend/optimizer/README

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
$PostgreSQL: pgsql/src/backend/optimizer/README,v 1.50 2009/07/21 02:02:44 tgl Exp $
1+
$PostgreSQL: pgsql/src/backend/optimizer/README,v 1.51 2009/09/17 20:49:28 tgl Exp $
22

33
Optimizer
44
=========
@@ -354,6 +354,7 @@ RelOptInfo - a relation or joined relations
354354
NestPath - nested-loop joins
355355
MergePath - merge joins
356356
HashPath - hash joins
357+
NoOpPath - same as its input path (used when a join is removed)
357358

358359
EquivalenceClass - a data structure representing a set of values known equal
359360

src/backend/optimizer/path/allpaths.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.185 2009/09/02 17:52:24 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.186 2009/09/17 20:49:28 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -1387,6 +1387,10 @@ print_path(PlannerInfo *root, Path *path, int indent)
13871387
ptype = "Unique";
13881388
subpath = ((UniquePath *) path)->subpath;
13891389
break;
1390+
case T_NoOpPath:
1391+
ptype = "NoOp";
1392+
subpath = ((NoOpPath *) path)->subpath;
1393+
break;
13901394
case T_NestPath:
13911395
ptype = "NestLoop";
13921396
join = true;

src/backend/optimizer/path/indxpath.c

Lines changed: 81 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
*
1010
*
1111
* IDENTIFICATION
12-
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.241 2009/08/04 16:08:36 tgl Exp $
12+
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.242 2009/09/17 20:49:28 tgl Exp $
1313
*
1414
*-------------------------------------------------------------------------
1515
*/
@@ -1918,6 +1918,86 @@ find_clauses_for_join(PlannerInfo *root, RelOptInfo *rel,
19181918
return clause_list;
19191919
}
19201920

1921+
/*
1922+
* relation_has_unique_index_for
1923+
* Determine whether the relation provably has at most one row satisfying
1924+
* a set of equality conditions, because the conditions constrain all
1925+
* columns of some unique index.
1926+
*
1927+
* The conditions are provided as a list of RestrictInfo nodes, where the
1928+
* caller has already determined that each condition is a mergejoinable
1929+
* equality with an expression in this relation on one side, and an
1930+
* expression not involving this relation on the other. The transient
1931+
* outer_is_left flag is used to identify which side we should look at:
1932+
* left side if outer_is_left is false, right side if it is true.
1933+
*/
1934+
bool
1935+
relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
1936+
List *restrictlist)
1937+
{
1938+
ListCell *ic;
1939+
1940+
/* Short-circuit the easy case */
1941+
if (restrictlist == NIL)
1942+
return false;
1943+
1944+
/* Examine each index of the relation ... */
1945+
foreach(ic, rel->indexlist)
1946+
{
1947+
IndexOptInfo *ind = (IndexOptInfo *) lfirst(ic);
1948+
int c;
1949+
1950+
/*
1951+
* If the index is not unique or if it's a partial index that doesn't
1952+
* match the query, it's useless here.
1953+
*/
1954+
if (!ind->unique || (ind->indpred != NIL && !ind->predOK))
1955+
continue;
1956+
1957+
/*
1958+
* Try to find each index column in the list of conditions. This is
1959+
* O(n^2) or worse, but we expect all the lists to be short.
1960+
*/
1961+
for (c = 0; c < ind->ncolumns; c++)
1962+
{
1963+
ListCell *lc;
1964+
1965+
foreach(lc, restrictlist)
1966+
{
1967+
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
1968+
Node *rexpr;
1969+
1970+
/*
1971+
* The condition's equality operator must be a member of the
1972+
* index opfamily, else it is not asserting the right kind
1973+
* of equality behavior for this index. We check this first
1974+
* since it's probably cheaper than match_index_to_operand().
1975+
*/
1976+
if (!list_member_oid(rinfo->mergeopfamilies, ind->opfamily[c]))
1977+
continue;
1978+
1979+
/* OK, see if the condition operand matches the index key */
1980+
if (rinfo->outer_is_left)
1981+
rexpr = get_rightop(rinfo->clause);
1982+
else
1983+
rexpr = get_leftop(rinfo->clause);
1984+
1985+
if (match_index_to_operand(rexpr, c, ind))
1986+
break; /* found a match; column is unique */
1987+
}
1988+
1989+
if (lc == NULL)
1990+
break; /* no match; this index doesn't help us */
1991+
}
1992+
1993+
/* Matched all columns of this index? */
1994+
if (c == ind->ncolumns)
1995+
return true;
1996+
}
1997+
1998+
return false;
1999+
}
2000+
19212001

19222002
/****************************************************************************
19232003
* ---- PATH CREATION UTILITIES ----

src/backend/optimizer/path/joinpath.c

Lines changed: 195 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.123 2009/09/12 22:12:04 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.124 2009/09/17 20:49:28 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -22,6 +22,11 @@
2222
#include "optimizer/paths.h"
2323

2424

25+
static bool join_is_removable(PlannerInfo *root, RelOptInfo *joinrel,
26+
RelOptInfo *outerrel, RelOptInfo *innerrel,
27+
List *restrictlist, JoinType jointype);
28+
static void generate_outer_only(PlannerInfo *root, RelOptInfo *joinrel,
29+
RelOptInfo *outerrel);
2530
static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
2631
RelOptInfo *outerrel, RelOptInfo *innerrel,
2732
List *restrictlist, List *mergeclause_list,
@@ -78,11 +83,26 @@ add_paths_to_joinrel(PlannerInfo *root,
7883
{
7984
List *mergeclause_list = NIL;
8085

86+
/*
87+
* 0. Consider join removal. This is always the most efficient strategy,
88+
* so if it works, there's no need to consider anything further.
89+
*/
90+
if (join_is_removable(root, joinrel, outerrel, innerrel,
91+
restrictlist, jointype))
92+
{
93+
generate_outer_only(root, joinrel, outerrel);
94+
return;
95+
}
96+
8197
/*
8298
* Find potential mergejoin clauses. We can skip this if we are not
8399
* interested in doing a mergejoin. However, mergejoin is currently our
84100
* only way of implementing full outer joins, so override mergejoin
85101
* disable if it's a full join.
102+
*
103+
* Note: do this after join_is_removable(), because this sets the
104+
* outer_is_left flags in the mergejoin clauses, while join_is_removable
105+
* uses those flags for its own purposes.
86106
*/
87107
if (enable_mergejoin || jointype == JOIN_FULL)
88108
mergeclause_list = select_mergejoin_clauses(root,
@@ -133,6 +153,180 @@ add_paths_to_joinrel(PlannerInfo *root,
133153
restrictlist, jointype, sjinfo);
134154
}
135155

156+
/*
157+
* join_is_removable
158+
* Determine whether we need not perform the join at all, because
159+
* it will just duplicate its left input.
160+
*
161+
* This is true for a left join for which the join condition cannot match
162+
* more than one inner-side row. (There are other possibly interesting
163+
* cases, but we don't have the infrastructure to prove them.)
164+
*
165+
* Note: there is no need to consider the symmetrical case of duplicating the
166+
* right input, because add_paths_to_joinrel() will be called with each rel
167+
* on the outer side.
168+
*/
169+
static bool
170+
join_is_removable(PlannerInfo *root,
171+
RelOptInfo *joinrel,
172+
RelOptInfo *outerrel,
173+
RelOptInfo *innerrel,
174+
List *restrictlist,
175+
JoinType jointype)
176+
{
177+
List *clause_list = NIL;
178+
ListCell *l;
179+
int attroff;
180+
181+
/*
182+
* Currently, we only know how to remove left joins to a baserel with
183+
* unique indexes. We can check most of these criteria pretty trivially
184+
* to avoid doing useless extra work. But checking whether any of the
185+
* indexes are unique would require iterating over the indexlist, so for
186+
* now we just make sure there are indexes of some sort or other. If none
187+
* of them are unique, join removal will still fail, just slightly later.
188+
*/
189+
if (jointype != JOIN_LEFT ||
190+
innerrel->reloptkind == RELOPT_JOINREL ||
191+
innerrel->rtekind != RTE_RELATION ||
192+
innerrel->indexlist == NIL)
193+
return false;
194+
195+
/*
196+
* We can't remove the join if any inner-rel attributes are used above
197+
* the join.
198+
*
199+
* As a micro-optimization, it seems better to start with max_attr and
200+
* count down rather than starting with min_attr and counting up, on the
201+
* theory that the system attributes are somewhat less likely to be wanted
202+
* and should be tested last.
203+
*/
204+
for (attroff = innerrel->max_attr - innerrel->min_attr;
205+
attroff >= 0;
206+
attroff--)
207+
{
208+
if (!bms_is_subset(innerrel->attr_needed[attroff], joinrel->relids))
209+
return false;
210+
}
211+
212+
/*
213+
* Search for mergejoinable clauses that constrain the inner rel against
214+
* either the outer rel or a pseudoconstant. If an operator is
215+
* mergejoinable then it behaves like equality for some btree opclass,
216+
* so it's what we want. The mergejoinability test also eliminates
217+
* clauses containing volatile functions, which we couldn't depend on.
218+
*/
219+
foreach(l, restrictlist)
220+
{
221+
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
222+
223+
/*
224+
* We are always considering an outer join here, so ignore pushed-down
225+
* clauses. Also ignore anything that doesn't have a mergejoinable
226+
* operator.
227+
*/
228+
if (restrictinfo->is_pushed_down)
229+
continue;
230+
231+
if (!restrictinfo->can_join ||
232+
restrictinfo->mergeopfamilies == NIL)
233+
continue; /* not mergejoinable */
234+
235+
/*
236+
* Check if clause is usable with these input rels. All the vars
237+
* needed on each side of the clause must be available from one or the
238+
* other of the input rels.
239+
*/
240+
if (bms_is_subset(restrictinfo->left_relids, outerrel->relids) &&
241+
bms_is_subset(restrictinfo->right_relids, innerrel->relids))
242+
{
243+
/* righthand side is inner */
244+
restrictinfo->outer_is_left = true;
245+
}
246+
else if (bms_is_subset(restrictinfo->left_relids, innerrel->relids) &&
247+
bms_is_subset(restrictinfo->right_relids, outerrel->relids))
248+
{
249+
/* lefthand side is inner */
250+
restrictinfo->outer_is_left = false;
251+
}
252+
else
253+
continue; /* no good for these input relations */
254+
255+
/* OK, add to list */
256+
clause_list = lappend(clause_list, restrictinfo);
257+
}
258+
259+
/* Now examine the rel's restriction clauses for var = const clauses */
260+
foreach(l, innerrel->baserestrictinfo)
261+
{
262+
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
263+
264+
/*
265+
* Note: can_join won't be set for a restriction clause, but
266+
* mergeopfamilies will be if it has a mergejoinable operator
267+
* and doesn't contain volatile functions.
268+
*/
269+
if (restrictinfo->mergeopfamilies == NIL)
270+
continue; /* not mergejoinable */
271+
272+
/*
273+
* The clause certainly doesn't refer to anything but the given
274+
* rel. If either side is pseudoconstant then we can use it.
275+
*/
276+
if (bms_is_empty(restrictinfo->left_relids))
277+
{
278+
/* righthand side is inner */
279+
restrictinfo->outer_is_left = true;
280+
}
281+
else if (bms_is_empty(restrictinfo->right_relids))
282+
{
283+
/* lefthand side is inner */
284+
restrictinfo->outer_is_left = false;
285+
}
286+
else
287+
continue;
288+
289+
/* OK, add to list */
290+
clause_list = lappend(clause_list, restrictinfo);
291+
}
292+
293+
/* Now examine the indexes to see if we have a matching unique index */
294+
if (relation_has_unique_index_for(root, innerrel, clause_list))
295+
return true;
296+
297+
/*
298+
* Some day it would be nice to check for other methods of establishing
299+
* distinctness.
300+
*/
301+
return false;
302+
}
303+
304+
/*
305+
* generate_outer_only
306+
* Generate "join" paths when we have found the join is removable.
307+
*/
308+
static void
309+
generate_outer_only(PlannerInfo *root, RelOptInfo *joinrel,
310+
RelOptInfo *outerrel)
311+
{
312+
ListCell *lc;
313+
314+
/*
315+
* For the moment, replicate all of the outerrel's paths as join paths.
316+
* Some of them might not really be interesting above the join, if they
317+
* have sort orderings that have no real use except to do a mergejoin
318+
* for the join we've just found we don't need. But distinguishing that
319+
* case probably isn't worth the extra code it would take.
320+
*/
321+
foreach(lc, outerrel->pathlist)
322+
{
323+
Path *outerpath = (Path *) lfirst(lc);
324+
325+
add_path(joinrel, (Path *)
326+
create_noop_path(root, joinrel, outerpath));
327+
}
328+
}
329+
136330
/*
137331
* sort_inner_and_outer
138332
* Create mergejoin join paths by explicitly sorting both the outer and

0 commit comments

Comments
 (0)