Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit e006a24

Browse files
committed
Implement SEMI and ANTI joins in the planner and executor. (Semijoins replace
the old JOIN_IN code, but antijoins are new functionality.) Teach the planner to convert appropriate EXISTS and NOT EXISTS subqueries into semi and anti joins respectively. Also, LEFT JOINs with suitable upper-level IS NULL filters are recognized as being anti joins. Unify the InClauseInfo and OuterJoinInfo infrastructure into "SpecialJoinInfo". With that change, it becomes possible to associate a SpecialJoinInfo with every join attempt, which permits some cleanup of join selectivity estimation. That needs to be taken much further than this patch does, but the next step is to change the API for oprjoin selectivity functions, which seems like material for a separate patch. So for the moment the output size estimates for semi and especially anti joins are quite bogus.
1 parent ef1c807 commit e006a24

40 files changed

+2124
-1199
lines changed

doc/src/sgml/indexam.sgml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<!-- $PostgreSQL: pgsql/doc/src/sgml/indexam.sgml,v 2.26 2008/04/14 17:05:32 tgl Exp $ -->
1+
<!-- $PostgreSQL: pgsql/doc/src/sgml/indexam.sgml,v 2.27 2008/08/14 18:47:58 tgl Exp $ -->
22

33
<chapter id="indexam">
44
<title>Index Access Method Interface Definition</title>
@@ -879,7 +879,8 @@ amcostestimate (PlannerInfo *root,
879879

880880
<programlisting>
881881
*indexSelectivity = clauselist_selectivity(root, indexQuals,
882-
index-&gt;rel-&gt;relid, JOIN_INNER);
882+
index-&gt;rel-&gt;relid,
883+
JOIN_INNER, NULL);
883884
</programlisting>
884885
</para>
885886
</step>

src/backend/commands/explain.c

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1994-5, Regents of the University of California
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/commands/explain.c,v 1.176 2008/08/07 03:04:03 tgl Exp $
10+
* $PostgreSQL: pgsql/src/backend/commands/explain.c,v 1.177 2008/08/14 18:47:58 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -450,8 +450,11 @@ explain_outNode(StringInfo str,
450450
case JOIN_RIGHT:
451451
pname = "Nested Loop Right Join";
452452
break;
453-
case JOIN_IN:
454-
pname = "Nested Loop IN Join";
453+
case JOIN_SEMI:
454+
pname = "Nested Loop Semi Join";
455+
break;
456+
case JOIN_ANTI:
457+
pname = "Nested Loop Anti Join";
455458
break;
456459
default:
457460
pname = "Nested Loop ??? Join";
@@ -473,8 +476,11 @@ explain_outNode(StringInfo str,
473476
case JOIN_RIGHT:
474477
pname = "Merge Right Join";
475478
break;
476-
case JOIN_IN:
477-
pname = "Merge IN Join";
479+
case JOIN_SEMI:
480+
pname = "Merge Semi Join";
481+
break;
482+
case JOIN_ANTI:
483+
pname = "Merge Anti Join";
478484
break;
479485
default:
480486
pname = "Merge ??? Join";
@@ -496,8 +502,11 @@ explain_outNode(StringInfo str,
496502
case JOIN_RIGHT:
497503
pname = "Hash Right Join";
498504
break;
499-
case JOIN_IN:
500-
pname = "Hash IN Join";
505+
case JOIN_SEMI:
506+
pname = "Hash Semi Join";
507+
break;
508+
case JOIN_ANTI:
509+
pname = "Hash Anti Join";
501510
break;
502511
default:
503512
pname = "Hash ??? Join";

src/backend/executor/nodeHashjoin.c

Lines changed: 44 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.93 2008/01/01 19:45:49 momjian Exp $
11+
* $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.94 2008/08/14 18:47:58 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -22,6 +22,9 @@
2222
#include "utils/memutils.h"
2323

2424

25+
/* Returns true for JOIN_LEFT and JOIN_ANTI jointypes */
26+
#define HASHJOIN_IS_OUTER(hjstate) ((hjstate)->hj_NullInnerTupleSlot != NULL)
27+
2528
static TupleTableSlot *ExecHashJoinOuterGetTuple(PlanState *outerNode,
2629
HashJoinState *hjstate,
2730
uint32 *hashvalue);
@@ -89,14 +92,6 @@ ExecHashJoin(HashJoinState *node)
8992
node->js.ps.ps_TupFromTlist = false;
9093
}
9194

92-
/*
93-
* If we're doing an IN join, we want to return at most one row per outer
94-
* tuple; so we can stop scanning the inner scan if we matched on the
95-
* previous try.
96-
*/
97-
if (node->js.jointype == JOIN_IN && node->hj_MatchedOuter)
98-
node->hj_NeedNewOuter = true;
99-
10095
/*
10196
* Reset per-tuple memory context to free any expression evaluation
10297
* storage allocated in the previous tuple cycle. Note this can't happen
@@ -129,7 +124,7 @@ ExecHashJoin(HashJoinState *node)
129124
* outer plan node. If we succeed, we have to stash it away for later
130125
* consumption by ExecHashJoinOuterGetTuple.
131126
*/
132-
if (node->js.jointype == JOIN_LEFT ||
127+
if (HASHJOIN_IS_OUTER(node) ||
133128
(outerNode->plan->startup_cost < hashNode->ps.plan->total_cost &&
134129
!node->hj_OuterNotEmpty))
135130
{
@@ -162,7 +157,7 @@ ExecHashJoin(HashJoinState *node)
162157
* If the inner relation is completely empty, and we're not doing an
163158
* outer join, we can quit without scanning the outer relation.
164159
*/
165-
if (hashtable->totalTuples == 0 && node->js.jointype != JOIN_LEFT)
160+
if (hashtable->totalTuples == 0 && !HASHJOIN_IS_OUTER(node))
166161
return NULL;
167162

168163
/*
@@ -263,27 +258,41 @@ ExecHashJoin(HashJoinState *node)
263258
{
264259
node->hj_MatchedOuter = true;
265260

266-
if (otherqual == NIL || ExecQual(otherqual, econtext, false))
261+
/* In an antijoin, we never return a matched tuple */
262+
if (node->js.jointype == JOIN_ANTI)
263+
{
264+
node->hj_NeedNewOuter = true;
265+
break; /* out of loop over hash bucket */
266+
}
267+
else
267268
{
268-
TupleTableSlot *result;
269+
/*
270+
* In a semijoin, we'll consider returning the first match,
271+
* but after that we're done with this outer tuple.
272+
*/
273+
if (node->js.jointype == JOIN_SEMI)
274+
node->hj_NeedNewOuter = true;
275+
276+
if (otherqual == NIL || ExecQual(otherqual, econtext, false))
277+
{
278+
TupleTableSlot *result;
269279

270-
result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
280+
result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
271281

272-
if (isDone != ExprEndResult)
273-
{
274-
node->js.ps.ps_TupFromTlist =
275-
(isDone == ExprMultipleResult);
276-
return result;
282+
if (isDone != ExprEndResult)
283+
{
284+
node->js.ps.ps_TupFromTlist =
285+
(isDone == ExprMultipleResult);
286+
return result;
287+
}
277288
}
278-
}
279289

280-
/*
281-
* If we didn't return a tuple, may need to set NeedNewOuter
282-
*/
283-
if (node->js.jointype == JOIN_IN)
284-
{
285-
node->hj_NeedNewOuter = true;
286-
break; /* out of loop over hash bucket */
290+
/*
291+
* If semijoin and we didn't return the tuple, we're still
292+
* done with this outer tuple.
293+
*/
294+
if (node->js.jointype == JOIN_SEMI)
295+
break; /* out of loop over hash bucket */
287296
}
288297
}
289298
}
@@ -296,7 +305,7 @@ ExecHashJoin(HashJoinState *node)
296305
node->hj_NeedNewOuter = true;
297306

298307
if (!node->hj_MatchedOuter &&
299-
node->js.jointype == JOIN_LEFT)
308+
HASHJOIN_IS_OUTER(node))
300309
{
301310
/*
302311
* We are doing an outer join and there were no join matches for
@@ -305,7 +314,7 @@ ExecHashJoin(HashJoinState *node)
305314
*/
306315
econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
307316

308-
if (ExecQual(otherqual, econtext, false))
317+
if (otherqual == NIL || ExecQual(otherqual, econtext, false))
309318
{
310319
/*
311320
* qualification was satisfied so we project and return the
@@ -398,12 +407,14 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
398407
ExecInitResultTupleSlot(estate, &hjstate->js.ps);
399408
hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate);
400409

410+
/* note: HASHJOIN_IS_OUTER macro depends on this initialization */
401411
switch (node->join.jointype)
402412
{
403413
case JOIN_INNER:
404-
case JOIN_IN:
414+
case JOIN_SEMI:
405415
break;
406416
case JOIN_LEFT:
417+
case JOIN_ANTI:
407418
hjstate->hj_NullInnerTupleSlot =
408419
ExecInitNullTupleSlot(estate,
409420
ExecGetResultType(innerPlanState(hjstate)));
@@ -570,7 +581,7 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
570581
if (ExecHashGetHashValue(hashtable, econtext,
571582
hjstate->hj_OuterHashKeys,
572583
true, /* outer tuple */
573-
(hjstate->js.jointype == JOIN_LEFT),
584+
HASHJOIN_IS_OUTER(hjstate),
574585
hashvalue))
575586
{
576587
/* remember outer relation is not empty for possible rescan */
@@ -650,7 +661,7 @@ ExecHashJoinNewBatch(HashJoinState *hjstate)
650661
* sides. We can sometimes skip over batches that are empty on only one
651662
* side, but there are exceptions:
652663
*
653-
* 1. In a LEFT JOIN, we have to process outer batches even if the inner
664+
* 1. In an outer join, we have to process outer batches even if the inner
654665
* batch is empty.
655666
*
656667
* 2. If we have increased nbatch since the initial estimate, we have to
@@ -667,7 +678,7 @@ ExecHashJoinNewBatch(HashJoinState *hjstate)
667678
hashtable->innerBatchFile[curbatch] == NULL))
668679
{
669680
if (hashtable->outerBatchFile[curbatch] &&
670-
hjstate->js.jointype == JOIN_LEFT)
681+
HASHJOIN_IS_OUTER(hjstate))
671682
break; /* must process due to rule 1 */
672683
if (hashtable->innerBatchFile[curbatch] &&
673684
nbatch != hashtable->nbatch_original)

src/backend/executor/nodeMergejoin.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/executor/nodeMergejoin.c,v 1.91 2008/04/13 20:51:20 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/executor/nodeMergejoin.c,v 1.92 2008/08/14 18:47:58 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -757,7 +757,7 @@ ExecMergeJoin(MergeJoinState *node)
757757
innerTupleSlot = node->mj_InnerTupleSlot;
758758
econtext->ecxt_innertuple = innerTupleSlot;
759759

760-
if (node->js.jointype == JOIN_IN &&
760+
if (node->js.jointype == JOIN_SEMI &&
761761
node->mj_MatchedOuter)
762762
qualResult = false;
763763
else
@@ -772,6 +772,10 @@ ExecMergeJoin(MergeJoinState *node)
772772
node->mj_MatchedOuter = true;
773773
node->mj_MatchedInner = true;
774774

775+
/* In an antijoin, we never return a matched tuple */
776+
if (node->js.jointype == JOIN_ANTI)
777+
break;
778+
775779
qualResult = (otherqual == NIL ||
776780
ExecQual(otherqual, econtext, false));
777781
MJ_DEBUG_QUAL(otherqual, qualResult);
@@ -1472,11 +1476,12 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags)
14721476
switch (node->join.jointype)
14731477
{
14741478
case JOIN_INNER:
1475-
case JOIN_IN:
1479+
case JOIN_SEMI:
14761480
mergestate->mj_FillOuter = false;
14771481
mergestate->mj_FillInner = false;
14781482
break;
14791483
case JOIN_LEFT:
1484+
case JOIN_ANTI:
14801485
mergestate->mj_FillOuter = true;
14811486
mergestate->mj_FillInner = false;
14821487
mergestate->mj_NullInnerTupleSlot =

src/backend/executor/nodeNestloop.c

Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/executor/nodeNestloop.c,v 1.46 2008/01/01 19:45:49 momjian Exp $
11+
* $PostgreSQL: pgsql/src/backend/executor/nodeNestloop.c,v 1.47 2008/08/14 18:47:58 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -101,15 +101,6 @@ ExecNestLoop(NestLoopState *node)
101101
node->js.ps.ps_TupFromTlist = false;
102102
}
103103

104-
/*
105-
* If we're doing an IN join, we want to return at most one row per outer
106-
* tuple; so we can stop scanning the inner scan if we matched on the
107-
* previous try.
108-
*/
109-
if (node->js.jointype == JOIN_IN &&
110-
node->nl_MatchedOuter)
111-
node->nl_NeedNewOuter = true;
112-
113104
/*
114105
* Reset per-tuple memory context to free any expression evaluation
115106
* storage allocated in the previous tuple cycle. Note this can't happen
@@ -177,7 +168,8 @@ ExecNestLoop(NestLoopState *node)
177168
node->nl_NeedNewOuter = true;
178169

179170
if (!node->nl_MatchedOuter &&
180-
node->js.jointype == JOIN_LEFT)
171+
(node->js.jointype == JOIN_LEFT ||
172+
node->js.jointype == JOIN_ANTI))
181173
{
182174
/*
183175
* We are doing an outer join and there were no join matches
@@ -189,7 +181,7 @@ ExecNestLoop(NestLoopState *node)
189181

190182
ENL1_printf("testing qualification for outer-join tuple");
191183

192-
if (ExecQual(otherqual, econtext, false))
184+
if (otherqual == NIL || ExecQual(otherqual, econtext, false))
193185
{
194186
/*
195187
* qualification was satisfied so we project and return
@@ -232,30 +224,39 @@ ExecNestLoop(NestLoopState *node)
232224
{
233225
node->nl_MatchedOuter = true;
234226

235-
if (otherqual == NIL || ExecQual(otherqual, econtext, false))
227+
/* In an antijoin, we never return a matched tuple */
228+
if (node->js.jointype == JOIN_ANTI)
229+
node->nl_NeedNewOuter = true;
230+
else
236231
{
237232
/*
238-
* qualification was satisfied so we project and return the
239-
* slot containing the result tuple using ExecProject().
233+
* In a semijoin, we'll consider returning the first match,
234+
* but after that we're done with this outer tuple.
240235
*/
241-
TupleTableSlot *result;
242-
ExprDoneCond isDone;
236+
if (node->js.jointype == JOIN_SEMI)
237+
node->nl_NeedNewOuter = true;
238+
if (otherqual == NIL || ExecQual(otherqual, econtext, false))
239+
{
240+
/*
241+
* qualification was satisfied so we project and return
242+
* the slot containing the result tuple using
243+
* ExecProject().
244+
*/
245+
TupleTableSlot *result;
246+
ExprDoneCond isDone;
243247

244-
ENL1_printf("qualification succeeded, projecting tuple");
248+
ENL1_printf("qualification succeeded, projecting tuple");
245249

246-
result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
250+
result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
247251

248-
if (isDone != ExprEndResult)
249-
{
250-
node->js.ps.ps_TupFromTlist =
251-
(isDone == ExprMultipleResult);
252-
return result;
252+
if (isDone != ExprEndResult)
253+
{
254+
node->js.ps.ps_TupFromTlist =
255+
(isDone == ExprMultipleResult);
256+
return result;
257+
}
253258
}
254259
}
255-
256-
/* If we didn't return a tuple, may need to set NeedNewOuter */
257-
if (node->js.jointype == JOIN_IN)
258-
node->nl_NeedNewOuter = true;
259260
}
260261

261262
/*
@@ -333,9 +334,10 @@ ExecInitNestLoop(NestLoop *node, EState *estate, int eflags)
333334
switch (node->join.jointype)
334335
{
335336
case JOIN_INNER:
336-
case JOIN_IN:
337+
case JOIN_SEMI:
337338
break;
338339
case JOIN_LEFT:
340+
case JOIN_ANTI:
339341
nlstate->nl_NullInnerTupleSlot =
340342
ExecInitNullTupleSlot(estate,
341343
ExecGetResultType(innerPlanState(nlstate)));

0 commit comments

Comments
 (0)