Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit f8c1095

Browse files
committed
Teach planner about the idea that a mergejoin won't necessarily read
both input streams to the end. If one variable's range is much less than the other, an indexscan-based merge can win by not scanning all of the other table. Per example from Reinhard Max.
1 parent fdc60bd commit f8c1095

File tree

6 files changed

+423
-114
lines changed

6 files changed

+423
-114
lines changed

src/backend/executor/nodeMergejoin.c

Lines changed: 41 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/executor/nodeMergejoin.c,v 1.47 2001/10/28 06:25:43 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/executor/nodeMergejoin.c,v 1.48 2002/03/01 04:09:22 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -88,97 +88,62 @@ static bool MergeCompare(List *eqQual, List *compareQual, ExprContext *econtext)
8888

8989

9090
/* ----------------------------------------------------------------
91-
* MJFormSkipQual
91+
* MJFormSkipQuals
9292
*
9393
* This takes the mergeclause which is a qualification of the
94-
* form ((= expr expr) (= expr expr) ...) and forms a new
95-
* qualification like ((> expr expr) (> expr expr) ...) which
96-
* is used by ExecMergeJoin() in order to determine if we should
97-
* skip tuples. The replacement operators are named either ">"
98-
* or "<" according to the replaceopname parameter, and have the
99-
* same operand data types as the "=" operators they replace.
100-
* (We expect there to be such operators because the "=" operators
94+
* form ((= expr expr) (= expr expr) ...) and forms new lists
95+
* of the forms ((< expr expr) (< expr expr) ...) and
96+
* ((> expr expr) (> expr expr) ...). These lists will be used
97+
* by ExecMergeJoin() to determine if we should skip tuples.
98+
* (We expect there to be suitable operators because the "=" operators
10199
* were marked mergejoinable; however, there might be a different
102100
* one needed in each qual clause.)
103101
* ----------------------------------------------------------------
104102
*/
105-
static List *
106-
MJFormSkipQual(List *qualList, char *replaceopname)
103+
static void
104+
MJFormSkipQuals(List *qualList, List **ltQuals, List **gtQuals)
107105
{
108-
List *qualCopy;
109-
List *qualcdr;
110-
Expr *qual;
111-
Oper *op;
112-
HeapTuple optup;
113-
Form_pg_operator opform;
114-
Oid oprleft,
115-
oprright;
106+
List *ltcdr,
107+
*gtcdr;
116108

117109
/*
118-
* qualList is a list: ((op .. ..) ...)
119-
*
120-
* first we make a copy of it. copyObject() makes a deep copy so let's
121-
* use it instead of the old fashoned lispCopy()...
110+
* Make modifiable copies of the qualList.
122111
*/
123-
qualCopy = (List *) copyObject((Node *) qualList);
112+
*ltQuals = (List *) copyObject((Node *) qualList);
113+
*gtQuals = (List *) copyObject((Node *) qualList);
124114

125-
foreach(qualcdr, qualCopy)
115+
/*
116+
* Scan both lists in parallel, so that we can update the operators
117+
* with the minimum number of syscache searches.
118+
*/
119+
ltcdr = *ltQuals;
120+
foreach(gtcdr, *gtQuals)
126121
{
127-
/*
128-
* first get the current (op .. ..) list
129-
*/
130-
qual = lfirst(qualcdr);
122+
Expr *ltqual = (Expr *) lfirst(ltcdr);
123+
Expr *gtqual = (Expr *) lfirst(gtcdr);
124+
Oper *ltop = (Oper *) ltqual->oper;
125+
Oper *gtop = (Oper *) gtqual->oper;
131126

132127
/*
133-
* now get at the op
128+
* The two ops should be identical, so use either one for lookup.
134129
*/
135-
op = (Oper *) qual->oper;
136-
if (!IsA(op, Oper))
137-
elog(ERROR, "MJFormSkipQual: op not an Oper!");
130+
if (!IsA(ltop, Oper))
131+
elog(ERROR, "MJFormSkipQuals: op not an Oper!");
138132

139133
/*
140-
* Get the declared left and right operand types of the operator.
141-
* Note we do *not* use the actual operand types, since those
142-
* might be different in scenarios with binary-compatible data
143-
* types. There should be "<" and ">" operators matching a
144-
* mergejoinable "=" operator's declared operand types, but we
145-
* might not find them if we search with the actual operand types.
134+
* Lookup the operators, and replace the data in the copied
135+
* operator nodes.
146136
*/
147-
optup = SearchSysCache(OPEROID,
148-
ObjectIdGetDatum(op->opno),
149-
0, 0, 0);
150-
if (!HeapTupleIsValid(optup)) /* shouldn't happen */
151-
elog(ERROR, "MJFormSkipQual: operator %u not found", op->opno);
152-
opform = (Form_pg_operator) GETSTRUCT(optup);
153-
oprleft = opform->oprleft;
154-
oprright = opform->oprright;
155-
ReleaseSysCache(optup);
156-
157-
/*
158-
* Now look up the matching "<" or ">" operator. If there isn't
159-
* one, whoever marked the "=" operator mergejoinable was a loser.
160-
*/
161-
optup = SearchSysCache(OPERNAME,
162-
PointerGetDatum(replaceopname),
163-
ObjectIdGetDatum(oprleft),
164-
ObjectIdGetDatum(oprright),
165-
CharGetDatum('b'));
166-
if (!HeapTupleIsValid(optup))
167-
elog(ERROR,
168-
"MJFormSkipQual: mergejoin operator %u has no matching %s op",
169-
op->opno, replaceopname);
170-
opform = (Form_pg_operator) GETSTRUCT(optup);
171-
172-
/*
173-
* And replace the data in the copied operator node.
174-
*/
175-
op->opno = optup->t_data->t_oid;
176-
op->opid = opform->oprcode;
177-
op->op_fcache = NULL;
178-
ReleaseSysCache(optup);
137+
op_mergejoin_crossops(ltop->opno,
138+
&ltop->opno,
139+
&gtop->opno,
140+
&ltop->opid,
141+
&gtop->opid);
142+
ltop->op_fcache = NULL;
143+
gtop->op_fcache = NULL;
144+
145+
ltcdr = lnext(ltcdr);
179146
}
180-
181-
return qualCopy;
182147
}
183148

184149
/* ----------------------------------------------------------------
@@ -1430,7 +1395,6 @@ bool
14301395
ExecInitMergeJoin(MergeJoin *node, EState *estate, Plan *parent)
14311396
{
14321397
MergeJoinState *mergestate;
1433-
List *joinclauses;
14341398

14351399
MJ1_printf("ExecInitMergeJoin: %s\n",
14361400
"initializing node");
@@ -1522,9 +1486,9 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, Plan *parent)
15221486
/*
15231487
* form merge skip qualifications
15241488
*/
1525-
joinclauses = node->mergeclauses;
1526-
mergestate->mj_OuterSkipQual = MJFormSkipQual(joinclauses, "<");
1527-
mergestate->mj_InnerSkipQual = MJFormSkipQual(joinclauses, ">");
1489+
MJFormSkipQuals(node->mergeclauses,
1490+
&mergestate->mj_OuterSkipQual,
1491+
&mergestate->mj_InnerSkipQual);
15281492

15291493
MJ_printf("\nExecInitMergeJoin: OuterSkipQual is ");
15301494
MJ_nodeDisplay(mergestate->mj_OuterSkipQual);

src/backend/optimizer/path/costsize.c

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
* Portions Copyright (c) 1994, Regents of the University of California
4343
*
4444
* IDENTIFICATION
45-
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.79 2001/10/25 05:49:32 momjian Exp $
45+
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.80 2002/03/01 04:09:24 tgl Exp $
4646
*
4747
*-------------------------------------------------------------------------
4848
*/
@@ -58,6 +58,7 @@
5858
#include "optimizer/cost.h"
5959
#include "optimizer/pathnode.h"
6060
#include "parser/parsetree.h"
61+
#include "utils/selfuncs.h"
6162
#include "utils/lsyscache.h"
6263
#include "utils/syscache.h"
6364

@@ -565,12 +566,29 @@ cost_mergejoin(Path *path, Query *root,
565566
Cost startup_cost = 0;
566567
Cost run_cost = 0;
567568
Cost cpu_per_tuple;
569+
double outer_rows,
570+
inner_rows;
568571
double ntuples;
572+
Selectivity leftscan,
573+
rightscan;
569574
Path sort_path; /* dummy for result of cost_sort */
570575

571576
if (!enable_mergejoin)
572577
startup_cost += disable_cost;
573578

579+
/*
580+
* A merge join will stop as soon as it exhausts either input stream.
581+
* Estimate fraction of the left and right inputs that will actually
582+
* need to be scanned. We use only the first (most significant)
583+
* merge clause for this purpose.
584+
*/
585+
mergejoinscansel(root,
586+
(Node *) ((RestrictInfo *) lfirst(mergeclauses))->clause,
587+
&leftscan, &rightscan);
588+
589+
outer_rows = outer_path->parent->rows * leftscan;
590+
inner_rows = inner_path->parent->rows * rightscan;
591+
574592
/* cost of source data */
575593

576594
/*
@@ -588,12 +606,14 @@ cost_mergejoin(Path *path, Query *root,
588606
outer_path->parent->rows,
589607
outer_path->parent->width);
590608
startup_cost += sort_path.startup_cost;
591-
run_cost += sort_path.total_cost - sort_path.startup_cost;
609+
run_cost += (sort_path.total_cost - sort_path.startup_cost)
610+
* leftscan;
592611
}
593612
else
594613
{
595614
startup_cost += outer_path->startup_cost;
596-
run_cost += outer_path->total_cost - outer_path->startup_cost;
615+
run_cost += (outer_path->total_cost - outer_path->startup_cost)
616+
* leftscan;
597617
}
598618

599619
if (innersortkeys) /* do we need to sort inner? */
@@ -605,30 +625,33 @@ cost_mergejoin(Path *path, Query *root,
605625
inner_path->parent->rows,
606626
inner_path->parent->width);
607627
startup_cost += sort_path.startup_cost;
608-
run_cost += sort_path.total_cost - sort_path.startup_cost;
628+
run_cost += (sort_path.total_cost - sort_path.startup_cost)
629+
* rightscan;
609630
}
610631
else
611632
{
612633
startup_cost += inner_path->startup_cost;
613-
run_cost += inner_path->total_cost - inner_path->startup_cost;
634+
run_cost += (inner_path->total_cost - inner_path->startup_cost)
635+
* rightscan;
614636
}
615637

616638
/*
617639
* The number of tuple comparisons needed depends drastically on the
618640
* number of equal keys in the two source relations, which we have no
619-
* good way of estimating. Somewhat arbitrarily, we charge one tuple
641+
* good way of estimating. (XXX could the MCV statistics help?)
642+
* Somewhat arbitrarily, we charge one tuple
620643
* comparison (one cpu_operator_cost) for each tuple in the two source
621644
* relations. This is probably a lower bound.
622645
*/
623-
run_cost += cpu_operator_cost *
624-
(outer_path->parent->rows + inner_path->parent->rows);
646+
run_cost += cpu_operator_cost * (outer_rows + inner_rows);
625647

626648
/*
627649
* For each tuple that gets through the mergejoin proper, we charge
628650
* cpu_tuple_cost plus the cost of evaluating additional restriction
629651
* clauses that are to be applied at the join. It's OK to use an
630652
* approximate selectivity here, since in most cases this is a minor
631-
* component of the cost.
653+
* component of the cost. NOTE: it's correct to use the unscaled rows
654+
* counts here, not the scaled-down counts we obtained above.
632655
*/
633656
ntuples = approx_selectivity(root, mergeclauses) *
634657
outer_path->parent->rows * inner_path->parent->rows;

0 commit comments

Comments
 (0)