6
6
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
7
7
* Portions Copyright (c) 1994, Regents of the University of California
8
8
*
9
- * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.66 2003/11/29 19:51:50 pgsql Exp $
9
+ * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.67 2004/01/23 23:54:21 tgl Exp $
10
10
*
11
11
*-------------------------------------------------------------------------
12
12
*/
31
31
#include "utils/memutils.h"
32
32
33
33
34
+ static bool desirable_join (Query * root ,
35
+ RelOptInfo * outer_rel , RelOptInfo * inner_rel );
36
+
37
+
34
38
/*
35
39
* geqo_eval
36
40
*
37
41
* Returns cost of a query tree as an individual of the population.
38
42
*/
39
43
Cost
40
- geqo_eval (Query * root , List * initial_rels , Gene * tour , int num_gene )
44
+ geqo_eval (Gene * tour , int num_gene , GeqoEvalData * evaldata )
41
45
{
42
46
MemoryContext mycontext ;
43
47
MemoryContext oldcxt ;
@@ -52,9 +56,9 @@ geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene)
52
56
* redundant cost calculations, we simply reject tours where tour[0] >
53
57
* tour[1], assigning them an artificially bad fitness.
54
58
*
55
- * (It would be better to tweak the GEQO logic to not generate such tours
56
- * in the first place, but I'm not sure of all the implications in the
57
- * mutation logic.)
59
+ * init_tour() is aware of this rule and so we should never reject a
60
+ * tour during the initial filling of the pool. It seems difficult to
61
+ * persuade the recombination logic never to break the rule, however.
58
62
*/
59
63
if (num_gene >= 2 && tour [0 ] > tour [1 ])
60
64
return DBL_MAX ;
@@ -80,10 +84,10 @@ geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene)
80
84
* this, it'll be pointing at recycled storage after the
81
85
* MemoryContextDelete below.
82
86
*/
83
- savelist = root -> join_rel_list ;
87
+ savelist = evaldata -> root -> join_rel_list ;
84
88
85
89
/* construct the best path for the given combination of relations */
86
- joinrel = gimme_tree (root , initial_rels , tour , num_gene );
90
+ joinrel = gimme_tree (tour , num_gene , evaldata );
87
91
88
92
/*
89
93
* compute fitness
@@ -97,7 +101,7 @@ geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene)
97
101
fitness = DBL_MAX ;
98
102
99
103
/* restore join_rel_list */
100
- root -> join_rel_list = savelist ;
104
+ evaldata -> root -> join_rel_list = savelist ;
101
105
102
106
/* release all the memory acquired within gimme_tree */
103
107
MemoryContextSwitchTo (oldcxt );
@@ -111,63 +115,156 @@ geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene)
111
115
* Form planner estimates for a join tree constructed in the specified
112
116
* order.
113
117
*
114
- * 'root' is the Query
115
- * 'initial_rels' is the list of initial relations (FROM-list items)
116
118
* 'tour' is the proposed join order, of length 'num_gene'
119
+ * 'evaldata' contains the context we need
117
120
*
118
121
* Returns a new join relation whose cheapest path is the best plan for
119
122
* this join order. NB: will return NULL if join order is invalid.
120
123
*
121
- * Note that at each step we consider using the next rel as both left and
122
- * right side of a join. However, we cannot build general ("bushy") plan
123
- * trees this way, only left-sided and right-sided trees.
124
+ * The original implementation of this routine always joined in the specified
125
+ * order, and so could only build left-sided plans (and right-sided and
126
+ * mixtures, as a byproduct of the fact that make_join_rel() is symmetric).
127
+ * It could never produce a "bushy" plan. This had a couple of big problems,
128
+ * of which the worst was that as of 7.4, there are situations involving IN
129
+ * subqueries where the only valid plans are bushy.
130
+ *
131
+ * The present implementation takes the given tour as a guideline, but
132
+ * postpones joins that seem unsuitable according to some heuristic rules.
133
+ * This allows correct bushy plans to be generated at need, and as a nice
134
+ * side-effect it seems to materially improve the quality of the generated
135
+ * plans.
124
136
*/
125
137
RelOptInfo *
126
- gimme_tree (Query * root , List * initial_rels ,
127
- Gene * tour , int num_gene )
138
+ gimme_tree (Gene * tour , int num_gene , GeqoEvalData * evaldata )
128
139
{
140
+ RelOptInfo * * stack ;
141
+ int stack_depth ;
129
142
RelOptInfo * joinrel ;
130
- int cur_rel_index ;
131
143
int rel_count ;
132
144
133
145
/*
134
- * Start with the first relation .. .
146
+ * Create a stack to hold not-yet-joined relations .
135
147
*/
136
- cur_rel_index = (int ) tour [0 ];
137
-
138
- joinrel = (RelOptInfo * ) nth (cur_rel_index - 1 , initial_rels );
148
+ stack = (RelOptInfo * * ) palloc (num_gene * sizeof (RelOptInfo * ));
149
+ stack_depth = 0 ;
139
150
140
151
/*
141
- * And add on each relation in the specified order ...
152
+ * Push each relation onto the stack in the specified order. After
153
+ * pushing each relation, see whether the top two stack entries are
154
+ * joinable according to the desirable_join() heuristics. If so,
155
+ * join them into one stack entry, and try again to combine with the
156
+ * next stack entry down (if any). When the stack top is no longer
157
+ * joinable, continue to the next input relation. After we have pushed
158
+ * the last input relation, the heuristics are disabled and we force
159
+ * joining all the remaining stack entries.
160
+ *
161
+ * If desirable_join() always returns true, this produces a straight
162
+ * left-to-right join just like the old code. Otherwise we may produce
163
+ * a bushy plan or a left/right-sided plan that really corresponds to
164
+ * some tour other than the one given. To the extent that the heuristics
165
+ * are helpful, however, this will be a better plan than the raw tour.
166
+ *
167
+ * Also, when a join attempt fails (because of IN-clause constraints),
168
+ * we may be able to recover and produce a workable plan, where the old
169
+ * code just had to give up. This case acts the same as a false result
170
+ * from desirable_join().
142
171
*/
143
- for (rel_count = 1 ; rel_count < num_gene ; rel_count ++ )
172
+ for (rel_count = 0 ; rel_count < num_gene ; rel_count ++ )
144
173
{
145
- RelOptInfo * inner_rel ;
146
- RelOptInfo * new_rel ;
174
+ int cur_rel_index ;
147
175
176
+ /* Get the next input relation and push it */
148
177
cur_rel_index = (int ) tour [rel_count ];
149
-
150
- inner_rel = (RelOptInfo * ) nth (cur_rel_index - 1 , initial_rels );
178
+ stack [stack_depth ] = (RelOptInfo * ) nth (cur_rel_index - 1 ,
179
+ evaldata -> initial_rels );
180
+ stack_depth ++ ;
151
181
152
182
/*
153
- * Construct a RelOptInfo representing the previous joinrel joined
154
- * to inner_rel. These are always inner joins. Note that we
155
- * expect the joinrel not to exist in root->join_rel_list yet, and
156
- * so the paths constructed for it will only include the ones we
157
- * want.
183
+ * While it's feasible, pop the top two stack entries and replace
184
+ * with their join.
158
185
*/
159
- new_rel = make_join_rel (root , joinrel , inner_rel , JOIN_INNER );
186
+ while (stack_depth >= 2 )
187
+ {
188
+ RelOptInfo * outer_rel = stack [stack_depth - 2 ];
189
+ RelOptInfo * inner_rel = stack [stack_depth - 1 ];
190
+
191
+ /*
192
+ * Don't pop if heuristics say not to join now. However,
193
+ * once we have exhausted the input, the heuristics can't
194
+ * prevent popping.
195
+ */
196
+ if (rel_count < num_gene - 1 &&
197
+ !desirable_join (evaldata -> root , outer_rel , inner_rel ))
198
+ break ;
160
199
161
- /* Fail if join order is not valid */
162
- if (new_rel == NULL )
163
- return NULL ;
200
+ /*
201
+ * Construct a RelOptInfo representing the join of these
202
+ * two input relations. These are always inner joins.
203
+ * Note that we expect the joinrel not to exist in
204
+ * root->join_rel_list yet, and so the paths constructed for it
205
+ * will only include the ones we want.
206
+ */
207
+ joinrel = make_join_rel (evaldata -> root , outer_rel , inner_rel ,
208
+ JOIN_INNER );
164
209
165
- /* Find and save the cheapest paths for this rel */
166
- set_cheapest (new_rel );
210
+ /* Can't pop stack here if join order is not valid */
211
+ if (!joinrel )
212
+ break ;
167
213
168
- /* and repeat... */
169
- joinrel = new_rel ;
214
+ /* Find and save the cheapest paths for this rel */
215
+ set_cheapest (joinrel );
216
+
217
+ /* Pop the stack and replace the inputs with their join */
218
+ stack_depth -- ;
219
+ stack [stack_depth - 1 ] = joinrel ;
220
+ }
170
221
}
171
222
223
+ /* Did we succeed in forming a single join relation? */
224
+ if (stack_depth == 1 )
225
+ joinrel = stack [0 ];
226
+ else
227
+ joinrel = NULL ;
228
+
229
+ pfree (stack );
230
+
172
231
return joinrel ;
173
232
}
233
+
234
+ /*
235
+ * Heuristics for gimme_tree: do we want to join these two relations?
236
+ */
237
+ static bool
238
+ desirable_join (Query * root ,
239
+ RelOptInfo * outer_rel , RelOptInfo * inner_rel )
240
+ {
241
+ List * i ;
242
+
243
+ /*
244
+ * Join if there is an applicable join clause.
245
+ */
246
+ foreach (i , outer_rel -> joininfo )
247
+ {
248
+ JoinInfo * joininfo = (JoinInfo * ) lfirst (i );
249
+
250
+ if (bms_is_subset (joininfo -> unjoined_relids , inner_rel -> relids ))
251
+ return true;
252
+ }
253
+
254
+ /*
255
+ * Join if the rels are members of the same IN sub-select. This is
256
+ * needed to improve the odds that we will find a valid solution in
257
+ * a case where an IN sub-select has a clauseless join.
258
+ */
259
+ foreach (i , root -> in_info_list )
260
+ {
261
+ InClauseInfo * ininfo = (InClauseInfo * ) lfirst (i );
262
+
263
+ if (bms_is_subset (outer_rel -> relids , ininfo -> righthand ) &&
264
+ bms_is_subset (inner_rel -> relids , ininfo -> righthand ))
265
+ return true;
266
+ }
267
+
268
+ /* Otherwise postpone the join till later. */
269
+ return false;
270
+ }
0 commit comments