|
8 | 8 | *
|
9 | 9 | *
|
10 | 10 | * IDENTIFICATION
|
11 |
| - * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.123 2009/09/12 22:12:04 tgl Exp $ |
| 11 | + * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.124 2009/09/17 20:49:28 tgl Exp $ |
12 | 12 | *
|
13 | 13 | *-------------------------------------------------------------------------
|
14 | 14 | */
|
|
22 | 22 | #include "optimizer/paths.h"
|
23 | 23 |
|
24 | 24 |
|
| 25 | +static bool join_is_removable(PlannerInfo *root, RelOptInfo *joinrel, |
| 26 | + RelOptInfo *outerrel, RelOptInfo *innerrel, |
| 27 | + List *restrictlist, JoinType jointype); |
| 28 | +static void generate_outer_only(PlannerInfo *root, RelOptInfo *joinrel, |
| 29 | + RelOptInfo *outerrel); |
25 | 30 | static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
|
26 | 31 | RelOptInfo *outerrel, RelOptInfo *innerrel,
|
27 | 32 | List *restrictlist, List *mergeclause_list,
|
@@ -78,11 +83,26 @@ add_paths_to_joinrel(PlannerInfo *root,
|
78 | 83 | {
|
79 | 84 | List *mergeclause_list = NIL;
|
80 | 85 |
|
| 86 | + /* |
| 87 | + * 0. Consider join removal. This is always the most efficient strategy, |
| 88 | + * so if it works, there's no need to consider anything further. |
| 89 | + */ |
| 90 | + if (join_is_removable(root, joinrel, outerrel, innerrel, |
| 91 | + restrictlist, jointype)) |
| 92 | + { |
| 93 | + generate_outer_only(root, joinrel, outerrel); |
| 94 | + return; |
| 95 | + } |
| 96 | + |
81 | 97 | /*
|
82 | 98 | * Find potential mergejoin clauses. We can skip this if we are not
|
83 | 99 | * interested in doing a mergejoin. However, mergejoin is currently our
|
84 | 100 | * only way of implementing full outer joins, so override mergejoin
|
85 | 101 | * disable if it's a full join.
|
| 102 | + * |
| 103 | + * Note: do this after join_is_removable(), because this sets the |
| 104 | + * outer_is_left flags in the mergejoin clauses, while join_is_removable |
| 105 | + * uses those flags for its own purposes. |
86 | 106 | */
|
87 | 107 | if (enable_mergejoin || jointype == JOIN_FULL)
|
88 | 108 | mergeclause_list = select_mergejoin_clauses(root,
|
@@ -133,6 +153,180 @@ add_paths_to_joinrel(PlannerInfo *root,
|
133 | 153 | restrictlist, jointype, sjinfo);
|
134 | 154 | }
|
135 | 155 |
|
| 156 | +/* |
| 157 | + * join_is_removable |
| 158 | + * Determine whether we need not perform the join at all, because |
| 159 | + * it will just duplicate its left input. |
| 160 | + * |
| 161 | + * This is true for a left join for which the join condition cannot match |
| 162 | + * more than one inner-side row. (There are other possibly interesting |
| 163 | + * cases, but we don't have the infrastructure to prove them.) |
| 164 | + * |
| 165 | + * Note: there is no need to consider the symmetrical case of duplicating the |
| 166 | + * right input, because add_paths_to_joinrel() will be called with each rel |
| 167 | + * on the outer side. |
| 168 | + */ |
| 169 | +static bool |
| 170 | +join_is_removable(PlannerInfo *root, |
| 171 | + RelOptInfo *joinrel, |
| 172 | + RelOptInfo *outerrel, |
| 173 | + RelOptInfo *innerrel, |
| 174 | + List *restrictlist, |
| 175 | + JoinType jointype) |
| 176 | +{ |
| 177 | + List *clause_list = NIL; |
| 178 | + ListCell *l; |
| 179 | + int attroff; |
| 180 | + |
| 181 | + /* |
| 182 | + * Currently, we only know how to remove left joins to a baserel with |
| 183 | + * unique indexes. We can check most of these criteria pretty trivially |
| 184 | + * to avoid doing useless extra work. But checking whether any of the |
| 185 | + * indexes are unique would require iterating over the indexlist, so for |
| 186 | + * now we just make sure there are indexes of some sort or other. If none |
| 187 | + * of them are unique, join removal will still fail, just slightly later. |
| 188 | + */ |
| 189 | + if (jointype != JOIN_LEFT || |
| 190 | + innerrel->reloptkind == RELOPT_JOINREL || |
| 191 | + innerrel->rtekind != RTE_RELATION || |
| 192 | + innerrel->indexlist == NIL) |
| 193 | + return false; |
| 194 | + |
| 195 | + /* |
| 196 | + * We can't remove the join if any inner-rel attributes are used above |
| 197 | + * the join. |
| 198 | + * |
| 199 | + * As a micro-optimization, it seems better to start with max_attr and |
| 200 | + * count down rather than starting with min_attr and counting up, on the |
| 201 | + * theory that the system attributes are somewhat less likely to be wanted |
| 202 | + * and should be tested last. |
| 203 | + */ |
| 204 | + for (attroff = innerrel->max_attr - innerrel->min_attr; |
| 205 | + attroff >= 0; |
| 206 | + attroff--) |
| 207 | + { |
| 208 | + if (!bms_is_subset(innerrel->attr_needed[attroff], joinrel->relids)) |
| 209 | + return false; |
| 210 | + } |
| 211 | + |
| 212 | + /* |
| 213 | + * Search for mergejoinable clauses that constrain the inner rel against |
| 214 | + * either the outer rel or a pseudoconstant. If an operator is |
| 215 | + * mergejoinable then it behaves like equality for some btree opclass, |
| 216 | + * so it's what we want. The mergejoinability test also eliminates |
| 217 | + * clauses containing volatile functions, which we couldn't depend on. |
| 218 | + */ |
| 219 | + foreach(l, restrictlist) |
| 220 | + { |
| 221 | + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); |
| 222 | + |
| 223 | + /* |
| 224 | + * We are always considering an outer join here, so ignore pushed-down |
| 225 | + * clauses. Also ignore anything that doesn't have a mergejoinable |
| 226 | + * operator. |
| 227 | + */ |
| 228 | + if (restrictinfo->is_pushed_down) |
| 229 | + continue; |
| 230 | + |
| 231 | + if (!restrictinfo->can_join || |
| 232 | + restrictinfo->mergeopfamilies == NIL) |
| 233 | + continue; /* not mergejoinable */ |
| 234 | + |
| 235 | + /* |
| 236 | + * Check if clause is usable with these input rels. All the vars |
| 237 | + * needed on each side of the clause must be available from one or the |
| 238 | + * other of the input rels. |
| 239 | + */ |
| 240 | + if (bms_is_subset(restrictinfo->left_relids, outerrel->relids) && |
| 241 | + bms_is_subset(restrictinfo->right_relids, innerrel->relids)) |
| 242 | + { |
| 243 | + /* righthand side is inner */ |
| 244 | + restrictinfo->outer_is_left = true; |
| 245 | + } |
| 246 | + else if (bms_is_subset(restrictinfo->left_relids, innerrel->relids) && |
| 247 | + bms_is_subset(restrictinfo->right_relids, outerrel->relids)) |
| 248 | + { |
| 249 | + /* lefthand side is inner */ |
| 250 | + restrictinfo->outer_is_left = false; |
| 251 | + } |
| 252 | + else |
| 253 | + continue; /* no good for these input relations */ |
| 254 | + |
| 255 | + /* OK, add to list */ |
| 256 | + clause_list = lappend(clause_list, restrictinfo); |
| 257 | + } |
| 258 | + |
| 259 | + /* Now examine the rel's restriction clauses for var = const clauses */ |
| 260 | + foreach(l, innerrel->baserestrictinfo) |
| 261 | + { |
| 262 | + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); |
| 263 | + |
| 264 | + /* |
| 265 | + * Note: can_join won't be set for a restriction clause, but |
| 266 | + * mergeopfamilies will be if it has a mergejoinable operator |
| 267 | + * and doesn't contain volatile functions. |
| 268 | + */ |
| 269 | + if (restrictinfo->mergeopfamilies == NIL) |
| 270 | + continue; /* not mergejoinable */ |
| 271 | + |
| 272 | + /* |
| 273 | + * The clause certainly doesn't refer to anything but the given |
| 274 | + * rel. If either side is pseudoconstant then we can use it. |
| 275 | + */ |
| 276 | + if (bms_is_empty(restrictinfo->left_relids)) |
| 277 | + { |
| 278 | + /* righthand side is inner */ |
| 279 | + restrictinfo->outer_is_left = true; |
| 280 | + } |
| 281 | + else if (bms_is_empty(restrictinfo->right_relids)) |
| 282 | + { |
| 283 | + /* lefthand side is inner */ |
| 284 | + restrictinfo->outer_is_left = false; |
| 285 | + } |
| 286 | + else |
| 287 | + continue; |
| 288 | + |
| 289 | + /* OK, add to list */ |
| 290 | + clause_list = lappend(clause_list, restrictinfo); |
| 291 | + } |
| 292 | + |
| 293 | + /* Now examine the indexes to see if we have a matching unique index */ |
| 294 | + if (relation_has_unique_index_for(root, innerrel, clause_list)) |
| 295 | + return true; |
| 296 | + |
| 297 | + /* |
| 298 | + * Some day it would be nice to check for other methods of establishing |
| 299 | + * distinctness. |
| 300 | + */ |
| 301 | + return false; |
| 302 | +} |
| 303 | + |
| 304 | +/* |
| 305 | + * generate_outer_only |
| 306 | + * Generate "join" paths when we have found the join is removable. |
| 307 | + */ |
| 308 | +static void |
| 309 | +generate_outer_only(PlannerInfo *root, RelOptInfo *joinrel, |
| 310 | + RelOptInfo *outerrel) |
| 311 | +{ |
| 312 | + ListCell *lc; |
| 313 | + |
| 314 | + /* |
| 315 | + * For the moment, replicate all of the outerrel's paths as join paths. |
| 316 | + * Some of them might not really be interesting above the join, if they |
| 317 | + * have sort orderings that have no real use except to do a mergejoin |
| 318 | + * for the join we've just found we don't need. But distinguishing that |
| 319 | + * case probably isn't worth the extra code it would take. |
| 320 | + */ |
| 321 | + foreach(lc, outerrel->pathlist) |
| 322 | + { |
| 323 | + Path *outerpath = (Path *) lfirst(lc); |
| 324 | + |
| 325 | + add_path(joinrel, (Path *) |
| 326 | + create_noop_path(root, joinrel, outerpath)); |
| 327 | + } |
| 328 | +} |
| 329 | + |
136 | 330 | /*
|
137 | 331 | * sort_inner_and_outer
|
138 | 332 | * Create mergejoin join paths by explicitly sorting both the outer and
|
|
0 commit comments