Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Allow create_index_paths() to consider multiple join bitmapscan paths.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 16 Aug 2012 17:03:54 +0000 (13:03 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 16 Aug 2012 17:03:54 +0000 (13:03 -0400)
In the initial cut at the "parameterized paths" feature, I'd simplified
create_index_paths() to the point where it would only generate a single
parameterized bitmap path per relation.  Experimentation with an example
supplied by Josh Berkus convinces me that that's not good enough: we really
need to consider a bitmap path for each possible outer relation.  Otherwise
we have regressions relative to pre-9.2 versions, in which the planner
picks a plain indexscan where it should have used a bitmap scan in queries
involving three or more tables.  Indeed, after fixing this, several queries
in the regression tests show improved plans as a result of using bitmap not
plain indexscans.

src/backend/optimizer/path/indxpath.c
src/test/regress/expected/join.out

index 66b68fc71d1a3f192100d1932db3c4000078451f..b6efb0fb4cd74e3c1aa795a424fc7cdc40b3d550 100644 (file)
@@ -309,26 +309,92 @@ create_index_paths(PlannerInfo *root, RelOptInfo *rel)
    }
 
    /*
-    * Likewise, if we found anything usable, generate a BitmapHeapPath for
-    * the most promising combination of join bitmap index paths.  Note there
-    * will be only one such path no matter how many join clauses are
-    * available.  (XXX is that good enough, or do we need to consider even
-    * more paths for different subsets of possible join partners?  Also,
-    * should we add in restriction bitmap paths as well?)
+    * Likewise, if we found anything usable, generate BitmapHeapPaths for the
+    * most promising combinations of join bitmap index paths.  Our strategy
+    * is to generate one such path for each distinct parameterization seen
+    * among the available bitmap index paths.  This may look pretty
+    * expensive, but usually there won't be very many distinct
+    * parameterizations.
     */
    if (bitjoinpaths != NIL)
    {
-       Path       *bitmapqual;
-       Relids      required_outer;
-       double      loop_count;
-       BitmapHeapPath *bpath;
+       List       *path_outer;
+       List       *all_path_outers;
+       ListCell   *lc;
 
-       bitmapqual = choose_bitmap_and(root, rel, bitjoinpaths);
-       required_outer = get_bitmap_tree_required_outer(bitmapqual);
-       loop_count = get_loop_count(root, required_outer);
-       bpath = create_bitmap_heap_path(root, rel, bitmapqual,
-                                       required_outer, loop_count);
-       add_path(rel, (Path *) bpath);
+       /*
+        * path_outer holds the parameterization of each path in bitjoinpaths
+        * (to save recalculating that several times), while all_path_outers
+        * holds all distinct parameterization sets.
+        */
+       path_outer = all_path_outers = NIL;
+       foreach(lc, bitjoinpaths)
+       {
+           Path       *path = (Path *) lfirst(lc);
+           Relids      required_outer;
+           bool        found = false;
+           ListCell   *lco;
+
+           required_outer = get_bitmap_tree_required_outer(path);
+           path_outer = lappend(path_outer, required_outer);
+
+           /* Have we already seen this param set? */
+           foreach(lco, all_path_outers)
+           {
+               Relids      existing_outers = (Relids) lfirst(lco);
+
+               if (bms_equal(existing_outers, required_outer))
+               {
+                   found = true;
+                   break;
+               }
+           }
+           if (!found)
+           {
+               /* No, so add it to all_path_outers */
+               all_path_outers = lappend(all_path_outers, required_outer);
+           }
+       }
+
+       /* Now, for each distinct parameterization set ... */
+       foreach(lc, all_path_outers)
+       {
+           Relids      max_outers = (Relids) lfirst(lc);
+           List       *this_path_set;
+           Path       *bitmapqual;
+           Relids      required_outer;
+           double      loop_count;
+           BitmapHeapPath *bpath;
+           ListCell   *lcp;
+           ListCell   *lco;
+
+           /* Identify all the bitmap join paths needing no more than that */
+           this_path_set = NIL;
+           forboth(lcp, bitjoinpaths, lco, path_outer)
+           {
+               Path       *path = (Path *) lfirst(lcp);
+               Relids      p_outers = (Relids) lfirst(lco);
+
+               if (bms_is_subset(p_outers, max_outers))
+                   this_path_set = lappend(this_path_set, path);
+           }
+
+           /*
+            * Add in restriction bitmap paths, since they can be used
+            * together with any join paths.
+            */
+           this_path_set = list_concat(this_path_set, bitindexpaths);
+
+           /* Select best AND combination for this parameterization */
+           bitmapqual = choose_bitmap_and(root, rel, this_path_set);
+
+           /* And push that path into the mix */
+           required_outer = get_bitmap_tree_required_outer(bitmapqual);
+           loop_count = get_loop_count(root, required_outer);
+           bpath = create_bitmap_heap_path(root, rel, bitmapqual,
+                                           required_outer, loop_count);
+           add_path(rel, (Path *) bpath);
+       }
    }
 }
 
index 6705706f02e69f9e214f705b2f4271d2293cff3c..51aeb8de7ba9fc15a2cde6b5a0018a41d76c2200 100644 (file)
@@ -2725,11 +2725,13 @@ where t1.unique1 = 1;
          Index Cond: (unique1 = 1)
    ->  Nested Loop
          Join Filter: (t1.ten = t3.ten)
-         ->  Index Scan using tenk1_hundred on tenk1 t2
-               Index Cond: (t1.hundred = hundred)
+         ->  Bitmap Heap Scan on tenk1 t2
+               Recheck Cond: (t1.hundred = hundred)
+               ->  Bitmap Index Scan on tenk1_hundred
+                     Index Cond: (t1.hundred = hundred)
          ->  Index Scan using tenk1_unique2 on tenk1 t3
                Index Cond: (unique2 = t2.thousand)
-(9 rows)
+(11 rows)
 
 explain (costs off)
 select * from tenk1 t1 left join
@@ -2743,32 +2745,36 @@ where t1.unique1 = 1;
          Index Cond: (unique1 = 1)
    ->  Nested Loop
          Join Filter: ((t1.ten + t2.ten) = t3.ten)
-         ->  Index Scan using tenk1_hundred on tenk1 t2
-               Index Cond: (t1.hundred = hundred)
+         ->  Bitmap Heap Scan on tenk1 t2
+               Recheck Cond: (t1.hundred = hundred)
+               ->  Bitmap Index Scan on tenk1_hundred
+                     Index Cond: (t1.hundred = hundred)
          ->  Index Scan using tenk1_unique2 on tenk1 t3
                Index Cond: (unique2 = t2.thousand)
-(9 rows)
+(11 rows)
 
 explain (costs off)
 select count(*) from
   tenk1 a join tenk1 b on a.unique1 = b.unique2
   left join tenk1 c on a.unique2 = b.unique1 and c.thousand = a.thousand
   join int4_tbl on b.thousand = f1;
-                                QUERY PLAN                                
---------------------------------------------------------------------------
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
  Aggregate
    ->  Nested Loop Left Join
          Join Filter: (a.unique2 = b.unique1)
          ->  Nested Loop
                ->  Nested Loop
                      ->  Seq Scan on int4_tbl
-                     ->  Index Scan using tenk1_thous_tenthous on tenk1 b
-                           Index Cond: (thousand = int4_tbl.f1)
+                     ->  Bitmap Heap Scan on tenk1 b
+                           Recheck Cond: (thousand = int4_tbl.f1)
+                           ->  Bitmap Index Scan on tenk1_thous_tenthous
+                                 Index Cond: (thousand = int4_tbl.f1)
                ->  Index Scan using tenk1_unique1 on tenk1 a
                      Index Cond: (unique1 = b.unique2)
          ->  Index Only Scan using tenk1_thous_tenthous on tenk1 c
                Index Cond: (thousand = a.thousand)
-(12 rows)
+(14 rows)
 
 select count(*) from
   tenk1 a join tenk1 b on a.unique1 = b.unique2