Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Allow skipping some items in a multi-key GIN search.
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>
Wed, 29 Jan 2014 15:53:39 +0000 (17:53 +0200)
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>
Wed, 29 Jan 2014 15:53:39 +0000 (17:53 +0200)
In a multi-key search, ie. something like "col @> 'foo' AND col @> 'bar'",
as soon as we find the next item that matches the first criteria, we don't
need to check the second criteria for TIDs smaller the first match. That
saves a lot of effort, especially if one of the terms is rare, while the
second occurs very frequently.

Based on ideas from Alexander Korotkov's fast scan patch.

src/backend/access/gin/ginget.c

index 4bdbd4525d53205aa1367cf480764e832048d9e7..40abb3971e2dc592611a71bd5628682784526c9f 100644 (file)
@@ -67,29 +67,6 @@ callConsistentFn(GinState *ginstate, GinScanKey key)
                                     PointerGetDatum(key->queryCategories)));
 }
 
-/*
- * Tries to refind previously taken ItemPointer on a posting page.
- */
-static bool
-needToStepRight(Page page, ItemPointer item)
-{
-   if (GinPageGetOpaque(page)->flags & GIN_DELETED)
-       /* page was deleted by concurrent vacuum */
-       return true;
-
-   if (ginCompareItemPointers(item, GinDataPageGetRightBound(page)) > 0
-           && !GinPageRightMost(page))
-   {
-       /*
-        * the item we're looking is > the right bound of the page, so it
-        * can't be on this page.
-        */
-       return true;
-   }
-
-   return false;
-}
-
 /*
  * Goes to the next page if current offset is outside of bounds
  */
@@ -447,8 +424,7 @@ restartScanEntry:
            page = BufferGetPage(entry->buffer);
 
            /*
-            * Copy page content to memory to avoid keeping it locked for
-            * a long time.
+            * Load the first page into memory.
             */
            entry->list = GinDataLeafPageGetItems(page, &entry->nlist);
 
@@ -518,88 +494,78 @@ startScan(IndexScanDesc scan)
 }
 
 /*
- * Gets next ItemPointer from PostingTree. Note, that we copy
- * page into GinScanEntry->list array and unlock page, but keep it pinned
- * to prevent interference with vacuum
+ * Load the next batch of item pointers from a posting tree.
+ *
+ * Note that we copy the page into GinScanEntry->list array and unlock it, but
+ * keep it pinned to prevent interference with vacuum.
  */
 static void
-entryGetNextItem(GinState *ginstate, GinScanEntry entry)
+entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advancePast)
 {
    Page        page;
    int         i;
 
+   LockBuffer(entry->buffer, GIN_SHARE);
+   page = BufferGetPage(entry->buffer);
    for (;;)
    {
-       if (entry->offset < entry->nlist)
+       entry->offset = InvalidOffsetNumber;
+       if (entry->list)
        {
-           entry->curItem = entry->list[entry->offset++];
-           return;
+           pfree(entry->list);
+           entry->list = NULL;
+           entry->nlist = 0;
        }
 
-       LockBuffer(entry->buffer, GIN_SHARE);
-       page = BufferGetPage(entry->buffer);
-       for (;;)
+       /*
+        * We've processed all the entries on this page. If it was the last
+        * page in the tree, we're done.
+        */
+       if (GinPageRightMost(page))
        {
-           /*
-            * It's needed to go by right link. During that we should refind
-            * first ItemPointer greater that stored
-            */
-           if (GinPageRightMost(page))
-           {
-               UnlockReleaseBuffer(entry->buffer);
-               ItemPointerSetInvalid(&entry->curItem);
-               entry->buffer = InvalidBuffer;
-               entry->isFinished = TRUE;
-               return;
-           }
+           UnlockReleaseBuffer(entry->buffer);
+           entry->buffer = InvalidBuffer;
+           entry->isFinished = TRUE;
+           return;
+       }
 
-           entry->buffer = ginStepRight(entry->buffer,
-                                        ginstate->index,
-                                        GIN_SHARE);
-           page = BufferGetPage(entry->buffer);
+       if (GinPageGetOpaque(page)->flags & GIN_DELETED)
+           continue;       /* page was deleted by concurrent vacuum */
 
-           entry->offset = InvalidOffsetNumber;
-           if (entry->list)
-           {
-               pfree(entry->list);
-               entry->list = NULL;
-           }
+       /*
+        * Step to next page, following the right link. then find the first
+        * ItemPointer greater than advancePast.
+        */
+       entry->buffer = ginStepRight(entry->buffer,
+                                    ginstate->index,
+                                    GIN_SHARE);
+       page = BufferGetPage(entry->buffer);
 
+       /*
+        * The first item > advancePast might not be on this page, but
+        * somewhere to the right, if the page was split, or a non-match from
+        * another key in the query allowed us to skip some items from this
+        * entry. Keep following the right-links until we re-find the correct
+        * page.
+        */
+       if (!GinPageRightMost(page) &&
+           ginCompareItemPointers(&advancePast, GinDataPageGetRightBound(page)) >= 0)
+       {
            /*
-            * If the page was concurrently split, we have to re-find the
-            * item we were stopped on. If the page was split more than once,
-            * the item might not be on this page, but somewhere to the right.
-            * Keep following the right-links until we re-find the correct
-            * page.
+            * the item we're looking is > the right bound of the page, so it
+            * can't be on this page.
             */
-           if (ItemPointerIsValid(&entry->curItem) &&
-               needToStepRight(page, &entry->curItem))
-           {
-               continue;
-           }
+           continue;
+       }
 
-           entry->list = GinDataLeafPageGetItems(page, &entry->nlist);
+       entry->list = GinDataLeafPageGetItems(page, &entry->nlist);
 
-           /* re-find the item we were stopped on. */
-           if (ItemPointerIsValid(&entry->curItem))
-           {
-               for (i = 0; i < entry->nlist; i++)
-               {
-                   if (ginCompareItemPointers(&entry->curItem,
-                                              &entry->list[i]) < 0)
-                   {
-                       LockBuffer(entry->buffer, GIN_UNLOCK);
-                       entry->offset = i + 1;
-                       entry->curItem = entry->list[entry->offset - 1];
-                       return;
-                   }
-               }
-           }
-           else
+       for (i = 0; i < entry->nlist; i++)
+       {
+           if (ginCompareItemPointers(&advancePast, &entry->list[i]) < 0)
            {
                LockBuffer(entry->buffer, GIN_UNLOCK);
-               entry->offset = 1; /* scan all items on the page. */
-               entry->curItem = entry->list[entry->offset - 1];
+               entry->offset = i;
                return;
            }
        }
@@ -610,10 +576,10 @@ entryGetNextItem(GinState *ginstate, GinScanEntry entry)
 #define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) )
 
 /*
- * Sets entry->curItem to next heap item pointer for one entry of one scan key,
- * or sets entry->isFinished to TRUE if there are no more.
+ * Sets entry->curItem to next heap item pointer > advancePast, for one entry
+ * of one scan key, or sets entry->isFinished to TRUE if there are no more.
  *
- * Item pointers must be returned in ascending order.
+ * Item pointers are returned in ascending order.
  *
  * Note: this can return a "lossy page" item pointer, indicating that the
  * entry potentially matches all items on that heap page.  However, it is
@@ -623,12 +589,20 @@ entryGetNextItem(GinState *ginstate, GinScanEntry entry)
  * current implementation this is guaranteed by the behavior of tidbitmaps.
  */
 static void
-entryGetItem(GinState *ginstate, GinScanEntry entry)
+entryGetItem(GinState *ginstate, GinScanEntry entry,
+            ItemPointerData advancePast)
 {
    Assert(!entry->isFinished);
 
+   Assert(!ItemPointerIsValid(&entry->curItem) ||
+          ginCompareItemPointers(&entry->curItem, &advancePast) <= 0);
+
    if (entry->matchBitmap)
    {
+       /* A bitmap result */
+       BlockNumber advancePastBlk = GinItemPointerGetBlockNumber(&advancePast);
+       OffsetNumber advancePastOff = GinItemPointerGetOffsetNumber(&advancePast);
+
        do
        {
            if (entry->matchResult == NULL ||
@@ -645,6 +619,18 @@ entryGetItem(GinState *ginstate, GinScanEntry entry)
                    break;
                }
 
+               /*
+                * If all the matches on this page are <= advancePast, skip
+                * to next page.
+                */
+               if (entry->matchResult->blockno < advancePastBlk ||
+                   (entry->matchResult->blockno == advancePastBlk &&
+                    entry->matchResult->offsets[entry->offset] <= advancePastOff))
+               {
+                   entry->offset = entry->matchResult->ntuples;
+                   continue;
+               }
+
                /*
                 * Reset counter to the beginning of entry->matchResult. Note:
                 * entry->offset is still greater than matchResult->ntuples if
@@ -670,6 +656,17 @@ entryGetItem(GinState *ginstate, GinScanEntry entry)
                break;
            }
 
+           if (entry->matchResult->blockno == advancePastBlk)
+           {
+               /*
+                * Skip to the right offset on this page. We already checked
+                * in above loop that there is at least one item > advancePast
+                * on the page.
+                */
+               while (entry->matchResult->offsets[entry->offset] <= advancePastOff)
+                   entry->offset++;
+           }
+
            ItemPointerSet(&entry->curItem,
                           entry->matchResult->blockno,
                           entry->matchResult->offsets[entry->offset]);
@@ -678,29 +675,48 @@ entryGetItem(GinState *ginstate, GinScanEntry entry)
    }
    else if (!BufferIsValid(entry->buffer))
    {
-       entry->offset++;
-       if (entry->offset <= entry->nlist)
-           entry->curItem = entry->list[entry->offset - 1];
-       else
+       /* A posting list from an entry tuple  */
+       do
        {
-           ItemPointerSetInvalid(&entry->curItem);
-           entry->isFinished = TRUE;
-       }
+           if (entry->offset >= entry->nlist)
+           {
+               ItemPointerSetInvalid(&entry->curItem);
+               entry->isFinished = TRUE;
+               break;
+           }
+
+           entry->curItem = entry->list[entry->offset++];
+       } while (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0);
+       /* XXX: shouldn't we apply the fuzzy search limit here? */
    }
    else
    {
+       /* A posting tree */
        do
        {
-           entryGetNextItem(ginstate, entry);
-       } while (entry->isFinished == FALSE &&
-                entry->reduceResult == TRUE &&
-                dropItem(entry));
+           /* If we've processed the current batch, load more items */
+           while (entry->offset >= entry->nlist)
+           {
+               entryLoadMoreItems(ginstate, entry, advancePast);
+
+               if (entry->isFinished)
+               {
+                   ItemPointerSetInvalid(&entry->curItem);
+                   return;
+               }
+           }
+
+           entry->curItem = entry->list[entry->offset++];
+
+       } while (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0 ||
+                (entry->reduceResult == TRUE && dropItem(entry)));
    }
 }
 
 /*
- * Identify the "current" item among the input entry streams for this scan key,
- * and test whether it passes the scan key qual condition.
+ * Identify the "current" item among the input entry streams for this scan key
+ * that is greater than advancePast, and test whether it passes the scan key
+ * qual condition.
  *
  * The current item is the smallest curItem among the inputs.  key->curItem
  * is set to that value.  key->curItemMatches is set to indicate whether that
@@ -719,7 +735,8 @@ entryGetItem(GinState *ginstate, GinScanEntry entry)
  * logic in scanGetItem.)
  */
 static void
-keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key)
+keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
+          ItemPointerData advancePast)
 {
    ItemPointerData minItem;
    ItemPointerData curPageLossy;
@@ -729,11 +746,20 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key)
    GinScanEntry entry;
    bool        res;
    MemoryContext oldCtx;
+   bool        allFinished;
 
    Assert(!key->isFinished);
 
    /*
-    * Find the minimum of the active entry curItems.
+    * We might have already tested this item; if so, no need to repeat work.
+    * (Note: the ">" case can happen, if minItem is exact but we previously
+    * had to set curItem to a lossy-page pointer.)
+    */
+   if (ginCompareItemPointers(&key->curItem, &advancePast) > 0)
+       return;
+
+   /*
+    * Find the minimum item > advancePast among the active entry streams.
     *
     * Note: a lossy-page entry is encoded by a ItemPointer with max value for
     * offset (0xffff), so that it will sort after any exact entries for the
@@ -741,16 +767,33 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key)
     * pointers, which is good.
     */
    ItemPointerSetMax(&minItem);
-
+   allFinished = true;
    for (i = 0; i < key->nentries; i++)
    {
        entry = key->scanEntry[i];
-       if (entry->isFinished == FALSE &&
-           ginCompareItemPointers(&entry->curItem, &minItem) < 0)
-           minItem = entry->curItem;
+
+       /*
+        * Advance this stream if necessary.
+        *
+        * In particular, since entry->curItem was initialized with
+        * ItemPointerSetMin, this ensures we fetch the first item for each
+        * entry on the first call.
+        */
+       while (entry->isFinished == FALSE &&
+              ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
+       {
+           entryGetItem(ginstate, entry, advancePast);
+       }
+
+       if (!entry->isFinished)
+       {
+           allFinished = FALSE;
+           if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
+               minItem = entry->curItem;
+       }
    }
 
-   if (ItemPointerIsMax(&minItem))
+   if (allFinished)
    {
        /* all entries are finished */
        key->isFinished = TRUE;
@@ -758,15 +801,7 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key)
    }
 
    /*
-    * We might have already tested this item; if so, no need to repeat work.
-    * (Note: the ">" case can happen, if minItem is exact but we previously
-    * had to set curItem to a lossy-page pointer.)
-    */
-   if (ginCompareItemPointers(&key->curItem, &minItem) >= 0)
-       return;
-
-   /*
-    * OK, advance key->curItem and perform consistentFn test.
+    * OK, set key->curItem and perform consistentFn test.
     */
    key->curItem = minItem;
 
@@ -895,117 +930,122 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key)
  * keyGetItem() the combination logic is known only to the consistentFn.
  */
 static bool
-scanGetItem(IndexScanDesc scan, ItemPointer advancePast,
+scanGetItem(IndexScanDesc scan, ItemPointerData advancePast,
            ItemPointerData *item, bool *recheck)
 {
    GinScanOpaque so = (GinScanOpaque) scan->opaque;
-   GinState   *ginstate = &so->ginstate;
-   ItemPointerData myAdvancePast = *advancePast;
    uint32      i;
-   bool        allFinished;
    bool        match;
 
-   for (;;)
+   /*----------
+    * Advance the scan keys in lock-step, until we find an item that matches
+    * all the keys. If any key reports isFinished, meaning its subset of the
+    * entries is exhausted, we can stop.  Otherwise, set *item to the next
+    * matching item.
+    *
+    * This logic works only if a keyGetItem stream can never contain both
+    * exact and lossy pointers for the same page.  Else we could have a
+    * case like
+    *
+    *      stream 1        stream 2
+    *      ...             ...
+    *      42/6            42/7
+    *      50/1            42/0xffff
+    *      ...             ...
+    *
+    * We would conclude that 42/6 is not a match and advance stream 1,
+    * thus never detecting the match to the lossy pointer in stream 2.
+    * (keyGetItem has a similar problem versus entryGetItem.)
+    *----------
+    */
+   do
    {
-       /*
-        * Advance any entries that are <= myAdvancePast.  In particular,
-        * since entry->curItem was initialized with ItemPointerSetMin, this
-        * ensures we fetch the first item for each entry on the first call.
-        */
-       allFinished = TRUE;
-
-       for (i = 0; i < so->totalentries; i++)
-       {
-           GinScanEntry entry = so->entries[i];
-
-           while (entry->isFinished == FALSE &&
-                  ginCompareItemPointers(&entry->curItem,
-                                         &myAdvancePast) <= 0)
-               entryGetItem(ginstate, entry);
-
-           if (entry->isFinished == FALSE)
-               allFinished = FALSE;
-       }
-
-       if (allFinished)
-       {
-           /* all entries exhausted, so we're done */
-           return false;
-       }
-
-       /*
-        * Perform the consistentFn test for each scan key.  If any key
-        * reports isFinished, meaning its subset of the entries is exhausted,
-        * we can stop.  Otherwise, set *item to the minimum of the key
-        * curItems.
-        */
-       ItemPointerSetMax(item);
-
-       for (i = 0; i < so->nkeys; i++)
+       ItemPointerSetMin(item);
+       match = true;
+       for (i = 0; i < so->nkeys && match; i++)
        {
            GinScanKey  key = so->keys + i;
 
-           keyGetItem(&so->ginstate, so->tempCtx, key);
+           /* Fetch the next item for this key that is > advancePast. */
+           keyGetItem(&so->ginstate, so->tempCtx, key, advancePast);
 
            if (key->isFinished)
-               return false;   /* finished one of keys */
-
-           if (ginCompareItemPointers(&key->curItem, item) < 0)
-               *item = key->curItem;
-       }
+               return false;
 
-       Assert(!ItemPointerIsMax(item));
+           /*
+            * If it's not a match, we can immediately conclude that nothing
+            * <= this item matches, without checking the rest of the keys.
+            */
+           if (!key->curItemMatches)
+           {
+               advancePast = key->curItem;
+               match = false;
+               break;
+           }
 
-       /*----------
-        * Now *item contains first ItemPointer after previous result.
-        *
-        * The item is a valid hit only if all the keys succeeded for either
-        * that exact TID, or a lossy reference to the same page.
-        *
-        * This logic works only if a keyGetItem stream can never contain both
-        * exact and lossy pointers for the same page.  Else we could have a
-        * case like
-        *
-        *      stream 1        stream 2
-        *      ...             ...
-        *      42/6            42/7
-        *      50/1            42/0xffff
-        *      ...             ...
-        *
-        * We would conclude that 42/6 is not a match and advance stream 1,
-        * thus never detecting the match to the lossy pointer in stream 2.
-        * (keyGetItem has a similar problem versus entryGetItem.)
-        *----------
-        */
-       match = true;
-       for (i = 0; i < so->nkeys; i++)
-       {
-           GinScanKey  key = so->keys + i;
+           /*
+            * It's a match. We can conclude that nothing < matches, so
+            * the other key streams can skip to this item.
+            *
+            * Beware of lossy pointers, though; from a lossy pointer, we
+            * can only conclude that nothing smaller than this *block*
+            * matches.
+            */
+           if (ItemPointerIsLossyPage(&key->curItem))
+           {
+               if (GinItemPointerGetBlockNumber(&advancePast) <
+                   GinItemPointerGetBlockNumber(&key->curItem))
+               {
+                   advancePast.ip_blkid = key->curItem.ip_blkid;
+                   advancePast.ip_posid = 0;
+               }
+           }
+           else
+           {
+               Assert(key->curItem.ip_posid > 0);
+               advancePast = key->curItem;
+               advancePast.ip_posid--;
+           }
 
-           if (key->curItemMatches)
+           /*
+            * If this is the first key, remember this location as a
+            * potential match.
+            *
+            * Otherwise, check if this is the same item that we checked the
+            * previous keys for (or a lossy pointer for the same page). If
+            * not, loop back to check the previous keys for this item (we
+            * will check this key again too, but keyGetItem returns quickly
+            * for that)
+            */
+           if (i == 0)
            {
-               if (ginCompareItemPointers(item, &key->curItem) == 0)
-                   continue;
-               if (ItemPointerIsLossyPage(&key->curItem) &&
-                   GinItemPointerGetBlockNumber(&key->curItem) ==
-                   GinItemPointerGetBlockNumber(item))
-                   continue;
+               *item = key->curItem;
+           }
+           else
+           {
+               if (ItemPointerIsLossyPage(&key->curItem) ||
+                   ItemPointerIsLossyPage(item))
+               {
+                   Assert (GinItemPointerGetBlockNumber(&key->curItem) >= GinItemPointerGetBlockNumber(item));
+                   match = (GinItemPointerGetBlockNumber(&key->curItem) ==
+                            GinItemPointerGetBlockNumber(item));
+               }
+               else
+               {
+                   Assert(ginCompareItemPointers(&key->curItem, item) >= 0);
+                   match = (ginCompareItemPointers(&key->curItem, item) == 0);
+               }
            }
-           match = false;
-           break;
        }
+   } while (!match);
 
-       if (match)
-           break;
-
-       /*
-        * No hit.  Update myAdvancePast to this TID, so that on the next pass
-        * we'll move to the next possible entry.
-        */
-       myAdvancePast = *item;
-   }
+   Assert(!ItemPointerIsMin(item));
 
    /*
+    * Now *item contains the first ItemPointer after previous result that
+    * satisfied all the keys for that exact TID, or a lossy reference
+    * to the same page.
+    *
     * We must return recheck = true if any of the keys are marked recheck.
     */
    *recheck = false;
@@ -1536,7 +1576,7 @@ gingetbitmap(PG_FUNCTION_ARGS)
    {
        CHECK_FOR_INTERRUPTS();
 
-       if (!scanGetItem(scan, &iptr, &iptr, &recheck))
+       if (!scanGetItem(scan, iptr, &iptr, &recheck))
            break;
 
        if (ItemPointerIsLossyPage(&iptr))