Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit e0b1ee1

Browse files
committed
Skip checking of scan keys required for directional scan in B-tree
Currently, B-tree code matches every scan key to every item on the page. Imagine the ordered B-tree scan for the query like this. SELECT * FROM tbl WHERE col > 'a' AND col < 'b' ORDER BY col; The (col > 'a') scan key will be always matched once we find the location to start the scan. The (col < 'b') scan key will match every item on the page as long as it matches the last item on the page. This patch implements prechecking of the scan keys required for directional scan on beginning of page scan. If precheck is successful we can skip this scan keys check for the items on the page. That could lead to significant acceleration especially if the comparison operator is expensive. Idea from patch by Konstantin Knizhnik. Discussion: https://postgr.es/m/079c3f8e-3371-abe2-e93c-fc8a0ae3f571%40garret.ru Reviewed-by: Peter Geoghegan, Pavel Borisov
1 parent 5da0a62 commit e0b1ee1

File tree

4 files changed

+119
-17
lines changed

4 files changed

+119
-17
lines changed

src/backend/access/nbtree/nbtree.c

+1
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,7 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
407407

408408
so->markItemIndex = -1;
409409
so->arrayKeyCount = 0;
410+
so->firstPage = false;
410411
BTScanPosUnpinIfPinned(so->markPos);
411412
BTScanPosInvalidate(so->markPos);
412413

src/backend/access/nbtree/nbtsearch.c

+67-3
Original file line numberDiff line numberDiff line change
@@ -1429,6 +1429,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
14291429
/* remember which buffer we have pinned, if any */
14301430
Assert(!BTScanPosIsValid(so->currPos));
14311431
so->currPos.buf = buf;
1432+
so->firstPage = true;
14321433

14331434
/*
14341435
* Now load data from the first page of the scan.
@@ -1539,6 +1540,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
15391540
int itemIndex;
15401541
bool continuescan;
15411542
int indnatts;
1543+
bool requiredMatchedByPrecheck;
15421544

15431545
/*
15441546
* We must have the buffer pinned and locked, but the usual macro can't be
@@ -1592,6 +1594,46 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
15921594
*/
15931595
Assert(BTScanPosIsPinned(so->currPos));
15941596

1597+
/*
1598+
* Prechecking the page with scan keys required for direction scan. We
1599+
* check these keys with the last item on the page (according to our scan
1600+
* direction). If these keys are matched, we can skip checking them with
1601+
* every item on the page. Scan keys for our scan direction would
1602+
* necessarily match the previous items. Scan keys required for opposite
1603+
* direction scan are already matched by the _bt_first() call.
1604+
*
1605+
* With the forward scan, we do this check for the last item on the page
1606+
* instead of the high key. It's relatively likely that the most
1607+
* significant column in the high key will be different from the
1608+
* corresponding value from the last item on the page. So checking with
1609+
* the last item on the page would give a more precise answer.
1610+
*
1611+
* We skip this for the first page in the scan to evade the possible
1612+
* slowdown of the point queries.
1613+
*/
1614+
if (!so->firstPage && minoff < maxoff)
1615+
{
1616+
ItemId iid;
1617+
IndexTuple itup;
1618+
1619+
iid = PageGetItemId(page, ScanDirectionIsForward(dir) ? maxoff : minoff);
1620+
itup = (IndexTuple) PageGetItem(page, iid);
1621+
1622+
/*
1623+
* Do the precheck. Note that we pass the pointer to
1624+
* 'requiredMatchedByPrecheck' to 'continuescan' argument. That will
1625+
* set flag to true if all required keys are satisfied and false
1626+
* otherwise.
1627+
*/
1628+
(void) _bt_checkkeys(scan, itup, indnatts, dir,
1629+
&requiredMatchedByPrecheck, false);
1630+
}
1631+
else
1632+
{
1633+
so->firstPage = false;
1634+
requiredMatchedByPrecheck = false;
1635+
}
1636+
15951637
if (ScanDirectionIsForward(dir))
15961638
{
15971639
/* load items[] in ascending order */
@@ -1603,6 +1645,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
16031645
{
16041646
ItemId iid = PageGetItemId(page, offnum);
16051647
IndexTuple itup;
1648+
bool passes_quals;
16061649

16071650
/*
16081651
* If the scan specifies not to return killed tuples, then we
@@ -1616,7 +1659,18 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
16161659

16171660
itup = (IndexTuple) PageGetItem(page, iid);
16181661

1619-
if (_bt_checkkeys(scan, itup, indnatts, dir, &continuescan))
1662+
passes_quals = _bt_checkkeys(scan, itup, indnatts, dir,
1663+
&continuescan, requiredMatchedByPrecheck);
1664+
1665+
/*
1666+
* If the result of prechecking required keys was true, then in
1667+
* assert-enabled builds we also recheck that _bt_checkkeys()
1668+
* result is is the same.
1669+
*/
1670+
Assert(!requiredMatchedByPrecheck ||
1671+
passes_quals == _bt_checkkeys(scan, itup, indnatts, dir,
1672+
&continuescan, false));
1673+
if (passes_quals)
16201674
{
16211675
/* tuple passes all scan key conditions */
16221676
if (!BTreeTupleIsPosting(itup))
@@ -1673,7 +1727,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
16731727
int truncatt;
16741728

16751729
truncatt = BTreeTupleGetNAtts(itup, scan->indexRelation);
1676-
_bt_checkkeys(scan, itup, truncatt, dir, &continuescan);
1730+
_bt_checkkeys(scan, itup, truncatt, dir, &continuescan, false);
16771731
}
16781732

16791733
if (!continuescan)
@@ -1725,7 +1779,16 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
17251779
itup = (IndexTuple) PageGetItem(page, iid);
17261780

17271781
passes_quals = _bt_checkkeys(scan, itup, indnatts, dir,
1728-
&continuescan);
1782+
&continuescan, requiredMatchedByPrecheck);
1783+
1784+
/*
1785+
* If the result of prechecking required keys was true, then in
1786+
* assert-enabled builds we also recheck that _bt_checkkeys()
1787+
* result is is the same.
1788+
*/
1789+
Assert(!requiredMatchedByPrecheck ||
1790+
passes_quals == _bt_checkkeys(scan, itup, indnatts, dir,
1791+
&continuescan, false));
17291792
if (passes_quals && tuple_alive)
17301793
{
17311794
/* tuple passes all scan key conditions */
@@ -2443,6 +2506,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir)
24432506

24442507
/* remember which buffer we have pinned */
24452508
so->currPos.buf = buf;
2509+
so->firstPage = true;
24462510

24472511
_bt_initialize_more_data(so, dir);
24482512

src/backend/access/nbtree/nbtutils.c

+46-13
Original file line numberDiff line numberDiff line change
@@ -1372,10 +1372,13 @@ _bt_mark_scankey_required(ScanKey skey)
13721372
* tupnatts: number of attributes in tupnatts (high key may be truncated)
13731373
* dir: direction we are scanning in
13741374
* continuescan: output parameter (will be set correctly in all cases)
1375+
* requiredMatchedByPrecheck: indicates that scan keys required for
1376+
* direction scan are already matched
13751377
*/
13761378
bool
13771379
_bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, int tupnatts,
1378-
ScanDirection dir, bool *continuescan)
1380+
ScanDirection dir, bool *continuescan,
1381+
bool requiredMatchedByPrecheck)
13791382
{
13801383
TupleDesc tupdesc;
13811384
BTScanOpaque so;
@@ -1396,6 +1399,29 @@ _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, int tupnatts,
13961399
Datum datum;
13971400
bool isNull;
13981401
Datum test;
1402+
bool requiredSameDir = false,
1403+
requiredOppositeDir = false;
1404+
1405+
/*
1406+
* Check if the key is required for ordered scan in the same or
1407+
* opposite direction. Save as flag variables for future usage.
1408+
*/
1409+
if (((key->sk_flags & SK_BT_REQFWD) && ScanDirectionIsForward(dir)) ||
1410+
((key->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsBackward(dir)))
1411+
requiredSameDir = true;
1412+
else if (((key->sk_flags & SK_BT_REQFWD) && ScanDirectionIsBackward(dir)) ||
1413+
((key->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsForward(dir)))
1414+
requiredOppositeDir = true;
1415+
1416+
/*
1417+
* Is the key required for scanning for either forward or backward
1418+
* direction? If so and called told us that these types of keys are
1419+
* known to be matched, skip the check. Except for the row keys,
1420+
* where NULLs could be found in the middle of matching values.
1421+
*/
1422+
if ((requiredSameDir || requiredOppositeDir) &&
1423+
!(key->sk_flags & SK_ROW_HEADER) && requiredMatchedByPrecheck)
1424+
continue;
13991425

14001426
if (key->sk_attno > tupnatts)
14011427
{
@@ -1444,11 +1470,7 @@ _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, int tupnatts,
14441470
* scan direction, then we can conclude no further tuples will
14451471
* pass, either.
14461472
*/
1447-
if ((key->sk_flags & SK_BT_REQFWD) &&
1448-
ScanDirectionIsForward(dir))
1449-
*continuescan = false;
1450-
else if ((key->sk_flags & SK_BT_REQBKWD) &&
1451-
ScanDirectionIsBackward(dir))
1473+
if (requiredSameDir)
14521474
*continuescan = false;
14531475

14541476
/*
@@ -1498,8 +1520,23 @@ _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, int tupnatts,
14981520
return false;
14991521
}
15001522

1501-
test = FunctionCall2Coll(&key->sk_func, key->sk_collation,
1502-
datum, key->sk_argument);
1523+
/*
1524+
* Apply the key checking function. When the key is required for
1525+
* opposite direction scan, it must be already satisfied by
1526+
* _bt_first() except for the NULLs checking, which have already done
1527+
* above.
1528+
*/
1529+
if (!requiredOppositeDir)
1530+
{
1531+
test = FunctionCall2Coll(&key->sk_func, key->sk_collation,
1532+
datum, key->sk_argument);
1533+
}
1534+
else
1535+
{
1536+
test = true;
1537+
Assert(test == FunctionCall2Coll(&key->sk_func, key->sk_collation,
1538+
datum, key->sk_argument));
1539+
}
15031540

15041541
if (!DatumGetBool(test))
15051542
{
@@ -1513,11 +1550,7 @@ _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, int tupnatts,
15131550
* initial positioning in _bt_first() when they are available. See
15141551
* comments in _bt_first().
15151552
*/
1516-
if ((key->sk_flags & SK_BT_REQFWD) &&
1517-
ScanDirectionIsForward(dir))
1518-
*continuescan = false;
1519-
else if ((key->sk_flags & SK_BT_REQBKWD) &&
1520-
ScanDirectionIsBackward(dir))
1553+
if (requiredSameDir)
15211554
*continuescan = false;
15221555

15231556
/*

src/include/access/nbtree.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -1056,6 +1056,9 @@ typedef struct BTScanOpaqueData
10561056
int *killedItems; /* currPos.items indexes of killed items */
10571057
int numKilled; /* number of currently stored items */
10581058

1059+
/* flag indicating the first page in the scan */
1060+
bool firstPage;
1061+
10591062
/*
10601063
* If we are doing an index-only scan, these are the tuple storage
10611064
* workspaces for the currPos and markPos respectively. Each is of size
@@ -1255,7 +1258,8 @@ extern void _bt_mark_array_keys(IndexScanDesc scan);
12551258
extern void _bt_restore_array_keys(IndexScanDesc scan);
12561259
extern void _bt_preprocess_keys(IndexScanDesc scan);
12571260
extern bool _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple,
1258-
int tupnatts, ScanDirection dir, bool *continuescan);
1261+
int tupnatts, ScanDirection dir, bool *continuescan,
1262+
bool requiredMatchedByPrecheck);
12591263
extern void _bt_killitems(IndexScanDesc scan);
12601264
extern BTCycleId _bt_vacuum_cycleid(Relation rel);
12611265
extern BTCycleId _bt_start_vacuum(Relation rel);

0 commit comments

Comments
 (0)