Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 627d634

Browse files
committed
Allow usage of match_orclause_to_indexcol() for joins
This commit allows transformation of OR-clauses into SAOP's for index scans within nested loop joins. That required the following changes. 1. Make match_orclause_to_indexcol() and group_similar_or_args() understand const-ness in the same way as match_opclause_to_indexcol(). This generally makes our approach more uniform. 2. Make match_join_clauses_to_index() pass OR-clauses to match_clause_to_index(). 3. Also switch match_join_clauses_to_index() to use list_append_unique_ptr() for adding clauses to *joinorclauses. That avoids possible duplicates when processing the same clauses with different indexes. Previously such duplicates were elimited in match_clause_to_index(), but now group_similar_or_args() each time generates distinct copies of grouped OR clauses. Discussion: https://postgr.es/m/CAPpHfdv%2BjtNwofg-p5z86jLYZUTt6tR17Wy00ta0dL%3DwHQN3ZA%40mail.gmail.com Reviewed-by: Andrei Lepikhov <lepihov@gmail.com> Reviewed-by: Alena Rybakina <a.rybakina@postgrespro.ru> Reviewed-by: Pavel Borisov <pashkin.elfe@gmail.com>
1 parent 23ef119 commit 627d634

File tree

6 files changed

+150
-42
lines changed

6 files changed

+150
-42
lines changed

src/backend/optimizer/path/indxpath.c

+41-29
Original file line numberDiff line numberDiff line change
@@ -1255,6 +1255,7 @@ group_similar_or_args(PlannerInfo *root, RelOptInfo *rel, RestrictInfo *rinfo)
12551255
ListCell *lc2;
12561256
List *orargs;
12571257
List *result = NIL;
1258+
Index relid = rel->relid;
12581259

12591260
Assert(IsA(rinfo->orclause, BoolExpr));
12601261
orargs = ((BoolExpr *) rinfo->orclause)->args;
@@ -1319,10 +1320,13 @@ group_similar_or_args(PlannerInfo *root, RelOptInfo *rel, RestrictInfo *rinfo)
13191320
/*
13201321
* Check for clauses of the form: (indexkey operator constant) or
13211322
* (constant operator indexkey). But we don't know a particular index
1322-
* yet. First check for a constant, which must be Const or Param.
1323-
* That's cheaper than search for an index key among all indexes.
1323+
* yet. Therefore, we try to distinguish the potential index key and
1324+
* constant first, then search for a matching index key among all
1325+
* indexes.
13241326
*/
1325-
if (IsA(leftop, Const) || IsA(leftop, Param))
1327+
if (bms_is_member(relid, argrinfo->right_relids) &&
1328+
!bms_is_member(relid, argrinfo->left_relids) &&
1329+
!contain_volatile_functions(leftop))
13261330
{
13271331
opno = get_commutator(opno);
13281332

@@ -1333,7 +1337,9 @@ group_similar_or_args(PlannerInfo *root, RelOptInfo *rel, RestrictInfo *rinfo)
13331337
}
13341338
nonConstExpr = rightop;
13351339
}
1336-
else if (IsA(rightop, Const) || IsA(rightop, Param))
1340+
else if (bms_is_member(relid, argrinfo->left_relids) &&
1341+
!bms_is_member(relid, argrinfo->right_relids) &&
1342+
!contain_volatile_functions(rightop))
13371343
{
13381344
nonConstExpr = leftop;
13391345
}
@@ -2414,6 +2420,7 @@ match_restriction_clauses_to_index(PlannerInfo *root,
24142420
* Identify join clauses for the rel that match the index.
24152421
* Matching clauses are added to *clauseset.
24162422
* Also, add any potentially usable join OR clauses to *joinorclauses.
2423+
* They also might be processed by match_clause_to_index() as a whole.
24172424
*/
24182425
static void
24192426
match_join_clauses_to_index(PlannerInfo *root,
@@ -2432,11 +2439,15 @@ match_join_clauses_to_index(PlannerInfo *root,
24322439
if (!join_clause_is_movable_to(rinfo, rel))
24332440
continue;
24342441

2435-
/* Potentially usable, so see if it matches the index or is an OR */
2442+
/*
2443+
* Potentially usable, so see if it matches the index or is an OR. Use
2444+
* list_append_unique_ptr() here to avoid possible duplicates when
2445+
* processing the same clauses with different indexes.
2446+
*/
24362447
if (restriction_is_or_clause(rinfo))
2437-
*joinorclauses = lappend(*joinorclauses, rinfo);
2438-
else
2439-
match_clause_to_index(root, rinfo, index, clauseset);
2448+
*joinorclauses = list_append_unique_ptr(*joinorclauses, rinfo);
2449+
2450+
match_clause_to_index(root, rinfo, index, clauseset);
24402451
}
24412452
}
24422453

@@ -2585,10 +2596,7 @@ match_clause_to_index(PlannerInfo *root,
25852596
* (3) must match the collation of the index, if collation is relevant.
25862597
*
25872598
* Our definition of "const" is exceedingly liberal: we allow anything that
2588-
* doesn't involve a volatile function or a Var of the index's relation
2589-
* except for a boolean OR expression input: due to a trade-off between the
2590-
* expected execution speedup and planning complexity, we limit or->saop
2591-
* transformation by obvious cases when an index scan can get a profit.
2599+
* doesn't involve a volatile function or a Var of the index's relation.
25922600
* In particular, Vars belonging to other relations of the query are
25932601
* accepted here, since a clause of that form can be used in a
25942602
* parameterized indexscan. It's the responsibility of higher code levels
@@ -3247,7 +3255,8 @@ match_orclause_to_indexcol(PlannerInfo *root,
32473255
Oid arraytype = InvalidOid;
32483256
Oid inputcollid = InvalidOid;
32493257
bool firstTime = true;
3250-
bool haveParam = false;
3258+
bool haveNonConst = false;
3259+
Index indexRelid = index->rel->relid;
32513260

32523261
Assert(IsA(orclause, BoolExpr));
32533262
Assert(orclause->boolop == OR_EXPR);
@@ -3259,10 +3268,9 @@ match_orclause_to_indexcol(PlannerInfo *root,
32593268
/*
32603269
* Try to convert a list of OR-clauses to a single SAOP expression. Each
32613270
* OR entry must be in the form: (indexkey operator constant) or (constant
3262-
* operator indexkey). Operators of all the entries must match. Constant
3263-
* might be either Const or Param. To be effective, give up on the first
3264-
* non-matching entry. Exit is implemented as a break from the loop,
3265-
* which is catched afterwards.
3271+
* operator indexkey). Operators of all the entries must match. To be
3272+
* effective, give up on the first non-matching entry. Exit is
3273+
* implemented as a break from the loop, which is catched afterwards.
32663274
*/
32673275
foreach(lc, orclause->args)
32683276
{
@@ -3313,17 +3321,21 @@ match_orclause_to_indexcol(PlannerInfo *root,
33133321

33143322
/*
33153323
* Check for clauses of the form: (indexkey operator constant) or
3316-
* (constant operator indexkey). Determine indexkey side first, check
3317-
* the constant later.
3324+
* (constant operator indexkey). See match_clause_to_indexcol's notes
3325+
* about const-ness.
33183326
*/
33193327
leftop = (Node *) linitial(subClause->args);
33203328
rightop = (Node *) lsecond(subClause->args);
3321-
if (match_index_to_operand(leftop, indexcol, index))
3329+
if (match_index_to_operand(leftop, indexcol, index) &&
3330+
!bms_is_member(indexRelid, subRinfo->right_relids) &&
3331+
!contain_volatile_functions(rightop))
33223332
{
33233333
indexExpr = leftop;
33243334
constExpr = rightop;
33253335
}
3326-
else if (match_index_to_operand(rightop, indexcol, index))
3336+
else if (match_index_to_operand(rightop, indexcol, index) &&
3337+
!bms_is_member(indexRelid, subRinfo->left_relids) &&
3338+
!contain_volatile_functions(leftop))
33273339
{
33283340
opno = get_commutator(opno);
33293341
if (!OidIsValid(opno))
@@ -3350,10 +3362,6 @@ match_orclause_to_indexcol(PlannerInfo *root,
33503362
if (IsA(indexExpr, RelabelType))
33513363
indexExpr = (Node *) ((RelabelType *) indexExpr)->arg;
33523364

3353-
/* We allow constant to be Const or Param */
3354-
if (!IsA(constExpr, Const) && !IsA(constExpr, Param))
3355-
break;
3356-
33573365
/* Forbid transformation for composite types, records. */
33583366
if (type_is_rowtype(exprType(constExpr)) ||
33593367
type_is_rowtype(exprType(indexExpr)))
@@ -3390,8 +3398,12 @@ match_orclause_to_indexcol(PlannerInfo *root,
33903398
break;
33913399
}
33923400

3393-
if (IsA(constExpr, Param))
3394-
haveParam = true;
3401+
/*
3402+
* Check if our list of constants in match_clause_to_indexcol's
3403+
* understanding of const-ness have something other than Const.
3404+
*/
3405+
if (!IsA(constExpr, Const))
3406+
haveNonConst = true;
33953407
consts = lappend(consts, constExpr);
33963408
}
33973409

@@ -3408,10 +3420,10 @@ match_orclause_to_indexcol(PlannerInfo *root,
34083420

34093421
/*
34103422
* Assemble an array from the list of constants. It seems more profitable
3411-
* to build a const array. But in the presence of parameters, we don't
3423+
* to build a const array. But in the presence of other nodes, we don't
34123424
* have a specific value here and must employ an ArrayExpr instead.
34133425
*/
3414-
if (haveParam)
3426+
if (haveNonConst)
34153427
{
34163428
ArrayExpr *arrayExpr = makeNode(ArrayExpr);
34173429

src/test/regress/expected/create_index.out

+32
Original file line numberDiff line numberDiff line change
@@ -2006,6 +2006,27 @@ SELECT * FROM tenk1
20062006
Filter: (((tenthous)::numeric = '1'::numeric) OR (tenthous = 3) OR ((tenthous)::numeric = '42'::numeric))
20072007
(2 rows)
20082008

2009+
EXPLAIN (COSTS OFF)
2010+
SELECT count(*) FROM tenk1 t1
2011+
WHERE t1.thousand = 42 OR t1.thousand = (SELECT t2.tenthous FROM tenk1 t2 WHERE t2.thousand = t1.tenthous + 1 LIMIT 1);
2012+
QUERY PLAN
2013+
----------------------------------------------------------------------------
2014+
Aggregate
2015+
-> Index Only Scan using tenk1_thous_tenthous on tenk1 t1
2016+
Filter: ((thousand = 42) OR (thousand = (SubPlan 1)))
2017+
SubPlan 1
2018+
-> Limit
2019+
-> Index Only Scan using tenk1_thous_tenthous on tenk1 t2
2020+
Index Cond: (thousand = (t1.tenthous + 1))
2021+
(7 rows)
2022+
2023+
SELECT count(*) FROM tenk1 t1
2024+
WHERE t1.thousand = 42 OR t1.thousand = (SELECT t2.tenthous FROM tenk1 t2 WHERE t2.thousand = t1.tenthous + 1 LIMIT 1);
2025+
count
2026+
-------
2027+
10
2028+
(1 row)
2029+
20092030
EXPLAIN (COSTS OFF)
20102031
SELECT count(*) FROM tenk1
20112032
WHERE hundred = 42 AND (thousand = 42 OR thousand = 99);
@@ -3255,6 +3276,17 @@ CREATE INDEX t_b_c_idx ON bitmap_split_or (b, c);
32553276
CREATE STATISTICS t_a_b_stat (mcv) ON a, b FROM bitmap_split_or;
32563277
CREATE STATISTICS t_b_c_stat (mcv) ON b, c FROM bitmap_split_or;
32573278
ANALYZE bitmap_split_or;
3279+
EXPLAIN (COSTS OFF)
3280+
SELECT * FROM bitmap_split_or t1, bitmap_split_or t2
3281+
WHERE t1.a = t2.b OR t1.a = 2;
3282+
QUERY PLAN
3283+
--------------------------------------------------------
3284+
Nested Loop
3285+
-> Seq Scan on bitmap_split_or t2
3286+
-> Index Scan using t_a_b_idx on bitmap_split_or t1
3287+
Index Cond: (a = ANY (ARRAY[t2.b, 2]))
3288+
(4 rows)
3289+
32583290
EXPLAIN (COSTS OFF)
32593291
SELECT * FROM bitmap_split_or WHERE a = 1 AND (b = 1 OR b = 2) AND c = 2;
32603292
QUERY PLAN

src/test/regress/expected/join.out

+45-6
Original file line numberDiff line numberDiff line change
@@ -3849,14 +3849,11 @@ where q1 = thousand or q2 = thousand;
38493849
-> Seq Scan on q2
38503850
-> Bitmap Heap Scan on tenk1
38513851
Recheck Cond: ((q1.q1 = thousand) OR (q2.q2 = thousand))
3852-
-> BitmapOr
3853-
-> Bitmap Index Scan on tenk1_thous_tenthous
3854-
Index Cond: (thousand = q1.q1)
3855-
-> Bitmap Index Scan on tenk1_thous_tenthous
3856-
Index Cond: (thousand = q2.q2)
3852+
-> Bitmap Index Scan on tenk1_thous_tenthous
3853+
Index Cond: (thousand = ANY (ARRAY[q1.q1, q2.q2]))
38573854
-> Hash
38583855
-> Seq Scan on int4_tbl
3859-
(15 rows)
3856+
(12 rows)
38603857

38613858
explain (costs off)
38623859
select * from
@@ -8239,3 +8236,45 @@ GROUP BY s.c1, s.c2;
82398236
(7 rows)
82408237

82418238
DROP TABLE group_tbl;
8239+
--
8240+
-- Test for a nested loop join involving index scan, transforming OR-clauses
8241+
-- to SAOP.
8242+
--
8243+
EXPLAIN (COSTS OFF)
8244+
SELECT COUNT(*) FROM tenk1 t1, tenk1 t2
8245+
WHERE t2.thousand = t1.tenthous OR t2.thousand = t1.unique1 OR t2.thousand = t1.unique2;
8246+
QUERY PLAN
8247+
-----------------------------------------------------------------------------------------
8248+
Aggregate
8249+
-> Nested Loop
8250+
-> Seq Scan on tenk1 t1
8251+
-> Index Only Scan using tenk1_thous_tenthous on tenk1 t2
8252+
Index Cond: (thousand = ANY (ARRAY[t1.tenthous, t1.unique1, t1.unique2]))
8253+
(5 rows)
8254+
8255+
SELECT COUNT(*) FROM tenk1 t1, tenk1 t2
8256+
WHERE t2.thousand = t1.tenthous OR t2.thousand = t1.unique1 OR t2.thousand = t1.unique2;
8257+
count
8258+
-------
8259+
20000
8260+
(1 row)
8261+
8262+
EXPLAIN (COSTS OFF)
8263+
SELECT COUNT(*) FROM onek t1 LEFT JOIN tenk1 t2
8264+
ON (t2.thousand = t1.tenthous OR t2.thousand = t1.thousand);
8265+
QUERY PLAN
8266+
------------------------------------------------------------------------------
8267+
Aggregate
8268+
-> Nested Loop Left Join
8269+
-> Seq Scan on onek t1
8270+
-> Index Only Scan using tenk1_thous_tenthous on tenk1 t2
8271+
Index Cond: (thousand = ANY (ARRAY[t1.tenthous, t1.thousand]))
8272+
(5 rows)
8273+
8274+
SELECT COUNT(*) FROM onek t1 LEFT JOIN tenk1 t2
8275+
ON (t2.thousand = t1.tenthous OR t2.thousand = t1.thousand);
8276+
count
8277+
-------
8278+
19000
8279+
(1 row)
8280+

src/test/regress/expected/partition_join.out

+6-6
Original file line numberDiff line numberDiff line change
@@ -2533,24 +2533,24 @@ where not exists (select 1 from prtx2
25332533
-> Seq Scan on prtx1_1
25342534
Filter: ((a < 20) AND (c = 91))
25352535
-> Bitmap Heap Scan on prtx2_1
2536-
Recheck Cond: ((b = (prtx1_1.b + 1)) OR (c = 99))
2536+
Recheck Cond: ((c = 99) OR (b = (prtx1_1.b + 1)))
25372537
Filter: (a = prtx1_1.a)
25382538
-> BitmapOr
2539-
-> Bitmap Index Scan on prtx2_1_b_idx
2540-
Index Cond: (b = (prtx1_1.b + 1))
25412539
-> Bitmap Index Scan on prtx2_1_c_idx
25422540
Index Cond: (c = 99)
2541+
-> Bitmap Index Scan on prtx2_1_b_idx
2542+
Index Cond: (b = (prtx1_1.b + 1))
25432543
-> Nested Loop Anti Join
25442544
-> Seq Scan on prtx1_2
25452545
Filter: ((a < 20) AND (c = 91))
25462546
-> Bitmap Heap Scan on prtx2_2
2547-
Recheck Cond: ((b = (prtx1_2.b + 1)) OR (c = 99))
2547+
Recheck Cond: ((c = 99) OR (b = (prtx1_2.b + 1)))
25482548
Filter: (a = prtx1_2.a)
25492549
-> BitmapOr
2550-
-> Bitmap Index Scan on prtx2_2_b_idx
2551-
Index Cond: (b = (prtx1_2.b + 1))
25522550
-> Bitmap Index Scan on prtx2_2_c_idx
25532551
Index Cond: (c = 99)
2552+
-> Bitmap Index Scan on prtx2_2_b_idx
2553+
Index Cond: (b = (prtx1_2.b + 1))
25542554
(23 rows)
25552555

25562556
select * from prtx1

src/test/regress/sql/create_index.sql

+9
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,12 @@ EXPLAIN (COSTS OFF)
781781
SELECT * FROM tenk1
782782
WHERE tenthous = 1::numeric OR tenthous = 3::int4 OR tenthous = 42::numeric;
783783

784+
EXPLAIN (COSTS OFF)
785+
SELECT count(*) FROM tenk1 t1
786+
WHERE t1.thousand = 42 OR t1.thousand = (SELECT t2.tenthous FROM tenk1 t2 WHERE t2.thousand = t1.tenthous + 1 LIMIT 1);
787+
SELECT count(*) FROM tenk1 t1
788+
WHERE t1.thousand = 42 OR t1.thousand = (SELECT t2.tenthous FROM tenk1 t2 WHERE t2.thousand = t1.tenthous + 1 LIMIT 1);
789+
784790
EXPLAIN (COSTS OFF)
785791
SELECT count(*) FROM tenk1
786792
WHERE hundred = 42 AND (thousand = 42 OR thousand = 99);
@@ -1367,6 +1373,9 @@ CREATE STATISTICS t_a_b_stat (mcv) ON a, b FROM bitmap_split_or;
13671373
CREATE STATISTICS t_b_c_stat (mcv) ON b, c FROM bitmap_split_or;
13681374
ANALYZE bitmap_split_or;
13691375
EXPLAIN (COSTS OFF)
1376+
SELECT * FROM bitmap_split_or t1, bitmap_split_or t2
1377+
WHERE t1.a = t2.b OR t1.a = 2;
1378+
EXPLAIN (COSTS OFF)
13701379
SELECT * FROM bitmap_split_or WHERE a = 1 AND (b = 1 OR b = 2) AND c = 2;
13711380
DROP TABLE bitmap_split_or;
13721381

src/test/regress/sql/join.sql

+17-1
Original file line numberDiff line numberDiff line change
@@ -3016,7 +3016,6 @@ SELECT t1.a FROM skip_fetch t1 LEFT JOIN skip_fetch t2 ON t2.a = 1 WHERE t2.a IS
30163016

30173017
RESET enable_indexonlyscan;
30183018
RESET enable_seqscan;
3019-
30203019
-- Test BitmapHeapScan with a rescan releases resources correctly
30213020
SET enable_seqscan = off;
30223021
SET enable_indexscan = off;
@@ -3046,3 +3045,20 @@ SELECT 1 FROM group_tbl t1
30463045
GROUP BY s.c1, s.c2;
30473046

30483047
DROP TABLE group_tbl;
3048+
3049+
--
3050+
-- Test for a nested loop join involving index scan, transforming OR-clauses
3051+
-- to SAOP.
3052+
--
3053+
3054+
EXPLAIN (COSTS OFF)
3055+
SELECT COUNT(*) FROM tenk1 t1, tenk1 t2
3056+
WHERE t2.thousand = t1.tenthous OR t2.thousand = t1.unique1 OR t2.thousand = t1.unique2;
3057+
SELECT COUNT(*) FROM tenk1 t1, tenk1 t2
3058+
WHERE t2.thousand = t1.tenthous OR t2.thousand = t1.unique1 OR t2.thousand = t1.unique2;
3059+
3060+
EXPLAIN (COSTS OFF)
3061+
SELECT COUNT(*) FROM onek t1 LEFT JOIN tenk1 t2
3062+
ON (t2.thousand = t1.tenthous OR t2.thousand = t1.thousand);
3063+
SELECT COUNT(*) FROM onek t1 LEFT JOIN tenk1 t2
3064+
ON (t2.thousand = t1.tenthous OR t2.thousand = t1.thousand);

0 commit comments

Comments
 (0)