Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 9f13376

Browse files
committed
Pull up ANY-SUBLINK with the necessary lateral support.
For ANY-SUBLINK, we adopted a two-stage pull-up approach to handle different types of scenarios. In the first stage, the sublink is pulled up as a subquery. Because of this, when writing this code, we did not have the ability to perform lateral joins, and therefore, we were unable to pull up Var with varlevelsup=1. Now that we have the ability to use lateral joins, we can eliminate this limitation. Author: Andy Fan <zhihui.fan1213@gmail.com> Author: Tom Lane <tgl@sss.pgh.pa.us> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Reviewed-by: Richard Guo <guofenglinux@gmail.com> Reviewed-by: Alena Rybakina <lena.ribackina@yandex.ru> Reviewed-by: Andrey Lepikhov <a.lepikhov@postgrespro.ru>
1 parent 995d400 commit 9f13376

File tree

7 files changed

+192
-20
lines changed

7 files changed

+192
-20
lines changed

contrib/postgres_fdw/expected/postgres_fdw.out

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11894,23 +11894,23 @@ CREATE FOREIGN TABLE foreign_tbl (b int)
1189411894
CREATE FOREIGN TABLE foreign_tbl2 () INHERITS (foreign_tbl)
1189511895
SERVER loopback OPTIONS (table_name 'base_tbl');
1189611896
EXPLAIN (VERBOSE, COSTS OFF)
11897-
SELECT a FROM base_tbl WHERE a IN (SELECT a FROM foreign_tbl);
11897+
SELECT a FROM base_tbl WHERE (a, random() > 0) IN (SELECT a, random() > 0 FROM foreign_tbl);
1189811898
QUERY PLAN
1189911899
-----------------------------------------------------------------------------
1190011900
Seq Scan on public.base_tbl
1190111901
Output: base_tbl.a
1190211902
Filter: (SubPlan 1)
1190311903
SubPlan 1
1190411904
-> Result
11905-
Output: base_tbl.a
11905+
Output: base_tbl.a, (random() > '0'::double precision)
1190611906
-> Append
1190711907
-> Async Foreign Scan on public.foreign_tbl foreign_tbl_1
1190811908
Remote SQL: SELECT NULL FROM public.base_tbl
1190911909
-> Async Foreign Scan on public.foreign_tbl2 foreign_tbl_2
1191011910
Remote SQL: SELECT NULL FROM public.base_tbl
1191111911
(11 rows)
1191211912

11913-
SELECT a FROM base_tbl WHERE a IN (SELECT a FROM foreign_tbl);
11913+
SELECT a FROM base_tbl WHERE (a, random() > 0) IN (SELECT a, random() > 0 FROM foreign_tbl);
1191411914
a
1191511915
---
1191611916
1

contrib/postgres_fdw/sql/postgres_fdw.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3988,8 +3988,8 @@ CREATE FOREIGN TABLE foreign_tbl2 () INHERITS (foreign_tbl)
39883988
SERVER loopback OPTIONS (table_name 'base_tbl');
39893989

39903990
EXPLAIN (VERBOSE, COSTS OFF)
3991-
SELECT a FROM base_tbl WHERE a IN (SELECT a FROM foreign_tbl);
3992-
SELECT a FROM base_tbl WHERE a IN (SELECT a FROM foreign_tbl);
3991+
SELECT a FROM base_tbl WHERE (a, random() > 0) IN (SELECT a, random() > 0 FROM foreign_tbl);
3992+
SELECT a FROM base_tbl WHERE (a, random() > 0) IN (SELECT a, random() > 0 FROM foreign_tbl);
39933993

39943994
-- Clean up
39953995
DROP FOREIGN TABLE foreign_tbl CASCADE;

src/backend/optimizer/plan/subselect.c

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1278,14 +1278,23 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
12781278
List *subquery_vars;
12791279
Node *quals;
12801280
ParseState *pstate;
1281+
Relids sub_ref_outer_relids;
1282+
bool use_lateral;
12811283

12821284
Assert(sublink->subLinkType == ANY_SUBLINK);
12831285

12841286
/*
1285-
* The sub-select must not refer to any Vars of the parent query. (Vars of
1286-
* higher levels should be okay, though.)
1287+
* If the sub-select refers to any Vars of the parent query, we so let's
1288+
* considering it as LATERAL. (Vars of higher levels don't matter here.)
12871289
*/
1288-
if (contain_vars_of_level((Node *) subselect, 1))
1290+
sub_ref_outer_relids = pull_varnos_of_level(NULL, (Node *) subselect, 1);
1291+
use_lateral = !bms_is_empty(sub_ref_outer_relids);
1292+
1293+
/*
1294+
* Check that sub-select refers nothing outside of available_rels of the
1295+
* parent query.
1296+
*/
1297+
if (!bms_is_subset(sub_ref_outer_relids, available_rels))
12891298
return NULL;
12901299

12911300
/*
@@ -1323,7 +1332,7 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
13231332
nsitem = addRangeTableEntryForSubquery(pstate,
13241333
subselect,
13251334
makeAlias("ANY_subquery", NIL),
1326-
false,
1335+
use_lateral,
13271336
false);
13281337
rte = nsitem->p_rte;
13291338
parse->rtable = lappend(parse->rtable, rte);

src/test/regress/expected/join.out

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5277,7 +5277,7 @@ reset enable_nestloop;
52775277
explain (costs off)
52785278
select a.unique1, b.unique2
52795279
from onek a left join onek b on a.unique1 = b.unique2
5280-
where b.unique2 = any (select q1 from int8_tbl c where c.q1 < b.unique1);
5280+
where (b.unique2, random() > 0) = any (select q1, random() > 0 from int8_tbl c where c.q1 < b.unique1);
52815281
QUERY PLAN
52825282
----------------------------------------------------------
52835283
Hash Join
@@ -5293,7 +5293,7 @@ select a.unique1, b.unique2
52935293

52945294
select a.unique1, b.unique2
52955295
from onek a left join onek b on a.unique1 = b.unique2
5296-
where b.unique2 = any (select q1 from int8_tbl c where c.q1 < b.unique1);
5296+
where (b.unique2, random() > 0) = any (select q1, random() > 0 from int8_tbl c where c.q1 < b.unique1);
52975297
unique1 | unique2
52985298
---------+---------
52995299
123 | 123
@@ -8210,12 +8210,12 @@ select * from (values (0), (1)) v(id),
82108210
lateral (select * from int8_tbl t1,
82118211
lateral (select * from
82128212
(select * from int8_tbl t2
8213-
where q1 = any (select q2 from int8_tbl t3
8213+
where (q1, random() > 0) = any (select q2, random() > 0 from int8_tbl t3
82148214
where q2 = (select greatest(t1.q1,t2.q2))
82158215
and (select v.id=0)) offset 0) ss2) ss
82168216
where t1.q1 = ss.q2) ss0;
8217-
QUERY PLAN
8218-
----------------------------------------------------------------------
8217+
QUERY PLAN
8218+
-------------------------------------------------------------------------------
82198219
Nested Loop
82208220
Output: "*VALUES*".column1, t1.q1, t1.q2, ss2.q1, ss2.q2
82218221
-> Seq Scan on public.int8_tbl t1
@@ -8232,7 +8232,7 @@ lateral (select * from int8_tbl t1,
82328232
Filter: (SubPlan 3)
82338233
SubPlan 3
82348234
-> Result
8235-
Output: t3.q2
8235+
Output: t3.q2, (random() > '0'::double precision)
82368236
One-Time Filter: $4
82378237
InitPlan 1 (returns $2)
82388238
-> Result
@@ -8249,7 +8249,7 @@ select * from (values (0), (1)) v(id),
82498249
lateral (select * from int8_tbl t1,
82508250
lateral (select * from
82518251
(select * from int8_tbl t2
8252-
where q1 = any (select q2 from int8_tbl t3
8252+
where (q1, random() > 0) = any (select q2, random() > 0 from int8_tbl t3
82538253
where q2 = (select greatest(t1.q1,t2.q2))
82548254
and (select v.id=0)) offset 0) ss2) ss
82558255
where t1.q1 = ss.q2) ss0;

src/test/regress/expected/subselect.out

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1926,3 +1926,129 @@ select * from x for update;
19261926
Output: subselect_tbl.f1, subselect_tbl.f2, subselect_tbl.f3
19271927
(2 rows)
19281928

1929+
-- Pull-up the direct-correlated ANY_SUBLINK
1930+
explain (costs off)
1931+
select * from tenk1 A where hundred in (select hundred from tenk2 B where B.odd = A.odd);
1932+
QUERY PLAN
1933+
------------------------------------------------------------
1934+
Hash Join
1935+
Hash Cond: ((a.odd = b.odd) AND (a.hundred = b.hundred))
1936+
-> Seq Scan on tenk1 a
1937+
-> Hash
1938+
-> HashAggregate
1939+
Group Key: b.odd, b.hundred
1940+
-> Seq Scan on tenk2 b
1941+
(7 rows)
1942+
1943+
explain (costs off)
1944+
select * from tenk1 A where exists
1945+
(select 1 from tenk2 B
1946+
where A.hundred in (select C.hundred FROM tenk2 C
1947+
WHERE c.odd = b.odd));
1948+
QUERY PLAN
1949+
---------------------------------
1950+
Nested Loop Semi Join
1951+
Join Filter: (SubPlan 1)
1952+
-> Seq Scan on tenk1 a
1953+
-> Materialize
1954+
-> Seq Scan on tenk2 b
1955+
SubPlan 1
1956+
-> Seq Scan on tenk2 c
1957+
Filter: (odd = b.odd)
1958+
(8 rows)
1959+
1960+
-- we should only try to pull up the sublink into RHS of a left join
1961+
-- but a.hundred is not avaiable.
1962+
explain (costs off)
1963+
SELECT * FROM tenk1 A LEFT JOIN tenk2 B
1964+
ON A.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd);
1965+
QUERY PLAN
1966+
---------------------------------
1967+
Nested Loop Left Join
1968+
Join Filter: (SubPlan 1)
1969+
-> Seq Scan on tenk1 a
1970+
-> Materialize
1971+
-> Seq Scan on tenk2 b
1972+
SubPlan 1
1973+
-> Seq Scan on tenk2 c
1974+
Filter: (odd = b.odd)
1975+
(8 rows)
1976+
1977+
-- we should only try to pull up the sublink into RHS of a left join
1978+
-- but a.odd is not avaiable for this.
1979+
explain (costs off)
1980+
SELECT * FROM tenk1 A LEFT JOIN tenk2 B
1981+
ON B.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = a.odd);
1982+
QUERY PLAN
1983+
---------------------------------
1984+
Nested Loop Left Join
1985+
Join Filter: (SubPlan 1)
1986+
-> Seq Scan on tenk1 a
1987+
-> Materialize
1988+
-> Seq Scan on tenk2 b
1989+
SubPlan 1
1990+
-> Seq Scan on tenk2 c
1991+
Filter: (odd = a.odd)
1992+
(8 rows)
1993+
1994+
-- should be able to pull up since all the references is available
1995+
explain (costs off)
1996+
SELECT * FROM tenk1 A LEFT JOIN tenk2 B
1997+
ON B.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd);
1998+
QUERY PLAN
1999+
------------------------------------------------------------------------
2000+
Nested Loop Left Join
2001+
-> Seq Scan on tenk1 a
2002+
-> Materialize
2003+
-> Hash Join
2004+
Hash Cond: ((b.odd = c.odd) AND (b.hundred = c.hundred))
2005+
-> Seq Scan on tenk2 b
2006+
-> Hash
2007+
-> HashAggregate
2008+
Group Key: c.odd, c.hundred
2009+
-> Seq Scan on tenk2 c
2010+
(10 rows)
2011+
2012+
-- we can pull up the sublink into the inner JoinExpr.
2013+
explain (costs off)
2014+
SELECT * FROM tenk1 A INNER JOIN tenk2 B
2015+
ON A.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd);
2016+
QUERY PLAN
2017+
-------------------------------------------------
2018+
Hash Join
2019+
Hash Cond: (c.odd = b.odd)
2020+
-> Hash Join
2021+
Hash Cond: (a.hundred = c.hundred)
2022+
-> Seq Scan on tenk1 a
2023+
-> Hash
2024+
-> HashAggregate
2025+
Group Key: c.odd, c.hundred
2026+
-> Seq Scan on tenk2 c
2027+
-> Hash
2028+
-> Seq Scan on tenk2 b
2029+
(11 rows)
2030+
2031+
-- we can pull up the aggregate sublink into RHS of a left join.
2032+
explain (costs off)
2033+
SELECT * FROM tenk1 A LEFT JOIN tenk2 B
2034+
ON B.hundred in (SELECT min(c.hundred) FROM tenk2 C WHERE c.odd = b.odd);
2035+
QUERY PLAN
2036+
---------------------------------------------------------------------------------------
2037+
Nested Loop Left Join
2038+
-> Seq Scan on tenk1 a
2039+
-> Materialize
2040+
-> Nested Loop
2041+
-> Seq Scan on tenk2 b
2042+
-> Memoize
2043+
Cache Key: b.hundred, b.odd
2044+
Cache Mode: binary
2045+
-> Subquery Scan on "ANY_subquery"
2046+
Filter: (b.hundred = "ANY_subquery".min)
2047+
-> Result
2048+
InitPlan 1 (returns $1)
2049+
-> Limit
2050+
-> Index Scan using tenk2_hundred on tenk2 c
2051+
Index Cond: (hundred IS NOT NULL)
2052+
Filter: (odd = b.odd)
2053+
(16 rows)
2054+

src/test/regress/sql/join.sql

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1864,11 +1864,11 @@ reset enable_nestloop;
18641864
explain (costs off)
18651865
select a.unique1, b.unique2
18661866
from onek a left join onek b on a.unique1 = b.unique2
1867-
where b.unique2 = any (select q1 from int8_tbl c where c.q1 < b.unique1);
1867+
where (b.unique2, random() > 0) = any (select q1, random() > 0 from int8_tbl c where c.q1 < b.unique1);
18681868

18691869
select a.unique1, b.unique2
18701870
from onek a left join onek b on a.unique1 = b.unique2
1871-
where b.unique2 = any (select q1 from int8_tbl c where c.q1 < b.unique1);
1871+
where (b.unique2, random() > 0) = any (select q1, random() > 0 from int8_tbl c where c.q1 < b.unique1);
18721872

18731873
--
18741874
-- test full-join strength reduction
@@ -3038,7 +3038,7 @@ select * from (values (0), (1)) v(id),
30383038
lateral (select * from int8_tbl t1,
30393039
lateral (select * from
30403040
(select * from int8_tbl t2
3041-
where q1 = any (select q2 from int8_tbl t3
3041+
where (q1, random() > 0) = any (select q2, random() > 0 from int8_tbl t3
30423042
where q2 = (select greatest(t1.q1,t2.q2))
30433043
and (select v.id=0)) offset 0) ss2) ss
30443044
where t1.q1 = ss.q2) ss0;
@@ -3047,7 +3047,7 @@ select * from (values (0), (1)) v(id),
30473047
lateral (select * from int8_tbl t1,
30483048
lateral (select * from
30493049
(select * from int8_tbl t2
3050-
where q1 = any (select q2 from int8_tbl t3
3050+
where (q1, random() > 0) = any (select q2, random() > 0 from int8_tbl t3
30513051
where q2 = (select greatest(t1.q1,t2.q2))
30523052
and (select v.id=0)) offset 0) ss2) ss
30533053
where t1.q1 = ss.q2) ss0;

src/test/regress/sql/subselect.sql

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -968,3 +968,40 @@ select * from (with x as (select 2 as y) select * from x) ss;
968968
explain (verbose, costs off)
969969
with x as (select * from subselect_tbl)
970970
select * from x for update;
971+
972+
-- Pull-up the direct-correlated ANY_SUBLINK
973+
explain (costs off)
974+
select * from tenk1 A where hundred in (select hundred from tenk2 B where B.odd = A.odd);
975+
976+
explain (costs off)
977+
select * from tenk1 A where exists
978+
(select 1 from tenk2 B
979+
where A.hundred in (select C.hundred FROM tenk2 C
980+
WHERE c.odd = b.odd));
981+
982+
-- we should only try to pull up the sublink into RHS of a left join
983+
-- but a.hundred is not avaiable.
984+
explain (costs off)
985+
SELECT * FROM tenk1 A LEFT JOIN tenk2 B
986+
ON A.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd);
987+
988+
-- we should only try to pull up the sublink into RHS of a left join
989+
-- but a.odd is not avaiable for this.
990+
explain (costs off)
991+
SELECT * FROM tenk1 A LEFT JOIN tenk2 B
992+
ON B.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = a.odd);
993+
994+
-- should be able to pull up since all the references is available
995+
explain (costs off)
996+
SELECT * FROM tenk1 A LEFT JOIN tenk2 B
997+
ON B.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd);
998+
999+
-- we can pull up the sublink into the inner JoinExpr.
1000+
explain (costs off)
1001+
SELECT * FROM tenk1 A INNER JOIN tenk2 B
1002+
ON A.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd);
1003+
1004+
-- we can pull up the aggregate sublink into RHS of a left join.
1005+
explain (costs off)
1006+
SELECT * FROM tenk1 A LEFT JOIN tenk2 B
1007+
ON B.hundred in (SELECT min(c.hundred) FROM tenk2 C WHERE c.odd = b.odd);

0 commit comments

Comments
 (0)