Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 5668a85

Browse files
author
Richard Guo
committed
Fix right-semi-joins in HashJoin rescans
When resetting a HashJoin node for rescans, if it is a single-batch join and there are no parameter changes for the inner subnode, we can just reuse the existing hash table without rebuilding it. However, for join types that depend on the inner-tuple match flags in the hash table, we need to reset these match flags to avoid incorrect results. This applies to right, right-anti, right-semi, and full joins. When I introduced "Right Semi Join" plan shapes in aa86129, I failed to reset the match flags in the hash table for right-semi joins in rescans. This oversight has been shown to produce incorrect results. This patch fixes it. Author: Richard Guo Discussion: https://postgr.es/m/CAMbWs4-nQF9io2WL2SkD0eXvfPdyBc9Q=hRwfQHCGV2usa0jyA@mail.gmail.com
1 parent f0c569d commit 5668a85

File tree

3 files changed

+97
-3
lines changed

3 files changed

+97
-3
lines changed

src/backend/executor/nodeHashjoin.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1511,10 +1511,11 @@ ExecReScanHashJoin(HashJoinState *node)
15111511
/*
15121512
* Okay to reuse the hash table; needn't rescan inner, either.
15131513
*
1514-
* However, if it's a right/right-anti/full join, we'd better
1515-
* reset the inner-tuple match flags contained in the table.
1514+
* However, if it's a right/right-anti/right-semi/full join, we'd
1515+
* better reset the inner-tuple match flags contained in the
1516+
* table.
15161517
*/
1517-
if (HJ_FILL_INNER(node))
1518+
if (HJ_FILL_INNER(node) || node->js.jointype == JOIN_RIGHT_SEMI)
15181519
ExecHashTableResetMatchFlags(node->hj_HashTable);
15191520

15201521
/*

src/test/regress/expected/join.out

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3036,6 +3036,69 @@ where not exists (select 1 from tbl_ra t2 where t2.b = t1.a) and t1.b < 2;
30363036
reset enable_hashjoin;
30373037
reset enable_nestloop;
30383038
--
3039+
-- regression test for bug with hash-right-semi join
3040+
--
3041+
create temp table tbl_rs(a int, b int);
3042+
insert into tbl_rs select i, i from generate_series(1,10)i;
3043+
analyze tbl_rs;
3044+
set enable_nestloop to off;
3045+
set enable_hashagg to off;
3046+
-- ensure we get a hash right semi join with SubPlan in hash clauses
3047+
explain (costs off)
3048+
select * from tbl_rs t1
3049+
where (select a from tbl_rs t2
3050+
where exists (select 1 from
3051+
(select (b in (select b from tbl_rs t3)) as c from tbl_rs t4 where t4.a = 1) s
3052+
where c in (select t1.a = 1 from tbl_rs t5 union all select true))
3053+
order by a limit 1) >= 0;
3054+
QUERY PLAN
3055+
--------------------------------------------------------------------------------------
3056+
Seq Scan on tbl_rs t1
3057+
Filter: ((SubPlan 3) >= 0)
3058+
SubPlan 3
3059+
-> Limit
3060+
InitPlan 2
3061+
-> Hash Right Semi Join
3062+
Hash Cond: (((t1.a = 1)) = (ANY (t4.b = (hashed SubPlan 1).col1)))
3063+
-> Append
3064+
-> Seq Scan on tbl_rs t5
3065+
-> Result
3066+
-> Hash
3067+
-> Seq Scan on tbl_rs t4
3068+
Filter: (a = 1)
3069+
SubPlan 1
3070+
-> Seq Scan on tbl_rs t3
3071+
-> Sort
3072+
Sort Key: t2.a
3073+
-> Result
3074+
One-Time Filter: (InitPlan 2).col1
3075+
-> Seq Scan on tbl_rs t2
3076+
(20 rows)
3077+
3078+
-- and check we get the expected results
3079+
select * from tbl_rs t1
3080+
where (select a from tbl_rs t2
3081+
where exists (select 1 from
3082+
(select (b in (select b from tbl_rs t3)) as c from tbl_rs t4 where t4.a = 1) s
3083+
where c in (select t1.a = 1 from tbl_rs t5 union all select true))
3084+
order by a limit 1) >= 0;
3085+
a | b
3086+
----+----
3087+
1 | 1
3088+
2 | 2
3089+
3 | 3
3090+
4 | 4
3091+
5 | 5
3092+
6 | 6
3093+
7 | 7
3094+
8 | 8
3095+
9 | 9
3096+
10 | 10
3097+
(10 rows)
3098+
3099+
reset enable_nestloop;
3100+
reset enable_hashagg;
3101+
--
30393102
-- regression test for bug #13908 (hash join with skew tuples & nbatch increase)
30403103
--
30413104
set work_mem to '64kB';

src/test/regress/sql/join.sql

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -738,6 +738,36 @@ where not exists (select 1 from tbl_ra t2 where t2.b = t1.a) and t1.b < 2;
738738
reset enable_hashjoin;
739739
reset enable_nestloop;
740740

741+
--
742+
-- regression test for bug with hash-right-semi join
743+
--
744+
create temp table tbl_rs(a int, b int);
745+
insert into tbl_rs select i, i from generate_series(1,10)i;
746+
analyze tbl_rs;
747+
748+
set enable_nestloop to off;
749+
set enable_hashagg to off;
750+
751+
-- ensure we get a hash right semi join with SubPlan in hash clauses
752+
explain (costs off)
753+
select * from tbl_rs t1
754+
where (select a from tbl_rs t2
755+
where exists (select 1 from
756+
(select (b in (select b from tbl_rs t3)) as c from tbl_rs t4 where t4.a = 1) s
757+
where c in (select t1.a = 1 from tbl_rs t5 union all select true))
758+
order by a limit 1) >= 0;
759+
760+
-- and check we get the expected results
761+
select * from tbl_rs t1
762+
where (select a from tbl_rs t2
763+
where exists (select 1 from
764+
(select (b in (select b from tbl_rs t3)) as c from tbl_rs t4 where t4.a = 1) s
765+
where c in (select t1.a = 1 from tbl_rs t5 union all select true))
766+
order by a limit 1) >= 0;
767+
768+
reset enable_nestloop;
769+
reset enable_hashagg;
770+
741771
--
742772
-- regression test for bug #13908 (hash join with skew tuples & nbatch increase)
743773
--

0 commit comments

Comments
 (0)