Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 2d26c4a

Browse files
committed
Fix get_useful_pathkeys_for_relation for volatile expressions
When considering Incremental Sort below a Gather Merge, we need to be a bit more careful when matching pathkeys to EC members. It's not enough to find a member whose Vars are all in the current relation's target; volatile expressions in particular need to be contained in the target, otherwise it's too early to use the pathkey. Reported-by: Jaime Casanova Author: James Coleman Reviewed-by: Tomas Vondra Backpatch-through: 13, where the incremental sort code was added Discussion: https://postgr.es/m/CAJGNTeNaxpXgBVcRhJX%2B2vSbq%2BF2kJqGBcvompmpvXb7pq%2BoFA%40mail.gmail.com
1 parent 936043c commit 2d26c4a

File tree

5 files changed

+207
-6
lines changed

5 files changed

+207
-6
lines changed

src/backend/optimizer/path/allpaths.c

+7-6
Original file line numberDiff line numberDiff line change
@@ -2756,7 +2756,8 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel)
27562756
/*
27572757
* Considering query_pathkeys is always worth it, because it might allow
27582758
* us to avoid a total sort when we have a partially presorted path
2759-
* available.
2759+
* available or to push the total sort into the parallel portion of the
2760+
* query.
27602761
*/
27612762
if (root->query_pathkeys)
27622763
{
@@ -2769,17 +2770,17 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel)
27692770
EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
27702771

27712772
/*
2772-
* We can only build an Incremental Sort for pathkeys which
2773-
* contain an EC member in the current relation, so ignore any
2774-
* suffix of the list as soon as we find a pathkey without an EC
2775-
* member the relation.
2773+
* We can only build a sort for pathkeys which contain an EC
2774+
* member in the current relation's target, so ignore any suffix
2775+
* of the list as soon as we find a pathkey without an EC member
2776+
* in the relation.
27762777
*
27772778
* By still returning the prefix of the pathkeys list that does
27782779
* meet criteria of EC membership in the current relation, we
27792780
* enable not just an incremental sort on the entirety of
27802781
* query_pathkeys but also incremental sort below a JOIN.
27812782
*/
2782-
if (!find_em_expr_for_rel(pathkey_ec, rel))
2783+
if (!find_em_expr_usable_for_sorting_rel(pathkey_ec, rel))
27832784
break;
27842785

27852786
npathkeys++;

src/backend/optimizer/path/equivclass.c

+70
Original file line numberDiff line numberDiff line change
@@ -794,6 +794,76 @@ find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
794794
return NULL;
795795
}
796796

797+
/*
798+
* Find an equivalence class member expression that can be safely used by a
799+
* sort node on top of the provided relation. The rules here must match those
800+
* applied in prepare_sort_from_pathkeys.
801+
*/
802+
Expr *
803+
find_em_expr_usable_for_sorting_rel(EquivalenceClass *ec, RelOptInfo *rel)
804+
{
805+
ListCell *lc_em;
806+
807+
/*
808+
* If there is more than one equivalence member matching these
809+
* requirements we'll be content to choose any one of them.
810+
*/
811+
foreach(lc_em, ec->ec_members)
812+
{
813+
EquivalenceMember *em = lfirst(lc_em);
814+
Expr *em_expr = em->em_expr;
815+
PathTarget *target = rel->reltarget;
816+
ListCell *lc_target_expr;
817+
818+
/*
819+
* We shouldn't be trying to sort by an equivalence class that
820+
* contains a constant, so no need to consider such cases any further.
821+
*/
822+
if (em->em_is_const)
823+
continue;
824+
825+
/*
826+
* Any Vars in the equivalence class member need to come from this
827+
* relation. This is a superset of prepare_sort_from_pathkeys ignoring
828+
* child members unless they belong to the rel being sorted.
829+
*/
830+
if (!bms_is_subset(em->em_relids, rel->relids))
831+
continue;
832+
833+
/*
834+
* As long as the expression isn't volatile then
835+
* prepare_sort_from_pathkeys is able to generate a new target entry,
836+
* so there's no need to verify that one already exists.
837+
*/
838+
if (!ec->ec_has_volatile)
839+
return em->em_expr;
840+
841+
/*
842+
* If, however, it's volatile, we have to verify that the
843+
* equivalence member's expr is already generated in the
844+
* relation's target (we do strip relabels first from both
845+
* expressions, which is cheap and might allow us to match
846+
* more expressions).
847+
*/
848+
while (em_expr && IsA(em_expr, RelabelType))
849+
em_expr = ((RelabelType *) em_expr)->arg;
850+
851+
foreach(lc_target_expr, target->exprs)
852+
{
853+
Expr *target_expr = lfirst(lc_target_expr);
854+
855+
while (target_expr && IsA(target_expr, RelabelType))
856+
target_expr = ((RelabelType *) target_expr)->arg;
857+
858+
if (equal(target_expr, em_expr))
859+
return em->em_expr;
860+
}
861+
}
862+
863+
/* We didn't find any suitable equivalence class expression */
864+
return NULL;
865+
}
866+
797867
/*
798868
* generate_base_implied_equalities
799869
* Generate any restriction clauses that we can deduce from equivalence

src/include/optimizer/paths.h

+1
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ extern EquivalenceClass *get_eclass_for_sort_expr(PlannerInfo *root,
135135
Relids rel,
136136
bool create_it);
137137
extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
138+
extern Expr *find_em_expr_usable_for_sorting_rel(EquivalenceClass *ec, RelOptInfo *rel);
138139
extern void generate_base_implied_equalities(PlannerInfo *root);
139140
extern List *generate_join_implied_equalities(PlannerInfo *root,
140141
Relids join_relids,

src/test/regress/expected/incremental_sort.out

+98
Original file line numberDiff line numberDiff line change
@@ -1469,3 +1469,101 @@ explain (costs off) select * from t union select * from t order by 1,3;
14691469
(13 rows)
14701470

14711471
drop table t;
1472+
-- Sort pushdown can't go below where expressions are part of the rel target.
1473+
-- In particular this is interesting for volatile expressions which have to
1474+
-- go above joins since otherwise we'll incorrectly use expression evaluations
1475+
-- across multiple rows.
1476+
set enable_hashagg=off;
1477+
set enable_seqscan=off;
1478+
set enable_incremental_sort = off;
1479+
set parallel_tuple_cost=0;
1480+
set parallel_setup_cost=0;
1481+
set min_parallel_table_scan_size = 0;
1482+
set min_parallel_index_scan_size = 0;
1483+
-- Parallel sort below join.
1484+
explain (costs off) select distinct sub.unique1, stringu1
1485+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
1486+
QUERY PLAN
1487+
--------------------------------------------------------------------------
1488+
Unique
1489+
-> Nested Loop
1490+
-> Gather Merge
1491+
Workers Planned: 2
1492+
-> Sort
1493+
Sort Key: tenk1.unique1, tenk1.stringu1
1494+
-> Parallel Index Scan using tenk1_unique1 on tenk1
1495+
-> Function Scan on generate_series
1496+
(8 rows)
1497+
1498+
explain (costs off) select sub.unique1, stringu1
1499+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
1500+
order by 1, 2;
1501+
QUERY PLAN
1502+
--------------------------------------------------------------------
1503+
Nested Loop
1504+
-> Gather Merge
1505+
Workers Planned: 2
1506+
-> Sort
1507+
Sort Key: tenk1.unique1, tenk1.stringu1
1508+
-> Parallel Index Scan using tenk1_unique1 on tenk1
1509+
-> Function Scan on generate_series
1510+
(7 rows)
1511+
1512+
-- Parallel sort but with expression that can be safely generated at the base rel.
1513+
explain (costs off) select distinct sub.unique1, md5(stringu1)
1514+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
1515+
QUERY PLAN
1516+
----------------------------------------------------------------------------------------
1517+
Unique
1518+
-> Nested Loop
1519+
-> Gather Merge
1520+
Workers Planned: 2
1521+
-> Sort
1522+
Sort Key: tenk1.unique1, (md5((tenk1.stringu1)::text)) COLLATE "C"
1523+
-> Parallel Index Scan using tenk1_unique1 on tenk1
1524+
-> Function Scan on generate_series
1525+
(8 rows)
1526+
1527+
explain (costs off) select sub.unique1, md5(stringu1)
1528+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
1529+
order by 1, 2;
1530+
QUERY PLAN
1531+
----------------------------------------------------------------------------------
1532+
Nested Loop
1533+
-> Gather Merge
1534+
Workers Planned: 2
1535+
-> Sort
1536+
Sort Key: tenk1.unique1, (md5((tenk1.stringu1)::text)) COLLATE "C"
1537+
-> Parallel Index Scan using tenk1_unique1 on tenk1
1538+
-> Function Scan on generate_series
1539+
(7 rows)
1540+
1541+
-- Parallel sort but with expression not available until the upper rel.
1542+
explain (costs off) select distinct sub.unique1, stringu1 || random()::text
1543+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
1544+
QUERY PLAN
1545+
---------------------------------------------------------------------------------------------
1546+
Unique
1547+
-> Sort
1548+
Sort Key: tenk1.unique1, (((tenk1.stringu1)::text || (random())::text)) COLLATE "C"
1549+
-> Gather
1550+
Workers Planned: 2
1551+
-> Nested Loop
1552+
-> Parallel Index Scan using tenk1_unique1 on tenk1
1553+
-> Function Scan on generate_series
1554+
(8 rows)
1555+
1556+
explain (costs off) select sub.unique1, stringu1 || random()::text
1557+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
1558+
order by 1, 2;
1559+
QUERY PLAN
1560+
---------------------------------------------------------------------------------------
1561+
Sort
1562+
Sort Key: tenk1.unique1, (((tenk1.stringu1)::text || (random())::text)) COLLATE "C"
1563+
-> Gather
1564+
Workers Planned: 2
1565+
-> Nested Loop
1566+
-> Parallel Index Scan using tenk1_unique1 on tenk1
1567+
-> Function Scan on generate_series
1568+
(7 rows)
1569+

src/test/regress/sql/incremental_sort.sql

+31
Original file line numberDiff line numberDiff line change
@@ -221,3 +221,34 @@ set enable_hashagg to off;
221221
explain (costs off) select * from t union select * from t order by 1,3;
222222

223223
drop table t;
224+
225+
-- Sort pushdown can't go below where expressions are part of the rel target.
226+
-- In particular this is interesting for volatile expressions which have to
227+
-- go above joins since otherwise we'll incorrectly use expression evaluations
228+
-- across multiple rows.
229+
set enable_hashagg=off;
230+
set enable_seqscan=off;
231+
set enable_incremental_sort = off;
232+
set parallel_tuple_cost=0;
233+
set parallel_setup_cost=0;
234+
set min_parallel_table_scan_size = 0;
235+
set min_parallel_index_scan_size = 0;
236+
237+
-- Parallel sort below join.
238+
explain (costs off) select distinct sub.unique1, stringu1
239+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
240+
explain (costs off) select sub.unique1, stringu1
241+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
242+
order by 1, 2;
243+
-- Parallel sort but with expression that can be safely generated at the base rel.
244+
explain (costs off) select distinct sub.unique1, md5(stringu1)
245+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
246+
explain (costs off) select sub.unique1, md5(stringu1)
247+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
248+
order by 1, 2;
249+
-- Parallel sort but with expression not available until the upper rel.
250+
explain (costs off) select distinct sub.unique1, stringu1 || random()::text
251+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
252+
explain (costs off) select sub.unique1, stringu1 || random()::text
253+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
254+
order by 1, 2;

0 commit comments

Comments
 (0)