Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit ebb7ae8

Browse files
committed
Fix get_useful_pathkeys_for_relation for volatile expressions
When considering Incremental Sort below a Gather Merge, we need to be a bit more careful when matching pathkeys to EC members. It's not enough to find a member whose Vars are all in the current relation's target; volatile expressions in particular need to be contained in the target, otherwise it's too early to use the pathkey. Reported-by: Jaime Casanova Author: James Coleman Reviewed-by: Tomas Vondra Backpatch-through: 13, where the incremental sort code was added Discussion: https://postgr.es/m/CAJGNTeNaxpXgBVcRhJX%2B2vSbq%2BF2kJqGBcvompmpvXb7pq%2BoFA%40mail.gmail.com
1 parent 92f8718 commit ebb7ae8

File tree

5 files changed

+207
-6
lines changed

5 files changed

+207
-6
lines changed

src/backend/optimizer/path/allpaths.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2816,7 +2816,8 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel)
28162816
/*
28172817
* Considering query_pathkeys is always worth it, because it might allow
28182818
* us to avoid a total sort when we have a partially presorted path
2819-
* available.
2819+
* available or to push the total sort into the parallel portion of the
2820+
* query.
28202821
*/
28212822
if (root->query_pathkeys)
28222823
{
@@ -2829,17 +2830,17 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel)
28292830
EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
28302831

28312832
/*
2832-
* We can only build an Incremental Sort for pathkeys which
2833-
* contain an EC member in the current relation, so ignore any
2834-
* suffix of the list as soon as we find a pathkey without an EC
2835-
* member the relation.
2833+
* We can only build a sort for pathkeys which contain an EC
2834+
* member in the current relation's target, so ignore any suffix
2835+
* of the list as soon as we find a pathkey without an EC member
2836+
* in the relation.
28362837
*
28372838
* By still returning the prefix of the pathkeys list that does
28382839
* meet criteria of EC membership in the current relation, we
28392840
* enable not just an incremental sort on the entirety of
28402841
* query_pathkeys but also incremental sort below a JOIN.
28412842
*/
2842-
if (!find_em_expr_for_rel(pathkey_ec, rel))
2843+
if (!find_em_expr_usable_for_sorting_rel(pathkey_ec, rel))
28432844
break;
28442845

28452846
npathkeys++;

src/backend/optimizer/path/equivclass.c

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -797,6 +797,76 @@ find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
797797
return NULL;
798798
}
799799

800+
/*
801+
* Find an equivalence class member expression that can be safely used by a
802+
* sort node on top of the provided relation. The rules here must match those
803+
* applied in prepare_sort_from_pathkeys.
804+
*/
805+
Expr *
806+
find_em_expr_usable_for_sorting_rel(EquivalenceClass *ec, RelOptInfo *rel)
807+
{
808+
ListCell *lc_em;
809+
810+
/*
811+
* If there is more than one equivalence member matching these
812+
* requirements we'll be content to choose any one of them.
813+
*/
814+
foreach(lc_em, ec->ec_members)
815+
{
816+
EquivalenceMember *em = lfirst(lc_em);
817+
Expr *em_expr = em->em_expr;
818+
PathTarget *target = rel->reltarget;
819+
ListCell *lc_target_expr;
820+
821+
/*
822+
* We shouldn't be trying to sort by an equivalence class that
823+
* contains a constant, so no need to consider such cases any further.
824+
*/
825+
if (em->em_is_const)
826+
continue;
827+
828+
/*
829+
* Any Vars in the equivalence class member need to come from this
830+
* relation. This is a superset of prepare_sort_from_pathkeys ignoring
831+
* child members unless they belong to the rel being sorted.
832+
*/
833+
if (!bms_is_subset(em->em_relids, rel->relids))
834+
continue;
835+
836+
/*
837+
* As long as the expression isn't volatile then
838+
* prepare_sort_from_pathkeys is able to generate a new target entry,
839+
* so there's no need to verify that one already exists.
840+
*/
841+
if (!ec->ec_has_volatile)
842+
return em->em_expr;
843+
844+
/*
845+
* If, however, it's volatile, we have to verify that the
846+
* equivalence member's expr is already generated in the
847+
* relation's target (we do strip relabels first from both
848+
* expressions, which is cheap and might allow us to match
849+
* more expressions).
850+
*/
851+
while (em_expr && IsA(em_expr, RelabelType))
852+
em_expr = ((RelabelType *) em_expr)->arg;
853+
854+
foreach(lc_target_expr, target->exprs)
855+
{
856+
Expr *target_expr = lfirst(lc_target_expr);
857+
858+
while (target_expr && IsA(target_expr, RelabelType))
859+
target_expr = ((RelabelType *) target_expr)->arg;
860+
861+
if (equal(target_expr, em_expr))
862+
return em->em_expr;
863+
}
864+
}
865+
866+
/* We didn't find any suitable equivalence class expression */
867+
return NULL;
868+
}
869+
800870
/*
801871
* generate_base_implied_equalities
802872
* Generate any restriction clauses that we can deduce from equivalence

src/include/optimizer/paths.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ extern EquivalenceClass *get_eclass_for_sort_expr(PlannerInfo *root,
135135
Relids rel,
136136
bool create_it);
137137
extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
138+
extern Expr *find_em_expr_usable_for_sorting_rel(EquivalenceClass *ec, RelOptInfo *rel);
138139
extern void generate_base_implied_equalities(PlannerInfo *root);
139140
extern List *generate_join_implied_equalities(PlannerInfo *root,
140141
Relids join_relids,

src/test/regress/expected/incremental_sort.out

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1469,3 +1469,101 @@ explain (costs off) select * from t union select * from t order by 1,3;
14691469
(13 rows)
14701470

14711471
drop table t;
1472+
-- Sort pushdown can't go below where expressions are part of the rel target.
1473+
-- In particular this is interesting for volatile expressions which have to
1474+
-- go above joins since otherwise we'll incorrectly use expression evaluations
1475+
-- across multiple rows.
1476+
set enable_hashagg=off;
1477+
set enable_seqscan=off;
1478+
set enable_incremental_sort = off;
1479+
set parallel_tuple_cost=0;
1480+
set parallel_setup_cost=0;
1481+
set min_parallel_table_scan_size = 0;
1482+
set min_parallel_index_scan_size = 0;
1483+
-- Parallel sort below join.
1484+
explain (costs off) select distinct sub.unique1, stringu1
1485+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
1486+
QUERY PLAN
1487+
--------------------------------------------------------------------------
1488+
Unique
1489+
-> Nested Loop
1490+
-> Gather Merge
1491+
Workers Planned: 2
1492+
-> Sort
1493+
Sort Key: tenk1.unique1, tenk1.stringu1
1494+
-> Parallel Index Scan using tenk1_unique1 on tenk1
1495+
-> Function Scan on generate_series
1496+
(8 rows)
1497+
1498+
explain (costs off) select sub.unique1, stringu1
1499+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
1500+
order by 1, 2;
1501+
QUERY PLAN
1502+
--------------------------------------------------------------------
1503+
Nested Loop
1504+
-> Gather Merge
1505+
Workers Planned: 2
1506+
-> Sort
1507+
Sort Key: tenk1.unique1, tenk1.stringu1
1508+
-> Parallel Index Scan using tenk1_unique1 on tenk1
1509+
-> Function Scan on generate_series
1510+
(7 rows)
1511+
1512+
-- Parallel sort but with expression that can be safely generated at the base rel.
1513+
explain (costs off) select distinct sub.unique1, md5(stringu1)
1514+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
1515+
QUERY PLAN
1516+
----------------------------------------------------------------------------------------
1517+
Unique
1518+
-> Nested Loop
1519+
-> Gather Merge
1520+
Workers Planned: 2
1521+
-> Sort
1522+
Sort Key: tenk1.unique1, (md5((tenk1.stringu1)::text)) COLLATE "C"
1523+
-> Parallel Index Scan using tenk1_unique1 on tenk1
1524+
-> Function Scan on generate_series
1525+
(8 rows)
1526+
1527+
explain (costs off) select sub.unique1, md5(stringu1)
1528+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
1529+
order by 1, 2;
1530+
QUERY PLAN
1531+
----------------------------------------------------------------------------------
1532+
Nested Loop
1533+
-> Gather Merge
1534+
Workers Planned: 2
1535+
-> Sort
1536+
Sort Key: tenk1.unique1, (md5((tenk1.stringu1)::text)) COLLATE "C"
1537+
-> Parallel Index Scan using tenk1_unique1 on tenk1
1538+
-> Function Scan on generate_series
1539+
(7 rows)
1540+
1541+
-- Parallel sort but with expression not available until the upper rel.
1542+
explain (costs off) select distinct sub.unique1, stringu1 || random()::text
1543+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
1544+
QUERY PLAN
1545+
---------------------------------------------------------------------------------------------
1546+
Unique
1547+
-> Sort
1548+
Sort Key: tenk1.unique1, (((tenk1.stringu1)::text || (random())::text)) COLLATE "C"
1549+
-> Gather
1550+
Workers Planned: 2
1551+
-> Nested Loop
1552+
-> Parallel Index Scan using tenk1_unique1 on tenk1
1553+
-> Function Scan on generate_series
1554+
(8 rows)
1555+
1556+
explain (costs off) select sub.unique1, stringu1 || random()::text
1557+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
1558+
order by 1, 2;
1559+
QUERY PLAN
1560+
---------------------------------------------------------------------------------------
1561+
Sort
1562+
Sort Key: tenk1.unique1, (((tenk1.stringu1)::text || (random())::text)) COLLATE "C"
1563+
-> Gather
1564+
Workers Planned: 2
1565+
-> Nested Loop
1566+
-> Parallel Index Scan using tenk1_unique1 on tenk1
1567+
-> Function Scan on generate_series
1568+
(7 rows)
1569+

src/test/regress/sql/incremental_sort.sql

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,3 +221,34 @@ set enable_hashagg to off;
221221
explain (costs off) select * from t union select * from t order by 1,3;
222222

223223
drop table t;
224+
225+
-- Sort pushdown can't go below where expressions are part of the rel target.
226+
-- In particular this is interesting for volatile expressions which have to
227+
-- go above joins since otherwise we'll incorrectly use expression evaluations
228+
-- across multiple rows.
229+
set enable_hashagg=off;
230+
set enable_seqscan=off;
231+
set enable_incremental_sort = off;
232+
set parallel_tuple_cost=0;
233+
set parallel_setup_cost=0;
234+
set min_parallel_table_scan_size = 0;
235+
set min_parallel_index_scan_size = 0;
236+
237+
-- Parallel sort below join.
238+
explain (costs off) select distinct sub.unique1, stringu1
239+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
240+
explain (costs off) select sub.unique1, stringu1
241+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
242+
order by 1, 2;
243+
-- Parallel sort but with expression that can be safely generated at the base rel.
244+
explain (costs off) select distinct sub.unique1, md5(stringu1)
245+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
246+
explain (costs off) select sub.unique1, md5(stringu1)
247+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
248+
order by 1, 2;
249+
-- Parallel sort but with expression not available until the upper rel.
250+
explain (costs off) select distinct sub.unique1, stringu1 || random()::text
251+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
252+
explain (costs off) select sub.unique1, stringu1 || random()::text
253+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
254+
order by 1, 2;

0 commit comments

Comments
 (0)