Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 6327f25

Browse files
committed
Disallow pushing volatile qual expressions down into DISTINCT subqueries.
A WHERE clause applied to the output of a subquery with DISTINCT should theoretically be applied only once per distinct row; but if we push it into the subquery then it will be evaluated at each row before duplicate elimination occurs. If the qual is volatile this can give rise to observably wrong results, so don't do that. While at it, refactor a little bit to allow subquery_is_pushdown_safe to report more than one kind of restrictive condition without indefinitely expanding its argument list. Although this is a bug fix, it seems unwise to back-patch it into released branches, since it might de-optimize plans for queries that aren't giving any trouble in practice. So apply to 9.4 but not further back.
1 parent c3096d5 commit 6327f25

File tree

3 files changed

+148
-54
lines changed

3 files changed

+148
-54
lines changed

src/backend/optimizer/path/allpaths.c

+106-54
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,14 @@
4040
#include "utils/lsyscache.h"
4141

4242

43+
/* results of subquery_is_pushdown_safe */
44+
typedef struct pushdown_safety_info
45+
{
46+
bool *unsafeColumns; /* which output columns are unsafe to use */
47+
bool unsafeVolatile; /* don't push down volatile quals */
48+
bool unsafeLeaky; /* don't push down leaky quals */
49+
} pushdown_safety_info;
50+
4351
/* These parameters are set by GUC */
4452
bool enable_geqo = false; /* just in case GUC doesn't set it */
4553
int geqo_threshold;
@@ -86,14 +94,15 @@ static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel,
8694
RangeTblEntry *rte);
8795
static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
8896
static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
89-
bool *unsafeColumns);
97+
pushdown_safety_info *safetyInfo);
9098
static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
91-
bool *unsafeColumns);
92-
static void check_output_expressions(Query *subquery, bool *unsafeColumns);
99+
pushdown_safety_info *safetyInfo);
100+
static void check_output_expressions(Query *subquery,
101+
pushdown_safety_info *safetyInfo);
93102
static void compare_tlist_datatypes(List *tlist, List *colTypes,
94-
bool *unsafeColumns);
103+
pushdown_safety_info *safetyInfo);
95104
static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
96-
bool *unsafeColumns);
105+
pushdown_safety_info *safetyInfo);
97106
static void subquery_push_qual(Query *subquery,
98107
RangeTblEntry *rte, Index rti, Node *qual);
99108
static void recurse_push_qual(Node *setOp, Query *topquery,
@@ -1116,7 +1125,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
11161125
Query *parse = root->parse;
11171126
Query *subquery = rte->subquery;
11181127
Relids required_outer;
1119-
bool *unsafeColumns;
1128+
pushdown_safety_info safetyInfo;
11201129
double tuple_fraction;
11211130
PlannerInfo *subroot;
11221131
List *pathkeys;
@@ -1136,13 +1145,25 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
11361145
required_outer = rel->lateral_relids;
11371146

11381147
/*
1139-
* We need a workspace for keeping track of unsafe-to-reference columns.
1140-
* unsafeColumns[i] is set TRUE if we've found that output column i of the
1141-
* subquery is unsafe to use in a pushed-down qual.
1148+
* Zero out result area for subquery_is_pushdown_safe, so that it can set
1149+
* flags as needed while recursing. In particular, we need a workspace
1150+
* for keeping track of unsafe-to-reference columns. unsafeColumns[i]
1151+
* will be set TRUE if we find that output column i of the subquery is
1152+
* unsafe to use in a pushed-down qual.
11421153
*/
1143-
unsafeColumns = (bool *)
1154+
memset(&safetyInfo, 0, sizeof(safetyInfo));
1155+
safetyInfo.unsafeColumns = (bool *)
11441156
palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
11451157

1158+
/*
1159+
* If the subquery has the "security_barrier" flag, it means the subquery
1160+
* originated from a view that must enforce row-level security. Then we
1161+
* must not push down quals that contain leaky functions. (Ideally this
1162+
* would be checked inside subquery_is_pushdown_safe, but since we don't
1163+
* currently pass the RTE to that function, we must do it here.)
1164+
*/
1165+
safetyInfo.unsafeLeaky = rte->security_barrier;
1166+
11461167
/*
11471168
* If there are any restriction clauses that have been attached to the
11481169
* subquery relation, consider pushing them down to become WHERE or HAVING
@@ -1157,18 +1178,14 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
11571178
* pseudoconstant clauses; better to have the gating node above the
11581179
* subquery.
11591180
*
1160-
* Also, if the sub-query has the "security_barrier" flag, it means the
1161-
* sub-query originated from a view that must enforce row-level security.
1162-
* Then we must not push down quals that contain leaky functions.
1163-
*
11641181
* Non-pushed-down clauses will get evaluated as qpquals of the
11651182
* SubqueryScan node.
11661183
*
11671184
* XXX Are there any cases where we want to make a policy decision not to
11681185
* push down a pushable qual, because it'd result in a worse plan?
11691186
*/
11701187
if (rel->baserestrictinfo != NIL &&
1171-
subquery_is_pushdown_safe(subquery, subquery, unsafeColumns))
1188+
subquery_is_pushdown_safe(subquery, subquery, &safetyInfo))
11721189
{
11731190
/* OK to consider pushing down individual quals */
11741191
List *upperrestrictlist = NIL;
@@ -1180,9 +1197,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
11801197
Node *clause = (Node *) rinfo->clause;
11811198

11821199
if (!rinfo->pseudoconstant &&
1183-
(!rte->security_barrier ||
1184-
!contain_leaky_functions(clause)) &&
1185-
qual_is_pushdown_safe(subquery, rti, clause, unsafeColumns))
1200+
qual_is_pushdown_safe(subquery, rti, clause, &safetyInfo))
11861201
{
11871202
/* Push it down */
11881203
subquery_push_qual(subquery, rte, rti, clause);
@@ -1196,7 +1211,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
11961211
rel->baserestrictinfo = upperrestrictlist;
11971212
}
11981213

1199-
pfree(unsafeColumns);
1214+
pfree(safetyInfo.unsafeColumns);
12001215

12011216
/*
12021217
* We can safely pass the outer tuple_fraction down to the subquery if the
@@ -1670,19 +1685,39 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
16701685
* 3. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
16711686
* quals into it, because that could change the results.
16721687
*
1673-
* In addition, we make several checks on the subquery's output columns
1674-
* to see if it is safe to reference them in pushed-down quals. If output
1675-
* column k is found to be unsafe to reference, we set unsafeColumns[k] to
1676-
* TRUE, but we don't reject the subquery overall since column k might
1677-
* not be referenced by some/all quals. The unsafeColumns[] array will be
1678-
* consulted later by qual_is_pushdown_safe(). It's better to do it this
1679-
* way than to make the checks directly in qual_is_pushdown_safe(), because
1680-
* when the subquery involves set operations we have to check the output
1688+
* 4. If the subquery uses DISTINCT, we cannot push volatile quals into it.
1689+
* This is because upper-level quals should semantically be evaluated only
1690+
* once per distinct row, not once per original row, and if the qual is
1691+
* volatile then extra evaluations could change the results. (This issue
1692+
* does not apply to other forms of aggregation such as GROUP BY, because
1693+
* when those are present we push into HAVING not WHERE, so that the quals
1694+
* are still applied after aggregation.)
1695+
*
1696+
* In addition, we make several checks on the subquery's output columns to see
1697+
* if it is safe to reference them in pushed-down quals. If output column k
1698+
* is found to be unsafe to reference, we set safetyInfo->unsafeColumns[k]
1699+
* to TRUE, but we don't reject the subquery overall since column k might not
1700+
* be referenced by some/all quals. The unsafeColumns[] array will be
1701+
* consulted later by qual_is_pushdown_safe(). It's better to do it this way
1702+
* than to make the checks directly in qual_is_pushdown_safe(), because when
1703+
* the subquery involves set operations we have to check the output
16811704
* expressions in each arm of the set op.
1705+
*
1706+
* Note: pushing quals into a DISTINCT subquery is theoretically dubious:
1707+
* we're effectively assuming that the quals cannot distinguish values that
1708+
* the DISTINCT's equality operator sees as equal, yet there are many
1709+
* counterexamples to that assumption. However use of such a qual with a
1710+
* DISTINCT subquery would be unsafe anyway, since there's no guarantee which
1711+
* "equal" value will be chosen as the output value by the DISTINCT operation.
1712+
* So we don't worry too much about that. Another objection is that if the
1713+
* qual is expensive to evaluate, running it for each original row might cost
1714+
* more than we save by eliminating rows before the DISTINCT step. But it
1715+
* would be very hard to estimate that at this stage, and in practice pushdown
1716+
* seldom seems to make things worse, so we ignore that problem too.
16821717
*/
16831718
static bool
16841719
subquery_is_pushdown_safe(Query *subquery, Query *topquery,
1685-
bool *unsafeColumns)
1720+
pushdown_safety_info *safetyInfo)
16861721
{
16871722
SetOperationStmt *topop;
16881723

@@ -1694,22 +1729,26 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
16941729
if (subquery->hasWindowFuncs)
16951730
return false;
16961731

1732+
/* Check point 4 */
1733+
if (subquery->distinctClause)
1734+
safetyInfo->unsafeVolatile = true;
1735+
16971736
/*
16981737
* If we're at a leaf query, check for unsafe expressions in its target
16991738
* list, and mark any unsafe ones in unsafeColumns[]. (Non-leaf nodes in
17001739
* setop trees have only simple Vars in their tlists, so no need to check
17011740
* them.)
17021741
*/
17031742
if (subquery->setOperations == NULL)
1704-
check_output_expressions(subquery, unsafeColumns);
1743+
check_output_expressions(subquery, safetyInfo);
17051744

17061745
/* Are we at top level, or looking at a setop component? */
17071746
if (subquery == topquery)
17081747
{
17091748
/* Top level, so check any component queries */
17101749
if (subquery->setOperations != NULL)
17111750
if (!recurse_pushdown_safe(subquery->setOperations, topquery,
1712-
unsafeColumns))
1751+
safetyInfo))
17131752
return false;
17141753
}
17151754
else
@@ -1722,7 +1761,7 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
17221761
Assert(topop && IsA(topop, SetOperationStmt));
17231762
compare_tlist_datatypes(subquery->targetList,
17241763
topop->colTypes,
1725-
unsafeColumns);
1764+
safetyInfo);
17261765
}
17271766
return true;
17281767
}
@@ -1732,7 +1771,7 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
17321771
*/
17331772
static bool
17341773
recurse_pushdown_safe(Node *setOp, Query *topquery,
1735-
bool *unsafeColumns)
1774+
pushdown_safety_info *safetyInfo)
17361775
{
17371776
if (IsA(setOp, RangeTblRef))
17381777
{
@@ -1741,7 +1780,7 @@ recurse_pushdown_safe(Node *setOp, Query *topquery,
17411780
Query *subquery = rte->subquery;
17421781

17431782
Assert(subquery != NULL);
1744-
return subquery_is_pushdown_safe(subquery, topquery, unsafeColumns);
1783+
return subquery_is_pushdown_safe(subquery, topquery, safetyInfo);
17451784
}
17461785
else if (IsA(setOp, SetOperationStmt))
17471786
{
@@ -1751,9 +1790,9 @@ recurse_pushdown_safe(Node *setOp, Query *topquery,
17511790
if (op->op == SETOP_EXCEPT)
17521791
return false;
17531792
/* Else recurse */
1754-
if (!recurse_pushdown_safe(op->larg, topquery, unsafeColumns))
1793+
if (!recurse_pushdown_safe(op->larg, topquery, safetyInfo))
17551794
return false;
1756-
if (!recurse_pushdown_safe(op->rarg, topquery, unsafeColumns))
1795+
if (!recurse_pushdown_safe(op->rarg, topquery, safetyInfo))
17571796
return false;
17581797
}
17591798
else
@@ -1784,14 +1823,12 @@ recurse_pushdown_safe(Node *setOp, Query *topquery,
17841823
* 3. If the subquery uses DISTINCT ON, we must not push down any quals that
17851824
* refer to non-DISTINCT output columns, because that could change the set
17861825
* of rows returned. (This condition is vacuous for DISTINCT, because then
1787-
* there are no non-DISTINCT output columns, so we needn't check. But note
1788-
* we are assuming that the qual can't distinguish values that the DISTINCT
1789-
* operator sees as equal. This is a bit shaky but we have no way to test
1790-
* for the case, and it's unlikely enough that we shouldn't refuse the
1791-
* optimization just because it could theoretically happen.)
1826+
* there are no non-DISTINCT output columns, so we needn't check. Note that
1827+
* subquery_is_pushdown_safe already reported that we can't use volatile
1828+
* quals if there's DISTINCT or DISTINCT ON.)
17921829
*/
17931830
static void
1794-
check_output_expressions(Query *subquery, bool *unsafeColumns)
1831+
check_output_expressions(Query *subquery, pushdown_safety_info *safetyInfo)
17951832
{
17961833
ListCell *lc;
17971834

@@ -1803,20 +1840,20 @@ check_output_expressions(Query *subquery, bool *unsafeColumns)
18031840
continue; /* ignore resjunk columns */
18041841

18051842
/* We need not check further if output col is already known unsafe */
1806-
if (unsafeColumns[tle->resno])
1843+
if (safetyInfo->unsafeColumns[tle->resno])
18071844
continue;
18081845

18091846
/* Functions returning sets are unsafe (point 1) */
18101847
if (expression_returns_set((Node *) tle->expr))
18111848
{
1812-
unsafeColumns[tle->resno] = true;
1849+
safetyInfo->unsafeColumns[tle->resno] = true;
18131850
continue;
18141851
}
18151852

18161853
/* Volatile functions are unsafe (point 2) */
18171854
if (contain_volatile_functions((Node *) tle->expr))
18181855
{
1819-
unsafeColumns[tle->resno] = true;
1856+
safetyInfo->unsafeColumns[tle->resno] = true;
18201857
continue;
18211858
}
18221859

@@ -1825,7 +1862,7 @@ check_output_expressions(Query *subquery, bool *unsafeColumns)
18251862
!targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
18261863
{
18271864
/* non-DISTINCT column, so mark it unsafe */
1828-
unsafeColumns[tle->resno] = true;
1865+
safetyInfo->unsafeColumns[tle->resno] = true;
18291866
continue;
18301867
}
18311868
}
@@ -1846,11 +1883,11 @@ check_output_expressions(Query *subquery, bool *unsafeColumns)
18461883
*
18471884
* tlist is a subquery tlist.
18481885
* colTypes is an OID list of the top-level setop's output column types.
1849-
* unsafeColumns[] is the result array.
1886+
* safetyInfo->unsafeColumns[] is the result array.
18501887
*/
18511888
static void
18521889
compare_tlist_datatypes(List *tlist, List *colTypes,
1853-
bool *unsafeColumns)
1890+
pushdown_safety_info *safetyInfo)
18541891
{
18551892
ListCell *l;
18561893
ListCell *colType = list_head(colTypes);
@@ -1864,7 +1901,7 @@ compare_tlist_datatypes(List *tlist, List *colTypes,
18641901
if (colType == NULL)
18651902
elog(ERROR, "wrong number of tlist entries");
18661903
if (exprType((Node *) tle->expr) != lfirst_oid(colType))
1867-
unsafeColumns[tle->resno] = true;
1904+
safetyInfo->unsafeColumns[tle->resno] = true;
18681905
colType = lnext(colType);
18691906
}
18701907
if (colType != NULL)
@@ -1883,15 +1920,20 @@ compare_tlist_datatypes(List *tlist, List *colTypes,
18831920
* it will work correctly: sublinks will already have been transformed into
18841921
* subplans in the qual, but not in the subquery).
18851922
*
1886-
* 2. The qual must not refer to the whole-row output of the subquery
1923+
* 2. If unsafeVolatile is set, the qual must not contain any volatile
1924+
* functions.
1925+
*
1926+
* 3. If unsafeLeaky is set, the qual must not contain any leaky functions.
1927+
*
1928+
* 4. The qual must not refer to the whole-row output of the subquery
18871929
* (since there is no easy way to name that within the subquery itself).
18881930
*
1889-
* 3. The qual must not refer to any subquery output columns that were
1931+
* 5. The qual must not refer to any subquery output columns that were
18901932
* found to be unsafe to reference by subquery_is_pushdown_safe().
18911933
*/
18921934
static bool
18931935
qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
1894-
bool *unsafeColumns)
1936+
pushdown_safety_info *safetyInfo)
18951937
{
18961938
bool safe = true;
18971939
List *vars;
@@ -1901,6 +1943,16 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
19011943
if (contain_subplans(qual))
19021944
return false;
19031945

1946+
/* Refuse volatile quals if we found they'd be unsafe (point 2) */
1947+
if (safetyInfo->unsafeVolatile &&
1948+
contain_volatile_functions(qual))
1949+
return false;
1950+
1951+
/* Refuse leaky quals if told to (point 3) */
1952+
if (safetyInfo->unsafeLeaky &&
1953+
contain_leaky_functions(qual))
1954+
return false;
1955+
19041956
/*
19051957
* It would be unsafe to push down window function calls, but at least for
19061958
* the moment we could never see any in a qual anyhow. (The same applies
@@ -1935,15 +1987,15 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
19351987
Assert(var->varno == rti);
19361988
Assert(var->varattno >= 0);
19371989

1938-
/* Check point 2 */
1990+
/* Check point 4 */
19391991
if (var->varattno == 0)
19401992
{
19411993
safe = false;
19421994
break;
19431995
}
19441996

1945-
/* Check point 3 */
1946-
if (unsafeColumns[var->varattno])
1997+
/* Check point 5 */
1998+
if (safetyInfo->unsafeColumns[var->varattno])
19471999
{
19482000
safe = false;
19492001
break;

src/test/regress/expected/subselect.out

+29
Original file line numberDiff line numberDiff line change
@@ -739,3 +739,32 @@ select * from int4_tbl where
739739
0
740740
(1 row)
741741

742+
--
743+
-- Check that volatile quals aren't pushed down past a DISTINCT:
744+
-- nextval() should not be called more than the nominal number of times
745+
--
746+
create temp sequence ts1;
747+
select * from
748+
(select distinct ten from tenk1) ss
749+
where ten < 10 + nextval('ts1')
750+
order by 1;
751+
ten
752+
-----
753+
0
754+
1
755+
2
756+
3
757+
4
758+
5
759+
6
760+
7
761+
8
762+
9
763+
(10 rows)
764+
765+
select nextval('ts1');
766+
nextval
767+
---------
768+
11
769+
(1 row)
770+

0 commit comments

Comments
 (0)