Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 1147035

Browse files
committed
Disallow pushing volatile qual expressions down into DISTINCT subqueries.
A WHERE clause applied to the output of a subquery with DISTINCT should theoretically be applied only once per distinct row; but if we push it into the subquery then it will be evaluated at each row before duplicate elimination occurs. If the qual is volatile this can give rise to observably wrong results, so don't do that. While at it, refactor a little bit to allow subquery_is_pushdown_safe to report more than one kind of restrictive condition without indefinitely expanding its argument list. Although this is a bug fix, it seems unwise to back-patch it into released branches, since it might de-optimize plans for queries that aren't giving any trouble in practice. So apply to 9.4 but not further back.
1 parent f71136e commit 1147035

File tree

3 files changed

+148
-54
lines changed

3 files changed

+148
-54
lines changed

src/backend/optimizer/path/allpaths.c

+106-54
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,14 @@
4242
#include "utils/lsyscache.h"
4343

4444

45+
/* results of subquery_is_pushdown_safe */
46+
typedef struct pushdown_safety_info
47+
{
48+
bool *unsafeColumns; /* which output columns are unsafe to use */
49+
bool unsafeVolatile; /* don't push down volatile quals */
50+
bool unsafeLeaky; /* don't push down leaky quals */
51+
} pushdown_safety_info;
52+
4553
/* These parameters are set by GUC */
4654
bool enable_geqo = false; /* just in case GUC doesn't set it */
4755
int geqo_threshold;
@@ -88,14 +96,15 @@ static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel,
8896
RangeTblEntry *rte);
8997
static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
9098
static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
91-
bool *unsafeColumns);
99+
pushdown_safety_info *safetyInfo);
92100
static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
93-
bool *unsafeColumns);
94-
static void check_output_expressions(Query *subquery, bool *unsafeColumns);
101+
pushdown_safety_info *safetyInfo);
102+
static void check_output_expressions(Query *subquery,
103+
pushdown_safety_info *safetyInfo);
95104
static void compare_tlist_datatypes(List *tlist, List *colTypes,
96-
bool *unsafeColumns);
105+
pushdown_safety_info *safetyInfo);
97106
static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
98-
bool *unsafeColumns);
107+
pushdown_safety_info *safetyInfo);
99108
static void subquery_push_qual(Query *subquery,
100109
RangeTblEntry *rte, Index rti, Node *qual);
101110
static void recurse_push_qual(Node *setOp, Query *topquery,
@@ -1119,7 +1128,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
11191128
Query *parse = root->parse;
11201129
Query *subquery = rte->subquery;
11211130
Relids required_outer;
1122-
bool *unsafeColumns;
1131+
pushdown_safety_info safetyInfo;
11231132
double tuple_fraction;
11241133
PlannerInfo *subroot;
11251134
List *pathkeys;
@@ -1139,13 +1148,25 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
11391148
required_outer = rel->lateral_relids;
11401149

11411150
/*
1142-
* We need a workspace for keeping track of unsafe-to-reference columns.
1143-
* unsafeColumns[i] is set TRUE if we've found that output column i of the
1144-
* subquery is unsafe to use in a pushed-down qual.
1151+
* Zero out result area for subquery_is_pushdown_safe, so that it can set
1152+
* flags as needed while recursing. In particular, we need a workspace
1153+
* for keeping track of unsafe-to-reference columns. unsafeColumns[i]
1154+
* will be set TRUE if we find that output column i of the subquery is
1155+
* unsafe to use in a pushed-down qual.
11451156
*/
1146-
unsafeColumns = (bool *)
1157+
memset(&safetyInfo, 0, sizeof(safetyInfo));
1158+
safetyInfo.unsafeColumns = (bool *)
11471159
palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
11481160

1161+
/*
1162+
* If the subquery has the "security_barrier" flag, it means the subquery
1163+
* originated from a view that must enforce row-level security. Then we
1164+
* must not push down quals that contain leaky functions. (Ideally this
1165+
* would be checked inside subquery_is_pushdown_safe, but since we don't
1166+
* currently pass the RTE to that function, we must do it here.)
1167+
*/
1168+
safetyInfo.unsafeLeaky = rte->security_barrier;
1169+
11491170
/*
11501171
* If there are any restriction clauses that have been attached to the
11511172
* subquery relation, consider pushing them down to become WHERE or HAVING
@@ -1160,18 +1181,14 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
11601181
* pseudoconstant clauses; better to have the gating node above the
11611182
* subquery.
11621183
*
1163-
* Also, if the sub-query has the "security_barrier" flag, it means the
1164-
* sub-query originated from a view that must enforce row-level security.
1165-
* Then we must not push down quals that contain leaky functions.
1166-
*
11671184
* Non-pushed-down clauses will get evaluated as qpquals of the
11681185
* SubqueryScan node.
11691186
*
11701187
* XXX Are there any cases where we want to make a policy decision not to
11711188
* push down a pushable qual, because it'd result in a worse plan?
11721189
*/
11731190
if (rel->baserestrictinfo != NIL &&
1174-
subquery_is_pushdown_safe(subquery, subquery, unsafeColumns))
1191+
subquery_is_pushdown_safe(subquery, subquery, &safetyInfo))
11751192
{
11761193
/* OK to consider pushing down individual quals */
11771194
List *upperrestrictlist = NIL;
@@ -1183,9 +1200,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
11831200
Node *clause = (Node *) rinfo->clause;
11841201

11851202
if (!rinfo->pseudoconstant &&
1186-
(!rte->security_barrier ||
1187-
!contain_leaky_functions(clause)) &&
1188-
qual_is_pushdown_safe(subquery, rti, clause, unsafeColumns))
1203+
qual_is_pushdown_safe(subquery, rti, clause, &safetyInfo))
11891204
{
11901205
/* Push it down */
11911206
subquery_push_qual(subquery, rte, rti, clause);
@@ -1199,7 +1214,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
11991214
rel->baserestrictinfo = upperrestrictlist;
12001215
}
12011216

1202-
pfree(unsafeColumns);
1217+
pfree(safetyInfo.unsafeColumns);
12031218

12041219
/*
12051220
* The upper query might not use all the subquery's output columns; if
@@ -1679,19 +1694,39 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
16791694
* 3. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
16801695
* quals into it, because that could change the results.
16811696
*
1682-
* In addition, we make several checks on the subquery's output columns
1683-
* to see if it is safe to reference them in pushed-down quals. If output
1684-
* column k is found to be unsafe to reference, we set unsafeColumns[k] to
1685-
* TRUE, but we don't reject the subquery overall since column k might
1686-
* not be referenced by some/all quals. The unsafeColumns[] array will be
1687-
* consulted later by qual_is_pushdown_safe(). It's better to do it this
1688-
* way than to make the checks directly in qual_is_pushdown_safe(), because
1689-
* when the subquery involves set operations we have to check the output
1697+
* 4. If the subquery uses DISTINCT, we cannot push volatile quals into it.
1698+
* This is because upper-level quals should semantically be evaluated only
1699+
* once per distinct row, not once per original row, and if the qual is
1700+
* volatile then extra evaluations could change the results. (This issue
1701+
* does not apply to other forms of aggregation such as GROUP BY, because
1702+
* when those are present we push into HAVING not WHERE, so that the quals
1703+
* are still applied after aggregation.)
1704+
*
1705+
* In addition, we make several checks on the subquery's output columns to see
1706+
* if it is safe to reference them in pushed-down quals. If output column k
1707+
* is found to be unsafe to reference, we set safetyInfo->unsafeColumns[k]
1708+
* to TRUE, but we don't reject the subquery overall since column k might not
1709+
* be referenced by some/all quals. The unsafeColumns[] array will be
1710+
* consulted later by qual_is_pushdown_safe(). It's better to do it this way
1711+
* than to make the checks directly in qual_is_pushdown_safe(), because when
1712+
* the subquery involves set operations we have to check the output
16901713
* expressions in each arm of the set op.
1714+
*
1715+
* Note: pushing quals into a DISTINCT subquery is theoretically dubious:
1716+
* we're effectively assuming that the quals cannot distinguish values that
1717+
* the DISTINCT's equality operator sees as equal, yet there are many
1718+
* counterexamples to that assumption. However use of such a qual with a
1719+
* DISTINCT subquery would be unsafe anyway, since there's no guarantee which
1720+
* "equal" value will be chosen as the output value by the DISTINCT operation.
1721+
* So we don't worry too much about that. Another objection is that if the
1722+
* qual is expensive to evaluate, running it for each original row might cost
1723+
* more than we save by eliminating rows before the DISTINCT step. But it
1724+
* would be very hard to estimate that at this stage, and in practice pushdown
1725+
* seldom seems to make things worse, so we ignore that problem too.
16911726
*/
16921727
static bool
16931728
subquery_is_pushdown_safe(Query *subquery, Query *topquery,
1694-
bool *unsafeColumns)
1729+
pushdown_safety_info *safetyInfo)
16951730
{
16961731
SetOperationStmt *topop;
16971732

@@ -1703,22 +1738,26 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
17031738
if (subquery->hasWindowFuncs)
17041739
return false;
17051740

1741+
/* Check point 4 */
1742+
if (subquery->distinctClause)
1743+
safetyInfo->unsafeVolatile = true;
1744+
17061745
/*
17071746
* If we're at a leaf query, check for unsafe expressions in its target
17081747
* list, and mark any unsafe ones in unsafeColumns[]. (Non-leaf nodes in
17091748
* setop trees have only simple Vars in their tlists, so no need to check
17101749
* them.)
17111750
*/
17121751
if (subquery->setOperations == NULL)
1713-
check_output_expressions(subquery, unsafeColumns);
1752+
check_output_expressions(subquery, safetyInfo);
17141753

17151754
/* Are we at top level, or looking at a setop component? */
17161755
if (subquery == topquery)
17171756
{
17181757
/* Top level, so check any component queries */
17191758
if (subquery->setOperations != NULL)
17201759
if (!recurse_pushdown_safe(subquery->setOperations, topquery,
1721-
unsafeColumns))
1760+
safetyInfo))
17221761
return false;
17231762
}
17241763
else
@@ -1731,7 +1770,7 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
17311770
Assert(topop && IsA(topop, SetOperationStmt));
17321771
compare_tlist_datatypes(subquery->targetList,
17331772
topop->colTypes,
1734-
unsafeColumns);
1773+
safetyInfo);
17351774
}
17361775
return true;
17371776
}
@@ -1741,7 +1780,7 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
17411780
*/
17421781
static bool
17431782
recurse_pushdown_safe(Node *setOp, Query *topquery,
1744-
bool *unsafeColumns)
1783+
pushdown_safety_info *safetyInfo)
17451784
{
17461785
if (IsA(setOp, RangeTblRef))
17471786
{
@@ -1750,7 +1789,7 @@ recurse_pushdown_safe(Node *setOp, Query *topquery,
17501789
Query *subquery = rte->subquery;
17511790

17521791
Assert(subquery != NULL);
1753-
return subquery_is_pushdown_safe(subquery, topquery, unsafeColumns);
1792+
return subquery_is_pushdown_safe(subquery, topquery, safetyInfo);
17541793
}
17551794
else if (IsA(setOp, SetOperationStmt))
17561795
{
@@ -1760,9 +1799,9 @@ recurse_pushdown_safe(Node *setOp, Query *topquery,
17601799
if (op->op == SETOP_EXCEPT)
17611800
return false;
17621801
/* Else recurse */
1763-
if (!recurse_pushdown_safe(op->larg, topquery, unsafeColumns))
1802+
if (!recurse_pushdown_safe(op->larg, topquery, safetyInfo))
17641803
return false;
1765-
if (!recurse_pushdown_safe(op->rarg, topquery, unsafeColumns))
1804+
if (!recurse_pushdown_safe(op->rarg, topquery, safetyInfo))
17661805
return false;
17671806
}
17681807
else
@@ -1793,14 +1832,12 @@ recurse_pushdown_safe(Node *setOp, Query *topquery,
17931832
* 3. If the subquery uses DISTINCT ON, we must not push down any quals that
17941833
* refer to non-DISTINCT output columns, because that could change the set
17951834
* of rows returned. (This condition is vacuous for DISTINCT, because then
1796-
* there are no non-DISTINCT output columns, so we needn't check. But note
1797-
* we are assuming that the qual can't distinguish values that the DISTINCT
1798-
* operator sees as equal. This is a bit shaky but we have no way to test
1799-
* for the case, and it's unlikely enough that we shouldn't refuse the
1800-
* optimization just because it could theoretically happen.)
1835+
* there are no non-DISTINCT output columns, so we needn't check. Note that
1836+
* subquery_is_pushdown_safe already reported that we can't use volatile
1837+
* quals if there's DISTINCT or DISTINCT ON.)
18011838
*/
18021839
static void
1803-
check_output_expressions(Query *subquery, bool *unsafeColumns)
1840+
check_output_expressions(Query *subquery, pushdown_safety_info *safetyInfo)
18041841
{
18051842
ListCell *lc;
18061843

@@ -1812,20 +1849,20 @@ check_output_expressions(Query *subquery, bool *unsafeColumns)
18121849
continue; /* ignore resjunk columns */
18131850

18141851
/* We need not check further if output col is already known unsafe */
1815-
if (unsafeColumns[tle->resno])
1852+
if (safetyInfo->unsafeColumns[tle->resno])
18161853
continue;
18171854

18181855
/* Functions returning sets are unsafe (point 1) */
18191856
if (expression_returns_set((Node *) tle->expr))
18201857
{
1821-
unsafeColumns[tle->resno] = true;
1858+
safetyInfo->unsafeColumns[tle->resno] = true;
18221859
continue;
18231860
}
18241861

18251862
/* Volatile functions are unsafe (point 2) */
18261863
if (contain_volatile_functions((Node *) tle->expr))
18271864
{
1828-
unsafeColumns[tle->resno] = true;
1865+
safetyInfo->unsafeColumns[tle->resno] = true;
18291866
continue;
18301867
}
18311868

@@ -1834,7 +1871,7 @@ check_output_expressions(Query *subquery, bool *unsafeColumns)
18341871
!targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
18351872
{
18361873
/* non-DISTINCT column, so mark it unsafe */
1837-
unsafeColumns[tle->resno] = true;
1874+
safetyInfo->unsafeColumns[tle->resno] = true;
18381875
continue;
18391876
}
18401877
}
@@ -1855,11 +1892,11 @@ check_output_expressions(Query *subquery, bool *unsafeColumns)
18551892
*
18561893
* tlist is a subquery tlist.
18571894
* colTypes is an OID list of the top-level setop's output column types.
1858-
* unsafeColumns[] is the result array.
1895+
* safetyInfo->unsafeColumns[] is the result array.
18591896
*/
18601897
static void
18611898
compare_tlist_datatypes(List *tlist, List *colTypes,
1862-
bool *unsafeColumns)
1899+
pushdown_safety_info *safetyInfo)
18631900
{
18641901
ListCell *l;
18651902
ListCell *colType = list_head(colTypes);
@@ -1873,7 +1910,7 @@ compare_tlist_datatypes(List *tlist, List *colTypes,
18731910
if (colType == NULL)
18741911
elog(ERROR, "wrong number of tlist entries");
18751912
if (exprType((Node *) tle->expr) != lfirst_oid(colType))
1876-
unsafeColumns[tle->resno] = true;
1913+
safetyInfo->unsafeColumns[tle->resno] = true;
18771914
colType = lnext(colType);
18781915
}
18791916
if (colType != NULL)
@@ -1892,15 +1929,20 @@ compare_tlist_datatypes(List *tlist, List *colTypes,
18921929
* it will work correctly: sublinks will already have been transformed into
18931930
* subplans in the qual, but not in the subquery).
18941931
*
1895-
* 2. The qual must not refer to the whole-row output of the subquery
1932+
* 2. If unsafeVolatile is set, the qual must not contain any volatile
1933+
* functions.
1934+
*
1935+
* 3. If unsafeLeaky is set, the qual must not contain any leaky functions.
1936+
*
1937+
* 4. The qual must not refer to the whole-row output of the subquery
18961938
* (since there is no easy way to name that within the subquery itself).
18971939
*
1898-
* 3. The qual must not refer to any subquery output columns that were
1940+
* 5. The qual must not refer to any subquery output columns that were
18991941
* found to be unsafe to reference by subquery_is_pushdown_safe().
19001942
*/
19011943
static bool
19021944
qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
1903-
bool *unsafeColumns)
1945+
pushdown_safety_info *safetyInfo)
19041946
{
19051947
bool safe = true;
19061948
List *vars;
@@ -1910,6 +1952,16 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
19101952
if (contain_subplans(qual))
19111953
return false;
19121954

1955+
/* Refuse volatile quals if we found they'd be unsafe (point 2) */
1956+
if (safetyInfo->unsafeVolatile &&
1957+
contain_volatile_functions(qual))
1958+
return false;
1959+
1960+
/* Refuse leaky quals if told to (point 3) */
1961+
if (safetyInfo->unsafeLeaky &&
1962+
contain_leaky_functions(qual))
1963+
return false;
1964+
19131965
/*
19141966
* It would be unsafe to push down window function calls, but at least for
19151967
* the moment we could never see any in a qual anyhow. (The same applies
@@ -1944,15 +1996,15 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
19441996
Assert(var->varno == rti);
19451997
Assert(var->varattno >= 0);
19461998

1947-
/* Check point 2 */
1999+
/* Check point 4 */
19482000
if (var->varattno == 0)
19492001
{
19502002
safe = false;
19512003
break;
19522004
}
19532005

1954-
/* Check point 3 */
1955-
if (unsafeColumns[var->varattno])
2006+
/* Check point 5 */
2007+
if (safetyInfo->unsafeColumns[var->varattno])
19562008
{
19572009
safe = false;
19582010
break;

src/test/regress/expected/subselect.out

+29
Original file line numberDiff line numberDiff line change
@@ -739,3 +739,32 @@ select * from int4_tbl where
739739
0
740740
(1 row)
741741

742+
--
743+
-- Check that volatile quals aren't pushed down past a DISTINCT:
744+
-- nextval() should not be called more than the nominal number of times
745+
--
746+
create temp sequence ts1;
747+
select * from
748+
(select distinct ten from tenk1) ss
749+
where ten < 10 + nextval('ts1')
750+
order by 1;
751+
ten
752+
-----
753+
0
754+
1
755+
2
756+
3
757+
4
758+
5
759+
6
760+
7
761+
8
762+
9
763+
(10 rows)
764+
765+
select nextval('ts1');
766+
nextval
767+
---------
768+
11
769+
(1 row)
770+

0 commit comments

Comments
 (0)