40
40
#include "utils/lsyscache.h"
41
41
42
42
43
+ /* results of subquery_is_pushdown_safe */
44
+ typedef struct pushdown_safety_info
45
+ {
46
+ bool * unsafeColumns ; /* which output columns are unsafe to use */
47
+ bool unsafeVolatile ; /* don't push down volatile quals */
48
+ bool unsafeLeaky ; /* don't push down leaky quals */
49
+ } pushdown_safety_info ;
50
+
43
51
/* These parameters are set by GUC */
44
52
bool enable_geqo = false; /* just in case GUC doesn't set it */
45
53
int geqo_threshold ;
@@ -86,14 +94,15 @@ static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel,
86
94
RangeTblEntry * rte );
87
95
static RelOptInfo * make_rel_from_joinlist (PlannerInfo * root , List * joinlist );
88
96
static bool subquery_is_pushdown_safe (Query * subquery , Query * topquery ,
89
- bool * unsafeColumns );
97
+ pushdown_safety_info * safetyInfo );
90
98
static bool recurse_pushdown_safe (Node * setOp , Query * topquery ,
91
- bool * unsafeColumns );
92
- static void check_output_expressions (Query * subquery , bool * unsafeColumns );
99
+ pushdown_safety_info * safetyInfo );
100
+ static void check_output_expressions (Query * subquery ,
101
+ pushdown_safety_info * safetyInfo );
93
102
static void compare_tlist_datatypes (List * tlist , List * colTypes ,
94
- bool * unsafeColumns );
103
+ pushdown_safety_info * safetyInfo );
95
104
static bool qual_is_pushdown_safe (Query * subquery , Index rti , Node * qual ,
96
- bool * unsafeColumns );
105
+ pushdown_safety_info * safetyInfo );
97
106
static void subquery_push_qual (Query * subquery ,
98
107
RangeTblEntry * rte , Index rti , Node * qual );
99
108
static void recurse_push_qual (Node * setOp , Query * topquery ,
@@ -1116,7 +1125,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
1116
1125
Query * parse = root -> parse ;
1117
1126
Query * subquery = rte -> subquery ;
1118
1127
Relids required_outer ;
1119
- bool * unsafeColumns ;
1128
+ pushdown_safety_info safetyInfo ;
1120
1129
double tuple_fraction ;
1121
1130
PlannerInfo * subroot ;
1122
1131
List * pathkeys ;
@@ -1136,13 +1145,25 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
1136
1145
required_outer = rel -> lateral_relids ;
1137
1146
1138
1147
/*
1139
- * We need a workspace for keeping track of unsafe-to-reference columns.
1140
- * unsafeColumns[i] is set TRUE if we've found that output column i of the
1141
- * subquery is unsafe to use in a pushed-down qual.
1148
+ * Zero out result area for subquery_is_pushdown_safe, so that it can set
1149
+ * flags as needed while recursing. In particular, we need a workspace
1150
+ * for keeping track of unsafe-to-reference columns. unsafeColumns[i]
1151
+ * will be set TRUE if we find that output column i of the subquery is
1152
+ * unsafe to use in a pushed-down qual.
1142
1153
*/
1143
- unsafeColumns = (bool * )
1154
+ memset (& safetyInfo , 0 , sizeof (safetyInfo ));
1155
+ safetyInfo .unsafeColumns = (bool * )
1144
1156
palloc0 ((list_length (subquery -> targetList ) + 1 ) * sizeof (bool ));
1145
1157
1158
+ /*
1159
+ * If the subquery has the "security_barrier" flag, it means the subquery
1160
+ * originated from a view that must enforce row-level security. Then we
1161
+ * must not push down quals that contain leaky functions. (Ideally this
1162
+ * would be checked inside subquery_is_pushdown_safe, but since we don't
1163
+ * currently pass the RTE to that function, we must do it here.)
1164
+ */
1165
+ safetyInfo .unsafeLeaky = rte -> security_barrier ;
1166
+
1146
1167
/*
1147
1168
* If there are any restriction clauses that have been attached to the
1148
1169
* subquery relation, consider pushing them down to become WHERE or HAVING
@@ -1157,18 +1178,14 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
1157
1178
* pseudoconstant clauses; better to have the gating node above the
1158
1179
* subquery.
1159
1180
*
1160
- * Also, if the sub-query has the "security_barrier" flag, it means the
1161
- * sub-query originated from a view that must enforce row-level security.
1162
- * Then we must not push down quals that contain leaky functions.
1163
- *
1164
1181
* Non-pushed-down clauses will get evaluated as qpquals of the
1165
1182
* SubqueryScan node.
1166
1183
*
1167
1184
* XXX Are there any cases where we want to make a policy decision not to
1168
1185
* push down a pushable qual, because it'd result in a worse plan?
1169
1186
*/
1170
1187
if (rel -> baserestrictinfo != NIL &&
1171
- subquery_is_pushdown_safe (subquery , subquery , unsafeColumns ))
1188
+ subquery_is_pushdown_safe (subquery , subquery , & safetyInfo ))
1172
1189
{
1173
1190
/* OK to consider pushing down individual quals */
1174
1191
List * upperrestrictlist = NIL ;
@@ -1180,9 +1197,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
1180
1197
Node * clause = (Node * ) rinfo -> clause ;
1181
1198
1182
1199
if (!rinfo -> pseudoconstant &&
1183
- (!rte -> security_barrier ||
1184
- !contain_leaky_functions (clause )) &&
1185
- qual_is_pushdown_safe (subquery , rti , clause , unsafeColumns ))
1200
+ qual_is_pushdown_safe (subquery , rti , clause , & safetyInfo ))
1186
1201
{
1187
1202
/* Push it down */
1188
1203
subquery_push_qual (subquery , rte , rti , clause );
@@ -1196,7 +1211,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
1196
1211
rel -> baserestrictinfo = upperrestrictlist ;
1197
1212
}
1198
1213
1199
- pfree (unsafeColumns );
1214
+ pfree (safetyInfo . unsafeColumns );
1200
1215
1201
1216
/*
1202
1217
* We can safely pass the outer tuple_fraction down to the subquery if the
@@ -1670,19 +1685,39 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
1670
1685
* 3. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
1671
1686
* quals into it, because that could change the results.
1672
1687
*
1673
- * In addition, we make several checks on the subquery's output columns
1674
- * to see if it is safe to reference them in pushed-down quals. If output
1675
- * column k is found to be unsafe to reference, we set unsafeColumns[k] to
1676
- * TRUE, but we don't reject the subquery overall since column k might
1677
- * not be referenced by some/all quals. The unsafeColumns[] array will be
1678
- * consulted later by qual_is_pushdown_safe(). It's better to do it this
1679
- * way than to make the checks directly in qual_is_pushdown_safe(), because
1680
- * when the subquery involves set operations we have to check the output
1688
+ * 4. If the subquery uses DISTINCT, we cannot push volatile quals into it.
1689
+ * This is because upper-level quals should semantically be evaluated only
1690
+ * once per distinct row, not once per original row, and if the qual is
1691
+ * volatile then extra evaluations could change the results. (This issue
1692
+ * does not apply to other forms of aggregation such as GROUP BY, because
1693
+ * when those are present we push into HAVING not WHERE, so that the quals
1694
+ * are still applied after aggregation.)
1695
+ *
1696
+ * In addition, we make several checks on the subquery's output columns to see
1697
+ * if it is safe to reference them in pushed-down quals. If output column k
1698
+ * is found to be unsafe to reference, we set safetyInfo->unsafeColumns[k]
1699
+ * to TRUE, but we don't reject the subquery overall since column k might not
1700
+ * be referenced by some/all quals. The unsafeColumns[] array will be
1701
+ * consulted later by qual_is_pushdown_safe(). It's better to do it this way
1702
+ * than to make the checks directly in qual_is_pushdown_safe(), because when
1703
+ * the subquery involves set operations we have to check the output
1681
1704
* expressions in each arm of the set op.
1705
+ *
1706
+ * Note: pushing quals into a DISTINCT subquery is theoretically dubious:
1707
+ * we're effectively assuming that the quals cannot distinguish values that
1708
+ * the DISTINCT's equality operator sees as equal, yet there are many
1709
+ * counterexamples to that assumption. However use of such a qual with a
1710
+ * DISTINCT subquery would be unsafe anyway, since there's no guarantee which
1711
+ * "equal" value will be chosen as the output value by the DISTINCT operation.
1712
+ * So we don't worry too much about that. Another objection is that if the
1713
+ * qual is expensive to evaluate, running it for each original row might cost
1714
+ * more than we save by eliminating rows before the DISTINCT step. But it
1715
+ * would be very hard to estimate that at this stage, and in practice pushdown
1716
+ * seldom seems to make things worse, so we ignore that problem too.
1682
1717
*/
1683
1718
static bool
1684
1719
subquery_is_pushdown_safe (Query * subquery , Query * topquery ,
1685
- bool * unsafeColumns )
1720
+ pushdown_safety_info * safetyInfo )
1686
1721
{
1687
1722
SetOperationStmt * topop ;
1688
1723
@@ -1694,22 +1729,26 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
1694
1729
if (subquery -> hasWindowFuncs )
1695
1730
return false;
1696
1731
1732
+ /* Check point 4 */
1733
+ if (subquery -> distinctClause )
1734
+ safetyInfo -> unsafeVolatile = true;
1735
+
1697
1736
/*
1698
1737
* If we're at a leaf query, check for unsafe expressions in its target
1699
1738
* list, and mark any unsafe ones in unsafeColumns[]. (Non-leaf nodes in
1700
1739
* setop trees have only simple Vars in their tlists, so no need to check
1701
1740
* them.)
1702
1741
*/
1703
1742
if (subquery -> setOperations == NULL )
1704
- check_output_expressions (subquery , unsafeColumns );
1743
+ check_output_expressions (subquery , safetyInfo );
1705
1744
1706
1745
/* Are we at top level, or looking at a setop component? */
1707
1746
if (subquery == topquery )
1708
1747
{
1709
1748
/* Top level, so check any component queries */
1710
1749
if (subquery -> setOperations != NULL )
1711
1750
if (!recurse_pushdown_safe (subquery -> setOperations , topquery ,
1712
- unsafeColumns ))
1751
+ safetyInfo ))
1713
1752
return false;
1714
1753
}
1715
1754
else
@@ -1722,7 +1761,7 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
1722
1761
Assert (topop && IsA (topop , SetOperationStmt ));
1723
1762
compare_tlist_datatypes (subquery -> targetList ,
1724
1763
topop -> colTypes ,
1725
- unsafeColumns );
1764
+ safetyInfo );
1726
1765
}
1727
1766
return true;
1728
1767
}
@@ -1732,7 +1771,7 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
1732
1771
*/
1733
1772
static bool
1734
1773
recurse_pushdown_safe (Node * setOp , Query * topquery ,
1735
- bool * unsafeColumns )
1774
+ pushdown_safety_info * safetyInfo )
1736
1775
{
1737
1776
if (IsA (setOp , RangeTblRef ))
1738
1777
{
@@ -1741,7 +1780,7 @@ recurse_pushdown_safe(Node *setOp, Query *topquery,
1741
1780
Query * subquery = rte -> subquery ;
1742
1781
1743
1782
Assert (subquery != NULL );
1744
- return subquery_is_pushdown_safe (subquery , topquery , unsafeColumns );
1783
+ return subquery_is_pushdown_safe (subquery , topquery , safetyInfo );
1745
1784
}
1746
1785
else if (IsA (setOp , SetOperationStmt ))
1747
1786
{
@@ -1751,9 +1790,9 @@ recurse_pushdown_safe(Node *setOp, Query *topquery,
1751
1790
if (op -> op == SETOP_EXCEPT )
1752
1791
return false;
1753
1792
/* Else recurse */
1754
- if (!recurse_pushdown_safe (op -> larg , topquery , unsafeColumns ))
1793
+ if (!recurse_pushdown_safe (op -> larg , topquery , safetyInfo ))
1755
1794
return false;
1756
- if (!recurse_pushdown_safe (op -> rarg , topquery , unsafeColumns ))
1795
+ if (!recurse_pushdown_safe (op -> rarg , topquery , safetyInfo ))
1757
1796
return false;
1758
1797
}
1759
1798
else
@@ -1784,14 +1823,12 @@ recurse_pushdown_safe(Node *setOp, Query *topquery,
1784
1823
* 3. If the subquery uses DISTINCT ON, we must not push down any quals that
1785
1824
* refer to non-DISTINCT output columns, because that could change the set
1786
1825
* of rows returned. (This condition is vacuous for DISTINCT, because then
1787
- * there are no non-DISTINCT output columns, so we needn't check. But note
1788
- * we are assuming that the qual can't distinguish values that the DISTINCT
1789
- * operator sees as equal. This is a bit shaky but we have no way to test
1790
- * for the case, and it's unlikely enough that we shouldn't refuse the
1791
- * optimization just because it could theoretically happen.)
1826
+ * there are no non-DISTINCT output columns, so we needn't check. Note that
1827
+ * subquery_is_pushdown_safe already reported that we can't use volatile
1828
+ * quals if there's DISTINCT or DISTINCT ON.)
1792
1829
*/
1793
1830
static void
1794
- check_output_expressions (Query * subquery , bool * unsafeColumns )
1831
+ check_output_expressions (Query * subquery , pushdown_safety_info * safetyInfo )
1795
1832
{
1796
1833
ListCell * lc ;
1797
1834
@@ -1803,20 +1840,20 @@ check_output_expressions(Query *subquery, bool *unsafeColumns)
1803
1840
continue ; /* ignore resjunk columns */
1804
1841
1805
1842
/* We need not check further if output col is already known unsafe */
1806
- if (unsafeColumns [tle -> resno ])
1843
+ if (safetyInfo -> unsafeColumns [tle -> resno ])
1807
1844
continue ;
1808
1845
1809
1846
/* Functions returning sets are unsafe (point 1) */
1810
1847
if (expression_returns_set ((Node * ) tle -> expr ))
1811
1848
{
1812
- unsafeColumns [tle -> resno ] = true;
1849
+ safetyInfo -> unsafeColumns [tle -> resno ] = true;
1813
1850
continue ;
1814
1851
}
1815
1852
1816
1853
/* Volatile functions are unsafe (point 2) */
1817
1854
if (contain_volatile_functions ((Node * ) tle -> expr ))
1818
1855
{
1819
- unsafeColumns [tle -> resno ] = true;
1856
+ safetyInfo -> unsafeColumns [tle -> resno ] = true;
1820
1857
continue ;
1821
1858
}
1822
1859
@@ -1825,7 +1862,7 @@ check_output_expressions(Query *subquery, bool *unsafeColumns)
1825
1862
!targetIsInSortList (tle , InvalidOid , subquery -> distinctClause ))
1826
1863
{
1827
1864
/* non-DISTINCT column, so mark it unsafe */
1828
- unsafeColumns [tle -> resno ] = true;
1865
+ safetyInfo -> unsafeColumns [tle -> resno ] = true;
1829
1866
continue ;
1830
1867
}
1831
1868
}
@@ -1846,11 +1883,11 @@ check_output_expressions(Query *subquery, bool *unsafeColumns)
1846
1883
*
1847
1884
* tlist is a subquery tlist.
1848
1885
* colTypes is an OID list of the top-level setop's output column types.
1849
- * unsafeColumns[] is the result array.
1886
+ * safetyInfo-> unsafeColumns[] is the result array.
1850
1887
*/
1851
1888
static void
1852
1889
compare_tlist_datatypes (List * tlist , List * colTypes ,
1853
- bool * unsafeColumns )
1890
+ pushdown_safety_info * safetyInfo )
1854
1891
{
1855
1892
ListCell * l ;
1856
1893
ListCell * colType = list_head (colTypes );
@@ -1864,7 +1901,7 @@ compare_tlist_datatypes(List *tlist, List *colTypes,
1864
1901
if (colType == NULL )
1865
1902
elog (ERROR , "wrong number of tlist entries" );
1866
1903
if (exprType ((Node * ) tle -> expr ) != lfirst_oid (colType ))
1867
- unsafeColumns [tle -> resno ] = true;
1904
+ safetyInfo -> unsafeColumns [tle -> resno ] = true;
1868
1905
colType = lnext (colType );
1869
1906
}
1870
1907
if (colType != NULL )
@@ -1883,15 +1920,20 @@ compare_tlist_datatypes(List *tlist, List *colTypes,
1883
1920
* it will work correctly: sublinks will already have been transformed into
1884
1921
* subplans in the qual, but not in the subquery).
1885
1922
*
1886
- * 2. The qual must not refer to the whole-row output of the subquery
1923
+ * 2. If unsafeVolatile is set, the qual must not contain any volatile
1924
+ * functions.
1925
+ *
1926
+ * 3. If unsafeLeaky is set, the qual must not contain any leaky functions.
1927
+ *
1928
+ * 4. The qual must not refer to the whole-row output of the subquery
1887
1929
* (since there is no easy way to name that within the subquery itself).
1888
1930
*
1889
- * 3 . The qual must not refer to any subquery output columns that were
1931
+ * 5 . The qual must not refer to any subquery output columns that were
1890
1932
* found to be unsafe to reference by subquery_is_pushdown_safe().
1891
1933
*/
1892
1934
static bool
1893
1935
qual_is_pushdown_safe (Query * subquery , Index rti , Node * qual ,
1894
- bool * unsafeColumns )
1936
+ pushdown_safety_info * safetyInfo )
1895
1937
{
1896
1938
bool safe = true;
1897
1939
List * vars ;
@@ -1901,6 +1943,16 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
1901
1943
if (contain_subplans (qual ))
1902
1944
return false;
1903
1945
1946
+ /* Refuse volatile quals if we found they'd be unsafe (point 2) */
1947
+ if (safetyInfo -> unsafeVolatile &&
1948
+ contain_volatile_functions (qual ))
1949
+ return false;
1950
+
1951
+ /* Refuse leaky quals if told to (point 3) */
1952
+ if (safetyInfo -> unsafeLeaky &&
1953
+ contain_leaky_functions (qual ))
1954
+ return false;
1955
+
1904
1956
/*
1905
1957
* It would be unsafe to push down window function calls, but at least for
1906
1958
* the moment we could never see any in a qual anyhow. (The same applies
@@ -1935,15 +1987,15 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
1935
1987
Assert (var -> varno == rti );
1936
1988
Assert (var -> varattno >= 0 );
1937
1989
1938
- /* Check point 2 */
1990
+ /* Check point 4 */
1939
1991
if (var -> varattno == 0 )
1940
1992
{
1941
1993
safe = false;
1942
1994
break ;
1943
1995
}
1944
1996
1945
- /* Check point 3 */
1946
- if (unsafeColumns [var -> varattno ])
1997
+ /* Check point 5 */
1998
+ if (safetyInfo -> unsafeColumns [var -> varattno ])
1947
1999
{
1948
2000
safe = false;
1949
2001
break ;
0 commit comments