42
42
#include "utils/lsyscache.h"
43
43
44
44
45
+ /* results of subquery_is_pushdown_safe */
46
+ typedef struct pushdown_safety_info
47
+ {
48
+ bool * unsafeColumns ; /* which output columns are unsafe to use */
49
+ bool unsafeVolatile ; /* don't push down volatile quals */
50
+ bool unsafeLeaky ; /* don't push down leaky quals */
51
+ } pushdown_safety_info ;
52
+
45
53
/* These parameters are set by GUC */
46
54
bool enable_geqo = false; /* just in case GUC doesn't set it */
47
55
int geqo_threshold ;
@@ -88,14 +96,15 @@ static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel,
88
96
RangeTblEntry * rte );
89
97
static RelOptInfo * make_rel_from_joinlist (PlannerInfo * root , List * joinlist );
90
98
static bool subquery_is_pushdown_safe (Query * subquery , Query * topquery ,
91
- bool * unsafeColumns );
99
+ pushdown_safety_info * safetyInfo );
92
100
static bool recurse_pushdown_safe (Node * setOp , Query * topquery ,
93
- bool * unsafeColumns );
94
- static void check_output_expressions (Query * subquery , bool * unsafeColumns );
101
+ pushdown_safety_info * safetyInfo );
102
+ static void check_output_expressions (Query * subquery ,
103
+ pushdown_safety_info * safetyInfo );
95
104
static void compare_tlist_datatypes (List * tlist , List * colTypes ,
96
- bool * unsafeColumns );
105
+ pushdown_safety_info * safetyInfo );
97
106
static bool qual_is_pushdown_safe (Query * subquery , Index rti , Node * qual ,
98
- bool * unsafeColumns );
107
+ pushdown_safety_info * safetyInfo );
99
108
static void subquery_push_qual (Query * subquery ,
100
109
RangeTblEntry * rte , Index rti , Node * qual );
101
110
static void recurse_push_qual (Node * setOp , Query * topquery ,
@@ -1119,7 +1128,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
1119
1128
Query * parse = root -> parse ;
1120
1129
Query * subquery = rte -> subquery ;
1121
1130
Relids required_outer ;
1122
- bool * unsafeColumns ;
1131
+ pushdown_safety_info safetyInfo ;
1123
1132
double tuple_fraction ;
1124
1133
PlannerInfo * subroot ;
1125
1134
List * pathkeys ;
@@ -1139,13 +1148,25 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
1139
1148
required_outer = rel -> lateral_relids ;
1140
1149
1141
1150
/*
1142
- * We need a workspace for keeping track of unsafe-to-reference columns.
1143
- * unsafeColumns[i] is set TRUE if we've found that output column i of the
1144
- * subquery is unsafe to use in a pushed-down qual.
1151
+ * Zero out result area for subquery_is_pushdown_safe, so that it can set
1152
+ * flags as needed while recursing. In particular, we need a workspace
1153
+ * for keeping track of unsafe-to-reference columns. unsafeColumns[i]
1154
+ * will be set TRUE if we find that output column i of the subquery is
1155
+ * unsafe to use in a pushed-down qual.
1145
1156
*/
1146
- unsafeColumns = (bool * )
1157
+ memset (& safetyInfo , 0 , sizeof (safetyInfo ));
1158
+ safetyInfo .unsafeColumns = (bool * )
1147
1159
palloc0 ((list_length (subquery -> targetList ) + 1 ) * sizeof (bool ));
1148
1160
1161
+ /*
1162
+ * If the subquery has the "security_barrier" flag, it means the subquery
1163
+ * originated from a view that must enforce row-level security. Then we
1164
+ * must not push down quals that contain leaky functions. (Ideally this
1165
+ * would be checked inside subquery_is_pushdown_safe, but since we don't
1166
+ * currently pass the RTE to that function, we must do it here.)
1167
+ */
1168
+ safetyInfo .unsafeLeaky = rte -> security_barrier ;
1169
+
1149
1170
/*
1150
1171
* If there are any restriction clauses that have been attached to the
1151
1172
* subquery relation, consider pushing them down to become WHERE or HAVING
@@ -1160,18 +1181,14 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
1160
1181
* pseudoconstant clauses; better to have the gating node above the
1161
1182
* subquery.
1162
1183
*
1163
- * Also, if the sub-query has the "security_barrier" flag, it means the
1164
- * sub-query originated from a view that must enforce row-level security.
1165
- * Then we must not push down quals that contain leaky functions.
1166
- *
1167
1184
* Non-pushed-down clauses will get evaluated as qpquals of the
1168
1185
* SubqueryScan node.
1169
1186
*
1170
1187
* XXX Are there any cases where we want to make a policy decision not to
1171
1188
* push down a pushable qual, because it'd result in a worse plan?
1172
1189
*/
1173
1190
if (rel -> baserestrictinfo != NIL &&
1174
- subquery_is_pushdown_safe (subquery , subquery , unsafeColumns ))
1191
+ subquery_is_pushdown_safe (subquery , subquery , & safetyInfo ))
1175
1192
{
1176
1193
/* OK to consider pushing down individual quals */
1177
1194
List * upperrestrictlist = NIL ;
@@ -1183,9 +1200,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
1183
1200
Node * clause = (Node * ) rinfo -> clause ;
1184
1201
1185
1202
if (!rinfo -> pseudoconstant &&
1186
- (!rte -> security_barrier ||
1187
- !contain_leaky_functions (clause )) &&
1188
- qual_is_pushdown_safe (subquery , rti , clause , unsafeColumns ))
1203
+ qual_is_pushdown_safe (subquery , rti , clause , & safetyInfo ))
1189
1204
{
1190
1205
/* Push it down */
1191
1206
subquery_push_qual (subquery , rte , rti , clause );
@@ -1199,7 +1214,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
1199
1214
rel -> baserestrictinfo = upperrestrictlist ;
1200
1215
}
1201
1216
1202
- pfree (unsafeColumns );
1217
+ pfree (safetyInfo . unsafeColumns );
1203
1218
1204
1219
/*
1205
1220
* The upper query might not use all the subquery's output columns; if
@@ -1679,19 +1694,39 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
1679
1694
* 3. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
1680
1695
* quals into it, because that could change the results.
1681
1696
*
1682
- * In addition, we make several checks on the subquery's output columns
1683
- * to see if it is safe to reference them in pushed-down quals. If output
1684
- * column k is found to be unsafe to reference, we set unsafeColumns[k] to
1685
- * TRUE, but we don't reject the subquery overall since column k might
1686
- * not be referenced by some/all quals. The unsafeColumns[] array will be
1687
- * consulted later by qual_is_pushdown_safe(). It's better to do it this
1688
- * way than to make the checks directly in qual_is_pushdown_safe(), because
1689
- * when the subquery involves set operations we have to check the output
1697
+ * 4. If the subquery uses DISTINCT, we cannot push volatile quals into it.
1698
+ * This is because upper-level quals should semantically be evaluated only
1699
+ * once per distinct row, not once per original row, and if the qual is
1700
+ * volatile then extra evaluations could change the results. (This issue
1701
+ * does not apply to other forms of aggregation such as GROUP BY, because
1702
+ * when those are present we push into HAVING not WHERE, so that the quals
1703
+ * are still applied after aggregation.)
1704
+ *
1705
+ * In addition, we make several checks on the subquery's output columns to see
1706
+ * if it is safe to reference them in pushed-down quals. If output column k
1707
+ * is found to be unsafe to reference, we set safetyInfo->unsafeColumns[k]
1708
+ * to TRUE, but we don't reject the subquery overall since column k might not
1709
+ * be referenced by some/all quals. The unsafeColumns[] array will be
1710
+ * consulted later by qual_is_pushdown_safe(). It's better to do it this way
1711
+ * than to make the checks directly in qual_is_pushdown_safe(), because when
1712
+ * the subquery involves set operations we have to check the output
1690
1713
* expressions in each arm of the set op.
1714
+ *
1715
+ * Note: pushing quals into a DISTINCT subquery is theoretically dubious:
1716
+ * we're effectively assuming that the quals cannot distinguish values that
1717
+ * the DISTINCT's equality operator sees as equal, yet there are many
1718
+ * counterexamples to that assumption. However use of such a qual with a
1719
+ * DISTINCT subquery would be unsafe anyway, since there's no guarantee which
1720
+ * "equal" value will be chosen as the output value by the DISTINCT operation.
1721
+ * So we don't worry too much about that. Another objection is that if the
1722
+ * qual is expensive to evaluate, running it for each original row might cost
1723
+ * more than we save by eliminating rows before the DISTINCT step. But it
1724
+ * would be very hard to estimate that at this stage, and in practice pushdown
1725
+ * seldom seems to make things worse, so we ignore that problem too.
1691
1726
*/
1692
1727
static bool
1693
1728
subquery_is_pushdown_safe (Query * subquery , Query * topquery ,
1694
- bool * unsafeColumns )
1729
+ pushdown_safety_info * safetyInfo )
1695
1730
{
1696
1731
SetOperationStmt * topop ;
1697
1732
@@ -1703,22 +1738,26 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
1703
1738
if (subquery -> hasWindowFuncs )
1704
1739
return false;
1705
1740
1741
+ /* Check point 4 */
1742
+ if (subquery -> distinctClause )
1743
+ safetyInfo -> unsafeVolatile = true;
1744
+
1706
1745
/*
1707
1746
* If we're at a leaf query, check for unsafe expressions in its target
1708
1747
* list, and mark any unsafe ones in unsafeColumns[]. (Non-leaf nodes in
1709
1748
* setop trees have only simple Vars in their tlists, so no need to check
1710
1749
* them.)
1711
1750
*/
1712
1751
if (subquery -> setOperations == NULL )
1713
- check_output_expressions (subquery , unsafeColumns );
1752
+ check_output_expressions (subquery , safetyInfo );
1714
1753
1715
1754
/* Are we at top level, or looking at a setop component? */
1716
1755
if (subquery == topquery )
1717
1756
{
1718
1757
/* Top level, so check any component queries */
1719
1758
if (subquery -> setOperations != NULL )
1720
1759
if (!recurse_pushdown_safe (subquery -> setOperations , topquery ,
1721
- unsafeColumns ))
1760
+ safetyInfo ))
1722
1761
return false;
1723
1762
}
1724
1763
else
@@ -1731,7 +1770,7 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
1731
1770
Assert (topop && IsA (topop , SetOperationStmt ));
1732
1771
compare_tlist_datatypes (subquery -> targetList ,
1733
1772
topop -> colTypes ,
1734
- unsafeColumns );
1773
+ safetyInfo );
1735
1774
}
1736
1775
return true;
1737
1776
}
@@ -1741,7 +1780,7 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
1741
1780
*/
1742
1781
static bool
1743
1782
recurse_pushdown_safe (Node * setOp , Query * topquery ,
1744
- bool * unsafeColumns )
1783
+ pushdown_safety_info * safetyInfo )
1745
1784
{
1746
1785
if (IsA (setOp , RangeTblRef ))
1747
1786
{
@@ -1750,7 +1789,7 @@ recurse_pushdown_safe(Node *setOp, Query *topquery,
1750
1789
Query * subquery = rte -> subquery ;
1751
1790
1752
1791
Assert (subquery != NULL );
1753
- return subquery_is_pushdown_safe (subquery , topquery , unsafeColumns );
1792
+ return subquery_is_pushdown_safe (subquery , topquery , safetyInfo );
1754
1793
}
1755
1794
else if (IsA (setOp , SetOperationStmt ))
1756
1795
{
@@ -1760,9 +1799,9 @@ recurse_pushdown_safe(Node *setOp, Query *topquery,
1760
1799
if (op -> op == SETOP_EXCEPT )
1761
1800
return false;
1762
1801
/* Else recurse */
1763
- if (!recurse_pushdown_safe (op -> larg , topquery , unsafeColumns ))
1802
+ if (!recurse_pushdown_safe (op -> larg , topquery , safetyInfo ))
1764
1803
return false;
1765
- if (!recurse_pushdown_safe (op -> rarg , topquery , unsafeColumns ))
1804
+ if (!recurse_pushdown_safe (op -> rarg , topquery , safetyInfo ))
1766
1805
return false;
1767
1806
}
1768
1807
else
@@ -1793,14 +1832,12 @@ recurse_pushdown_safe(Node *setOp, Query *topquery,
1793
1832
* 3. If the subquery uses DISTINCT ON, we must not push down any quals that
1794
1833
* refer to non-DISTINCT output columns, because that could change the set
1795
1834
* of rows returned. (This condition is vacuous for DISTINCT, because then
1796
- * there are no non-DISTINCT output columns, so we needn't check. But note
1797
- * we are assuming that the qual can't distinguish values that the DISTINCT
1798
- * operator sees as equal. This is a bit shaky but we have no way to test
1799
- * for the case, and it's unlikely enough that we shouldn't refuse the
1800
- * optimization just because it could theoretically happen.)
1835
+ * there are no non-DISTINCT output columns, so we needn't check. Note that
1836
+ * subquery_is_pushdown_safe already reported that we can't use volatile
1837
+ * quals if there's DISTINCT or DISTINCT ON.)
1801
1838
*/
1802
1839
static void
1803
- check_output_expressions (Query * subquery , bool * unsafeColumns )
1840
+ check_output_expressions (Query * subquery , pushdown_safety_info * safetyInfo )
1804
1841
{
1805
1842
ListCell * lc ;
1806
1843
@@ -1812,20 +1849,20 @@ check_output_expressions(Query *subquery, bool *unsafeColumns)
1812
1849
continue ; /* ignore resjunk columns */
1813
1850
1814
1851
/* We need not check further if output col is already known unsafe */
1815
- if (unsafeColumns [tle -> resno ])
1852
+ if (safetyInfo -> unsafeColumns [tle -> resno ])
1816
1853
continue ;
1817
1854
1818
1855
/* Functions returning sets are unsafe (point 1) */
1819
1856
if (expression_returns_set ((Node * ) tle -> expr ))
1820
1857
{
1821
- unsafeColumns [tle -> resno ] = true;
1858
+ safetyInfo -> unsafeColumns [tle -> resno ] = true;
1822
1859
continue ;
1823
1860
}
1824
1861
1825
1862
/* Volatile functions are unsafe (point 2) */
1826
1863
if (contain_volatile_functions ((Node * ) tle -> expr ))
1827
1864
{
1828
- unsafeColumns [tle -> resno ] = true;
1865
+ safetyInfo -> unsafeColumns [tle -> resno ] = true;
1829
1866
continue ;
1830
1867
}
1831
1868
@@ -1834,7 +1871,7 @@ check_output_expressions(Query *subquery, bool *unsafeColumns)
1834
1871
!targetIsInSortList (tle , InvalidOid , subquery -> distinctClause ))
1835
1872
{
1836
1873
/* non-DISTINCT column, so mark it unsafe */
1837
- unsafeColumns [tle -> resno ] = true;
1874
+ safetyInfo -> unsafeColumns [tle -> resno ] = true;
1838
1875
continue ;
1839
1876
}
1840
1877
}
@@ -1855,11 +1892,11 @@ check_output_expressions(Query *subquery, bool *unsafeColumns)
1855
1892
*
1856
1893
* tlist is a subquery tlist.
1857
1894
* colTypes is an OID list of the top-level setop's output column types.
1858
- * unsafeColumns[] is the result array.
1895
+ * safetyInfo-> unsafeColumns[] is the result array.
1859
1896
*/
1860
1897
static void
1861
1898
compare_tlist_datatypes (List * tlist , List * colTypes ,
1862
- bool * unsafeColumns )
1899
+ pushdown_safety_info * safetyInfo )
1863
1900
{
1864
1901
ListCell * l ;
1865
1902
ListCell * colType = list_head (colTypes );
@@ -1873,7 +1910,7 @@ compare_tlist_datatypes(List *tlist, List *colTypes,
1873
1910
if (colType == NULL )
1874
1911
elog (ERROR , "wrong number of tlist entries" );
1875
1912
if (exprType ((Node * ) tle -> expr ) != lfirst_oid (colType ))
1876
- unsafeColumns [tle -> resno ] = true;
1913
+ safetyInfo -> unsafeColumns [tle -> resno ] = true;
1877
1914
colType = lnext (colType );
1878
1915
}
1879
1916
if (colType != NULL )
@@ -1892,15 +1929,20 @@ compare_tlist_datatypes(List *tlist, List *colTypes,
1892
1929
* it will work correctly: sublinks will already have been transformed into
1893
1930
* subplans in the qual, but not in the subquery).
1894
1931
*
1895
- * 2. The qual must not refer to the whole-row output of the subquery
1932
+ * 2. If unsafeVolatile is set, the qual must not contain any volatile
1933
+ * functions.
1934
+ *
1935
+ * 3. If unsafeLeaky is set, the qual must not contain any leaky functions.
1936
+ *
1937
+ * 4. The qual must not refer to the whole-row output of the subquery
1896
1938
* (since there is no easy way to name that within the subquery itself).
1897
1939
*
1898
- * 3 . The qual must not refer to any subquery output columns that were
1940
+ * 5 . The qual must not refer to any subquery output columns that were
1899
1941
* found to be unsafe to reference by subquery_is_pushdown_safe().
1900
1942
*/
1901
1943
static bool
1902
1944
qual_is_pushdown_safe (Query * subquery , Index rti , Node * qual ,
1903
- bool * unsafeColumns )
1945
+ pushdown_safety_info * safetyInfo )
1904
1946
{
1905
1947
bool safe = true;
1906
1948
List * vars ;
@@ -1910,6 +1952,16 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
1910
1952
if (contain_subplans (qual ))
1911
1953
return false;
1912
1954
1955
+ /* Refuse volatile quals if we found they'd be unsafe (point 2) */
1956
+ if (safetyInfo -> unsafeVolatile &&
1957
+ contain_volatile_functions (qual ))
1958
+ return false;
1959
+
1960
+ /* Refuse leaky quals if told to (point 3) */
1961
+ if (safetyInfo -> unsafeLeaky &&
1962
+ contain_leaky_functions (qual ))
1963
+ return false;
1964
+
1913
1965
/*
1914
1966
* It would be unsafe to push down window function calls, but at least for
1915
1967
* the moment we could never see any in a qual anyhow. (The same applies
@@ -1944,15 +1996,15 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
1944
1996
Assert (var -> varno == rti );
1945
1997
Assert (var -> varattno >= 0 );
1946
1998
1947
- /* Check point 2 */
1999
+ /* Check point 4 */
1948
2000
if (var -> varattno == 0 )
1949
2001
{
1950
2002
safe = false;
1951
2003
break ;
1952
2004
}
1953
2005
1954
- /* Check point 3 */
1955
- if (unsafeColumns [var -> varattno ])
2006
+ /* Check point 5 */
2007
+ if (safetyInfo -> unsafeColumns [var -> varattno ])
1956
2008
{
1957
2009
safe = false;
1958
2010
break ;
0 commit comments