Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 964c0d0

Browse files
committed
Prevent pushing down WHERE clauses into unsafe UNION/INTERSECT nests.
The planner is aware that it mustn't push down upper-level quals into subqueries if the quals reference subquery output columns that contain set-returning functions or volatile functions, or are non-DISTINCT outputs of a DISTINCT ON subquery. However, it missed making this check when there were one or more levels of UNION or INTERSECT above the dangerous expression. This could lead to "set-valued function called in context that cannot accept a set" errors, as seen in bug #8213 from Eric Soroos, or to silently wrong answers in the other cases. To fix, refactor the checks so that we make the column-is-unsafe checks during subquery_is_pushdown_safe(), which already has to recursively inspect all arms of a set-operation tree. This makes qual_is_pushdown_safe() considerably simpler, at the cost that we will spend some cycles checking output columns that possibly aren't referenced in any upper qual. But the cases where this code gets executed at all are already nontrivial queries, so it's unlikely anybody will notice any slowdown of planning. This has been broken since commit 05f916e, which makes the bug over ten years old. A bit surprising nobody noticed it before now.
1 parent a3bd609 commit 964c0d0

File tree

3 files changed

+255
-91
lines changed

3 files changed

+255
-91
lines changed

src/backend/optimizer/path/allpaths.c

Lines changed: 125 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -82,13 +82,14 @@ static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel,
8282
RangeTblEntry *rte);
8383
static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
8484
static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
85-
bool *differentTypes);
85+
bool *unsafeColumns);
8686
static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
87-
bool *differentTypes);
87+
bool *unsafeColumns);
88+
static void check_output_expressions(Query *subquery, bool *unsafeColumns);
8889
static void compare_tlist_datatypes(List *tlist, List *colTypes,
89-
bool *differentTypes);
90+
bool *unsafeColumns);
9091
static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
91-
bool *differentTypes);
92+
bool *unsafeColumns);
9293
static void subquery_push_qual(Query *subquery,
9394
RangeTblEntry *rte, Index rti, Node *qual);
9495
static void recurse_push_qual(Node *setOp, Query *topquery,
@@ -1048,7 +1049,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
10481049
Query *parse = root->parse;
10491050
Query *subquery = rte->subquery;
10501051
Relids required_outer;
1051-
bool *differentTypes;
1052+
bool *unsafeColumns;
10521053
double tuple_fraction;
10531054
PlannerInfo *subroot;
10541055
List *pathkeys;
@@ -1067,8 +1068,12 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
10671068
*/
10681069
required_outer = rel->lateral_relids;
10691070

1070-
/* We need a workspace for keeping track of set-op type coercions */
1071-
differentTypes = (bool *)
1071+
/*
1072+
* We need a workspace for keeping track of unsafe-to-reference columns.
1073+
* unsafeColumns[i] is set TRUE if we've found that output column i of the
1074+
* subquery is unsafe to use in a pushed-down qual.
1075+
*/
1076+
unsafeColumns = (bool *)
10721077
palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
10731078

10741079
/*
@@ -1096,7 +1101,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
10961101
* push down a pushable qual, because it'd result in a worse plan?
10971102
*/
10981103
if (rel->baserestrictinfo != NIL &&
1099-
subquery_is_pushdown_safe(subquery, subquery, differentTypes))
1104+
subquery_is_pushdown_safe(subquery, subquery, unsafeColumns))
11001105
{
11011106
/* OK to consider pushing down individual quals */
11021107
List *upperrestrictlist = NIL;
@@ -1110,7 +1115,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
11101115
if (!rinfo->pseudoconstant &&
11111116
(!rte->security_barrier ||
11121117
!contain_leaky_functions(clause)) &&
1113-
qual_is_pushdown_safe(subquery, rti, clause, differentTypes))
1118+
qual_is_pushdown_safe(subquery, rti, clause, unsafeColumns))
11141119
{
11151120
/* Push it down */
11161121
subquery_push_qual(subquery, rte, rti, clause);
@@ -1124,7 +1129,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
11241129
rel->baserestrictinfo = upperrestrictlist;
11251130
}
11261131

1127-
pfree(differentTypes);
1132+
pfree(unsafeColumns);
11281133

11291134
/*
11301135
* We can safely pass the outer tuple_fraction down to the subquery if the
@@ -1553,17 +1558,19 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
15531558
* 3. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
15541559
* quals into it, because that could change the results.
15551560
*
1556-
* 4. For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
1557-
* push quals into each component query, but the quals can only reference
1558-
* subquery columns that suffer no type coercions in the set operation.
1559-
* Otherwise there are possible semantic gotchas. So, we check the
1560-
* component queries to see if any of them have different output types;
1561-
* differentTypes[k] is set true if column k has different type in any
1562-
* component.
1561+
* In addition, we make several checks on the subquery's output columns
1562+
* to see if it is safe to reference them in pushed-down quals. If output
1563+
* column k is found to be unsafe to reference, we set unsafeColumns[k] to
1564+
* TRUE, but we don't reject the subquery overall since column k might
1565+
* not be referenced by some/all quals. The unsafeColumns[] array will be
1566+
* consulted later by qual_is_pushdown_safe(). It's better to do it this
1567+
* way than to make the checks directly in qual_is_pushdown_safe(), because
1568+
* when the subquery involves set operations we have to check the output
1569+
* expressions in each arm of the set op.
15631570
*/
15641571
static bool
15651572
subquery_is_pushdown_safe(Query *subquery, Query *topquery,
1566-
bool *differentTypes)
1573+
bool *unsafeColumns)
15671574
{
15681575
SetOperationStmt *topop;
15691576

@@ -1575,13 +1582,22 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
15751582
if (subquery->hasWindowFuncs)
15761583
return false;
15771584

1585+
/*
1586+
* If we're at a leaf query, check for unsafe expressions in its target
1587+
* list, and mark any unsafe ones in unsafeColumns[]. (Non-leaf nodes in
1588+
* setop trees have only simple Vars in their tlists, so no need to check
1589+
* them.)
1590+
*/
1591+
if (subquery->setOperations == NULL)
1592+
check_output_expressions(subquery, unsafeColumns);
1593+
15781594
/* Are we at top level, or looking at a setop component? */
15791595
if (subquery == topquery)
15801596
{
15811597
/* Top level, so check any component queries */
15821598
if (subquery->setOperations != NULL)
15831599
if (!recurse_pushdown_safe(subquery->setOperations, topquery,
1584-
differentTypes))
1600+
unsafeColumns))
15851601
return false;
15861602
}
15871603
else
@@ -1594,7 +1610,7 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
15941610
Assert(topop && IsA(topop, SetOperationStmt));
15951611
compare_tlist_datatypes(subquery->targetList,
15961612
topop->colTypes,
1597-
differentTypes);
1613+
unsafeColumns);
15981614
}
15991615
return true;
16001616
}
@@ -1604,7 +1620,7 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
16041620
*/
16051621
static bool
16061622
recurse_pushdown_safe(Node *setOp, Query *topquery,
1607-
bool *differentTypes)
1623+
bool *unsafeColumns)
16081624
{
16091625
if (IsA(setOp, RangeTblRef))
16101626
{
@@ -1613,19 +1629,19 @@ recurse_pushdown_safe(Node *setOp, Query *topquery,
16131629
Query *subquery = rte->subquery;
16141630

16151631
Assert(subquery != NULL);
1616-
return subquery_is_pushdown_safe(subquery, topquery, differentTypes);
1632+
return subquery_is_pushdown_safe(subquery, topquery, unsafeColumns);
16171633
}
16181634
else if (IsA(setOp, SetOperationStmt))
16191635
{
16201636
SetOperationStmt *op = (SetOperationStmt *) setOp;
16211637

1622-
/* EXCEPT is no good */
1638+
/* EXCEPT is no good (point 3 for subquery_is_pushdown_safe) */
16231639
if (op->op == SETOP_EXCEPT)
16241640
return false;
16251641
/* Else recurse */
1626-
if (!recurse_pushdown_safe(op->larg, topquery, differentTypes))
1642+
if (!recurse_pushdown_safe(op->larg, topquery, unsafeColumns))
16271643
return false;
1628-
if (!recurse_pushdown_safe(op->rarg, topquery, differentTypes))
1644+
if (!recurse_pushdown_safe(op->rarg, topquery, unsafeColumns))
16291645
return false;
16301646
}
16311647
else
@@ -1637,17 +1653,92 @@ recurse_pushdown_safe(Node *setOp, Query *topquery,
16371653
}
16381654

16391655
/*
1640-
* Compare tlist's datatypes against the list of set-operation result types.
1641-
* For any items that are different, mark the appropriate element of
1642-
* differentTypes[] to show that this column will have type conversions.
1656+
* check_output_expressions - check subquery's output expressions for safety
1657+
*
1658+
* There are several cases in which it's unsafe to push down an upper-level
1659+
* qual if it references a particular output column of a subquery. We check
1660+
* each output column of the subquery and set unsafeColumns[k] to TRUE if
1661+
* that column is unsafe for a pushed-down qual to reference. The conditions
1662+
* checked here are:
1663+
*
1664+
* 1. We must not push down any quals that refer to subselect outputs that
1665+
* return sets, else we'd introduce functions-returning-sets into the
1666+
* subquery's WHERE/HAVING quals.
1667+
*
1668+
* 2. We must not push down any quals that refer to subselect outputs that
1669+
* contain volatile functions, for fear of introducing strange results due
1670+
* to multiple evaluation of a volatile function.
1671+
*
1672+
* 3. If the subquery uses DISTINCT ON, we must not push down any quals that
1673+
* refer to non-DISTINCT output columns, because that could change the set
1674+
* of rows returned. (This condition is vacuous for DISTINCT, because then
1675+
* there are no non-DISTINCT output columns, so we needn't check. But note
1676+
* we are assuming that the qual can't distinguish values that the DISTINCT
1677+
* operator sees as equal. This is a bit shaky but we have no way to test
1678+
* for the case, and it's unlikely enough that we shouldn't refuse the
1679+
* optimization just because it could theoretically happen.)
1680+
*/
1681+
static void
1682+
check_output_expressions(Query *subquery, bool *unsafeColumns)
1683+
{
1684+
ListCell *lc;
1685+
1686+
foreach(lc, subquery->targetList)
1687+
{
1688+
TargetEntry *tle = (TargetEntry *) lfirst(lc);
1689+
1690+
if (tle->resjunk)
1691+
continue; /* ignore resjunk columns */
1692+
1693+
/* We need not check further if output col is already known unsafe */
1694+
if (unsafeColumns[tle->resno])
1695+
continue;
1696+
1697+
/* Functions returning sets are unsafe (point 1) */
1698+
if (expression_returns_set((Node *) tle->expr))
1699+
{
1700+
unsafeColumns[tle->resno] = true;
1701+
continue;
1702+
}
1703+
1704+
/* Volatile functions are unsafe (point 2) */
1705+
if (contain_volatile_functions((Node *) tle->expr))
1706+
{
1707+
unsafeColumns[tle->resno] = true;
1708+
continue;
1709+
}
1710+
1711+
/* If subquery uses DISTINCT ON, check point 3 */
1712+
if (subquery->hasDistinctOn &&
1713+
!targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
1714+
{
1715+
/* non-DISTINCT column, so mark it unsafe */
1716+
unsafeColumns[tle->resno] = true;
1717+
continue;
1718+
}
1719+
}
1720+
}
1721+
1722+
/*
1723+
* For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
1724+
* push quals into each component query, but the quals can only reference
1725+
* subquery columns that suffer no type coercions in the set operation.
1726+
* Otherwise there are possible semantic gotchas. So, we check the
1727+
* component queries to see if any of them have output types different from
1728+
* the top-level setop outputs. unsafeColumns[k] is set true if column k
1729+
* has different type in any component.
16431730
*
16441731
* We don't have to care about typmods here: the only allowed difference
16451732
* between set-op input and output typmods is input is a specific typmod
16461733
* and output is -1, and that does not require a coercion.
1734+
*
1735+
* tlist is a subquery tlist.
1736+
* colTypes is an OID list of the top-level setop's output column types.
1737+
* unsafeColumns[] is the result array.
16471738
*/
16481739
static void
16491740
compare_tlist_datatypes(List *tlist, List *colTypes,
1650-
bool *differentTypes)
1741+
bool *unsafeColumns)
16511742
{
16521743
ListCell *l;
16531744
ListCell *colType = list_head(colTypes);
@@ -1661,7 +1752,7 @@ compare_tlist_datatypes(List *tlist, List *colTypes,
16611752
if (colType == NULL)
16621753
elog(ERROR, "wrong number of tlist entries");
16631754
if (exprType((Node *) tle->expr) != lfirst_oid(colType))
1664-
differentTypes[tle->resno] = true;
1755+
unsafeColumns[tle->resno] = true;
16651756
colType = lnext(colType);
16661757
}
16671758
if (colType != NULL)
@@ -1684,34 +1775,15 @@ compare_tlist_datatypes(List *tlist, List *colTypes,
16841775
* (since there is no easy way to name that within the subquery itself).
16851776
*
16861777
* 3. The qual must not refer to any subquery output columns that were
1687-
* found to have inconsistent types across a set operation tree by
1688-
* subquery_is_pushdown_safe().
1689-
*
1690-
* 4. If the subquery uses DISTINCT ON, we must not push down any quals that
1691-
* refer to non-DISTINCT output columns, because that could change the set
1692-
* of rows returned. (This condition is vacuous for DISTINCT, because then
1693-
* there are no non-DISTINCT output columns, so we needn't check. But note
1694-
* we are assuming that the qual can't distinguish values that the DISTINCT
1695-
* operator sees as equal. This is a bit shaky but we have no way to test
1696-
* for the case, and it's unlikely enough that we shouldn't refuse the
1697-
* optimization just because it could theoretically happen.)
1698-
*
1699-
* 5. We must not push down any quals that refer to subselect outputs that
1700-
* return sets, else we'd introduce functions-returning-sets into the
1701-
* subquery's WHERE/HAVING quals.
1702-
*
1703-
* 6. We must not push down any quals that refer to subselect outputs that
1704-
* contain volatile functions, for fear of introducing strange results due
1705-
* to multiple evaluation of a volatile function.
1778+
* found to be unsafe to reference by subquery_is_pushdown_safe().
17061779
*/
17071780
static bool
17081781
qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
1709-
bool *differentTypes)
1782+
bool *unsafeColumns)
17101783
{
17111784
bool safe = true;
17121785
List *vars;
17131786
ListCell *vl;
1714-
Bitmapset *tested = NULL;
17151787

17161788
/* Refuse subselects (point 1) */
17171789
if (contain_subplans(qual))
@@ -1734,7 +1806,6 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
17341806
foreach(vl, vars)
17351807
{
17361808
Var *var = (Var *) lfirst(vl);
1737-
TargetEntry *tle;
17381809

17391810
/*
17401811
* XXX Punt if we find any PlaceHolderVars in the restriction clause.
@@ -1750,6 +1821,7 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
17501821
}
17511822

17521823
Assert(var->varno == rti);
1824+
Assert(var->varattno >= 0);
17531825

17541826
/* Check point 2 */
17551827
if (var->varattno == 0)
@@ -1758,53 +1830,15 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
17581830
break;
17591831
}
17601832

1761-
/*
1762-
* We use a bitmapset to avoid testing the same attno more than once.
1763-
* (NB: this only works because subquery outputs can't have negative
1764-
* attnos.)
1765-
*/
1766-
if (bms_is_member(var->varattno, tested))
1767-
continue;
1768-
tested = bms_add_member(tested, var->varattno);
1769-
17701833
/* Check point 3 */
1771-
if (differentTypes[var->varattno])
1772-
{
1773-
safe = false;
1774-
break;
1775-
}
1776-
1777-
/* Must find the tlist element referenced by the Var */
1778-
tle = get_tle_by_resno(subquery->targetList, var->varattno);
1779-
Assert(tle != NULL);
1780-
Assert(!tle->resjunk);
1781-
1782-
/* If subquery uses DISTINCT ON, check point 4 */
1783-
if (subquery->hasDistinctOn &&
1784-
!targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
1785-
{
1786-
/* non-DISTINCT column, so fail */
1787-
safe = false;
1788-
break;
1789-
}
1790-
1791-
/* Refuse functions returning sets (point 5) */
1792-
if (expression_returns_set((Node *) tle->expr))
1793-
{
1794-
safe = false;
1795-
break;
1796-
}
1797-
1798-
/* Refuse volatile functions (point 6) */
1799-
if (contain_volatile_functions((Node *) tle->expr))
1834+
if (unsafeColumns[var->varattno])
18001835
{
18011836
safe = false;
18021837
break;
18031838
}
18041839
}
18051840

18061841
list_free(vars);
1807-
bms_free(tested);
18081842

18091843
return safe;
18101844
}

0 commit comments

Comments
 (0)