diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c index 6e7dc3d2df98..ecbb35882899 100644 --- a/contrib/postgres_fdw/deparse.c +++ b/contrib/postgres_fdw/deparse.c @@ -164,6 +164,8 @@ static void deparseBoolExpr(BoolExpr *node, deparse_expr_cxt *context); static void deparseNullTest(NullTest *node, deparse_expr_cxt *context); static void deparseCaseExpr(CaseExpr *node, deparse_expr_cxt *context); static void deparseArrayExpr(ArrayExpr *node, deparse_expr_cxt *context); +static void deparseConvertRowtypeExpr(ConvertRowtypeExpr *cre, + deparse_expr_cxt *context); static void printRemoteParam(int paramindex, Oid paramtype, int32 paramtypmod, deparse_expr_cxt *context); static void printRemotePlaceholder(Oid paramtype, int32 paramtypmod, @@ -200,9 +202,9 @@ static Node *deparseSortGroupClause(Index ref, List *tlist, bool force_colno, /* * Helper functions */ -static bool is_subquery_var(Var *node, RelOptInfo *foreignrel, +static bool is_subquery_var(Node *node, Index varno, int varattno, RelOptInfo *foreignrel, int *relno, int *colno); -static void get_relation_column_alias_ids(Var *node, RelOptInfo *foreignrel, +static void get_relation_column_alias_ids(Node *node, Index varno, int varattno, RelOptInfo *foreignrel, int *relno, int *colno); @@ -1186,18 +1188,25 @@ build_tlist_to_deparse(RelOptInfo *foreignrel) /* * We require columns specified in foreignrel->reltarget->exprs and those - * required for evaluating the local conditions. + * required for evaluating the local conditions. Child relation's + * targetlist or local conditions may have ConvertRowtypeExpr when parent + * whole-row Vars were translated. We need to include those in targetlist + * to be pushed down to match the targetlists produced for the joining + * relations (in case we are using subqueries in the deparsed query) and + * also to match the targetlist of outer plan if foreign scan has one. */ tlist = add_to_flat_tlist(tlist, pull_var_clause((Node *) foreignrel->reltarget->exprs, - PVC_RECURSE_PLACEHOLDERS)); + PVC_RECURSE_PLACEHOLDERS | + PVC_INCLUDE_CONVERTROWTYPES)); foreach(lc, fpinfo->local_conds) { RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); tlist = add_to_flat_tlist(tlist, pull_var_clause((Node *) rinfo->clause, - PVC_RECURSE_PLACEHOLDERS)); + PVC_RECURSE_PLACEHOLDERS | + PVC_INCLUDE_CONVERTROWTYPES)); } return tlist; @@ -2298,7 +2307,7 @@ deparseDirectUpdateSql(StringInfo buf, PlannerInfo *root, appendStringInfoString(buf, "UPDATE "); deparseRelation(buf, rel); - if (foreignrel->reloptkind == RELOPT_JOINREL) + if (IS_JOIN_REL(foreignrel)) appendStringInfo(buf, " %s%d", REL_ALIAS_PREFIX, rtindex); appendStringInfoString(buf, " SET "); @@ -2325,7 +2334,7 @@ deparseDirectUpdateSql(StringInfo buf, PlannerInfo *root, reset_transmission_modes(nestlevel); - if (foreignrel->reloptkind == RELOPT_JOINREL) + if (IS_JOIN_REL(foreignrel)) { List *ignore_conds = NIL; @@ -2341,7 +2350,7 @@ deparseDirectUpdateSql(StringInfo buf, PlannerInfo *root, if (additional_conds != NIL) list_free_deep(additional_conds); - if (foreignrel->reloptkind == RELOPT_JOINREL) + if (IS_JOIN_REL(foreignrel)) deparseExplicitTargetList(returningList, true, retrieved_attrs, &context); else @@ -2406,10 +2415,10 @@ deparseDirectDeleteSql(StringInfo buf, PlannerInfo *root, appendStringInfoString(buf, "DELETE FROM "); deparseRelation(buf, rel); - if (foreignrel->reloptkind == RELOPT_JOINREL) + if (IS_JOIN_REL(foreignrel)) appendStringInfo(buf, " %s%d", REL_ALIAS_PREFIX, rtindex); - if (foreignrel->reloptkind == RELOPT_JOINREL) + if (IS_JOIN_REL(foreignrel)) { List *ignore_conds = NIL; @@ -2424,7 +2433,7 @@ deparseDirectDeleteSql(StringInfo buf, PlannerInfo *root, if (additional_conds != NIL) list_free_deep(additional_conds); - if (foreignrel->reloptkind == RELOPT_JOINREL) + if (IS_JOIN_REL(foreignrel)) deparseExplicitTargetList(returningList, true, retrieved_attrs, &context); else @@ -2928,6 +2937,10 @@ deparseExpr(Expr *node, deparse_expr_cxt *context) case T_Aggref: deparseAggref((Aggref *) node, context); break; + case T_ConvertRowtypeExpr: + deparseConvertRowtypeExpr(castNode(ConvertRowtypeExpr, node), + context); + break; default: elog(ERROR, "unsupported expression type for deparse: %d", (int) nodeTag(node)); @@ -2958,7 +2971,8 @@ deparseVar(Var *node, deparse_expr_cxt *context) * subquery, use the relation and column alias to the Var provided by the * subquery, instead of the remote name. */ - if (is_subquery_var(node, context->scanrel, &relno, &colno)) + if (is_subquery_var((Node *) node, node->varno, node->varattno, context->scanrel, &relno, + &colno)) { appendStringInfo(context->buf, "%s%d.%s%d", SUBQUERY_REL_ALIAS_PREFIX, relno, @@ -3740,6 +3754,115 @@ deparseAggref(Aggref *node, deparse_expr_cxt *context) appendStringInfoChar(buf, ')'); } +/* + * Deparse a ConvertRowtypeExpr node. + * + * The function handles ConvertRowtypeExpr nodes constructed to convert a child + * tuple to parent tuple. The function deparses a ConvertRowtypeExpr as a ROW + * expression child column refrences arranged according to the tuple conversion + * map for converting child tuple to that of the parent. The ROW expression is + * decorated with CASE similar to deparseColumnRef() to take care of + * ConvertRowtypeExpr nodes on nullable side of the join. + */ +static void +deparseConvertRowtypeExpr(ConvertRowtypeExpr *cre, + deparse_expr_cxt *context) +{ + ConvertRowtypeExpr *expr = cre; + Var *child_var; + TupleDesc parent_desc; + TupleDesc child_desc; + TupleConversionMap *conv_map; + AttrMap *attrMap; + int cnt; + bool qualify_col = (bms_num_members(context->scanrel->relids) > 1); + StringInfo buf = context->buf; + PlannerInfo *root = context->root; + int relno; + int colno; + bool first_col; + + /* + * Multi-level partitioned hierarchies produce nested ConvertRowtypeExprs, + * where the top ConvertRowtypeExpr gives the parent relation and the leaf + * Var node gives the child relation. Fetch the leaf Var node + * corresponding to the lowest child. + */ + while (IsA(expr->arg, ConvertRowtypeExpr)) + expr = castNode(ConvertRowtypeExpr, expr->arg); + child_var = castNode(Var, expr->arg); + Assert(child_var->varattno == 0); + + /* + * If the child Var belongs to the foreign relation that is deparsed as a + * subquery, use the relation and column alias to the child Var provided + * by the subquery, instead of the remote name. + */ + if (is_subquery_var((Node *) cre, child_var->varno, child_var->varattno, context->scanrel, + &relno, &colno)) + { + appendStringInfo(context->buf, "%s%d.%s%d", + SUBQUERY_REL_ALIAS_PREFIX, relno, + SUBQUERY_COL_ALIAS_PREFIX, colno); + return; + } + + /* Construct the conversion map. */ + parent_desc = lookup_rowtype_tupdesc(cre->resulttype, -1); + child_desc = lookup_rowtype_tupdesc(child_var->vartype, + child_var->vartypmod); + conv_map = convert_tuples_by_name(child_desc, parent_desc); + + ReleaseTupleDesc(parent_desc); + ReleaseTupleDesc(child_desc); + + /* If no conversion is needed, deparse the child Var as is. */ + if (conv_map == NULL) + { + deparseVar(child_var, context); + return; + } + + attrMap = conv_map->attrMap; + + /* + * In case the whole-row reference is under an outer join then it has to + * go NULL whenever the rest of the row goes NULL. Deparsing a join query + * would always involve multiple relations, thus qualify_col would be + * true. + */ + if (qualify_col) + { + appendStringInfoString(buf, "CASE WHEN ("); + ADD_REL_QUALIFIER(buf, child_var->varno); + appendStringInfoString(buf, "*)::text IS NOT NULL THEN "); + } + + /* Construct ROW expression according to the conversion map. */ + appendStringInfoString(buf, "ROW("); + first_col = true; + for (cnt = 0; cnt < parent_desc->natts; cnt++) + { + /* Ignore dropped columns. */ + if (attrMap->attnums[cnt] == 0) + continue; + + if (!first_col) + appendStringInfoString(buf, ", "); + deparseColumnRef(buf, child_var->varno, attrMap->attnums[cnt], + planner_rt_fetch(child_var->varno, root), + qualify_col); + first_col = false; + } + appendStringInfoChar(buf, ')'); + + /* Complete the CASE WHEN statement started above. */ + if (qualify_col) + appendStringInfoString(buf, " END"); + + free_conversion_map(conv_map); +} + /* * Append ORDER BY within aggregate function. */ @@ -4102,12 +4225,18 @@ deparseSortGroupClause(Index ref, List *tlist, bool force_colno, /* - * Returns true if given Var is deparsed as a subquery output column, in + * Returns true if given Node is deparsed as a subquery output column, in * which case, *relno and *colno are set to the IDs for the relation and - * column alias to the Var provided by the subquery. + * column alias to the Node provided by the subquery. + * + * We do not allow relations with PlaceHolderVars to be pushed down. We call + * this function only for simple or join relations, whose targetlists can + * contain only Var and ConvertRowtypeExpr (in case of child-joins) nodes + * (apart from PHVs). So support only Var and ConvertRowtypeExpr nodes. */ static bool -is_subquery_var(Var *node, RelOptInfo *foreignrel, int *relno, int *colno) +is_subquery_var(Node *node, Index varno, int varattno, RelOptInfo *foreignrel, + int *relno, int *colno) { PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) foreignrel->fdw_private; RelOptInfo *outerrel = fpinfo->outerrel; @@ -4116,6 +4245,8 @@ is_subquery_var(Var *node, RelOptInfo *foreignrel, int *relno, int *colno) /* Should only be called in these cases. */ Assert(IS_SIMPLE_REL(foreignrel) || IS_JOIN_REL(foreignrel)); + Assert(IsA(node, ConvertRowtypeExpr) || IsA(node, Var)); + /* * If the given relation isn't a join relation, it doesn't have any lower * subqueries, so the Var isn't a subquery output column. @@ -4127,10 +4258,10 @@ is_subquery_var(Var *node, RelOptInfo *foreignrel, int *relno, int *colno) * If the Var doesn't belong to any lower subqueries, it isn't a subquery * output column. */ - if (!bms_is_member(node->varno, fpinfo->lower_subquery_rels)) + if (!bms_is_member(varno, fpinfo->lower_subquery_rels)) return false; - if (bms_is_member(node->varno, outerrel->relids)) + if (bms_is_member(varno, outerrel->relids)) { /* * If outer relation is deparsed as a subquery, the Var is an output @@ -4138,16 +4269,16 @@ is_subquery_var(Var *node, RelOptInfo *foreignrel, int *relno, int *colno) */ if (fpinfo->make_outerrel_subquery) { - get_relation_column_alias_ids(node, outerrel, relno, colno); + get_relation_column_alias_ids(node, varno, varattno, outerrel, relno, colno); return true; } /* Otherwise, recurse into the outer relation. */ - return is_subquery_var(node, outerrel, relno, colno); + return is_subquery_var(node, varno, varattno, outerrel, relno, colno); } else { - Assert(bms_is_member(node->varno, innerrel->relids)); + Assert(bms_is_member(varno, innerrel->relids)); /* * If inner relation is deparsed as a subquery, the Var is an output @@ -4155,12 +4286,12 @@ is_subquery_var(Var *node, RelOptInfo *foreignrel, int *relno, int *colno) */ if (fpinfo->make_innerrel_subquery) { - get_relation_column_alias_ids(node, innerrel, relno, colno); + get_relation_column_alias_ids(node, varno, varattno, innerrel, relno, colno); return true; } /* Otherwise, recurse into the inner relation. */ - return is_subquery_var(node, innerrel, relno, colno); + return is_subquery_var(node, varno, varattno, innerrel, relno, colno); } } @@ -4169,7 +4300,7 @@ is_subquery_var(Var *node, RelOptInfo *foreignrel, int *relno, int *colno) * given relation, which are returned into *relno and *colno. */ static void -get_relation_column_alias_ids(Var *node, RelOptInfo *foreignrel, +get_relation_column_alias_ids(Node *node, Index varno, int varattno, RelOptInfo *foreignrel, int *relno, int *colno) { PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) foreignrel->fdw_private; @@ -4189,11 +4320,12 @@ get_relation_column_alias_ids(Var *node, RelOptInfo *foreignrel, * Match reltarget entries only on varno/varattno. Ideally there * would be some cross-check on varnullingrels, but it's unclear what * to do exactly; we don't have enough context to know what that value - * should be. + * should be. Also match posible converted whole-row references. */ - if (IsA(tlvar, Var) && - tlvar->varno == node->varno && - tlvar->varattno == node->varattno) + if ((IsA(tlvar, Var) && + tlvar->varno == varno && + tlvar->varattno == varattno) + || is_equal_converted_whole_row_references((Node *) tlvar, node)) { *colno = i; return; diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 7e487cff2a47..4e249cfdb55b 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -10059,6 +10059,11 @@ ALTER TABLE fprt1_p1 SET (autovacuum_enabled = 'false'); ALTER TABLE fprt1_p2 SET (autovacuum_enabled = 'false'); INSERT INTO fprt1_p1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 249, 2) i; INSERT INTO fprt1_p2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(250, 499, 2) i; +-- Add and drop column from the parent partition so that the number of +-- attributes in tuple descriptors of parent and child do not match exactly. +-- Used for testing ConvertRowtypeExpr. +ALTER TABLE fprt1 DROP COLUMN c; +ALTER TABLE fprt1 ADD COLUMN c varchar; CREATE FOREIGN TABLE ftprt1_p1 PARTITION OF fprt1 FOR VALUES FROM (0) TO (250) SERVER loopback OPTIONS (table_name 'fprt1_p1', use_remote_estimate 'true'); CREATE FOREIGN TABLE ftprt1_p2 PARTITION OF fprt1 FOR VALUES FROM (250) TO (500) @@ -10125,23 +10130,19 @@ SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) 8 | | (5 rows) --- with whole-row reference; partitionwise join does not apply +-- with whole-row reference EXPLAIN (COSTS OFF) SELECT t1.wr, t2.wr FROM (SELECT t1 wr, a FROM fprt1 t1 WHERE t1.a % 25 = 0) t1 FULL JOIN (SELECT t2 wr, b FROM fprt2 t2 WHERE t2.b % 25 = 0) t2 ON (t1.a = t2.b) ORDER BY 1,2; - QUERY PLAN --------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------- Sort Sort Key: ((t1.*)::fprt1), ((t2.*)::fprt2) - -> Hash Full Join - Hash Cond: (t1.a = t2.b) - -> Append - -> Foreign Scan on ftprt1_p1 t1_1 - -> Foreign Scan on ftprt1_p2 t1_2 - -> Hash - -> Append - -> Foreign Scan on ftprt2_p1 t2_1 - -> Foreign Scan on ftprt2_p2 t2_2 -(11 rows) + -> Append + -> Foreign Scan + Relations: (ftprt1_p1 t1_1) FULL JOIN (ftprt2_p1 t2_1) + -> Foreign Scan + Relations: (ftprt1_p2 t1_2) FULL JOIN (ftprt2_p2 t2_2) +(7 rows) SELECT t1.wr, t2.wr FROM (SELECT t1 wr, a FROM fprt1 t1 WHERE t1.a % 25 = 0) t1 FULL JOIN (SELECT t2 wr, b FROM fprt2 t2 WHERE t2.b % 25 = 0) t2 ON (t1.a = t2.b) ORDER BY 1,2; wr | wr @@ -10224,22 +10225,26 @@ SELECT t1.a, t1.phv, t2.b, t2.phv FROM (SELECT 't1_phv' phv, * FROM fprt1 WHERE | | 475 | t2_phv (14 rows) --- test FOR UPDATE; partitionwise join does not apply +-- test FOR UPDATE EXPLAIN (COSTS OFF) SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY 1,2 FOR UPDATE OF t1; - QUERY PLAN --------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------- LockRows - -> Nested Loop - Join Filter: (t1.a = t2.b) + -> Sort + Sort Key: t1.a -> Append - -> Foreign Scan on ftprt1_p1 t1_1 - -> Foreign Scan on ftprt1_p2 t1_2 - -> Materialize - -> Append - -> Foreign Scan on ftprt2_p1 t2_1 - -> Foreign Scan on ftprt2_p2 t2_2 -(10 rows) + -> Foreign Scan + Relations: (ftprt1_p1 t1_1) INNER JOIN (ftprt2_p1 t2_1) + -> Nested Loop + -> Foreign Scan on ftprt1_p1 t1_1 + -> Foreign Scan on ftprt2_p1 t2_1 + -> Foreign Scan + Relations: (ftprt1_p2 t1_2) INNER JOIN (ftprt2_p2 t2_2) + -> Nested Loop + -> Foreign Scan on ftprt1_p2 t1_2 + -> Foreign Scan on ftprt2_p2 t2_2 +(14 rows) SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY 1,2 FOR UPDATE OF t1; a | b @@ -10250,6 +10255,205 @@ SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a 400 | 400 (4 rows) +-- test sort paths and whole-row references +set enable_sort to off; +EXPLAIN (COSTS OFF) +SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY t2.a FOR UPDATE OF t1; + QUERY PLAN +----------------------------------------------------------------------- + LockRows + -> Merge Append + Sort Key: t2.a + -> Foreign Scan + Relations: (ftprt1_p1 t1_1) INNER JOIN (ftprt2_p1 t2_1) + -> Result + -> Sort + Disabled: true + Sort Key: t2_1.a + -> Nested Loop + -> Foreign Scan on ftprt1_p1 t1_1 + -> Foreign Scan on ftprt2_p1 t2_1 + -> Foreign Scan + Relations: (ftprt1_p2 t1_2) INNER JOIN (ftprt2_p2 t2_2) + -> Result + -> Sort + Disabled: true + Sort Key: t2_2.a + -> Nested Loop + -> Foreign Scan on ftprt1_p2 t1_2 + -> Foreign Scan on ftprt2_p2 t2_2 +(21 rows) + +SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY t2.a FOR UPDATE OF t1; + a | b +-----+----- + 0 | 0 + 150 | 150 + 250 | 250 + 400 | 400 +(4 rows) + +reset enable_sort; +-- test partition-wise DML +EXPLAIN (COSTS OFF, VERBOSE) +UPDATE fprt1 SET b=fprt1.b+1 FROM fprt2 WHERE fprt1.a = fprt2.b AND fprt2.a % 25 = 0; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------- + Update on public.fprt1 + Foreign Update on public.ftprt1_p1 fprt1_1 + Foreign Update on public.ftprt1_p2 fprt1_2 + -> Append + -> Foreign Update + Remote SQL: UPDATE public.fprt1_p1 r3 SET b = (r3.b + 1) FROM public.fprt2_p1 r5 WHERE ((r3.a = r5.b)) AND (((r5.a % 25) = 0)) + -> Foreign Update + Remote SQL: UPDATE public.fprt1_p2 r4 SET b = (r4.b + 1) FROM public.fprt2_p2 r6 WHERE ((r4.a = r6.b)) AND (((r6.a % 25) = 0)) +(8 rows) + +UPDATE fprt1 SET b=fprt1.b+1 FROM fprt2 WHERE fprt1.a = fprt2.b AND fprt2.a % 25 = 0; +EXPLAIN (COSTS OFF, VERBOSE) +UPDATE fprt1 SET b=(fprt1.a+fprt2.b)/2 FROM fprt2 WHERE fprt1.a = fprt2.b AND fprt2.a % 25 = 0; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------- + Update on public.fprt1 + Foreign Update on public.ftprt1_p1 fprt1_1 + Foreign Update on public.ftprt1_p2 fprt1_2 + -> Append + -> Foreign Update + Remote SQL: UPDATE public.fprt1_p1 r3 SET b = ((r3.a + r5.b) / 2) FROM public.fprt2_p1 r5 WHERE ((r3.a = r5.b)) AND (((r5.a % 25) = 0)) + -> Foreign Update + Remote SQL: UPDATE public.fprt1_p2 r4 SET b = ((r4.a + r6.b) / 2) FROM public.fprt2_p2 r6 WHERE ((r4.a = r6.b)) AND (((r6.a % 25) = 0)) +(8 rows) + +UPDATE fprt1 SET b=(fprt1.a+fprt2.b)/2 FROM fprt2 WHERE fprt1.a = fprt2.b AND fprt2.a % 25 = 0; +-- returning whole row references +EXPLAIN (COSTS OFF) +UPDATE fprt1 SET b=fprt1.b+1 FROM fprt2 WHERE fprt1.a = fprt2.b AND fprt2.a % 25 = 0 RETURNING fprt1, fprt2; + QUERY PLAN +--------------------------------------- + Update on fprt1 + Foreign Update on ftprt1_p1 fprt1_1 + Foreign Update on ftprt1_p2 fprt1_2 + -> Append + -> Foreign Update + -> Foreign Update +(6 rows) + +UPDATE fprt1 SET b=fprt1.b+1 FROM fprt2 WHERE fprt1.a = fprt2.b AND fprt2.a % 25 = 0 RETURNING fprt1, fprt2; + fprt1 | fprt2 +----------------+---------------- + (0,1,0000) | (0,0,0000) + (150,151,0003) | (150,150,0003) + (250,251,0005) | (250,250,0005) + (400,401,0008) | (400,400,0008) +(4 rows) + +-- tableoids are returned correctly +EXPLAIN (COSTS OFF) +UPDATE fprt1 t1 SET b = t1.b + 1 FROM fprt2 t2 WHERE t1.a = t2.b AND t1.a % 25 = 0 RETURNING t2.tableoid::regclass; + QUERY PLAN +------------------------------------ + Update on fprt1 t1 + Foreign Update on ftprt1_p1 t1_1 + Foreign Update on ftprt1_p2 t1_2 + -> Append + -> Foreign Update + -> Foreign Update +(6 rows) + +UPDATE fprt1 t1 SET b = t1.b + 1 FROM fprt2 t2 WHERE t1.a = t2.b AND t1.a % 25 = 0 RETURNING t2.tableoid::regclass; + tableoid +----------- + ftprt2_p1 + ftprt2_p1 + ftprt2_p2 + ftprt2_p2 +(4 rows) + +-- join of several tables +EXPLAIN (VERBOSE, COSTS OFF) +UPDATE fprt1 t1 SET b = t1.b + 1 FROM fprt2 t2, fprt1 t3 WHERE t1.a = t2.b AND t2.b = t3.a AND t1.a % 25 = 0 RETURNING *; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Update on public.fprt1 t1 + Output: t1_1.a, t1_1.b, t1_1.c, t2.a, t2.b, t2.c, t3.a, t3.b, t3.c + Foreign Update on public.ftprt1_p1 t1_1 + Foreign Update on public.ftprt1_p2 t1_2 + -> Append + -> Foreign Update + Remote SQL: UPDATE public.fprt1_p1 r4 SET b = (r4.b + 1) FROM (public.fprt2_p1 r6 INNER JOIN public.fprt1_p1 r8 ON (TRUE)) WHERE ((r4.a = r8.a)) AND ((r4.a = r6.b)) AND (((r4.a % 25) = 0)) RETURNING r4.a, r4.b, r4.c, r6.a, r6.b, r6.c, r8.a, r8.b, r8.c + -> Foreign Update + Remote SQL: UPDATE public.fprt1_p2 r5 SET b = (r5.b + 1) FROM (public.fprt2_p2 r7 INNER JOIN public.fprt1_p2 r9 ON (TRUE)) WHERE ((r5.a = r9.a)) AND ((r5.a = r7.b)) AND (((r5.a % 25) = 0)) RETURNING r5.a, r5.b, r5.c, r7.a, r7.b, r7.c, r9.a, r9.b, r9.c +(9 rows) + +UPDATE fprt1 t1 SET b = t1.b + 1 FROM fprt2 t2, fprt1 t3 WHERE t1.a = t2.b AND t2.b = t3.a AND t1.a % 25 = 0 RETURNING *; + a | b | c | a | b | c | a | b | c +-----+-----+------+-----+-----+------+-----+-----+------ + 0 | 3 | 0000 | 0 | 0 | 0000 | 0 | 2 | 0000 + 150 | 153 | 0003 | 150 | 150 | 0003 | 150 | 152 | 0003 + 250 | 253 | 0005 | 250 | 250 | 0005 | 250 | 252 | 0005 + 400 | 403 | 0008 | 400 | 400 | 0008 | 400 | 402 | 0008 +(4 rows) + +-- left join +EXPLAIN (VERBOSE, COSTS OFF) +UPDATE fprt1 t1 SET b = t1.b + 1 FROM fprt2 t2 LEFT JOIN fprt1 t3 ON (t2.b = t3.a) WHERE t1.a = t2.b AND t1.a % 25 = 0 RETURNING *; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Update on public.fprt1 t1 + Output: t1_1.a, t1_1.b, t1_1.c, t2.a, t2.b, t2.c, t3.a, t3.b, t3.c + Foreign Update on public.ftprt1_p1 t1_1 + Foreign Update on public.ftprt1_p2 t1_2 + -> Append + -> Foreign Update + Remote SQL: UPDATE public.fprt1_p1 r5 SET b = (r5.b + 1) FROM (public.fprt2_p1 r7 LEFT JOIN public.fprt1_p1 r9 ON (((r7.b = r9.a)))) WHERE ((r5.a = r7.b)) AND (((r5.a % 25) = 0)) RETURNING r5.a, r5.b, r5.c, r7.a, r7.b, r7.c, r9.a, r9.b, r9.c + -> Foreign Update + Remote SQL: UPDATE public.fprt1_p2 r6 SET b = (r6.b + 1) FROM (public.fprt2_p2 r8 LEFT JOIN public.fprt1_p2 r10 ON (((r8.b = r10.a)))) WHERE ((r6.a = r8.b)) AND (((r6.a % 25) = 0)) RETURNING r6.a, r6.b, r6.c, r8.a, r8.b, r8.c, r10.a, r10.b, r10.c +(9 rows) + +UPDATE fprt1 t1 SET b = t1.b + 1 FROM fprt2 t2 LEFT JOIN fprt1 t3 ON (t2.b = t3.a) WHERE t1.a = t2.b AND t1.a % 25 = 0 RETURNING *; + a | b | c | a | b | c | a | b | c +-----+-----+------+-----+-----+------+-----+-----+------ + 0 | 4 | 0000 | 0 | 0 | 0000 | 0 | 3 | 0000 + 150 | 154 | 0003 | 150 | 150 | 0003 | 150 | 153 | 0003 + 250 | 254 | 0005 | 250 | 250 | 0005 | 250 | 253 | 0005 + 400 | 404 | 0008 | 400 | 400 | 0008 | 400 | 403 | 0008 +(4 rows) + +-- delete +EXPLAIN (COSTS OFF) +DELETE FROM fprt1 USING fprt2 WHERE fprt1.a = fprt2.b AND fprt2.a % 30 = 29; + QUERY PLAN +--------------------------------------- + Delete on fprt1 + Foreign Delete on ftprt1_p1 fprt1_1 + Foreign Delete on ftprt1_p2 fprt1_2 + -> Append + -> Foreign Delete + -> Foreign Delete +(6 rows) + +DELETE FROM fprt1 USING fprt2 WHERE fprt1.a = fprt2.b AND fprt2.a % 30 = 29; +EXPLAIN (COSTS OFF) +DELETE FROM fprt1 USING fprt2 WHERE fprt1.a = fprt2.b AND fprt2.a % 25 = 0 RETURNING *; + QUERY PLAN +--------------------------------------- + Delete on fprt1 + Foreign Delete on ftprt1_p1 fprt1_1 + Foreign Delete on ftprt1_p2 fprt1_2 + -> Append + -> Foreign Delete + -> Foreign Delete +(6 rows) + +DELETE FROM fprt1 USING fprt2 WHERE fprt1.a = fprt2.b AND fprt2.a % 25 = 0 RETURNING *; + a | b | c | a | b | c +-----+-----+------+-----+-----+------ + 0 | 4 | 0000 | 0 | 0 | 0000 + 150 | 154 | 0003 | 150 | 150 | 0003 + 250 | 254 | 0005 | 250 | 250 | 0005 + 400 | 404 | 0008 | 400 | 400 | 0008 +(4 rows) + RESET enable_partitionwise_join; -- =================================================================== -- test partitionwise aggregates diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index b92e2a0fc9f8..044661bafff4 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -242,6 +242,9 @@ typedef struct PgFdwDirectModifyState AttrNumber oidAttno; /* attnum of input oid column */ bool hasSystemCols; /* are there system columns of resultRel? */ + List *tidAttnos; /* attnos of output tid columns */ + List *tidVarnos; /* varnos of output tid columns */ + /* working memory context */ MemoryContext temp_cxt; /* context for per-tuple temporary data */ } PgFdwDirectModifyState; @@ -1473,6 +1476,22 @@ get_tupdesc_for_join_scan_tuples(ForeignScanState *node) */ var = (Var *) list_nth_node(TargetEntry, fsplan->fdw_scan_tlist, i)->expr; + + /* + * TODO: Do we really need this? Target list can contain not only + * Vars, but ConvertRowtypeExpr will certainly contribute to + * ss.ss_ScanTupleSlot->tts_tupleDescriptor attribute type in + * ExecTypeFromTL() (called from ExecInitForeignScan()) in the same + * way. + */ + if (IsA(var, ConvertRowtypeExpr)) + { + ConvertRowtypeExpr *convexpr = castNode(ConvertRowtypeExpr, var); + + att->atttypid = convexpr->resulttype; + continue; + } + if (!IsA(var, Var) || var->varattno != 0) continue; rte = list_nth(estate->es_range_table, var->varno - 1); @@ -2463,6 +2482,7 @@ postgresPlanDirectModify(PlannerInfo *root, List *params_list = NIL; List *returningList = NIL; List *retrieved_attrs = NIL; + Relids child_relids = NULL; /* * Decide whether it is safe to modify a foreign table directly. @@ -2494,6 +2514,12 @@ postgresPlanDirectModify(PlannerInfo *root, foreignrel = find_join_rel(root, fscan->fs_relids); /* We should have a rel for this foreign join. */ Assert(foreignrel); + + /* + * If it's a child joinrel, record relids to translate. + */ + if (IS_OTHER_REL(foreignrel)) + child_relids = foreignrel->relids; } else foreignrel = root->simple_rel_array[resultRelation]; @@ -2513,7 +2539,7 @@ postgresPlanDirectModify(PlannerInfo *root, * The expressions of concern are the first N columns of the processed * targetlist, where N is the length of the rel's update_colnos. */ - get_translated_update_targetlist(root, resultRelation, + get_translated_update_targetlist(root, resultRelation, child_relids, &processed_tlist, &targetAttrs); forboth(lc, processed_tlist, lc2, targetAttrs) { @@ -2566,8 +2592,17 @@ postgresPlanDirectModify(PlannerInfo *root, * node below. */ if (fscan->scan.scanrelid == 0) + { + if (foreignrel->reloptkind == RELOPT_OTHER_JOINREL) + { + returningList = (List *) adjust_appendrel_attrs_multilevel(root, + (Node *) returningList, + foreignrel, + foreignrel->top_parent); + } returningList = build_remote_returning(resultRelation, rel, returningList); + } } /* @@ -3244,7 +3279,7 @@ estimate_path_cost_size(PlannerInfo *root, /* Shouldn't get here unless we have LIMIT */ Assert(fpextra->has_limit); Assert(foreignrel->reloptkind == RELOPT_BASEREL || - foreignrel->reloptkind == RELOPT_JOINREL); + IS_JOIN_REL(foreignrel)); startup_cost += foreignrel->reltarget->cost.startup; run_cost += foreignrel->reltarget->cost.per_tuple * rows; } @@ -4436,7 +4471,7 @@ build_remote_returning(Index rtindex, Relation rel, List *returningList) Assert(returningList); - vars = pull_var_clause((Node *) returningList, PVC_INCLUDE_PLACEHOLDERS); + vars = pull_var_clause((Node *) returningList, PVC_INCLUDE_PLACEHOLDERS | PVC_INCLUDE_CONVERTROWTYPES); /* * If there's a whole-row reference to the target relation, then we'll @@ -4446,6 +4481,9 @@ build_remote_returning(Index rtindex, Relation rel, List *returningList) { Var *var = (Var *) lfirst(lc); + while (IsA(var, ConvertRowtypeExpr)) + var = (Var *) (((ConvertRowtypeExpr *) var)->arg); + if (IsA(var, Var) && var->varno == rtindex && var->varattno == InvalidAttrNumber) @@ -4500,6 +4538,29 @@ build_remote_returning(Index rtindex, Relation rel, List *returningList) var->varattno != SelfItemPointerAttributeNumber) continue; /* don't need it */ + /* + * Also no need in whole-row references to the target relation under + * ConvertRowtypeExpr + */ + if (IsA(var, ConvertRowtypeExpr)) + { + ConvertRowtypeExpr *cre = (ConvertRowtypeExpr *) var; + Var *cre_var; + + while (IsA(cre->arg, ConvertRowtypeExpr)) + cre = (ConvertRowtypeExpr *) cre->arg; + + + cre_var = (Var *) cre->arg; + + if (IsA(cre_var, Var) && + cre_var->varno == rtindex) + { + Assert(cre_var->varattno == InvalidAttrNumber); + continue; + } + } + if (tlist_member((Expr *) var, tlist)) continue; /* already got it */ @@ -4705,15 +4766,26 @@ init_returning_filter(PgFdwDirectModifyState *dmstate, palloc0(resultTupType->natts * sizeof(AttrNumber)); dmstate->ctidAttno = dmstate->oidAttno = 0; + dmstate->tidAttnos = dmstate->tidVarnos = NIL; i = 1; dmstate->hasSystemCols = false; foreach(lc, fdw_scan_tlist) { TargetEntry *tle = (TargetEntry *) lfirst(lc); - Var *var = (Var *) tle->expr; + Expr *expr; + Var *var; - Assert(IsA(var, Var)); + expr = tle->expr; + + while (IsA(expr, ConvertRowtypeExpr)) + { + ConvertRowtypeExpr *cre = castNode(ConvertRowtypeExpr, expr); + + expr = cre->arg; + } + + var = castNode(Var, expr); /* * If the Var is a column of the target relation to be retrieved from @@ -4742,10 +4814,15 @@ init_returning_filter(PgFdwDirectModifyState *dmstate, * relation either. */ Assert(attrno > 0); - dmstate->attnoMap[attrno - 1] = i; } } + /* Record tableoid attributes and corresponding varnos */ + if (var->varattno == TableOidAttributeNumber) + { + dmstate->tidAttnos = lappend_int(dmstate->tidAttnos, i); + dmstate->tidVarnos = lappend_int(dmstate->tidVarnos, var->varno); + } i++; } } @@ -4766,6 +4843,8 @@ apply_returning_filter(PgFdwDirectModifyState *dmstate, Datum *old_values; bool *old_isnull; int i; + ListCell *lc1, + *lc2; /* * Use the return tuple slot as a place to store the result tuple. @@ -4805,6 +4884,29 @@ apply_returning_filter(PgFdwDirectModifyState *dmstate, } } + /* + * Set tableoid attributes + */ + forboth(lc1, dmstate->tidAttnos, lc2, dmstate->tidVarnos) + { + AttrNumber attno; + Index varno; + RangeTblEntry *rte; + + attno = lfirst_int(lc1); + varno = lfirst_int(lc2); + + rte = list_nth(estate->es_range_table, varno - 1); + Assert(rte->rtekind == RTE_RELATION); + Assert(rte->relkind == RELKIND_FOREIGN_TABLE); + + /* Attributes numbering in init_returning_filter() starts with 1 */ + Assert(attno >= 1); + + slot->tts_values[attno - 1] = ObjectIdGetDatum(rte->relid); + slot->tts_isnull[attno - 1] = false; + } + /* * Build the virtual tuple. */ @@ -6116,7 +6218,7 @@ add_paths_with_pathkeys_for_rel(PlannerInfo *root, RelOptInfo *rel, /* Include columns required for evaluating PHVs in the tlist. */ add_new_columns_to_pathtarget(target, pull_var_clause((Node *) target->exprs, - PVC_RECURSE_PLACEHOLDERS)); + PVC_RECURSE_PLACEHOLDERS | PVC_INCLUDE_CONVERTROWTYPES)); /* Include columns required for evaluating the local conditions. */ foreach(lc, fpinfo->local_conds) @@ -6125,7 +6227,7 @@ add_paths_with_pathkeys_for_rel(PlannerInfo *root, RelOptInfo *rel, add_new_columns_to_pathtarget(target, pull_var_clause((Node *) rinfo->clause, - PVC_RECURSE_PLACEHOLDERS)); + PVC_RECURSE_PLACEHOLDERS | PVC_INCLUDE_CONVERTROWTYPES)); } /* diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index b58ab6ee5867..119d7da6731d 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -3138,6 +3138,11 @@ ALTER TABLE fprt1_p1 SET (autovacuum_enabled = 'false'); ALTER TABLE fprt1_p2 SET (autovacuum_enabled = 'false'); INSERT INTO fprt1_p1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 249, 2) i; INSERT INTO fprt1_p2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(250, 499, 2) i; +-- Add and drop column from the parent partition so that the number of +-- attributes in tuple descriptors of parent and child do not match exactly. +-- Used for testing ConvertRowtypeExpr. +ALTER TABLE fprt1 DROP COLUMN c; +ALTER TABLE fprt1 ADD COLUMN c varchar; CREATE FOREIGN TABLE ftprt1_p1 PARTITION OF fprt1 FOR VALUES FROM (0) TO (250) SERVER loopback OPTIONS (table_name 'fprt1_p1', use_remote_estimate 'true'); CREATE FOREIGN TABLE ftprt1_p2 PARTITION OF fprt1 FOR VALUES FROM (250) TO (500) @@ -3172,7 +3177,7 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3; SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3; --- with whole-row reference; partitionwise join does not apply +-- with whole-row reference EXPLAIN (COSTS OFF) SELECT t1.wr, t2.wr FROM (SELECT t1 wr, a FROM fprt1 t1 WHERE t1.a % 25 = 0) t1 FULL JOIN (SELECT t2 wr, b FROM fprt2 t2 WHERE t2.b % 25 = 0) t2 ON (t1.a = t2.b) ORDER BY 1,2; SELECT t1.wr, t2.wr FROM (SELECT t1 wr, a FROM fprt1 t1 WHERE t1.a % 25 = 0) t1 FULL JOIN (SELECT t2 wr, b FROM fprt2 t2 WHERE t2.b % 25 = 0) t2 ON (t1.a = t2.b) ORDER BY 1,2; @@ -3187,11 +3192,56 @@ EXPLAIN (COSTS OFF) SELECT t1.a, t1.phv, t2.b, t2.phv FROM (SELECT 't1_phv' phv, * FROM fprt1 WHERE a % 25 = 0) t1 FULL JOIN (SELECT 't2_phv' phv, * FROM fprt2 WHERE b % 25 = 0) t2 ON (t1.a = t2.b) ORDER BY t1.a, t2.b; SELECT t1.a, t1.phv, t2.b, t2.phv FROM (SELECT 't1_phv' phv, * FROM fprt1 WHERE a % 25 = 0) t1 FULL JOIN (SELECT 't2_phv' phv, * FROM fprt2 WHERE b % 25 = 0) t2 ON (t1.a = t2.b) ORDER BY t1.a, t2.b; --- test FOR UPDATE; partitionwise join does not apply +-- test FOR UPDATE EXPLAIN (COSTS OFF) SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY 1,2 FOR UPDATE OF t1; SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY 1,2 FOR UPDATE OF t1; +-- test sort paths and whole-row references +set enable_sort to off; +EXPLAIN (COSTS OFF) +SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY t2.a FOR UPDATE OF t1; +SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY t2.a FOR UPDATE OF t1; +reset enable_sort; + +-- test partition-wise DML +EXPLAIN (COSTS OFF, VERBOSE) +UPDATE fprt1 SET b=fprt1.b+1 FROM fprt2 WHERE fprt1.a = fprt2.b AND fprt2.a % 25 = 0; +UPDATE fprt1 SET b=fprt1.b+1 FROM fprt2 WHERE fprt1.a = fprt2.b AND fprt2.a % 25 = 0; + +EXPLAIN (COSTS OFF, VERBOSE) +UPDATE fprt1 SET b=(fprt1.a+fprt2.b)/2 FROM fprt2 WHERE fprt1.a = fprt2.b AND fprt2.a % 25 = 0; +UPDATE fprt1 SET b=(fprt1.a+fprt2.b)/2 FROM fprt2 WHERE fprt1.a = fprt2.b AND fprt2.a % 25 = 0; + +-- returning whole row references +EXPLAIN (COSTS OFF) +UPDATE fprt1 SET b=fprt1.b+1 FROM fprt2 WHERE fprt1.a = fprt2.b AND fprt2.a % 25 = 0 RETURNING fprt1, fprt2; +UPDATE fprt1 SET b=fprt1.b+1 FROM fprt2 WHERE fprt1.a = fprt2.b AND fprt2.a % 25 = 0 RETURNING fprt1, fprt2; + +-- tableoids are returned correctly +EXPLAIN (COSTS OFF) +UPDATE fprt1 t1 SET b = t1.b + 1 FROM fprt2 t2 WHERE t1.a = t2.b AND t1.a % 25 = 0 RETURNING t2.tableoid::regclass; +UPDATE fprt1 t1 SET b = t1.b + 1 FROM fprt2 t2 WHERE t1.a = t2.b AND t1.a % 25 = 0 RETURNING t2.tableoid::regclass; + +-- join of several tables +EXPLAIN (VERBOSE, COSTS OFF) +UPDATE fprt1 t1 SET b = t1.b + 1 FROM fprt2 t2, fprt1 t3 WHERE t1.a = t2.b AND t2.b = t3.a AND t1.a % 25 = 0 RETURNING *; +UPDATE fprt1 t1 SET b = t1.b + 1 FROM fprt2 t2, fprt1 t3 WHERE t1.a = t2.b AND t2.b = t3.a AND t1.a % 25 = 0 RETURNING *; + +-- left join +EXPLAIN (VERBOSE, COSTS OFF) +UPDATE fprt1 t1 SET b = t1.b + 1 FROM fprt2 t2 LEFT JOIN fprt1 t3 ON (t2.b = t3.a) WHERE t1.a = t2.b AND t1.a % 25 = 0 RETURNING *; +UPDATE fprt1 t1 SET b = t1.b + 1 FROM fprt2 t2 LEFT JOIN fprt1 t3 ON (t2.b = t3.a) WHERE t1.a = t2.b AND t1.a % 25 = 0 RETURNING *; + +-- delete +EXPLAIN (COSTS OFF) +DELETE FROM fprt1 USING fprt2 WHERE fprt1.a = fprt2.b AND fprt2.a % 30 = 29; +DELETE FROM fprt1 USING fprt2 WHERE fprt1.a = fprt2.b AND fprt2.a % 30 = 29; + +EXPLAIN (COSTS OFF) +DELETE FROM fprt1 USING fprt2 WHERE fprt1.a = fprt2.b AND fprt2.a % 25 = 0 RETURNING *; +DELETE FROM fprt1 USING fprt2 WHERE fprt1.a = fprt2.b AND fprt2.a % 25 = 0 RETURNING *; + RESET enable_partitionwise_join; diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c index 7bc823507f1b..54c6d9ed18f3 100644 --- a/src/backend/nodes/nodeFuncs.c +++ b/src/backend/nodes/nodeFuncs.c @@ -4840,3 +4840,84 @@ planstate_walk_members(PlanState **planstates, int nplans, return false; } + +/* + * is_converted_whole_row_reference + * If the given node is a ConvertRowtypeExpr encapsulating a whole-row + * reference as implicit cast (i.e a parent's whole row reference + * translated by adjust_appendrel_attrs()), return true. Otherwise return + * false. + */ +bool +is_converted_whole_row_reference(Node *node) +{ + ConvertRowtypeExpr *convexpr; + + if (!node || !IsA(node, ConvertRowtypeExpr)) + return false; + + /* Traverse nested ConvertRowtypeExpr's. */ + convexpr = castNode(ConvertRowtypeExpr, node); + while (convexpr->convertformat == COERCE_IMPLICIT_CAST && + IsA(convexpr->arg, ConvertRowtypeExpr)) + convexpr = castNode(ConvertRowtypeExpr, convexpr->arg); + + if (IsA(convexpr->arg, Var)) + { + Var *var = castNode(Var, convexpr->arg); + + if (var->varattno == 0) + return true; + } + + return false; +} + +/* + * is_equal_converted_whole_row_references + * Determine if both nodes are equivalent ConvertRowtypeExprs + * over the same var. + * It differs from equal(), because we ignore varnullingrels. + */ +bool +is_equal_converted_whole_row_references(Node *node1, Node *node2) +{ + ConvertRowtypeExpr *convexpr1; + ConvertRowtypeExpr *convexpr2; + + if (!node1 || !IsA(node1, ConvertRowtypeExpr)) + return false; + + if (!node2 || !IsA(node2, ConvertRowtypeExpr)) + return false; + + convexpr1 = castNode(ConvertRowtypeExpr, node1); + convexpr2 = castNode(ConvertRowtypeExpr, node2); + + while (convexpr1->convertformat == COERCE_IMPLICIT_CAST && + convexpr2->convertformat == COERCE_IMPLICIT_CAST && + convexpr1->resulttype == convexpr2->resulttype && + IsA(convexpr1->arg, ConvertRowtypeExpr) && + IsA(convexpr2->arg, ConvertRowtypeExpr)) + { + convexpr1 = castNode(ConvertRowtypeExpr, convexpr1->arg); + convexpr2 = castNode(ConvertRowtypeExpr, convexpr2->arg); + } + + if (IsA(convexpr1->arg, Var) && IsA(convexpr2->arg, Var)) + { + Var *var1 = castNode(Var, convexpr1->arg); + Var *var2 = castNode(Var, convexpr2->arg); + + if ((var1->varno == var2->varno) && + (var1->varattno == var2->varattno) && + (var1->varlevelsup == var2->varlevelsup) && + (var1->vartype == var2->vartype)) + { + /* TODO: Can we state that both varattnos is 0? */ + return true; + } + } + + return false; +} diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 1115ebeee29f..6b524c96e00c 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -976,8 +976,7 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, */ if (enable_partitionwise_join && rel->reloptkind == RELOPT_BASEREL && - rte->relkind == RELKIND_PARTITIONED_TABLE && - bms_is_empty(rel->attr_needed[InvalidAttrNumber - rel->min_attr])) + rte->relkind == RELKIND_PARTITIONED_TABLE) rel->consider_partitionwise_join = true; /* diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 7cafaca33c5c..aadbff7e70b6 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -852,7 +852,8 @@ find_computable_ec_member(PlannerInfo *root, exprvars = pull_var_clause((Node *) exprs, PVC_INCLUDE_AGGREGATES | PVC_INCLUDE_WINDOWFUNCS | - PVC_INCLUDE_PLACEHOLDERS); + PVC_INCLUDE_PLACEHOLDERS | + PVC_INCLUDE_CONVERTROWTYPES); foreach(lc, ec->ec_members) { @@ -880,7 +881,8 @@ find_computable_ec_member(PlannerInfo *root, emvars = pull_var_clause((Node *) em->em_expr, PVC_INCLUDE_AGGREGATES | PVC_INCLUDE_WINDOWFUNCS | - PVC_INCLUDE_PLACEHOLDERS); + PVC_INCLUDE_PLACEHOLDERS| + PVC_INCLUDE_CONVERTROWTYPES); foreach(lc2, emvars) { if (!list_member(exprvars, lfirst(lc2))) diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 0868249be94b..819d7e497de2 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -52,6 +52,9 @@ typedef struct int num_vars; /* number of plain Var tlist entries */ bool has_ph_vars; /* are there PlaceHolderVar entries? */ bool has_non_vars; /* are there other entries? */ + bool has_conv_whole_rows; /* are there ConvertRowtypeExpr + * entries encapsulating a whole-row + * Var? */ tlist_vinfo vars[FLEXIBLE_ARRAY_MEMBER]; /* has num_vars entries */ } indexed_tlist; @@ -211,7 +214,6 @@ static List *set_windowagg_runcondition_references(PlannerInfo *root, List *runcondition, Plan *plan); - /***************************************************************************** * * SUBPLAN REFERENCES @@ -2738,6 +2740,7 @@ build_tlist_index(List *tlist) itlist->tlist = tlist; itlist->has_ph_vars = false; itlist->has_non_vars = false; + itlist->has_conv_whole_rows = false; /* Find the Vars and fill in the index array */ vinfo = itlist->vars; @@ -2757,6 +2760,8 @@ build_tlist_index(List *tlist) } else if (tle->expr && IsA(tle->expr, PlaceHolderVar)) itlist->has_ph_vars = true; + else if (is_converted_whole_row_reference((Node *) tle->expr)) + itlist->has_conv_whole_rows = true; else itlist->has_non_vars = true; } @@ -2772,7 +2777,10 @@ build_tlist_index(List *tlist) * This is like build_tlist_index, but we only index tlist entries that * are Vars belonging to some rel other than the one specified. We will set * has_ph_vars (allowing PlaceHolderVars to be matched), but not has_non_vars - * (so nothing other than Vars and PlaceHolderVars can be matched). + * (so nothing other than Vars and PlaceHolderVars can be matched). In case of + * DML, where this function will be used, returning lists from child relations + * will be appended similar to a simple append relation. That does not require + * fixing ConvertRowtypeExpr references. So, those are not considered here. */ static indexed_tlist * build_tlist_index_other_vars(List *tlist, int ignore_rel) @@ -2789,6 +2797,7 @@ build_tlist_index_other_vars(List *tlist, int ignore_rel) itlist->tlist = tlist; itlist->has_ph_vars = false; itlist->has_non_vars = false; + itlist->has_conv_whole_rows = false; /* Find the desired Vars and fill in the index array */ vinfo = itlist->vars; @@ -2966,7 +2975,7 @@ search_indexed_tlist_for_non_var(Expr *node, if (IsA(node, Const)) return NULL; - tle = tlist_member(node, itlist->tlist); + tle = tlist_member_match_converted_whole_row(node, itlist->tlist); if (tle) { /* Found a matching subplan output expression */ @@ -3094,6 +3103,7 @@ static Node * fix_join_expr_mutator(Node *node, fix_join_expr_context *context) { Var *newvar; + bool converted_whole_row; if (node == NULL) return NULL; @@ -3182,7 +3192,8 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) return fix_join_expr_mutator((Node *) phv->phexpr, context); } /* Try matching more complex expressions too, if tlists have any */ - if (context->outer_itlist && context->outer_itlist->has_non_vars) + converted_whole_row = is_converted_whole_row_reference(node); + if (context->outer_itlist && (context->outer_itlist->has_non_vars || (context->outer_itlist->has_conv_whole_rows && converted_whole_row))) { newvar = search_indexed_tlist_for_non_var((Expr *) node, context->outer_itlist, @@ -3190,7 +3201,7 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) if (newvar) return (Node *) newvar; } - if (context->inner_itlist && context->inner_itlist->has_non_vars) + if (context->inner_itlist && (context->inner_itlist->has_non_vars || (context->inner_itlist->has_conv_whole_rows && converted_whole_row))) { newvar = search_indexed_tlist_for_non_var((Expr *) node, context->inner_itlist, @@ -3301,7 +3312,7 @@ fix_upper_expr_mutator(Node *node, fix_upper_expr_context *context) return fix_upper_expr_mutator((Node *) phv->phexpr, context); } /* Try matching more complex expressions too, if tlist has any */ - if (context->subplan_itlist->has_non_vars) + if (context->subplan_itlist->has_non_vars || (context->subplan_itlist->has_conv_whole_rows && is_converted_whole_row_reference(node))) { newvar = search_indexed_tlist_for_non_var((Expr *) node, context->subplan_itlist, diff --git a/src/backend/optimizer/util/appendinfo.c b/src/backend/optimizer/util/appendinfo.c index 5b3dc0d86539..789cd06186fa 100644 --- a/src/backend/optimizer/util/appendinfo.c +++ b/src/backend/optimizer/util/appendinfo.c @@ -707,7 +707,7 @@ adjust_inherited_attnums_multilevel(PlannerInfo *root, List *attnums, * relied on for this purpose.) */ void -get_translated_update_targetlist(PlannerInfo *root, Index relid, +get_translated_update_targetlist(PlannerInfo *root, Index relid, Relids child_joinrel_relids, List **processed_tlist, List **update_colnos) { /* This is pretty meaningless for commands other than UPDATE. */ @@ -725,11 +725,31 @@ get_translated_update_targetlist(PlannerInfo *root, Index relid, else { Assert(bms_is_member(relid, root->all_result_relids)); - *processed_tlist = (List *) - adjust_appendrel_attrs_multilevel(root, - (Node *) root->processed_tlist, - find_base_rel(root, relid), - find_base_rel(root, root->parse->resultRelation)); + + /* + * UPDATE targetlist corresponds to SET clause and so can contain + * arbitrary expressions, including those which contain references not + * only to result relation, but also to other vars. So in case of join + * we should also translate these vars to their parents. + */ + if (bms_is_empty(child_joinrel_relids)) + { + *processed_tlist = (List *) + adjust_appendrel_attrs_multilevel(root, + (Node *) root->processed_tlist, + find_base_rel(root, relid), + find_base_rel(root, root->parse->resultRelation)); + } + else + { + RelOptInfo *child_joinrel = find_join_rel(root, child_joinrel_relids); + + *processed_tlist = (List *) + adjust_appendrel_attrs_multilevel(root, + (Node *) root->processed_tlist, + child_joinrel, + child_joinrel->top_parent); + } if (update_colnos) *update_colnos = adjust_inherited_attnums_multilevel(root, root->update_colnos, diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c index d2b4ecc5e513..454d2e3abc11 100644 --- a/src/backend/optimizer/util/tlist.c +++ b/src/backend/optimizer/util/tlist.c @@ -119,6 +119,29 @@ tlist_member_match_var(Var *var, List *targetlist) return NULL; } +/* + * tlist_member_match_converted_whole_row + * tlist_member() variant, which compares whole var references + * based on their varno/varattno + */ +TargetEntry * +tlist_member_match_converted_whole_row(Expr *node, List *targetlist) +{ + ListCell *temp; + + foreach(temp, targetlist) + { + TargetEntry *tlentry = (TargetEntry *) lfirst(temp); + + if (equal(node, tlentry->expr)) + return tlentry; + + if (is_equal_converted_whole_row_references((Node *)node, (Node *)tlentry->expr)) + return tlentry; + } + return NULL; +} + /* * add_to_flat_tlist * Add more items to a flattened tlist (if they're not already in it) diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c index 8065237a1895..186bd44554c8 100644 --- a/src/backend/optimizer/util/var.c +++ b/src/backend/optimizer/util/var.c @@ -635,6 +635,11 @@ locate_var_of_level_walker(Node *node, * Vars within a PHV's expression are included in the result only * when PVC_RECURSE_PLACEHOLDERS is specified. * + * ConvertRowtypeExprs encapsulating whole row references are handled + * according to these bits in 'flags': + * PVC_INCLUDE_CONVERTROWTYPES include ConvertRowtypeExprs in output list + * by default - recurse into ConvertRowtypeExprs arguments + * * GroupingFuncs are treated exactly like Aggrefs, and so do not need * their own flag bits. * @@ -748,6 +753,19 @@ pull_var_clause_walker(Node *node, pull_var_clause_context *context) else elog(ERROR, "PlaceHolderVar found where not expected"); } + else if (is_converted_whole_row_reference(node)) + { + if (context->flags & PVC_INCLUDE_CONVERTROWTYPES) + { + context->varlist = lappend(context->varlist, node); + /* we do NOT descend into the contained expression */ + return false; + } + else + { + /* fall through to recurse into the ConvertRowtype's argument. */ + } + } return expression_tree_walker(node, pull_var_clause_walker, context); } diff --git a/src/include/nodes/nodeFuncs.h b/src/include/nodes/nodeFuncs.h index 5653fec8cbe5..c42ca6795bbe 100644 --- a/src/include/nodes/nodeFuncs.h +++ b/src/include/nodes/nodeFuncs.h @@ -221,4 +221,6 @@ extern bool planstate_tree_walker_impl(struct PlanState *planstate, planstate_tree_walker_callback walker, void *context); +extern bool is_converted_whole_row_reference(Node *node); +extern bool is_equal_converted_whole_row_references(Node *node1, Node *node2); #endif /* NODEFUNCS_H */ diff --git a/src/include/optimizer/appendinfo.h b/src/include/optimizer/appendinfo.h index d06f93b72667..787ab92a0e78 100644 --- a/src/include/optimizer/appendinfo.h +++ b/src/include/optimizer/appendinfo.h @@ -36,6 +36,7 @@ extern List *adjust_inherited_attnums_multilevel(PlannerInfo *root, Index child_relid, Index top_parent_relid); extern void get_translated_update_targetlist(PlannerInfo *root, Index relid, + Relids child_joinrel_relids, List **processed_tlist, List **update_colnos); extern AppendRelInfo **find_appinfos_by_relids(PlannerInfo *root, diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h index bcf8ed645c24..4c9843728695 100644 --- a/src/include/optimizer/optimizer.h +++ b/src/include/optimizer/optimizer.h @@ -192,6 +192,8 @@ extern SortGroupClause *get_sortgroupref_clause_noerr(Index sortref, * output list */ #define PVC_RECURSE_PLACEHOLDERS 0x0020 /* recurse into PlaceHolderVar * arguments */ +#define PVC_INCLUDE_CONVERTROWTYPES 0x0040 /* include ConvertRowtypeExprs in + * output list */ extern Bitmapset *pull_varnos(PlannerInfo *root, Node *node); extern Bitmapset *pull_varnos_of_level(PlannerInfo *root, Node *node, int levelsup); diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h index 9c355bf11794..473b3946035d 100644 --- a/src/include/optimizer/tlist.h +++ b/src/include/optimizer/tlist.h @@ -18,6 +18,7 @@ extern TargetEntry *tlist_member(Expr *node, List *targetlist); +extern TargetEntry *tlist_member_match_converted_whole_row(Expr *node, List *targetlist); extern List *add_to_flat_tlist(List *tlist, List *exprs); diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out index 5f2c0cf5786e..ba9245e40112 100644 --- a/src/test/regress/expected/partition_aggregate.out +++ b/src/test/regress/expected/partition_aggregate.out @@ -453,27 +453,36 @@ SELECT t1.x, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2 24 | 900 | 100 (5 rows) --- Check with whole-row reference; partitionwise aggregation does not apply +-- Check with whole-row reference EXPLAIN (COSTS OFF) SELECT t1.x, sum(t1.y), count(t1) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; QUERY PLAN ------------------------------------------------------------- Sort Sort Key: t1.x, (sum(t1.y)), (count(((t1.*)::pagg_tab1))) - -> HashAggregate - Group Key: t1.x - -> Hash Join - Hash Cond: (t1.x = t2.y) - -> Append - -> Seq Scan on pagg_tab1_p1 t1_1 - -> Seq Scan on pagg_tab1_p2 t1_2 - -> Seq Scan on pagg_tab1_p3 t1_3 - -> Hash - -> Append - -> Seq Scan on pagg_tab2_p1 t2_1 - -> Seq Scan on pagg_tab2_p2 t2_2 - -> Seq Scan on pagg_tab2_p3 t2_3 -(15 rows) + -> Append + -> HashAggregate + Group Key: t1.x + -> Hash Join + Hash Cond: (t1.x = t2.y) + -> Seq Scan on pagg_tab1_p1 t1 + -> Hash + -> Seq Scan on pagg_tab2_p1 t2 + -> HashAggregate + Group Key: t1_1.x + -> Hash Join + Hash Cond: (t1_1.x = t2_1.y) + -> Seq Scan on pagg_tab1_p2 t1_1 + -> Hash + -> Seq Scan on pagg_tab2_p2 t2_1 + -> HashAggregate + Group Key: t1_2.x + -> Hash Join + Hash Cond: (t2_2.y = t1_2.x) + -> Seq Scan on pagg_tab2_p3 t2_2 + -> Hash + -> Seq Scan on pagg_tab1_p3 t1_2 +(24 rows) SELECT t1.x, sum(t1.y), count(t1) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; x | sum | count diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out index 108f9ecb4458..22c4279931f9 100644 --- a/src/test/regress/expected/partition_join.out +++ b/src/test/regress/expected/partition_join.out @@ -147,28 +147,33 @@ SELECT COUNT(*) FROM prt1 t1 300 (1 row) --- left outer join, with whole-row reference; partitionwise join does not apply +-- left outer join, with whole-row reference EXPLAIN (COSTS OFF) SELECT t1, t2 FROM prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b; QUERY PLAN -------------------------------------------------- Sort Sort Key: t1.a, t2.b - -> Hash Right Join - Hash Cond: (t2.b = t1.a) - -> Append + -> Append + -> Hash Right Join + Hash Cond: (t2_1.b = t1_1.a) -> Seq Scan on prt2_p1 t2_1 - -> Seq Scan on prt2_p2 t2_2 - -> Seq Scan on prt2_p3 t2_3 - -> Hash - -> Append + -> Hash -> Seq Scan on prt1_p1 t1_1 Filter: (b = 0) + -> Hash Right Join + Hash Cond: (t2_2.b = t1_2.a) + -> Seq Scan on prt2_p2 t2_2 + -> Hash -> Seq Scan on prt1_p2 t1_2 Filter: (b = 0) + -> Hash Right Join + Hash Cond: (t2_3.b = t1_3.a) + -> Seq Scan on prt2_p3 t2_3 + -> Hash -> Seq Scan on prt1_p3 t1_3 Filter: (b = 0) -(16 rows) +(21 rows) SELECT t1, t2 FROM prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b; t1 | t2 @@ -1386,28 +1391,37 @@ SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * (9 rows) -- merge join when expression with whole-row reference needs to be sorted; --- partitionwise join does not apply EXPLAIN (COSTS OFF) SELECT t1.a, t2.b FROM prt1 t1, prt2 t2 WHERE t1::text = t2::text AND t1.a = t2.b ORDER BY t1.a; - QUERY PLAN ------------------------------------------------------------------------------------------ - Merge Join - Merge Cond: ((t1.a = t2.b) AND (((((t1.*)::prt1))::text) = ((((t2.*)::prt2))::text))) - -> Sort - Sort Key: t1.a, ((((t1.*)::prt1))::text) - -> Result - -> Append - -> Seq Scan on prt1_p1 t1_1 - -> Seq Scan on prt1_p2 t1_2 - -> Seq Scan on prt1_p3 t1_3 - -> Sort - Sort Key: t2.b, ((((t2.*)::prt2))::text) - -> Result - -> Append - -> Seq Scan on prt2_p1 t2_1 - -> Seq Scan on prt2_p2 t2_2 - -> Seq Scan on prt2_p3 t2_3 -(16 rows) + QUERY PLAN +----------------------------------------------------------------------------------- + Merge Append + Sort Key: t1.a + -> Merge Join + Merge Cond: ((t1_1.a = t2_1.b) AND (((t1_1.*)::text) = ((t2_1.*)::text))) + -> Sort + Sort Key: t1_1.a, ((t1_1.*)::text) + -> Seq Scan on prt1_p1 t1_1 + -> Sort + Sort Key: t2_1.b, ((t2_1.*)::text) + -> Seq Scan on prt2_p1 t2_1 + -> Merge Join + Merge Cond: ((t1_2.a = t2_2.b) AND (((t1_2.*)::text) = ((t2_2.*)::text))) + -> Sort + Sort Key: t1_2.a, ((t1_2.*)::text) + -> Seq Scan on prt1_p2 t1_2 + -> Sort + Sort Key: t2_2.b, ((t2_2.*)::text) + -> Seq Scan on prt2_p2 t2_2 + -> Merge Join + Merge Cond: ((t1_3.a = t2_3.b) AND (((t1_3.*)::text) = ((t2_3.*)::text))) + -> Sort + Sort Key: t1_3.a, ((t1_3.*)::text) + -> Seq Scan on prt1_p3 t1_3 + -> Sort + Sort Key: t2_3.b, ((t2_3.*)::text) + -> Seq Scan on prt2_p3 t2_3 +(26 rows) SELECT t1.a, t2.b FROM prt1 t1, prt2 t2 WHERE t1::text = t2::text AND t1.a = t2.b ORDER BY t1.a; a | b diff --git a/src/test/regress/sql/partition_aggregate.sql b/src/test/regress/sql/partition_aggregate.sql index ab070fee2449..a763228e6c0f 100644 --- a/src/test/regress/sql/partition_aggregate.sql +++ b/src/test/regress/sql/partition_aggregate.sql @@ -116,7 +116,7 @@ EXPLAIN (COSTS OFF) SELECT t1.x, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; SELECT t1.x, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; --- Check with whole-row reference; partitionwise aggregation does not apply +-- Check with whole-row reference EXPLAIN (COSTS OFF) SELECT t1.x, sum(t1.y), count(t1) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; SELECT t1.x, sum(t1.y), count(t1) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; diff --git a/src/test/regress/sql/partition_join.sql b/src/test/regress/sql/partition_join.sql index e84b65f4442f..36fe3cf66dcd 100644 --- a/src/test/regress/sql/partition_join.sql +++ b/src/test/regress/sql/partition_join.sql @@ -48,7 +48,7 @@ SELECT COUNT(*) FROM prt1 t1 LEFT JOIN prt1 t2 ON t1.a = t2.a LEFT JOIN prt1 t3 ON t2.a = t3.a; --- left outer join, with whole-row reference; partitionwise join does not apply +-- left outer join, with whole-row reference EXPLAIN (COSTS OFF) SELECT t1, t2 FROM prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b; SELECT t1, t2 FROM prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b; @@ -234,7 +234,6 @@ SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b; -- merge join when expression with whole-row reference needs to be sorted; --- partitionwise join does not apply EXPLAIN (COSTS OFF) SELECT t1.a, t2.b FROM prt1 t1, prt2 t2 WHERE t1::text = t2::text AND t1.a = t2.b ORDER BY t1.a; SELECT t1.a, t2.b FROM prt1 t1, prt2 t2 WHERE t1::text = t2::text AND t1.a = t2.b ORDER BY t1.a;