Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 1c2cb27

Browse files
committed
Fix run-time partition pruning for appends with multiple source rels.
The previous coding here supposed that if run-time partitioning applied to a particular Append/MergeAppend plan, then all child plans of that node must be members of a single partitioning hierarchy. This is totally wrong, since an Append could be formed from a UNION ALL: we could have multiple hierarchies sharing the same Append, or child plans that aren't part of any hierarchy. To fix, restructure the related plan-time and execution-time data structures so that we can have a separate list or array for each partitioning hierarchy. Also track subplans that are not part of any hierarchy, and make sure they don't get pruned. Per reports from Phil Florent and others. Back-patch to v11, since the bug originated there. David Rowley, with a lot of cosmetic adjustments by me; thanks also to Amit Langote for review. Discussion: https://postgr.es/m/HE1PR03MB17068BB27404C90B5B788BCABA7B0@HE1PR03MB1706.eurprd03.prod.outlook.com
1 parent c40489e commit 1c2cb27

File tree

16 files changed

+842
-287
lines changed

16 files changed

+842
-287
lines changed

src/backend/executor/execPartition.c

+239-160
Large diffs are not rendered by default.

src/backend/executor/nodeAppend.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ ExecInitAppend(Append *node, EState *estate, int eflags)
129129
appendstate->as_whichplan = INVALID_SUBPLAN_INDEX;
130130

131131
/* If run-time partition pruning is enabled, then set that up now */
132-
if (node->part_prune_infos != NIL)
132+
if (node->part_prune_info != NULL)
133133
{
134134
PartitionPruneState *prunestate;
135135

@@ -138,7 +138,7 @@ ExecInitAppend(Append *node, EState *estate, int eflags)
138138

139139
/* Create the working data structure for pruning. */
140140
prunestate = ExecCreatePartitionPruneState(&appendstate->ps,
141-
node->part_prune_infos);
141+
node->part_prune_info);
142142
appendstate->as_prune_state = prunestate;
143143

144144
/* Perform an initial partition prune, if required. */

src/backend/executor/nodeMergeAppend.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -90,15 +90,15 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags)
9090
mergestate->ms_noopscan = false;
9191

9292
/* If run-time partition pruning is enabled, then set that up now */
93-
if (node->part_prune_infos != NIL)
93+
if (node->part_prune_info != NULL)
9494
{
9595
PartitionPruneState *prunestate;
9696

9797
/* We may need an expression context to evaluate partition exprs */
9898
ExecAssignExprContext(estate, &mergestate->ps);
9999

100100
prunestate = ExecCreatePartitionPruneState(&mergestate->ps,
101-
node->part_prune_infos);
101+
node->part_prune_info);
102102
mergestate->ms_prune_state = prunestate;
103103

104104
/* Perform an initial partition prune, if required. */

src/backend/nodes/copyfuncs.c

+16-2
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ _copyAppend(const Append *from)
245245
COPY_NODE_FIELD(appendplans);
246246
COPY_SCALAR_FIELD(first_partial_plan);
247247
COPY_NODE_FIELD(partitioned_rels);
248-
COPY_NODE_FIELD(part_prune_infos);
248+
COPY_NODE_FIELD(part_prune_info);
249249

250250
return newnode;
251251
}
@@ -273,7 +273,7 @@ _copyMergeAppend(const MergeAppend *from)
273273
COPY_POINTER_FIELD(sortOperators, from->numCols * sizeof(Oid));
274274
COPY_POINTER_FIELD(collations, from->numCols * sizeof(Oid));
275275
COPY_POINTER_FIELD(nullsFirst, from->numCols * sizeof(bool));
276-
COPY_NODE_FIELD(part_prune_infos);
276+
COPY_NODE_FIELD(part_prune_info);
277277

278278
return newnode;
279279
}
@@ -1182,6 +1182,17 @@ _copyPartitionPruneInfo(const PartitionPruneInfo *from)
11821182
{
11831183
PartitionPruneInfo *newnode = makeNode(PartitionPruneInfo);
11841184

1185+
COPY_NODE_FIELD(prune_infos);
1186+
COPY_BITMAPSET_FIELD(other_subplans);
1187+
1188+
return newnode;
1189+
}
1190+
1191+
static PartitionedRelPruneInfo *
1192+
_copyPartitionedRelPruneInfo(const PartitionedRelPruneInfo *from)
1193+
{
1194+
PartitionedRelPruneInfo *newnode = makeNode(PartitionedRelPruneInfo);
1195+
11851196
COPY_SCALAR_FIELD(reloid);
11861197
COPY_NODE_FIELD(pruning_steps);
11871198
COPY_BITMAPSET_FIELD(present_parts);
@@ -4908,6 +4919,9 @@ copyObjectImpl(const void *from)
49084919
case T_PartitionPruneInfo:
49094920
retval = _copyPartitionPruneInfo(from);
49104921
break;
4922+
case T_PartitionedRelPruneInfo:
4923+
retval = _copyPartitionedRelPruneInfo(from);
4924+
break;
49114925
case T_PartitionPruneStepOp:
49124926
retval = _copyPartitionPruneStepOp(from);
49134927
break;

src/backend/nodes/outfuncs.c

+15-3
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ _outAppend(StringInfo str, const Append *node)
402402
WRITE_NODE_FIELD(appendplans);
403403
WRITE_INT_FIELD(first_partial_plan);
404404
WRITE_NODE_FIELD(partitioned_rels);
405-
WRITE_NODE_FIELD(part_prune_infos);
405+
WRITE_NODE_FIELD(part_prune_info);
406406
}
407407

408408
static void
@@ -435,7 +435,7 @@ _outMergeAppend(StringInfo str, const MergeAppend *node)
435435
for (i = 0; i < node->numCols; i++)
436436
appendStringInfo(str, " %s", booltostr(node->nullsFirst[i]));
437437

438-
WRITE_NODE_FIELD(part_prune_infos);
438+
WRITE_NODE_FIELD(part_prune_info);
439439
}
440440

441441
static void
@@ -1014,10 +1014,19 @@ _outPlanRowMark(StringInfo str, const PlanRowMark *node)
10141014

10151015
static void
10161016
_outPartitionPruneInfo(StringInfo str, const PartitionPruneInfo *node)
1017+
{
1018+
WRITE_NODE_TYPE("PARTITIONPRUNEINFO");
1019+
1020+
WRITE_NODE_FIELD(prune_infos);
1021+
WRITE_BITMAPSET_FIELD(other_subplans);
1022+
}
1023+
1024+
static void
1025+
_outPartitionedRelPruneInfo(StringInfo str, const PartitionedRelPruneInfo *node)
10171026
{
10181027
int i;
10191028

1020-
WRITE_NODE_TYPE("PARTITIONPRUNEINFO");
1029+
WRITE_NODE_TYPE("PARTITIONEDRELPRUNEINFO");
10211030

10221031
WRITE_OID_FIELD(reloid);
10231032
WRITE_NODE_FIELD(pruning_steps);
@@ -3831,6 +3840,9 @@ outNode(StringInfo str, const void *obj)
38313840
case T_PartitionPruneInfo:
38323841
_outPartitionPruneInfo(str, obj);
38333842
break;
3843+
case T_PartitionedRelPruneInfo:
3844+
_outPartitionedRelPruneInfo(str, obj);
3845+
break;
38343846
case T_PartitionPruneStepOp:
38353847
_outPartitionPruneStepOp(str, obj);
38363848
break;

src/backend/nodes/readfuncs.c

+15-2
Original file line numberDiff line numberDiff line change
@@ -1612,7 +1612,7 @@ _readAppend(void)
16121612
READ_NODE_FIELD(appendplans);
16131613
READ_INT_FIELD(first_partial_plan);
16141614
READ_NODE_FIELD(partitioned_rels);
1615-
READ_NODE_FIELD(part_prune_infos);
1615+
READ_NODE_FIELD(part_prune_info);
16161616

16171617
READ_DONE();
16181618
}
@@ -1634,7 +1634,7 @@ _readMergeAppend(void)
16341634
READ_OID_ARRAY(sortOperators, local_node->numCols);
16351635
READ_OID_ARRAY(collations, local_node->numCols);
16361636
READ_BOOL_ARRAY(nullsFirst, local_node->numCols);
1637-
READ_NODE_FIELD(part_prune_infos);
1637+
READ_NODE_FIELD(part_prune_info);
16381638

16391639
READ_DONE();
16401640
}
@@ -2329,6 +2329,17 @@ _readPartitionPruneInfo(void)
23292329
{
23302330
READ_LOCALS(PartitionPruneInfo);
23312331

2332+
READ_NODE_FIELD(prune_infos);
2333+
READ_BITMAPSET_FIELD(other_subplans);
2334+
2335+
READ_DONE();
2336+
}
2337+
2338+
static PartitionedRelPruneInfo *
2339+
_readPartitionedRelPruneInfo(void)
2340+
{
2341+
READ_LOCALS(PartitionedRelPruneInfo);
2342+
23322343
READ_OID_FIELD(reloid);
23332344
READ_NODE_FIELD(pruning_steps);
23342345
READ_BITMAPSET_FIELD(present_parts);
@@ -2726,6 +2737,8 @@ parseNodeString(void)
27262737
return_value = _readPlanRowMark();
27272738
else if (MATCH("PARTITIONPRUNEINFO", 18))
27282739
return_value = _readPartitionPruneInfo();
2740+
else if (MATCH("PARTITIONEDRELPRUNEINFO", 23))
2741+
return_value = _readPartitionedRelPruneInfo();
27292742
else if (MATCH("PARTITIONPRUNESTEPOP", 20))
27302743
return_value = _readPartitionPruneStepOp();
27312744
else if (MATCH("PARTITIONPRUNESTEPCOMBINE", 25))

src/backend/optimizer/path/allpaths.c

+11-16
Original file line numberDiff line numberDiff line change
@@ -1388,7 +1388,6 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
13881388
List *all_child_outers = NIL;
13891389
ListCell *l;
13901390
List *partitioned_rels = NIL;
1391-
bool build_partitioned_rels = false;
13921391
double partial_rows = -1;
13931392

13941393
/* If appropriate, consider parallel append */
@@ -1413,10 +1412,11 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
14131412
if (rel->part_scheme != NULL)
14141413
{
14151414
if (IS_SIMPLE_REL(rel))
1416-
partitioned_rels = rel->partitioned_child_rels;
1415+
partitioned_rels = list_make1(rel->partitioned_child_rels);
14171416
else if (IS_JOIN_REL(rel))
14181417
{
14191418
int relid = -1;
1419+
List *partrels = NIL;
14201420

14211421
/*
14221422
* For a partitioned joinrel, concatenate the component rels'
@@ -1430,16 +1430,16 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
14301430
component = root->simple_rel_array[relid];
14311431
Assert(component->part_scheme != NULL);
14321432
Assert(list_length(component->partitioned_child_rels) >= 1);
1433-
partitioned_rels =
1434-
list_concat(partitioned_rels,
1433+
partrels =
1434+
list_concat(partrels,
14351435
list_copy(component->partitioned_child_rels));
14361436
}
1437+
1438+
partitioned_rels = list_make1(partrels);
14371439
}
14381440

14391441
Assert(list_length(partitioned_rels) >= 1);
14401442
}
1441-
else if (rel->rtekind == RTE_SUBQUERY)
1442-
build_partitioned_rels = true;
14431443

14441444
/*
14451445
* For every non-dummy child, remember the cheapest path. Also, identify
@@ -1453,17 +1453,12 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
14531453
Path *cheapest_partial_path = NULL;
14541454

14551455
/*
1456-
* If we need to build partitioned_rels, accumulate the partitioned
1457-
* rels for this child. We must ensure that parents are always listed
1458-
* before their child partitioned tables.
1456+
* For UNION ALLs with non-empty partitioned_child_rels, accumulate
1457+
* the Lists of child relations.
14591458
*/
1460-
if (build_partitioned_rels)
1461-
{
1462-
List *cprels = childrel->partitioned_child_rels;
1463-
1464-
partitioned_rels = list_concat(partitioned_rels,
1465-
list_copy(cprels));
1466-
}
1459+
if (rel->rtekind == RTE_SUBQUERY && childrel->partitioned_child_rels != NIL)
1460+
partitioned_rels = lappend(partitioned_rels,
1461+
childrel->partitioned_child_rels);
14671462

14681463
/*
14691464
* If child has an unparameterized cheapest-total path, add that to

0 commit comments

Comments
 (0)