Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 4e23236

Browse files
committed
Improve commentary about run-time partition pruning data structures.
No code changes except for a couple of new Asserts. David Rowley and Tom Lane Discussion: https://postgr.es/m/CAKJS1f-6GODRNgEtdPxCnAPme2h2hTztB6LmtfdmcYAAOE0kQg@mail.gmail.com
1 parent e5d11b9 commit 4e23236

File tree

5 files changed

+71
-45
lines changed

5 files changed

+71
-45
lines changed

src/backend/optimizer/path/allpaths.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1451,7 +1451,8 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
14511451

14521452
/*
14531453
* If we need to build partitioned_rels, accumulate the partitioned
1454-
* rels for this child.
1454+
* rels for this child. We must ensure that parents are always listed
1455+
* before their child partitioned tables.
14551456
*/
14561457
if (build_partitioned_rels)
14571458
{

src/backend/partitioning/partprune.c

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -202,12 +202,17 @@ make_partition_pruneinfo(PlannerInfo *root, List *partition_rels,
202202
int i;
203203

204204
/*
205-
* Allocate two arrays to store the 1-based indexes of the 'subpaths' and
206-
* 'partitioned_rels' by relid.
205+
* Construct two temporary arrays to map from planner relids to subplan
206+
* and sub-partition indexes. For convenience, we use 1-based indexes
207+
* here, so that zero can represent an un-filled array entry.
207208
*/
208209
relid_subplan_map = palloc0(sizeof(int) * root->simple_rel_array_size);
209210
relid_subpart_map = palloc0(sizeof(int) * root->simple_rel_array_size);
210211

212+
/*
213+
* relid_subplan_map maps relid of a leaf partition to the index in
214+
* 'subpaths' of the scan plan for that partition.
215+
*/
211216
i = 1;
212217
foreach(lc, subpaths)
213218
{
@@ -216,17 +221,27 @@ make_partition_pruneinfo(PlannerInfo *root, List *partition_rels,
216221

217222
Assert(IS_SIMPLE_REL(pathrel));
218223
Assert(pathrel->relid < root->simple_rel_array_size);
224+
/* No duplicates please */
225+
Assert(relid_subplan_map[pathrel->relid] == 0);
219226

220227
relid_subplan_map[pathrel->relid] = i++;
221228
}
222229

223-
/* Likewise for the partition_rels */
230+
/*
231+
* relid_subpart_map maps relid of a non-leaf partition to the index in
232+
* 'partition_rels' of that rel (which will also be the index in the
233+
* returned PartitionPruneInfo list of the info for that partition).
234+
*/
224235
i = 1;
225236
foreach(lc, partition_rels)
226237
{
227238
Index rti = lfirst_int(lc);
228239

229240
Assert(rti < root->simple_rel_array_size);
241+
/* No duplicates please */
242+
Assert(relid_subpart_map[rti] == 0);
243+
/* Same rel cannot be both leaf and non-leaf */
244+
Assert(relid_subplan_map[rti] == 0);
230245

231246
relid_subpart_map[rti] = i++;
232247
}
@@ -287,16 +302,16 @@ make_partition_pruneinfo(PlannerInfo *root, List *partition_rels,
287302
return NIL;
288303
}
289304

305+
/*
306+
* Construct the subplan and subpart maps for this partitioning level.
307+
* Here we convert to zero-based indexes, with -1 for empty entries.
308+
* Also construct a Bitmapset of all partitions that are present (that
309+
* is, not pruned already).
310+
*/
290311
subplan_map = (int *) palloc(nparts * sizeof(int));
291312
subpart_map = (int *) palloc(nparts * sizeof(int));
292313
present_parts = NULL;
293314

294-
/*
295-
* Loop over each partition of the partitioned rel and record the
296-
* subpath index for each. Any partitions which are not present in
297-
* the subpaths List will be set to -1, and any sub-partitioned table
298-
* which is not present will also be set to -1.
299-
*/
300315
for (i = 0; i < nparts; i++)
301316
{
302317
RelOptInfo *partrel = subpart->part_rels[i];
@@ -305,12 +320,6 @@ make_partition_pruneinfo(PlannerInfo *root, List *partition_rels,
305320

306321
subplan_map[i] = subplanidx;
307322
subpart_map[i] = subpartidx;
308-
309-
/*
310-
* Record the indexes of all the partition indexes that we have
311-
* subplans or subparts for. This allows an optimization to skip
312-
* attempting any run-time pruning when it's irrelevant.
313-
*/
314323
if (subplanidx >= 0 || subpartidx >= 0)
315324
present_parts = bms_add_member(present_parts, i);
316325
}

src/include/executor/execPartition.h

Lines changed: 21 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -113,30 +113,27 @@ typedef struct PartitionTupleRouting
113113
} PartitionTupleRouting;
114114

115115
/*-----------------------
116-
* PartitionPruningData - Encapsulates all information required to support
117-
* elimination of partitions in plan types which support arbitrary Lists of
118-
* subplans. Information stored here allows the partition pruning functions
119-
* to be called and the return value of partition indexes translated into the
120-
* subpath indexes of plan types such as Append, thus allowing us to bypass a
121-
* subplan when we can prove that no tuple matching the 'pruning_steps' will
122-
* be found within.
116+
* PartitionPruningData - Per-partitioned-table data for run-time pruning
117+
* of partitions. For a multilevel partitioned table, we have one of these
118+
* for the topmost partition plus one for each non-leaf child partition,
119+
* ordered such that parents appear before their children.
123120
*
124-
* subplan_map An array containing the subplan index which
125-
* matches this partition index, or -1 if the
126-
* subplan has been pruned already.
127-
* subpart_map An array containing the index into the
128-
* partprunedata array in PartitionPruneState, or
129-
* -1 if there is no such element in that array.
121+
* subplan_map[] and subpart_map[] have the same definitions as in
122+
* PartitionPruneInfo (see plannodes.h); though note that here,
123+
* subpart_map contains indexes into PartitionPruneState.partprunedata[].
124+
*
125+
* subplan_map Subplan index by partition index, or -1.
126+
* subpart_map Subpart index by partition index, or -1.
130127
* present_parts A Bitmapset of the partition indexes that we
131-
* have subplans mapped for.
128+
* have subplans or subparts for.
132129
* context Contains the context details required to call
133130
* the partition pruning code.
134131
* pruning_steps List of PartitionPruneSteps used to
135132
* perform the actual pruning.
136133
* do_initial_prune true if pruning should be performed during
137-
* executor startup.
134+
* executor startup (for this partitioning level).
138135
* do_exec_prune true if pruning should be performed during
139-
* executor run.
136+
* executor run (for this partitioning level).
140137
*-----------------------
141138
*/
142139
typedef struct PartitionPruningData
@@ -152,16 +149,18 @@ typedef struct PartitionPruningData
152149

153150
/*-----------------------
154151
* PartitionPruneState - State object required for plan nodes to perform
155-
* partition pruning elimination of their subplans. This encapsulates a
156-
* flattened hierarchy of PartitionPruningData structs.
152+
* run-time partition pruning.
153+
*
157154
* This struct can be attached to plan types which support arbitrary Lists of
158155
* subplans containing partitions to allow subplans to be eliminated due to
159156
* the clauses being unable to match to any tuple that the subplan could
160-
* possibly produce.
157+
* possibly produce. Note that we currently support only one partitioned
158+
* table per parent plan node, hence partprunedata[] need describe only one
159+
* partitioning hierarchy.
161160
*
162-
* partprunedata Array of PartitionPruningData for the plan's target
163-
* partitioned relation. First element contains the
164-
* details for the target partitioned table.
161+
* partprunedata Array of PartitionPruningData for the plan's
162+
* partitioned relation, ordered such that parent tables
163+
* appear before children (hence, topmost table is first).
165164
* num_partprunedata Number of items in 'partprunedata' array.
166165
* do_initial_prune true if pruning should be performed during executor
167166
* startup (at any hierarchy level).

src/include/nodes/plannodes.h

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1059,18 +1059,35 @@ typedef struct PlanRowMark
10591059
* plan types which support arbitrary numbers of subplans, such as Append.
10601060
* We also store various details to tell the executor when it should be
10611061
* performing partition pruning.
1062+
*
1063+
* Each PartitionPruneInfo describes the partitioning rules for a single
1064+
* partitioned table (a/k/a level of partitioning). For a multilevel
1065+
* partitioned table, we have a List of PartitionPruneInfos, where the
1066+
* first entry represents the topmost partitioned table and additional
1067+
* entries represent non-leaf child partitions, ordered such that parents
1068+
* appear before their children.
1069+
*
1070+
* subplan_map[] and subpart_map[] are indexed by partition index (where
1071+
* zero is the topmost partition, and non-leaf partitions must come before
1072+
* their children). For a leaf partition p, subplan_map[p] contains the
1073+
* zero-based index of the partition's subplan in the parent plan's subplan
1074+
* list; it is -1 if the partition is non-leaf or has been pruned. For a
1075+
* non-leaf partition p, subpart_map[p] contains the zero-based index of
1076+
* that sub-partition's PartitionPruneInfo in the plan's PartitionPruneInfo
1077+
* list; it is -1 if the partition is a leaf or has been pruned. All these
1078+
* indexes are global across the whole partitioned table and Append plan node.
10621079
*/
10631080
typedef struct PartitionPruneInfo
10641081
{
10651082
NodeTag type;
1066-
Oid reloid; /* Oid of partition rel */
1083+
Oid reloid; /* OID of partition rel for this level */
10671084
List *pruning_steps; /* List of PartitionPruneStep, see below */
1068-
Bitmapset *present_parts; /* Indexes of all partitions which subplans
1069-
* are present for. */
1085+
Bitmapset *present_parts; /* Indexes of all partitions which subplans or
1086+
* subparts are present for. */
10701087
int nparts; /* Length of subplan_map[] and subpart_map[] */
10711088
int nexprs; /* Length of hasexecparam[] */
1072-
int *subplan_map; /* subplan index by partition id, or -1 */
1073-
int *subpart_map; /* subpart index by partition id, or -1 */
1089+
int *subplan_map; /* subplan index by partition index, or -1 */
1090+
int *subpart_map; /* subpart index by partition index, or -1 */
10741091
bool *hasexecparam; /* true if corresponding pruning_step contains
10751092
* any PARAM_EXEC Params. */
10761093
bool do_initial_prune; /* true if pruning should be performed

src/include/nodes/relation.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -552,8 +552,8 @@ typedef struct PartitionSchemeData *PartitionScheme;
552552
* part_rels - RelOptInfos for each partition
553553
* partexprs, nullable_partexprs - Partition key expressions
554554
* partitioned_child_rels - RT indexes of unpruned partitions of
555-
* relation that are partitioned tables
556-
* themselves
555+
* this relation that are partitioned tables
556+
* themselves, in hierarchical order
557557
*
558558
* Note: A base relation always has only one set of partition keys, but a join
559559
* relation may have as many sets of partition keys as the number of relations

0 commit comments

Comments
 (0)