Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 77b6b5e

Browse files
committed
Make RelationGetPartitionDispatchInfo expand depth-first.
With this change, the order of leaf partitions as returned by RelationGetPartitionDispatchInfo should now be the same as the order used by expand_inherited_rtentry. This will make it simpler for future patches to match up the partition dispatch information with the planner data structures. The new code is also, in my opinion anyway, simpler and easier to understand. Amit Langote, reviewed by Amit Khandekar. I also reviewed and made a few cosmetic revisions. Discussion: http://postgr.es/m/d98d4761-5071-1762-501e-0e15047c714b@lab.ntt.co.jp
1 parent 8951c65 commit 77b6b5e

File tree

2 files changed

+116
-143
lines changed

2 files changed

+116
-143
lines changed

src/backend/catalog/partition.c

Lines changed: 109 additions & 143 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,8 @@ static int32 partition_bound_cmp(PartitionKey key,
147147
static int partition_bound_bsearch(PartitionKey key,
148148
PartitionBoundInfo boundinfo,
149149
void *probe, bool probe_is_bound, bool *is_equal);
150+
static void get_partition_dispatch_recurse(Relation rel, Relation parent,
151+
List **pds, List **leaf_part_oids);
150152

151153
/*
152154
* RelationBuildPartitionDesc
@@ -1191,21 +1193,6 @@ get_partition_qual_relid(Oid relid)
11911193
return result;
11921194
}
11931195

1194-
/*
1195-
* Append OIDs of rel's partitions to the list 'partoids' and for each OID,
1196-
* append pointer rel to the list 'parents'.
1197-
*/
1198-
#define APPEND_REL_PARTITION_OIDS(rel, partoids, parents) \
1199-
do\
1200-
{\
1201-
int i;\
1202-
for (i = 0; i < (rel)->rd_partdesc->nparts; i++)\
1203-
{\
1204-
(partoids) = lappend_oid((partoids), (rel)->rd_partdesc->oids[i]);\
1205-
(parents) = lappend((parents), (rel));\
1206-
}\
1207-
} while(0)
1208-
12091196
/*
12101197
* RelationGetPartitionDispatchInfo
12111198
* Returns information necessary to route tuples down a partition tree
@@ -1222,151 +1209,130 @@ PartitionDispatch *
12221209
RelationGetPartitionDispatchInfo(Relation rel,
12231210
int *num_parted, List **leaf_part_oids)
12241211
{
1212+
List *pdlist = NIL;
12251213
PartitionDispatchData **pd;
1226-
List *all_parts = NIL,
1227-
*all_parents = NIL,
1228-
*parted_rels,
1229-
*parted_rel_parents;
1230-
ListCell *lc1,
1231-
*lc2;
1232-
int i,
1233-
k,
1234-
offset;
1214+
ListCell *lc;
1215+
int i;
12351216

1236-
/*
1237-
* We rely on the relcache to traverse the partition tree to build both
1238-
* the leaf partition OIDs list and the array of PartitionDispatch objects
1239-
* for the partitioned tables in the tree. That means every partitioned
1240-
* table in the tree must be locked, which is fine since we require the
1241-
* caller to lock all the partitions anyway.
1242-
*
1243-
* For every partitioned table in the tree, starting with the root
1244-
* partitioned table, add its relcache entry to parted_rels, while also
1245-
* queuing its partitions (in the order in which they appear in the
1246-
* partition descriptor) to be looked at later in the same loop. This is
1247-
* a bit tricky but works because the foreach() macro doesn't fetch the
1248-
* next list element until the bottom of the loop.
1249-
*/
1250-
*num_parted = 1;
1251-
parted_rels = list_make1(rel);
1252-
/* Root partitioned table has no parent, so NULL for parent */
1253-
parted_rel_parents = list_make1(NULL);
1254-
APPEND_REL_PARTITION_OIDS(rel, all_parts, all_parents);
1255-
forboth(lc1, all_parts, lc2, all_parents)
1217+
Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
1218+
1219+
*num_parted = 0;
1220+
*leaf_part_oids = NIL;
1221+
1222+
get_partition_dispatch_recurse(rel, NULL, &pdlist, leaf_part_oids);
1223+
*num_parted = list_length(pdlist);
1224+
pd = (PartitionDispatchData **) palloc(*num_parted *
1225+
sizeof(PartitionDispatchData *));
1226+
i = 0;
1227+
foreach(lc, pdlist)
12561228
{
1257-
Oid partrelid = lfirst_oid(lc1);
1258-
Relation parent = lfirst(lc2);
1229+
pd[i++] = lfirst(lc);
1230+
}
12591231

1260-
if (get_rel_relkind(partrelid) == RELKIND_PARTITIONED_TABLE)
1261-
{
1262-
/*
1263-
* Already locked by the caller. Note that it is the
1264-
* responsibility of the caller to close the below relcache entry,
1265-
* once done using the information being collected here (for
1266-
* example, in ExecEndModifyTable).
1267-
*/
1268-
Relation partrel = heap_open(partrelid, NoLock);
1232+
return pd;
1233+
}
12691234

1270-
(*num_parted)++;
1271-
parted_rels = lappend(parted_rels, partrel);
1272-
parted_rel_parents = lappend(parted_rel_parents, parent);
1273-
APPEND_REL_PARTITION_OIDS(partrel, all_parts, all_parents);
1274-
}
1235+
/*
1236+
* get_partition_dispatch_recurse
1237+
* Recursively expand partition tree rooted at rel
1238+
*
1239+
* As the partition tree is expanded in a depth-first manner, we mantain two
1240+
* global lists: of PartitionDispatch objects corresponding to partitioned
1241+
* tables in *pds and of the leaf partition OIDs in *leaf_part_oids.
1242+
*
1243+
* Note that the order of OIDs of leaf partitions in leaf_part_oids matches
1244+
* the order in which the planner's expand_partitioned_rtentry() processes
1245+
* them. It's not necessarily the case that the offsets match up exactly,
1246+
* because constraint exclusion might prune away some partitions on the
1247+
* planner side, whereas we'll always have the complete list; but unpruned
1248+
* partitions will appear in the same order in the plan as they are returned
1249+
* here.
1250+
*/
1251+
static void
1252+
get_partition_dispatch_recurse(Relation rel, Relation parent,
1253+
List **pds, List **leaf_part_oids)
1254+
{
1255+
TupleDesc tupdesc = RelationGetDescr(rel);
1256+
PartitionDesc partdesc = RelationGetPartitionDesc(rel);
1257+
PartitionKey partkey = RelationGetPartitionKey(rel);
1258+
PartitionDispatch pd;
1259+
int i;
1260+
1261+
check_stack_depth();
1262+
1263+
/* Build a PartitionDispatch for this table and add it to *pds. */
1264+
pd = (PartitionDispatch) palloc(sizeof(PartitionDispatchData));
1265+
*pds = lappend(*pds, pd);
1266+
pd->reldesc = rel;
1267+
pd->key = partkey;
1268+
pd->keystate = NIL;
1269+
pd->partdesc = partdesc;
1270+
if (parent != NULL)
1271+
{
1272+
/*
1273+
* For every partitioned table other than the root, we must store a
1274+
* tuple table slot initialized with its tuple descriptor and a tuple
1275+
* conversion map to convert a tuple from its parent's rowtype to its
1276+
* own. That is to make sure that we are looking at the correct row
1277+
* using the correct tuple descriptor when computing its partition key
1278+
* for tuple routing.
1279+
*/
1280+
pd->tupslot = MakeSingleTupleTableSlot(tupdesc);
1281+
pd->tupmap = convert_tuples_by_name(RelationGetDescr(parent),
1282+
tupdesc,
1283+
gettext_noop("could not convert row type"));
1284+
}
1285+
else
1286+
{
1287+
/* Not required for the root partitioned table */
1288+
pd->tupslot = NULL;
1289+
pd->tupmap = NULL;
12751290
}
12761291

12771292
/*
1278-
* We want to create two arrays - one for leaf partitions and another for
1279-
* partitioned tables (including the root table and internal partitions).
1280-
* While we only create the latter here, leaf partition array of suitable
1281-
* objects (such as, ResultRelInfo) is created by the caller using the
1282-
* list of OIDs we return. Indexes into these arrays get assigned in a
1283-
* breadth-first manner, whereby partitions of any given level are placed
1284-
* consecutively in the respective arrays.
1293+
* Go look at each partition of this table. If it's a leaf partition,
1294+
* simply add its OID to *leaf_part_oids. If it's a partitioned table,
1295+
* recursively call get_partition_dispatch_recurse(), so that its
1296+
* partitions are processed as well and a corresponding PartitionDispatch
1297+
* object gets added to *pds.
1298+
*
1299+
* About the values in pd->indexes: for a leaf partition, it contains the
1300+
* leaf partition's position in the global list *leaf_part_oids minus 1,
1301+
* whereas for a partitioned table partition, it contains the partition's
1302+
* position in the global list *pds multiplied by -1. The latter is
1303+
* multiplied by -1 to distinguish partitioned tables from leaf partitions
1304+
* when going through the values in pd->indexes. So, for example, when
1305+
* using it during tuple-routing, encountering a value >= 0 means we found
1306+
* a leaf partition. It is immediately returned as the index in the array
1307+
* of ResultRelInfos of all the leaf partitions, using which we insert the
1308+
* tuple into that leaf partition. A negative value means we found a
1309+
* partitioned table. The value multiplied by -1 is returned as the index
1310+
* in the array of PartitionDispatch objects of all partitioned tables in
1311+
* the tree. This value is used to continue the search in the next level
1312+
* of the partition tree.
12851313
*/
1286-
pd = (PartitionDispatchData **) palloc(*num_parted *
1287-
sizeof(PartitionDispatchData *));
1288-
*leaf_part_oids = NIL;
1289-
i = k = offset = 0;
1290-
forboth(lc1, parted_rels, lc2, parted_rel_parents)
1314+
pd->indexes = (int *) palloc(partdesc->nparts * sizeof(int));
1315+
for (i = 0; i < partdesc->nparts; i++)
12911316
{
1292-
Relation partrel = lfirst(lc1);
1293-
Relation parent = lfirst(lc2);
1294-
PartitionKey partkey = RelationGetPartitionKey(partrel);
1295-
TupleDesc tupdesc = RelationGetDescr(partrel);
1296-
PartitionDesc partdesc = RelationGetPartitionDesc(partrel);
1297-
int j,
1298-
m;
1299-
1300-
pd[i] = (PartitionDispatch) palloc(sizeof(PartitionDispatchData));
1301-
pd[i]->reldesc = partrel;
1302-
pd[i]->key = partkey;
1303-
pd[i]->keystate = NIL;
1304-
pd[i]->partdesc = partdesc;
1305-
if (parent != NULL)
1317+
Oid partrelid = partdesc->oids[i];
1318+
1319+
if (get_rel_relkind(partrelid) != RELKIND_PARTITIONED_TABLE)
13061320
{
1307-
/*
1308-
* For every partitioned table other than root, we must store a
1309-
* tuple table slot initialized with its tuple descriptor and a
1310-
* tuple conversion map to convert a tuple from its parent's
1311-
* rowtype to its own. That is to make sure that we are looking at
1312-
* the correct row using the correct tuple descriptor when
1313-
* computing its partition key for tuple routing.
1314-
*/
1315-
pd[i]->tupslot = MakeSingleTupleTableSlot(tupdesc);
1316-
pd[i]->tupmap = convert_tuples_by_name(RelationGetDescr(parent),
1317-
tupdesc,
1318-
gettext_noop("could not convert row type"));
1321+
*leaf_part_oids = lappend_oid(*leaf_part_oids, partrelid);
1322+
pd->indexes[i] = list_length(*leaf_part_oids) - 1;
13191323
}
13201324
else
13211325
{
1322-
/* Not required for the root partitioned table */
1323-
pd[i]->tupslot = NULL;
1324-
pd[i]->tupmap = NULL;
1325-
}
1326-
pd[i]->indexes = (int *) palloc(partdesc->nparts * sizeof(int));
1327-
1328-
/*
1329-
* Indexes corresponding to the internal partitions are multiplied by
1330-
* -1 to distinguish them from those of leaf partitions. Encountering
1331-
* an index >= 0 means we found a leaf partition, which is immediately
1332-
* returned as the partition we are looking for. A negative index
1333-
* means we found a partitioned table, whose PartitionDispatch object
1334-
* is located at the above index multiplied back by -1. Using the
1335-
* PartitionDispatch object, search is continued further down the
1336-
* partition tree.
1337-
*/
1338-
m = 0;
1339-
for (j = 0; j < partdesc->nparts; j++)
1340-
{
1341-
Oid partrelid = partdesc->oids[j];
1326+
/*
1327+
* We assume all tables in the partition tree were already locked
1328+
* by the caller.
1329+
*/
1330+
Relation partrel = heap_open(partrelid, NoLock);
13421331

1343-
if (get_rel_relkind(partrelid) != RELKIND_PARTITIONED_TABLE)
1344-
{
1345-
*leaf_part_oids = lappend_oid(*leaf_part_oids, partrelid);
1346-
pd[i]->indexes[j] = k++;
1347-
}
1348-
else
1349-
{
1350-
/*
1351-
* offset denotes the number of partitioned tables of upper
1352-
* levels including those of the current level. Any partition
1353-
* of this table must belong to the next level and hence will
1354-
* be placed after the last partitioned table of this level.
1355-
*/
1356-
pd[i]->indexes[j] = -(1 + offset + m);
1357-
m++;
1358-
}
1332+
pd->indexes[i] = -list_length(*pds);
1333+
get_partition_dispatch_recurse(partrel, rel, pds, leaf_part_oids);
13591334
}
1360-
i++;
1361-
1362-
/*
1363-
* This counts the number of partitioned tables at upper levels
1364-
* including those of the current level.
1365-
*/
1366-
offset += m;
13671335
}
1368-
1369-
return pd;
13701336
}
13711337

13721338
/* Module-local functions */

src/backend/optimizer/prep/prepunion.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1565,6 +1565,13 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
15651565
root->append_rel_list = list_concat(root->append_rel_list, appinfos);
15661566
}
15671567

1568+
/*
1569+
* expand_partitioned_rtentry
1570+
* Recursively expand an RTE for a partitioned table.
1571+
*
1572+
* Note that RelationGetPartitionDispatchInfo will expand partitions in the
1573+
* same order as this code.
1574+
*/
15681575
static void
15691576
expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte,
15701577
Index parentRTindex, Relation parentrel,

0 commit comments

Comments
 (0)