@@ -147,6 +147,8 @@ static int32 partition_bound_cmp(PartitionKey key,
147
147
static int partition_bound_bsearch (PartitionKey key ,
148
148
PartitionBoundInfo boundinfo ,
149
149
void * probe , bool probe_is_bound , bool * is_equal );
150
+ static void get_partition_dispatch_recurse (Relation rel , Relation parent ,
151
+ List * * pds , List * * leaf_part_oids );
150
152
151
153
/*
152
154
* RelationBuildPartitionDesc
@@ -1191,21 +1193,6 @@ get_partition_qual_relid(Oid relid)
1191
1193
return result ;
1192
1194
}
1193
1195
1194
- /*
1195
- * Append OIDs of rel's partitions to the list 'partoids' and for each OID,
1196
- * append pointer rel to the list 'parents'.
1197
- */
1198
- #define APPEND_REL_PARTITION_OIDS (rel , partoids , parents ) \
1199
- do\
1200
- {\
1201
- int i;\
1202
- for (i = 0; i < (rel)->rd_partdesc->nparts; i++)\
1203
- {\
1204
- (partoids) = lappend_oid((partoids), (rel)->rd_partdesc->oids[i]);\
1205
- (parents) = lappend((parents), (rel));\
1206
- }\
1207
- } while(0)
1208
-
1209
1196
/*
1210
1197
* RelationGetPartitionDispatchInfo
1211
1198
* Returns information necessary to route tuples down a partition tree
@@ -1222,151 +1209,130 @@ PartitionDispatch *
1222
1209
RelationGetPartitionDispatchInfo (Relation rel ,
1223
1210
int * num_parted , List * * leaf_part_oids )
1224
1211
{
1212
+ List * pdlist = NIL ;
1225
1213
PartitionDispatchData * * pd ;
1226
- List * all_parts = NIL ,
1227
- * all_parents = NIL ,
1228
- * parted_rels ,
1229
- * parted_rel_parents ;
1230
- ListCell * lc1 ,
1231
- * lc2 ;
1232
- int i ,
1233
- k ,
1234
- offset ;
1214
+ ListCell * lc ;
1215
+ int i ;
1235
1216
1236
- /*
1237
- * We rely on the relcache to traverse the partition tree to build both
1238
- * the leaf partition OIDs list and the array of PartitionDispatch objects
1239
- * for the partitioned tables in the tree. That means every partitioned
1240
- * table in the tree must be locked, which is fine since we require the
1241
- * caller to lock all the partitions anyway.
1242
- *
1243
- * For every partitioned table in the tree, starting with the root
1244
- * partitioned table, add its relcache entry to parted_rels, while also
1245
- * queuing its partitions (in the order in which they appear in the
1246
- * partition descriptor) to be looked at later in the same loop. This is
1247
- * a bit tricky but works because the foreach() macro doesn't fetch the
1248
- * next list element until the bottom of the loop.
1249
- */
1250
- * num_parted = 1 ;
1251
- parted_rels = list_make1 (rel );
1252
- /* Root partitioned table has no parent, so NULL for parent */
1253
- parted_rel_parents = list_make1 (NULL );
1254
- APPEND_REL_PARTITION_OIDS (rel , all_parts , all_parents );
1255
- forboth (lc1 , all_parts , lc2 , all_parents )
1217
+ Assert (rel -> rd_rel -> relkind == RELKIND_PARTITIONED_TABLE );
1218
+
1219
+ * num_parted = 0 ;
1220
+ * leaf_part_oids = NIL ;
1221
+
1222
+ get_partition_dispatch_recurse (rel , NULL , & pdlist , leaf_part_oids );
1223
+ * num_parted = list_length (pdlist );
1224
+ pd = (PartitionDispatchData * * ) palloc (* num_parted *
1225
+ sizeof (PartitionDispatchData * ));
1226
+ i = 0 ;
1227
+ foreach (lc , pdlist )
1256
1228
{
1257
- Oid partrelid = lfirst_oid ( lc1 );
1258
- Relation parent = lfirst ( lc2 );
1229
+ pd [ i ++ ] = lfirst ( lc );
1230
+ }
1259
1231
1260
- if (get_rel_relkind (partrelid ) == RELKIND_PARTITIONED_TABLE )
1261
- {
1262
- /*
1263
- * Already locked by the caller. Note that it is the
1264
- * responsibility of the caller to close the below relcache entry,
1265
- * once done using the information being collected here (for
1266
- * example, in ExecEndModifyTable).
1267
- */
1268
- Relation partrel = heap_open (partrelid , NoLock );
1232
+ return pd ;
1233
+ }
1269
1234
1270
- (* num_parted )++ ;
1271
- parted_rels = lappend (parted_rels , partrel );
1272
- parted_rel_parents = lappend (parted_rel_parents , parent );
1273
- APPEND_REL_PARTITION_OIDS (partrel , all_parts , all_parents );
1274
- }
1235
+ /*
1236
+ * get_partition_dispatch_recurse
1237
+ * Recursively expand partition tree rooted at rel
1238
+ *
1239
+ * As the partition tree is expanded in a depth-first manner, we mantain two
1240
+ * global lists: of PartitionDispatch objects corresponding to partitioned
1241
+ * tables in *pds and of the leaf partition OIDs in *leaf_part_oids.
1242
+ *
1243
+ * Note that the order of OIDs of leaf partitions in leaf_part_oids matches
1244
+ * the order in which the planner's expand_partitioned_rtentry() processes
1245
+ * them. It's not necessarily the case that the offsets match up exactly,
1246
+ * because constraint exclusion might prune away some partitions on the
1247
+ * planner side, whereas we'll always have the complete list; but unpruned
1248
+ * partitions will appear in the same order in the plan as they are returned
1249
+ * here.
1250
+ */
1251
+ static void
1252
+ get_partition_dispatch_recurse (Relation rel , Relation parent ,
1253
+ List * * pds , List * * leaf_part_oids )
1254
+ {
1255
+ TupleDesc tupdesc = RelationGetDescr (rel );
1256
+ PartitionDesc partdesc = RelationGetPartitionDesc (rel );
1257
+ PartitionKey partkey = RelationGetPartitionKey (rel );
1258
+ PartitionDispatch pd ;
1259
+ int i ;
1260
+
1261
+ check_stack_depth ();
1262
+
1263
+ /* Build a PartitionDispatch for this table and add it to *pds. */
1264
+ pd = (PartitionDispatch ) palloc (sizeof (PartitionDispatchData ));
1265
+ * pds = lappend (* pds , pd );
1266
+ pd -> reldesc = rel ;
1267
+ pd -> key = partkey ;
1268
+ pd -> keystate = NIL ;
1269
+ pd -> partdesc = partdesc ;
1270
+ if (parent != NULL )
1271
+ {
1272
+ /*
1273
+ * For every partitioned table other than the root, we must store a
1274
+ * tuple table slot initialized with its tuple descriptor and a tuple
1275
+ * conversion map to convert a tuple from its parent's rowtype to its
1276
+ * own. That is to make sure that we are looking at the correct row
1277
+ * using the correct tuple descriptor when computing its partition key
1278
+ * for tuple routing.
1279
+ */
1280
+ pd -> tupslot = MakeSingleTupleTableSlot (tupdesc );
1281
+ pd -> tupmap = convert_tuples_by_name (RelationGetDescr (parent ),
1282
+ tupdesc ,
1283
+ gettext_noop ("could not convert row type" ));
1284
+ }
1285
+ else
1286
+ {
1287
+ /* Not required for the root partitioned table */
1288
+ pd -> tupslot = NULL ;
1289
+ pd -> tupmap = NULL ;
1275
1290
}
1276
1291
1277
1292
/*
1278
- * We want to create two arrays - one for leaf partitions and another for
1279
- * partitioned tables (including the root table and internal partitions).
1280
- * While we only create the latter here, leaf partition array of suitable
1281
- * objects (such as, ResultRelInfo) is created by the caller using the
1282
- * list of OIDs we return. Indexes into these arrays get assigned in a
1283
- * breadth-first manner, whereby partitions of any given level are placed
1284
- * consecutively in the respective arrays.
1293
+ * Go look at each partition of this table. If it's a leaf partition,
1294
+ * simply add its OID to *leaf_part_oids. If it's a partitioned table,
1295
+ * recursively call get_partition_dispatch_recurse(), so that its
1296
+ * partitions are processed as well and a corresponding PartitionDispatch
1297
+ * object gets added to *pds.
1298
+ *
1299
+ * About the values in pd->indexes: for a leaf partition, it contains the
1300
+ * leaf partition's position in the global list *leaf_part_oids minus 1,
1301
+ * whereas for a partitioned table partition, it contains the partition's
1302
+ * position in the global list *pds multiplied by -1. The latter is
1303
+ * multiplied by -1 to distinguish partitioned tables from leaf partitions
1304
+ * when going through the values in pd->indexes. So, for example, when
1305
+ * using it during tuple-routing, encountering a value >= 0 means we found
1306
+ * a leaf partition. It is immediately returned as the index in the array
1307
+ * of ResultRelInfos of all the leaf partitions, using which we insert the
1308
+ * tuple into that leaf partition. A negative value means we found a
1309
+ * partitioned table. The value multiplied by -1 is returned as the index
1310
+ * in the array of PartitionDispatch objects of all partitioned tables in
1311
+ * the tree. This value is used to continue the search in the next level
1312
+ * of the partition tree.
1285
1313
*/
1286
- pd = (PartitionDispatchData * * ) palloc (* num_parted *
1287
- sizeof (PartitionDispatchData * ));
1288
- * leaf_part_oids = NIL ;
1289
- i = k = offset = 0 ;
1290
- forboth (lc1 , parted_rels , lc2 , parted_rel_parents )
1314
+ pd -> indexes = (int * ) palloc (partdesc -> nparts * sizeof (int ));
1315
+ for (i = 0 ; i < partdesc -> nparts ; i ++ )
1291
1316
{
1292
- Relation partrel = lfirst (lc1 );
1293
- Relation parent = lfirst (lc2 );
1294
- PartitionKey partkey = RelationGetPartitionKey (partrel );
1295
- TupleDesc tupdesc = RelationGetDescr (partrel );
1296
- PartitionDesc partdesc = RelationGetPartitionDesc (partrel );
1297
- int j ,
1298
- m ;
1299
-
1300
- pd [i ] = (PartitionDispatch ) palloc (sizeof (PartitionDispatchData ));
1301
- pd [i ]-> reldesc = partrel ;
1302
- pd [i ]-> key = partkey ;
1303
- pd [i ]-> keystate = NIL ;
1304
- pd [i ]-> partdesc = partdesc ;
1305
- if (parent != NULL )
1317
+ Oid partrelid = partdesc -> oids [i ];
1318
+
1319
+ if (get_rel_relkind (partrelid ) != RELKIND_PARTITIONED_TABLE )
1306
1320
{
1307
- /*
1308
- * For every partitioned table other than root, we must store a
1309
- * tuple table slot initialized with its tuple descriptor and a
1310
- * tuple conversion map to convert a tuple from its parent's
1311
- * rowtype to its own. That is to make sure that we are looking at
1312
- * the correct row using the correct tuple descriptor when
1313
- * computing its partition key for tuple routing.
1314
- */
1315
- pd [i ]-> tupslot = MakeSingleTupleTableSlot (tupdesc );
1316
- pd [i ]-> tupmap = convert_tuples_by_name (RelationGetDescr (parent ),
1317
- tupdesc ,
1318
- gettext_noop ("could not convert row type" ));
1321
+ * leaf_part_oids = lappend_oid (* leaf_part_oids , partrelid );
1322
+ pd -> indexes [i ] = list_length (* leaf_part_oids ) - 1 ;
1319
1323
}
1320
1324
else
1321
1325
{
1322
- /* Not required for the root partitioned table */
1323
- pd [i ]-> tupslot = NULL ;
1324
- pd [i ]-> tupmap = NULL ;
1325
- }
1326
- pd [i ]-> indexes = (int * ) palloc (partdesc -> nparts * sizeof (int ));
1327
-
1328
- /*
1329
- * Indexes corresponding to the internal partitions are multiplied by
1330
- * -1 to distinguish them from those of leaf partitions. Encountering
1331
- * an index >= 0 means we found a leaf partition, which is immediately
1332
- * returned as the partition we are looking for. A negative index
1333
- * means we found a partitioned table, whose PartitionDispatch object
1334
- * is located at the above index multiplied back by -1. Using the
1335
- * PartitionDispatch object, search is continued further down the
1336
- * partition tree.
1337
- */
1338
- m = 0 ;
1339
- for (j = 0 ; j < partdesc -> nparts ; j ++ )
1340
- {
1341
- Oid partrelid = partdesc -> oids [j ];
1326
+ /*
1327
+ * We assume all tables in the partition tree were already locked
1328
+ * by the caller.
1329
+ */
1330
+ Relation partrel = heap_open (partrelid , NoLock );
1342
1331
1343
- if (get_rel_relkind (partrelid ) != RELKIND_PARTITIONED_TABLE )
1344
- {
1345
- * leaf_part_oids = lappend_oid (* leaf_part_oids , partrelid );
1346
- pd [i ]-> indexes [j ] = k ++ ;
1347
- }
1348
- else
1349
- {
1350
- /*
1351
- * offset denotes the number of partitioned tables of upper
1352
- * levels including those of the current level. Any partition
1353
- * of this table must belong to the next level and hence will
1354
- * be placed after the last partitioned table of this level.
1355
- */
1356
- pd [i ]-> indexes [j ] = - (1 + offset + m );
1357
- m ++ ;
1358
- }
1332
+ pd -> indexes [i ] = - list_length (* pds );
1333
+ get_partition_dispatch_recurse (partrel , rel , pds , leaf_part_oids );
1359
1334
}
1360
- i ++ ;
1361
-
1362
- /*
1363
- * This counts the number of partitioned tables at upper levels
1364
- * including those of the current level.
1365
- */
1366
- offset += m ;
1367
1335
}
1368
-
1369
- return pd ;
1370
1336
}
1371
1337
1372
1338
/* Module-local functions */
0 commit comments