* PartitionDispatchData->indexes for details on how this array is
* indexed.
*
+ * nonleaf_partitions
+ * Array of 'max_dispatch' elements containing pointers to fake
+ * ResultRelInfo objects for nonleaf partitions, useful for checking
+ * the partition constraint.
+ *
* num_dispatch
* The current number of items stored in the 'partition_dispatch_info'
* array. Also serves as the index of the next free array element for
{
Relation partition_root;
PartitionDispatch *partition_dispatch_info;
+ ResultRelInfo **nonleaf_partitions;
int num_dispatch;
int max_dispatch;
ResultRelInfo **partitions;
PartitionDispatch dispatch;
PartitionDesc partdesc;
ExprContext *ecxt = GetPerTupleExprContext(estate);
- TupleTableSlot *ecxt_scantuple_old = ecxt->ecxt_scantuple;
+ TupleTableSlot *ecxt_scantuple_saved = ecxt->ecxt_scantuple;
+ TupleTableSlot *rootslot = slot;
TupleTableSlot *myslot = NULL;
MemoryContext oldcxt;
+ ResultRelInfo *rri = NULL;
/* use per-tuple context here to avoid leaking memory */
oldcxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
/* start with the root partitioned table */
dispatch = pd[0];
- while (true)
+ while (dispatch != NULL)
{
- AttrNumber *map = dispatch->tupmap;
int partidx = -1;
CHECK_FOR_INTERRUPTS();
rel = dispatch->reldesc;
partdesc = dispatch->partdesc;
- /*
- * Convert the tuple to this parent's layout, if different from the
- * current relation.
- */
- myslot = dispatch->tupslot;
- if (myslot != NULL)
- {
- Assert(map != NULL);
- slot = execute_attr_map_slot(map, slot, myslot);
- }
-
/*
* Extract partition key from tuple. Expression evaluation machinery
* that FormPartitionKeyDatum() invokes expects ecxt_scantuple to
if (partdesc->is_leaf[partidx])
{
- ResultRelInfo *rri;
-
/*
- * Look to see if we've already got a ResultRelInfo for this
- * partition.
+ * We've reached the leaf -- hurray, we're done. Look to see if
+ * we've already got a ResultRelInfo for this partition.
*/
if (likely(dispatch->indexes[partidx] >= 0))
{
dispatch,
rootResultRelInfo, partidx);
}
+ Assert(rri != NULL);
- /* Release the tuple in the lowest parent's dedicated slot. */
- if (slot == myslot)
- ExecClearTuple(myslot);
-
- MemoryContextSwitchTo(oldcxt);
- ecxt->ecxt_scantuple = ecxt_scantuple_old;
- return rri;
+ /* Signal to terminate the loop */
+ dispatch = NULL;
}
else
{
/* Already built. */
Assert(dispatch->indexes[partidx] < proute->num_dispatch);
+ rri = proute->nonleaf_partitions[dispatch->indexes[partidx]];
+
/*
* Move down to the next partition level and search again
* until we find a leaf partition that matches this tuple
dispatch, partidx);
Assert(dispatch->indexes[partidx] >= 0 &&
dispatch->indexes[partidx] < proute->num_dispatch);
+
+ rri = proute->nonleaf_partitions[dispatch->indexes[partidx]];
dispatch = subdispatch;
}
+
+ /*
+ * Convert the tuple to the new parent's layout, if different from
+ * the previous parent.
+ */
+ if (dispatch->tupslot)
+ {
+ AttrNumber *map = dispatch->tupmap;
+ TupleTableSlot *tempslot = myslot;
+
+ myslot = dispatch->tupslot;
+ slot = execute_attr_map_slot(map, slot, myslot);
+
+ if (tempslot != NULL)
+ ExecClearTuple(tempslot);
+ }
+ }
+
+ /*
+ * If this partition is the default one, we must check its partition
+ * constraint now, which may have changed concurrently due to
+ * partitions being added to the parent.
+ *
+ * (We do this here, and do not rely on ExecInsert doing it, because
+ * we don't want to miss doing it for non-leaf partitions.)
+ */
+ if (partidx == partdesc->boundinfo->default_index)
+ {
+ PartitionRoutingInfo *partrouteinfo = rri->ri_PartitionInfo;
+
+ /*
+ * The tuple must match the partition's layout for the constraint
+ * expression to be evaluated successfully. If the partition is
+ * sub-partitioned, that would already be the case due to the code
+ * above, but for a leaf partition the tuple still matches the
+ * parent's layout.
+ *
+ * Note that we have a map to convert from root to current
+ * partition, but not from immediate parent to current partition.
+ * So if we have to convert, do it from the root slot; if not, use
+ * the root slot as-is.
+ */
+ if (partrouteinfo)
+ {
+ TupleConversionMap *map = partrouteinfo->pi_RootToPartitionMap;
+
+ if (map)
+ slot = execute_attr_map_slot(map->attrMap, rootslot,
+ partrouteinfo->pi_PartitionTupleSlot);
+ else
+ slot = rootslot;
+ }
+
+ ExecPartitionCheck(rri, slot, estate, true);
}
}
+
+ /* Release the tuple in the lowest parent's dedicated slot. */
+ if (myslot != NULL)
+ ExecClearTuple(myslot);
+ /* and restore ecxt's scantuple */
+ ecxt->ecxt_scantuple = ecxt_scantuple_saved;
+ MemoryContextSwitchTo(oldcxt);
+
+ return rri;
}
/*
proute->max_dispatch = 4;
proute->partition_dispatch_info = (PartitionDispatch *)
palloc(sizeof(PartitionDispatch) * proute->max_dispatch);
+ proute->nonleaf_partitions = (ResultRelInfo **)
+ palloc(sizeof(ResultRelInfo *) * proute->max_dispatch);
}
else
{
proute->partition_dispatch_info = (PartitionDispatch *)
repalloc(proute->partition_dispatch_info,
sizeof(PartitionDispatch) * proute->max_dispatch);
+ proute->nonleaf_partitions = (ResultRelInfo **)
+ repalloc(proute->nonleaf_partitions,
+ sizeof(ResultRelInfo *) * proute->max_dispatch);
}
}
proute->partition_dispatch_info[dispatchidx] = pd;
+ /*
+ * If setting up a PartitionDispatch for a sub-partitioned table, we may
+ * also need a minimally valid ResultRelInfo for checking the partition
+ * constraint later; set that up now.
+ */
+ if (parent_pd)
+ {
+ ResultRelInfo *rri = makeNode(ResultRelInfo);
+
+ InitResultRelInfo(rri, rel, 1, proute->partition_root, 0);
+ proute->nonleaf_partitions[dispatchidx] = rri;
+ }
+ else
+ proute->nonleaf_partitions[dispatchidx] = NULL;
+
/*
* Finally, if setting up a PartitionDispatch for a sub-partitioned table,
* install a downlink in the parent to allow quick descent.
--- /dev/null
+Parsed test spec with 2 sessions
+
+starting permutation: s1b s1a s2b s2i s1c s2c s2s
+step s1b: begin;
+step s1a: alter table tpart attach partition tpart_2 for values from (100) to (200);
+step s2b: begin;
+step s2i: insert into tpart values (110,'xxx'), (120, 'yyy'), (150, 'zzz'); <waiting ...>
+step s1c: commit;
+step s2i: <... completed>
+error in steps s1c s2i: ERROR: new row for relation "tpart_default" violates partition constraint
+step s2c: commit;
+step s2s: select tableoid::regclass, * from tpart;
+tableoid i j
+
+tpart_2 110 xxx
+tpart_2 120 yyy
+tpart_2 150 zzz
+
+starting permutation: s1b s1a s2b s2i2 s1c s2c s2s
+step s1b: begin;
+step s1a: alter table tpart attach partition tpart_2 for values from (100) to (200);
+step s2b: begin;
+step s2i2: insert into tpart_default (i, j) values (110, 'xxx'), (120, 'yyy'), (150, 'zzz'); <waiting ...>
+step s1c: commit;
+step s2i2: <... completed>
+error in steps s1c s2i2: ERROR: new row for relation "tpart_default" violates partition constraint
+step s2c: commit;
+step s2s: select tableoid::regclass, * from tpart;
+tableoid i j
+
+tpart_2 110 xxx
+tpart_2 120 yyy
+tpart_2 150 zzz
+
+starting permutation: s1b s2b s2i s1a s2c s1c s2s
+step s1b: begin;
+step s2b: begin;
+step s2i: insert into tpart values (110,'xxx'), (120, 'yyy'), (150, 'zzz');
+step s1a: alter table tpart attach partition tpart_2 for values from (100) to (200); <waiting ...>
+step s2c: commit;
+step s1a: <... completed>
+error in steps s2c s1a: ERROR: updated partition constraint for default partition would be violated by some row
+step s1c: commit;
+step s2s: select tableoid::regclass, * from tpart;
+tableoid i j
+
+tpart_default_default110 xxx
+tpart_default_default120 yyy
+tpart_default_default150 zzz
--- /dev/null
+# Verify that default partition constraint is enforced correctly
+# in light of partitions being added concurrently to its parent
+setup {
+ drop table if exists tpart;
+ create table tpart(i int, j text) partition by range(i);
+ create table tpart_1(like tpart);
+ create table tpart_2(like tpart);
+ create table tpart_default (a int, j text, i int) partition by list (j);
+ create table tpart_default_default (a int, i int, b int, j text);
+ alter table tpart_default_default drop b;
+ alter table tpart_default attach partition tpart_default_default default;
+ alter table tpart_default drop a;
+ alter table tpart attach partition tpart_default default;
+ alter table tpart attach partition tpart_1 for values from(0) to (100);
+ insert into tpart_2 values (110,'xxx'), (120, 'yyy'), (150, 'zzz');
+}
+
+session "s1"
+step "s1b" { begin; }
+step "s1a" { alter table tpart attach partition tpart_2 for values from (100) to (200); }
+step "s1c" { commit; }
+
+session "s2"
+step "s2b" { begin; }
+step "s2i" { insert into tpart values (110,'xxx'), (120, 'yyy'), (150, 'zzz'); }
+step "s2i2" { insert into tpart_default (i, j) values (110, 'xxx'), (120, 'yyy'), (150, 'zzz'); }
+step "s2c" { commit; }
+step "s2s" { select tableoid::regclass, * from tpart; }
+
+teardown { drop table tpart; }
+
+# insert into tpart by s2 which routes to tpart_default due to not seeing
+# concurrently added tpart_2 should fail, because the partition constraint
+# of tpart_default would have changed due to tpart_2 having been added
+permutation "s1b" "s1a" "s2b" "s2i" "s1c" "s2c" "s2s"
+
+# similar to above, but now insert into sub-partitioned tpart_default
+permutation "s1b" "s1a" "s2b" "s2i2" "s1c" "s2c" "s2s"
+
+# reverse: now the insert into tpart_default by s2 occurs first followed by
+# attach in s1, which should fail when it scans the leaf default partition
+# find the violating rows
+permutation "s1b" "s2b" "s2i" "s1a" "s2c" "s1c" "s2s"