Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit b827304

Browse files
committed
Fix timing issue with ALTER TABLE's validate constraint
An ALTER TABLE to validate a foreign key in which another subcommand already caused a pending table rewrite could fail due to ALTER TABLE attempting to validate the foreign key before the actual table rewrite takes place. This situation could result in an error such as: ERROR: could not read block 0 in file "base/nnnnn/nnnnn": read only 0 of 8192 bytes The failure here was due to the SPI call which validates the foreign key trying to access an index which is yet to be rebuilt. Similarly, we also incorrectly tried to validate CHECK constraints before the heap had been rewritten. The fix for both is to delay constraint validation until phase 3, after the table has been rewritten. For CHECK constraints this means a slight behavioral change. Previously ALTER TABLE VALIDATE CONSTRAINT on inheritance tables would be validated from the bottom up. This was different from the order of evaluation when a new CHECK constraint was added. The changes made here aligns the VALIDATE CONSTRAINT evaluation order for inheritance tables to be the same as ADD CONSTRAINT, which is generally top-down. Reported-by: Nazli Ugur Koyluoglu, using SQLancer Discussion: https://postgr.es/m/CAApHDvp%3DZXv8wiRyk_0rWr00skhGkt8vXDrHJYXRMft3TjkxCA%40mail.gmail.com Backpatch-through: 9.5 (all supported versions)
1 parent 9678c08 commit b827304

File tree

3 files changed

+93
-106
lines changed

3 files changed

+93
-106
lines changed

src/backend/commands/tablecmds.c

+50-105
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,8 @@ static void AlterSeqNamespaces(Relation classRel, Relation rel,
328328
LOCKMODE lockmode);
329329
static ObjectAddress ATExecAlterConstraint(Relation rel, AlterTableCmd *cmd,
330330
bool recurse, bool recursing, LOCKMODE lockmode);
331-
static ObjectAddress ATExecValidateConstraint(Relation rel, char *constrName,
331+
static ObjectAddress ATExecValidateConstraint(List **wqueue,
332+
Relation rel, char *constrName,
332333
bool recurse, bool recursing, LOCKMODE lockmode);
333334
static int transformColumnNameList(Oid relId, List *colList,
334335
int16 *attnums, Oid *atttypids);
@@ -342,7 +343,6 @@ static Oid transformFkeyCheckAttrs(Relation pkrel,
342343
static void checkFkeyPermissions(Relation rel, int16 *attnums, int natts);
343344
static CoercionPathType findFkeyCast(Oid targetTypeId, Oid sourceTypeId,
344345
Oid *funcid);
345-
static void validateCheckConstraint(Relation rel, HeapTuple constrtup);
346346
static void validateForeignKeyConstraint(char *conname,
347347
Relation rel, Relation pkrel,
348348
Oid pkindOid, Oid constraintOid);
@@ -4500,13 +4500,13 @@ ATExecCmd(List **wqueue, AlteredTableInfo *tab, Relation rel,
45004500
address = ATExecAlterConstraint(rel, cmd, false, false, lockmode);
45014501
break;
45024502
case AT_ValidateConstraint: /* VALIDATE CONSTRAINT */
4503-
address = ATExecValidateConstraint(rel, cmd->name, false, false,
4504-
lockmode);
4503+
address = ATExecValidateConstraint(wqueue, rel, cmd->name, false,
4504+
false, lockmode);
45054505
break;
45064506
case AT_ValidateConstraintRecurse: /* VALIDATE CONSTRAINT with
45074507
* recursion */
4508-
address = ATExecValidateConstraint(rel, cmd->name, true, false,
4509-
lockmode);
4508+
address = ATExecValidateConstraint(wqueue, rel, cmd->name, true,
4509+
false, lockmode);
45104510
break;
45114511
case AT_DropConstraint: /* DROP CONSTRAINT */
45124512
ATExecDropConstraint(rel, cmd->name, cmd->behavior,
@@ -9729,8 +9729,8 @@ ATExecAlterConstraint(Relation rel, AlterTableCmd *cmd,
97299729
* was already validated, InvalidObjectAddress is returned.
97309730
*/
97319731
static ObjectAddress
9732-
ATExecValidateConstraint(Relation rel, char *constrName, bool recurse,
9733-
bool recursing, LOCKMODE lockmode)
9732+
ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName,
9733+
bool recurse, bool recursing, LOCKMODE lockmode)
97349734
{
97359735
Relation conrel;
97369736
SysScanDesc scan;
@@ -9776,27 +9776,31 @@ ATExecValidateConstraint(Relation rel, char *constrName, bool recurse,
97769776

97779777
if (!con->convalidated)
97789778
{
9779+
AlteredTableInfo *tab;
97799780
HeapTuple copyTuple;
97809781
Form_pg_constraint copy_con;
97819782

97829783
if (con->contype == CONSTRAINT_FOREIGN)
97839784
{
9784-
Relation refrel;
9785+
NewConstraint *newcon;
9786+
Constraint *fkconstraint;
97859787

9786-
/*
9787-
* Triggers are already in place on both tables, so a concurrent
9788-
* write that alters the result here is not possible. Normally we
9789-
* can run a query here to do the validation, which would only
9790-
* require AccessShareLock. In some cases, it is possible that we
9791-
* might need to fire triggers to perform the check, so we take a
9792-
* lock at RowShareLock level just in case.
9793-
*/
9794-
refrel = table_open(con->confrelid, RowShareLock);
9788+
/* Queue validation for phase 3 */
9789+
fkconstraint = makeNode(Constraint);
9790+
/* for now this is all we need */
9791+
fkconstraint->conname = constrName;
97959792

9796-
validateForeignKeyConstraint(constrName, rel, refrel,
9797-
con->conindid,
9798-
con->oid);
9799-
table_close(refrel, NoLock);
9793+
newcon = (NewConstraint *) palloc0(sizeof(NewConstraint));
9794+
newcon->name = constrName;
9795+
newcon->contype = CONSTR_FOREIGN;
9796+
newcon->refrelid = con->confrelid;
9797+
newcon->refindid = con->conindid;
9798+
newcon->conid = con->oid;
9799+
newcon->qual = (Node *) fkconstraint;
9800+
9801+
/* Find or create work queue entry for this table */
9802+
tab = ATGetQueueEntry(wqueue, rel);
9803+
tab->constraints = lappend(tab->constraints, newcon);
98009804

98019805
/*
98029806
* We disallow creating invalid foreign keys to or from
@@ -9807,6 +9811,10 @@ ATExecValidateConstraint(Relation rel, char *constrName, bool recurse,
98079811
{
98089812
List *children = NIL;
98099813
ListCell *child;
9814+
NewConstraint *newcon;
9815+
bool isnull;
9816+
Datum val;
9817+
char *conbin;
98109818

98119819
/*
98129820
* If we're recursing, the parent has already done this, so skip
@@ -9846,12 +9854,30 @@ ATExecValidateConstraint(Relation rel, char *constrName, bool recurse,
98469854
/* find_all_inheritors already got lock */
98479855
childrel = table_open(childoid, NoLock);
98489856

9849-
ATExecValidateConstraint(childrel, constrName, false,
9857+
ATExecValidateConstraint(wqueue, childrel, constrName, false,
98509858
true, lockmode);
98519859
table_close(childrel, NoLock);
98529860
}
98539861

9854-
validateCheckConstraint(rel, tuple);
9862+
/* Queue validation for phase 3 */
9863+
newcon = (NewConstraint *) palloc0(sizeof(NewConstraint));
9864+
newcon->name = constrName;
9865+
newcon->contype = CONSTR_CHECK;
9866+
newcon->refrelid = InvalidOid;
9867+
newcon->refindid = InvalidOid;
9868+
newcon->conid = con->oid;
9869+
9870+
val = SysCacheGetAttr(CONSTROID, tuple,
9871+
Anum_pg_constraint_conbin, &isnull);
9872+
if (isnull)
9873+
elog(ERROR, "null conbin for constraint %u", con->oid);
9874+
9875+
conbin = TextDatumGetCString(val);
9876+
newcon->qual = (Node *) stringToNode(conbin);
9877+
9878+
/* Find or create work queue entry for this table */
9879+
tab = ATGetQueueEntry(wqueue, rel);
9880+
tab->constraints = lappend(tab->constraints, newcon);
98559881

98569882
/*
98579883
* Invalidate relcache so that others see the new validated
@@ -10225,87 +10251,6 @@ checkFkeyPermissions(Relation rel, int16 *attnums, int natts)
1022510251
}
1022610252
}
1022710253

10228-
/*
10229-
* Scan the existing rows in a table to verify they meet a proposed
10230-
* CHECK constraint.
10231-
*
10232-
* The caller must have opened and locked the relation appropriately.
10233-
*/
10234-
static void
10235-
validateCheckConstraint(Relation rel, HeapTuple constrtup)
10236-
{
10237-
EState *estate;
10238-
Datum val;
10239-
char *conbin;
10240-
Expr *origexpr;
10241-
ExprState *exprstate;
10242-
TableScanDesc scan;
10243-
ExprContext *econtext;
10244-
MemoryContext oldcxt;
10245-
TupleTableSlot *slot;
10246-
Form_pg_constraint constrForm;
10247-
bool isnull;
10248-
Snapshot snapshot;
10249-
10250-
/*
10251-
* VALIDATE CONSTRAINT is a no-op for foreign tables and partitioned
10252-
* tables.
10253-
*/
10254-
if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE ||
10255-
rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
10256-
return;
10257-
10258-
constrForm = (Form_pg_constraint) GETSTRUCT(constrtup);
10259-
10260-
estate = CreateExecutorState();
10261-
10262-
/*
10263-
* XXX this tuple doesn't really come from a syscache, but this doesn't
10264-
* matter to SysCacheGetAttr, because it only wants to be able to fetch
10265-
* the tupdesc
10266-
*/
10267-
val = SysCacheGetAttr(CONSTROID, constrtup, Anum_pg_constraint_conbin,
10268-
&isnull);
10269-
if (isnull)
10270-
elog(ERROR, "null conbin for constraint %u",
10271-
constrForm->oid);
10272-
conbin = TextDatumGetCString(val);
10273-
origexpr = (Expr *) stringToNode(conbin);
10274-
exprstate = ExecPrepareExpr(origexpr, estate);
10275-
10276-
econtext = GetPerTupleExprContext(estate);
10277-
slot = table_slot_create(rel, NULL);
10278-
econtext->ecxt_scantuple = slot;
10279-
10280-
snapshot = RegisterSnapshot(GetLatestSnapshot());
10281-
scan = table_beginscan(rel, snapshot, 0, NULL);
10282-
10283-
/*
10284-
* Switch to per-tuple memory context and reset it for each tuple
10285-
* produced, so we don't leak memory.
10286-
*/
10287-
oldcxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
10288-
10289-
while (table_scan_getnextslot(scan, ForwardScanDirection, slot))
10290-
{
10291-
if (!ExecCheck(exprstate, econtext))
10292-
ereport(ERROR,
10293-
(errcode(ERRCODE_CHECK_VIOLATION),
10294-
errmsg("check constraint \"%s\" of relation \"%s\" is violated by some row",
10295-
NameStr(constrForm->conname),
10296-
RelationGetRelationName(rel)),
10297-
errtableconstraint(rel, NameStr(constrForm->conname))));
10298-
10299-
ResetExprContext(econtext);
10300-
}
10301-
10302-
MemoryContextSwitchTo(oldcxt);
10303-
table_endscan(scan);
10304-
UnregisterSnapshot(snapshot);
10305-
ExecDropSingleTupleTableSlot(slot);
10306-
FreeExecutorState(estate);
10307-
}
10308-
1030910254
/*
1031010255
* Scan the existing rows in a table to verify they meet a proposed FK
1031110256
* constraint.

src/test/regress/expected/alter_table.out

+21-1
Original file line numberDiff line numberDiff line change
@@ -487,8 +487,8 @@ NOTICE: boo: 18
487487
ALTER TABLE attmp3 ADD CONSTRAINT IDENTITY check (b = boo(b)) NOT VALID;
488488
NOTICE: merging constraint "identity" with inherited definition
489489
ALTER TABLE attmp3 VALIDATE CONSTRAINT identity;
490-
NOTICE: boo: 16
491490
NOTICE: boo: 20
491+
NOTICE: boo: 16
492492
-- A NO INHERIT constraint should not be looked for in children during VALIDATE CONSTRAINT
493493
create table parent_noinh_convalid (a int);
494494
create table child_noinh_convalid () inherits (parent_noinh_convalid);
@@ -997,6 +997,26 @@ alter table atacc1
997997
add column b float8 not null default random(),
998998
add primary key(a);
999999
drop table atacc1;
1000+
-- additionally, we've seen issues with foreign key validation not being
1001+
-- properly delayed until after a table rewrite. Check that works ok.
1002+
create table atacc1 (a int primary key);
1003+
alter table atacc1 add constraint atacc1_fkey foreign key (a) references atacc1 (a) not valid;
1004+
alter table atacc1 validate constraint atacc1_fkey, alter a type bigint;
1005+
drop table atacc1;
1006+
-- we've also seen issues with check constraints being validated at the wrong
1007+
-- time when there's a pending table rewrite.
1008+
create table atacc1 (a bigint, b int);
1009+
insert into atacc1 values(1,1);
1010+
alter table atacc1 add constraint atacc1_chk check(b = 1) not valid;
1011+
alter table atacc1 validate constraint atacc1_chk, alter a type int;
1012+
drop table atacc1;
1013+
-- same as above, but ensure the constraint violation is detected
1014+
create table atacc1 (a bigint, b int);
1015+
insert into atacc1 values(1,2);
1016+
alter table atacc1 add constraint atacc1_chk check(b = 1) not valid;
1017+
alter table atacc1 validate constraint atacc1_chk, alter a type int;
1018+
ERROR: check constraint "atacc1_chk" of relation "atacc1" is violated by some row
1019+
drop table atacc1;
10001020
-- something a little more complicated
10011021
create table atacc1 ( test int, test2 int);
10021022
-- add a primary key constraint

src/test/regress/sql/alter_table.sql

+22
Original file line numberDiff line numberDiff line change
@@ -757,6 +757,28 @@ alter table atacc1
757757
add primary key(a);
758758
drop table atacc1;
759759

760+
-- additionally, we've seen issues with foreign key validation not being
761+
-- properly delayed until after a table rewrite. Check that works ok.
762+
create table atacc1 (a int primary key);
763+
alter table atacc1 add constraint atacc1_fkey foreign key (a) references atacc1 (a) not valid;
764+
alter table atacc1 validate constraint atacc1_fkey, alter a type bigint;
765+
drop table atacc1;
766+
767+
-- we've also seen issues with check constraints being validated at the wrong
768+
-- time when there's a pending table rewrite.
769+
create table atacc1 (a bigint, b int);
770+
insert into atacc1 values(1,1);
771+
alter table atacc1 add constraint atacc1_chk check(b = 1) not valid;
772+
alter table atacc1 validate constraint atacc1_chk, alter a type int;
773+
drop table atacc1;
774+
775+
-- same as above, but ensure the constraint violation is detected
776+
create table atacc1 (a bigint, b int);
777+
insert into atacc1 values(1,2);
778+
alter table atacc1 add constraint atacc1_chk check(b = 1) not valid;
779+
alter table atacc1 validate constraint atacc1_chk, alter a type int;
780+
drop table atacc1;
781+
760782
-- something a little more complicated
761783
create table atacc1 ( test int, test2 int);
762784
-- add a primary key constraint

0 commit comments

Comments
 (0)