Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit cc18687

Browse files
committed
Fix use-after-free issue in regexp engine.
Commit cebc1d3 taught parseqatom() to optimize cases where a branch contains only one, "messy", atom by getting rid of excess subRE nodes. The way we really should do that is to keep the subRE built for the "messy" child atom; but to avoid changing parseqatom's nominal API, I made it delete that node after copying its fields to the outer subRE made by parsebranch(). It seems that that actually worked at the time; but it became dangerous after ea1268f, because that later commit allowed the lower invocation of parse() to return a subRE that was also pointed to by some v->subs[] entry. This meant we could wind up with a dangling pointer in v->subs[], allowing a later backref to misbehave, but only if that subRE struct had been reused in between. So the damage seems confined to cases like '((...))...(...\2'. To fix, do what I should have done before and modify parseqatom's API to make it possible for it to remove the caller's subRE instead of the callee's. That's safer because we know that subRE isn't complete yet, so noplace else will have a pointer to it. Per report from Mark Dilger. Back-patch to v14 where the problematic patches came in. Discussion: https://postgr.es/m/0203588E-E609-43AF-9F4F-902854231EE7@enterprisedb.com
1 parent 675c945 commit cc18687

File tree

3 files changed

+73
-67
lines changed

3 files changed

+73
-67
lines changed

src/backend/regex/regcomp.c

+63-67
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ static int freev(struct vars *, int);
4343
static void makesearch(struct vars *, struct nfa *);
4444
static struct subre *parse(struct vars *, int, int, struct state *, struct state *);
4545
static struct subre *parsebranch(struct vars *, int, int, struct state *, struct state *, int);
46-
static void parseqatom(struct vars *, int, int, struct state *, struct state *, struct subre *);
46+
static struct subre *parseqatom(struct vars *, int, int, struct state *, struct state *, struct subre *);
4747
static void nonword(struct vars *, int, struct state *, struct state *);
4848
static void word(struct vars *, int, struct state *, struct state *);
4949
static void charclass(struct vars *, enum char_classes,
@@ -756,7 +756,7 @@ parsebranch(struct vars *v,
756756
seencontent = 1;
757757

758758
/* NB, recursion in parseqatom() may swallow rest of branch */
759-
parseqatom(v, stopper, type, lp, right, t);
759+
t = parseqatom(v, stopper, type, lp, right, t);
760760
NOERRN();
761761
}
762762

@@ -777,8 +777,12 @@ parsebranch(struct vars *v,
777777
* The bookkeeping near the end cooperates very closely with parsebranch();
778778
* in particular, it contains a recursion that can involve parsing the rest
779779
* of the branch, making this function's name somewhat inaccurate.
780+
*
781+
* Usually, the return value is just "top", but in some cases where we
782+
* have parsed the rest of the branch, we may deem "top" redundant and
783+
* free it, returning some child subre instead.
780784
*/
781-
static void
785+
static struct subre *
782786
parseqatom(struct vars *v,
783787
int stopper, /* EOS or ')' */
784788
int type, /* LACON (lookaround subRE) or PLAIN */
@@ -818,103 +822,103 @@ parseqatom(struct vars *v,
818822
if (v->cflags & REG_NLANCH)
819823
ARCV(BEHIND, v->nlcolor);
820824
NEXT();
821-
return;
825+
return top;
822826
break;
823827
case '$':
824828
ARCV('$', 1);
825829
if (v->cflags & REG_NLANCH)
826830
ARCV(AHEAD, v->nlcolor);
827831
NEXT();
828-
return;
832+
return top;
829833
break;
830834
case SBEGIN:
831835
ARCV('^', 1); /* BOL */
832836
ARCV('^', 0); /* or BOS */
833837
NEXT();
834-
return;
838+
return top;
835839
break;
836840
case SEND:
837841
ARCV('$', 1); /* EOL */
838842
ARCV('$', 0); /* or EOS */
839843
NEXT();
840-
return;
844+
return top;
841845
break;
842846
case '<':
843847
wordchrs(v);
844848
s = newstate(v->nfa);
845-
NOERR();
849+
NOERRN();
846850
nonword(v, BEHIND, lp, s);
847851
word(v, AHEAD, s, rp);
848852
NEXT();
849-
return;
853+
return top;
850854
break;
851855
case '>':
852856
wordchrs(v);
853857
s = newstate(v->nfa);
854-
NOERR();
858+
NOERRN();
855859
word(v, BEHIND, lp, s);
856860
nonword(v, AHEAD, s, rp);
857861
NEXT();
858-
return;
862+
return top;
859863
break;
860864
case WBDRY:
861865
wordchrs(v);
862866
s = newstate(v->nfa);
863-
NOERR();
867+
NOERRN();
864868
nonword(v, BEHIND, lp, s);
865869
word(v, AHEAD, s, rp);
866870
s = newstate(v->nfa);
867-
NOERR();
871+
NOERRN();
868872
word(v, BEHIND, lp, s);
869873
nonword(v, AHEAD, s, rp);
870874
NEXT();
871-
return;
875+
return top;
872876
break;
873877
case NWBDRY:
874878
wordchrs(v);
875879
s = newstate(v->nfa);
876-
NOERR();
880+
NOERRN();
877881
word(v, BEHIND, lp, s);
878882
word(v, AHEAD, s, rp);
879883
s = newstate(v->nfa);
880-
NOERR();
884+
NOERRN();
881885
nonword(v, BEHIND, lp, s);
882886
nonword(v, AHEAD, s, rp);
883887
NEXT();
884-
return;
888+
return top;
885889
break;
886890
case LACON: /* lookaround constraint */
887891
latype = v->nextvalue;
888892
NEXT();
889893
s = newstate(v->nfa);
890894
s2 = newstate(v->nfa);
891-
NOERR();
895+
NOERRN();
892896
t = parse(v, ')', LACON, s, s2);
893897
freesubre(v, t); /* internal structure irrelevant */
894-
NOERR();
898+
NOERRN();
895899
assert(SEE(')'));
896900
NEXT();
897901
processlacon(v, s, s2, latype, lp, rp);
898-
return;
902+
return top;
899903
break;
900904
/* then errors, to get them out of the way */
901905
case '*':
902906
case '+':
903907
case '?':
904908
case '{':
905909
ERR(REG_BADRPT);
906-
return;
910+
return top;
907911
break;
908912
default:
909913
ERR(REG_ASSERT);
910-
return;
914+
return top;
911915
break;
912916
/* then plain characters, and minor variants on that theme */
913917
case ')': /* unbalanced paren */
914918
if ((v->cflags & REG_ADVANCED) != REG_EXTENDED)
915919
{
916920
ERR(REG_EPAREN);
917-
return;
921+
return top;
918922
}
919923
/* legal in EREs due to specification botch */
920924
NOTE(REG_UPBOTCH);
@@ -923,7 +927,7 @@ parseqatom(struct vars *v,
923927
case PLAIN:
924928
onechr(v, v->nextvalue, lp, rp);
925929
okcolors(v->nfa, v->cm);
926-
NOERR();
930+
NOERRN();
927931
NEXT();
928932
break;
929933
case '[':
@@ -972,14 +976,14 @@ parseqatom(struct vars *v,
972976
*/
973977
s = newstate(v->nfa);
974978
s2 = newstate(v->nfa);
975-
NOERR();
979+
NOERRN();
976980
EMPTYARC(lp, s);
977981
EMPTYARC(s2, rp);
978-
NOERR();
982+
NOERRN();
979983
atom = parse(v, ')', type, s, s2);
980984
assert(SEE(')') || ISERR());
981985
NEXT();
982-
NOERR();
986+
NOERRN();
983987
if (cap)
984988
{
985989
assert(v->subs[subno] == NULL);
@@ -994,7 +998,7 @@ parseqatom(struct vars *v,
994998
{
995999
/* generate no-op wrapper node to handle "((x))" */
9961000
t = subre(v, '(', atom->flags | CAP, lp, rp);
997-
NOERR();
1001+
NOERRN();
9981002
t->capno = subno;
9991003
t->child = atom;
10001004
atom = t;
@@ -1006,10 +1010,10 @@ parseqatom(struct vars *v,
10061010
INSIST(type != LACON, REG_ESUBREG);
10071011
INSIST(v->nextvalue < v->nsubs, REG_ESUBREG);
10081012
INSIST(v->subs[v->nextvalue] != NULL, REG_ESUBREG);
1009-
NOERR();
1013+
NOERRN();
10101014
assert(v->nextvalue > 0);
10111015
atom = subre(v, 'b', BACKR, lp, rp);
1012-
NOERR();
1016+
NOERRN();
10131017
subno = v->nextvalue;
10141018
atom->backno = subno;
10151019
EMPTYARC(lp, rp); /* temporarily, so there's something */
@@ -1050,7 +1054,7 @@ parseqatom(struct vars *v,
10501054
if (m > n)
10511055
{
10521056
ERR(REG_BADBR);
1053-
return;
1057+
return top;
10541058
}
10551059
/* {m,n} exercises preference, even if it's {m,m} */
10561060
qprefer = (v->nextvalue) ? LONGER : SHORTER;
@@ -1064,7 +1068,7 @@ parseqatom(struct vars *v,
10641068
if (!SEE('}'))
10651069
{ /* catches errors too */
10661070
ERR(REG_BADBR);
1067-
return;
1071+
return top;
10681072
}
10691073
NEXT();
10701074
break;
@@ -1083,7 +1087,7 @@ parseqatom(struct vars *v,
10831087
v->subs[subno] = NULL;
10841088
delsub(v->nfa, lp, rp);
10851089
EMPTYARC(lp, rp);
1086-
return;
1090+
return top;
10871091
}
10881092

10891093
/* if not a messy case, avoid hard part */
@@ -1096,7 +1100,7 @@ parseqatom(struct vars *v,
10961100
if (atom != NULL)
10971101
freesubre(v, atom);
10981102
top->flags = f;
1099-
return;
1103+
return top;
11001104
}
11011105

11021106
/*
@@ -1110,7 +1114,7 @@ parseqatom(struct vars *v,
11101114
if (atom == NULL)
11111115
{
11121116
atom = subre(v, '=', 0, lp, rp);
1113-
NOERR();
1117+
NOERRN();
11141118
}
11151119

11161120
/*----------
@@ -1131,20 +1135,20 @@ parseqatom(struct vars *v,
11311135
*/
11321136
s = newstate(v->nfa); /* first, new endpoints for the atom */
11331137
s2 = newstate(v->nfa);
1134-
NOERR();
1138+
NOERRN();
11351139
moveouts(v->nfa, lp, s);
11361140
moveins(v->nfa, rp, s2);
1137-
NOERR();
1141+
NOERRN();
11381142
atom->begin = s;
11391143
atom->end = s2;
11401144
s = newstate(v->nfa); /* set up starting state */
1141-
NOERR();
1145+
NOERRN();
11421146
EMPTYARC(lp, s);
1143-
NOERR();
1147+
NOERRN();
11441148

11451149
/* break remaining subRE into x{...} and what follows */
11461150
t = subre(v, '.', COMBINE(qprefer, atom->flags), lp, rp);
1147-
NOERR();
1151+
NOERRN();
11481152
t->child = atom;
11491153
atomp = &t->child;
11501154

@@ -1163,7 +1167,7 @@ parseqatom(struct vars *v,
11631167
*/
11641168
assert(top->op == '=' && top->child == NULL);
11651169
top->child = subre(v, '=', top->flags, top->begin, lp);
1166-
NOERR();
1170+
NOERRN();
11671171
top->op = '.';
11681172
top->child->sibling = t;
11691173
/* top->flags will get updated later */
@@ -1182,11 +1186,11 @@ parseqatom(struct vars *v,
11821186
*/
11831187
dupnfa(v->nfa, v->subs[subno]->begin, v->subs[subno]->end,
11841188
atom->begin, atom->end);
1185-
NOERR();
1189+
NOERRN();
11861190

11871191
/* The backref node's NFA should not enforce any constraints */
11881192
removeconstraints(v->nfa, atom->begin, atom->end);
1189-
NOERR();
1193+
NOERRN();
11901194
}
11911195

11921196
/*
@@ -1226,7 +1230,7 @@ parseqatom(struct vars *v,
12261230
repeat(v, atom->begin, atom->end, m, n);
12271231
f = COMBINE(qprefer, atom->flags);
12281232
t = subre(v, '=', f, atom->begin, atom->end);
1229-
NOERR();
1233+
NOERRN();
12301234
freesubre(v, atom);
12311235
*atomp = t;
12321236
/* rest of branch can be strung starting from t->end */
@@ -1247,9 +1251,9 @@ parseqatom(struct vars *v,
12471251
repeat(v, s, atom->begin, m - 1, (n == DUPINF) ? n : n - 1);
12481252
f = COMBINE(qprefer, atom->flags);
12491253
t = subre(v, '.', f, s, atom->end); /* prefix and atom */
1250-
NOERR();
1254+
NOERRN();
12511255
t->child = subre(v, '=', PREF(f), s, atom->begin);
1252-
NOERR();
1256+
NOERRN();
12531257
t->child->sibling = atom;
12541258
*atomp = t;
12551259
/* rest of branch can be strung starting from atom->end */
@@ -1259,14 +1263,14 @@ parseqatom(struct vars *v,
12591263
{
12601264
/* general case: need an iteration node */
12611265
s2 = newstate(v->nfa);
1262-
NOERR();
1266+
NOERRN();
12631267
moveouts(v->nfa, atom->end, s2);
1264-
NOERR();
1268+
NOERRN();
12651269
dupnfa(v->nfa, atom->begin, atom->end, s, s2);
12661270
repeat(v, s, s2, m, n);
12671271
f = COMBINE(qprefer, atom->flags);
12681272
t = subre(v, '*', f, s, s2);
1269-
NOERR();
1273+
NOERRN();
12701274
t->min = (short) m;
12711275
t->max = (short) n;
12721276
t->child = atom;
@@ -1280,7 +1284,7 @@ parseqatom(struct vars *v,
12801284
{
12811285
/* parse all the rest of the branch, and insert in t->child->sibling */
12821286
t->child->sibling = parsebranch(v, stopper, type, s2, rp, 1);
1283-
NOERR();
1287+
NOERRN();
12841288
assert(SEE('|') || SEE(stopper) || SEE(EOS));
12851289

12861290
/* here's the promised update of the flags */
@@ -1299,9 +1303,7 @@ parseqatom(struct vars *v,
12991303
*
13001304
* If the messy atom was the first thing in the branch, then
13011305
* top->child is vacuous and we can get rid of one level of
1302-
* concatenation. Since the caller is holding a pointer to the top
1303-
* node, we can't remove that node; but we're allowed to change its
1304-
* properties.
1306+
* concatenation.
13051307
*/
13061308
assert(top->child->op == '=');
13071309
if (top->child->begin == top->child->end)
@@ -1351,21 +1353,13 @@ parseqatom(struct vars *v,
13511353
{
13521354
assert(!MESSY(top->child->flags));
13531355
t = top->child->sibling;
1354-
freesubre(v, top->child);
1355-
top->op = t->op;
1356-
top->flags = t->flags;
1357-
top->latype = t->latype;
1358-
top->id = t->id;
1359-
top->capno = t->capno;
1360-
top->backno = t->backno;
1361-
top->min = t->min;
1362-
top->max = t->max;
1363-
top->child = t->child;
1364-
top->begin = t->begin;
1365-
top->end = t->end;
1366-
freesrnode(v, t);
1356+
top->child->sibling = NULL;
1357+
freesubre(v, top);
1358+
top = t;
13671359
}
13681360
}
1361+
1362+
return top;
13691363
}
13701364

13711365
/*
@@ -2109,7 +2103,9 @@ freesrnode(struct vars *v, /* might be NULL */
21092103

21102104
if (!NULLCNFA(sr->cnfa))
21112105
freecnfa(&sr->cnfa);
2112-
sr->flags = 0;
2106+
sr->flags = 0; /* in particular, not INUSE */
2107+
sr->child = sr->sibling = NULL;
2108+
sr->begin = sr->end = NULL;
21132109

21142110
if (v != NULL && v->treechain != NULL)
21152111
{

0 commit comments

Comments
 (0)