@@ -43,7 +43,7 @@ static int freev(struct vars *, int);
43
43
static void makesearch (struct vars * , struct nfa * );
44
44
static struct subre * parse (struct vars * , int , int , struct state * , struct state * );
45
45
static struct subre * parsebranch (struct vars * , int , int , struct state * , struct state * , int );
46
- static void parseqatom (struct vars * , int , int , struct state * , struct state * , struct subre * );
46
+ static struct subre * parseqatom (struct vars * , int , int , struct state * , struct state * , struct subre * );
47
47
static void nonword (struct vars * , int , struct state * , struct state * );
48
48
static void word (struct vars * , int , struct state * , struct state * );
49
49
static void charclass (struct vars * , enum char_classes ,
@@ -756,7 +756,7 @@ parsebranch(struct vars *v,
756
756
seencontent = 1 ;
757
757
758
758
/* NB, recursion in parseqatom() may swallow rest of branch */
759
- parseqatom (v , stopper , type , lp , right , t );
759
+ t = parseqatom (v , stopper , type , lp , right , t );
760
760
NOERRN ();
761
761
}
762
762
@@ -777,8 +777,12 @@ parsebranch(struct vars *v,
777
777
* The bookkeeping near the end cooperates very closely with parsebranch();
778
778
* in particular, it contains a recursion that can involve parsing the rest
779
779
* of the branch, making this function's name somewhat inaccurate.
780
+ *
781
+ * Usually, the return value is just "top", but in some cases where we
782
+ * have parsed the rest of the branch, we may deem "top" redundant and
783
+ * free it, returning some child subre instead.
780
784
*/
781
- static void
785
+ static struct subre *
782
786
parseqatom (struct vars * v ,
783
787
int stopper , /* EOS or ')' */
784
788
int type , /* LACON (lookaround subRE) or PLAIN */
@@ -818,103 +822,103 @@ parseqatom(struct vars *v,
818
822
if (v -> cflags & REG_NLANCH )
819
823
ARCV (BEHIND , v -> nlcolor );
820
824
NEXT ();
821
- return ;
825
+ return top ;
822
826
break ;
823
827
case '$' :
824
828
ARCV ('$' , 1 );
825
829
if (v -> cflags & REG_NLANCH )
826
830
ARCV (AHEAD , v -> nlcolor );
827
831
NEXT ();
828
- return ;
832
+ return top ;
829
833
break ;
830
834
case SBEGIN :
831
835
ARCV ('^' , 1 ); /* BOL */
832
836
ARCV ('^' , 0 ); /* or BOS */
833
837
NEXT ();
834
- return ;
838
+ return top ;
835
839
break ;
836
840
case SEND :
837
841
ARCV ('$' , 1 ); /* EOL */
838
842
ARCV ('$' , 0 ); /* or EOS */
839
843
NEXT ();
840
- return ;
844
+ return top ;
841
845
break ;
842
846
case '<' :
843
847
wordchrs (v );
844
848
s = newstate (v -> nfa );
845
- NOERR ();
849
+ NOERRN ();
846
850
nonword (v , BEHIND , lp , s );
847
851
word (v , AHEAD , s , rp );
848
852
NEXT ();
849
- return ;
853
+ return top ;
850
854
break ;
851
855
case '>' :
852
856
wordchrs (v );
853
857
s = newstate (v -> nfa );
854
- NOERR ();
858
+ NOERRN ();
855
859
word (v , BEHIND , lp , s );
856
860
nonword (v , AHEAD , s , rp );
857
861
NEXT ();
858
- return ;
862
+ return top ;
859
863
break ;
860
864
case WBDRY :
861
865
wordchrs (v );
862
866
s = newstate (v -> nfa );
863
- NOERR ();
867
+ NOERRN ();
864
868
nonword (v , BEHIND , lp , s );
865
869
word (v , AHEAD , s , rp );
866
870
s = newstate (v -> nfa );
867
- NOERR ();
871
+ NOERRN ();
868
872
word (v , BEHIND , lp , s );
869
873
nonword (v , AHEAD , s , rp );
870
874
NEXT ();
871
- return ;
875
+ return top ;
872
876
break ;
873
877
case NWBDRY :
874
878
wordchrs (v );
875
879
s = newstate (v -> nfa );
876
- NOERR ();
880
+ NOERRN ();
877
881
word (v , BEHIND , lp , s );
878
882
word (v , AHEAD , s , rp );
879
883
s = newstate (v -> nfa );
880
- NOERR ();
884
+ NOERRN ();
881
885
nonword (v , BEHIND , lp , s );
882
886
nonword (v , AHEAD , s , rp );
883
887
NEXT ();
884
- return ;
888
+ return top ;
885
889
break ;
886
890
case LACON : /* lookaround constraint */
887
891
latype = v -> nextvalue ;
888
892
NEXT ();
889
893
s = newstate (v -> nfa );
890
894
s2 = newstate (v -> nfa );
891
- NOERR ();
895
+ NOERRN ();
892
896
t = parse (v , ')' , LACON , s , s2 );
893
897
freesubre (v , t ); /* internal structure irrelevant */
894
- NOERR ();
898
+ NOERRN ();
895
899
assert (SEE (')' ));
896
900
NEXT ();
897
901
processlacon (v , s , s2 , latype , lp , rp );
898
- return ;
902
+ return top ;
899
903
break ;
900
904
/* then errors, to get them out of the way */
901
905
case '*' :
902
906
case '+' :
903
907
case '?' :
904
908
case '{' :
905
909
ERR (REG_BADRPT );
906
- return ;
910
+ return top ;
907
911
break ;
908
912
default :
909
913
ERR (REG_ASSERT );
910
- return ;
914
+ return top ;
911
915
break ;
912
916
/* then plain characters, and minor variants on that theme */
913
917
case ')' : /* unbalanced paren */
914
918
if ((v -> cflags & REG_ADVANCED ) != REG_EXTENDED )
915
919
{
916
920
ERR (REG_EPAREN );
917
- return ;
921
+ return top ;
918
922
}
919
923
/* legal in EREs due to specification botch */
920
924
NOTE (REG_UPBOTCH );
@@ -923,7 +927,7 @@ parseqatom(struct vars *v,
923
927
case PLAIN :
924
928
onechr (v , v -> nextvalue , lp , rp );
925
929
okcolors (v -> nfa , v -> cm );
926
- NOERR ();
930
+ NOERRN ();
927
931
NEXT ();
928
932
break ;
929
933
case '[' :
@@ -972,14 +976,14 @@ parseqatom(struct vars *v,
972
976
*/
973
977
s = newstate (v -> nfa );
974
978
s2 = newstate (v -> nfa );
975
- NOERR ();
979
+ NOERRN ();
976
980
EMPTYARC (lp , s );
977
981
EMPTYARC (s2 , rp );
978
- NOERR ();
982
+ NOERRN ();
979
983
atom = parse (v , ')' , type , s , s2 );
980
984
assert (SEE (')' ) || ISERR ());
981
985
NEXT ();
982
- NOERR ();
986
+ NOERRN ();
983
987
if (cap )
984
988
{
985
989
assert (v -> subs [subno ] == NULL );
@@ -994,7 +998,7 @@ parseqatom(struct vars *v,
994
998
{
995
999
/* generate no-op wrapper node to handle "((x))" */
996
1000
t = subre (v , '(' , atom -> flags | CAP , lp , rp );
997
- NOERR ();
1001
+ NOERRN ();
998
1002
t -> capno = subno ;
999
1003
t -> child = atom ;
1000
1004
atom = t ;
@@ -1006,10 +1010,10 @@ parseqatom(struct vars *v,
1006
1010
INSIST (type != LACON , REG_ESUBREG );
1007
1011
INSIST (v -> nextvalue < v -> nsubs , REG_ESUBREG );
1008
1012
INSIST (v -> subs [v -> nextvalue ] != NULL , REG_ESUBREG );
1009
- NOERR ();
1013
+ NOERRN ();
1010
1014
assert (v -> nextvalue > 0 );
1011
1015
atom = subre (v , 'b' , BACKR , lp , rp );
1012
- NOERR ();
1016
+ NOERRN ();
1013
1017
subno = v -> nextvalue ;
1014
1018
atom -> backno = subno ;
1015
1019
EMPTYARC (lp , rp ); /* temporarily, so there's something */
@@ -1050,7 +1054,7 @@ parseqatom(struct vars *v,
1050
1054
if (m > n )
1051
1055
{
1052
1056
ERR (REG_BADBR );
1053
- return ;
1057
+ return top ;
1054
1058
}
1055
1059
/* {m,n} exercises preference, even if it's {m,m} */
1056
1060
qprefer = (v -> nextvalue ) ? LONGER : SHORTER ;
@@ -1064,7 +1068,7 @@ parseqatom(struct vars *v,
1064
1068
if (!SEE ('}' ))
1065
1069
{ /* catches errors too */
1066
1070
ERR (REG_BADBR );
1067
- return ;
1071
+ return top ;
1068
1072
}
1069
1073
NEXT ();
1070
1074
break ;
@@ -1083,7 +1087,7 @@ parseqatom(struct vars *v,
1083
1087
v -> subs [subno ] = NULL ;
1084
1088
delsub (v -> nfa , lp , rp );
1085
1089
EMPTYARC (lp , rp );
1086
- return ;
1090
+ return top ;
1087
1091
}
1088
1092
1089
1093
/* if not a messy case, avoid hard part */
@@ -1096,7 +1100,7 @@ parseqatom(struct vars *v,
1096
1100
if (atom != NULL )
1097
1101
freesubre (v , atom );
1098
1102
top -> flags = f ;
1099
- return ;
1103
+ return top ;
1100
1104
}
1101
1105
1102
1106
/*
@@ -1110,7 +1114,7 @@ parseqatom(struct vars *v,
1110
1114
if (atom == NULL )
1111
1115
{
1112
1116
atom = subre (v , '=' , 0 , lp , rp );
1113
- NOERR ();
1117
+ NOERRN ();
1114
1118
}
1115
1119
1116
1120
/*----------
@@ -1131,20 +1135,20 @@ parseqatom(struct vars *v,
1131
1135
*/
1132
1136
s = newstate (v -> nfa ); /* first, new endpoints for the atom */
1133
1137
s2 = newstate (v -> nfa );
1134
- NOERR ();
1138
+ NOERRN ();
1135
1139
moveouts (v -> nfa , lp , s );
1136
1140
moveins (v -> nfa , rp , s2 );
1137
- NOERR ();
1141
+ NOERRN ();
1138
1142
atom -> begin = s ;
1139
1143
atom -> end = s2 ;
1140
1144
s = newstate (v -> nfa ); /* set up starting state */
1141
- NOERR ();
1145
+ NOERRN ();
1142
1146
EMPTYARC (lp , s );
1143
- NOERR ();
1147
+ NOERRN ();
1144
1148
1145
1149
/* break remaining subRE into x{...} and what follows */
1146
1150
t = subre (v , '.' , COMBINE (qprefer , atom -> flags ), lp , rp );
1147
- NOERR ();
1151
+ NOERRN ();
1148
1152
t -> child = atom ;
1149
1153
atomp = & t -> child ;
1150
1154
@@ -1163,7 +1167,7 @@ parseqatom(struct vars *v,
1163
1167
*/
1164
1168
assert (top -> op == '=' && top -> child == NULL );
1165
1169
top -> child = subre (v , '=' , top -> flags , top -> begin , lp );
1166
- NOERR ();
1170
+ NOERRN ();
1167
1171
top -> op = '.' ;
1168
1172
top -> child -> sibling = t ;
1169
1173
/* top->flags will get updated later */
@@ -1182,11 +1186,11 @@ parseqatom(struct vars *v,
1182
1186
*/
1183
1187
dupnfa (v -> nfa , v -> subs [subno ]-> begin , v -> subs [subno ]-> end ,
1184
1188
atom -> begin , atom -> end );
1185
- NOERR ();
1189
+ NOERRN ();
1186
1190
1187
1191
/* The backref node's NFA should not enforce any constraints */
1188
1192
removeconstraints (v -> nfa , atom -> begin , atom -> end );
1189
- NOERR ();
1193
+ NOERRN ();
1190
1194
}
1191
1195
1192
1196
/*
@@ -1226,7 +1230,7 @@ parseqatom(struct vars *v,
1226
1230
repeat (v , atom -> begin , atom -> end , m , n );
1227
1231
f = COMBINE (qprefer , atom -> flags );
1228
1232
t = subre (v , '=' , f , atom -> begin , atom -> end );
1229
- NOERR ();
1233
+ NOERRN ();
1230
1234
freesubre (v , atom );
1231
1235
* atomp = t ;
1232
1236
/* rest of branch can be strung starting from t->end */
@@ -1247,9 +1251,9 @@ parseqatom(struct vars *v,
1247
1251
repeat (v , s , atom -> begin , m - 1 , (n == DUPINF ) ? n : n - 1 );
1248
1252
f = COMBINE (qprefer , atom -> flags );
1249
1253
t = subre (v , '.' , f , s , atom -> end ); /* prefix and atom */
1250
- NOERR ();
1254
+ NOERRN ();
1251
1255
t -> child = subre (v , '=' , PREF (f ), s , atom -> begin );
1252
- NOERR ();
1256
+ NOERRN ();
1253
1257
t -> child -> sibling = atom ;
1254
1258
* atomp = t ;
1255
1259
/* rest of branch can be strung starting from atom->end */
@@ -1259,14 +1263,14 @@ parseqatom(struct vars *v,
1259
1263
{
1260
1264
/* general case: need an iteration node */
1261
1265
s2 = newstate (v -> nfa );
1262
- NOERR ();
1266
+ NOERRN ();
1263
1267
moveouts (v -> nfa , atom -> end , s2 );
1264
- NOERR ();
1268
+ NOERRN ();
1265
1269
dupnfa (v -> nfa , atom -> begin , atom -> end , s , s2 );
1266
1270
repeat (v , s , s2 , m , n );
1267
1271
f = COMBINE (qprefer , atom -> flags );
1268
1272
t = subre (v , '*' , f , s , s2 );
1269
- NOERR ();
1273
+ NOERRN ();
1270
1274
t -> min = (short ) m ;
1271
1275
t -> max = (short ) n ;
1272
1276
t -> child = atom ;
@@ -1280,7 +1284,7 @@ parseqatom(struct vars *v,
1280
1284
{
1281
1285
/* parse all the rest of the branch, and insert in t->child->sibling */
1282
1286
t -> child -> sibling = parsebranch (v , stopper , type , s2 , rp , 1 );
1283
- NOERR ();
1287
+ NOERRN ();
1284
1288
assert (SEE ('|' ) || SEE (stopper ) || SEE (EOS ));
1285
1289
1286
1290
/* here's the promised update of the flags */
@@ -1299,9 +1303,7 @@ parseqatom(struct vars *v,
1299
1303
*
1300
1304
* If the messy atom was the first thing in the branch, then
1301
1305
* top->child is vacuous and we can get rid of one level of
1302
- * concatenation. Since the caller is holding a pointer to the top
1303
- * node, we can't remove that node; but we're allowed to change its
1304
- * properties.
1306
+ * concatenation.
1305
1307
*/
1306
1308
assert (top -> child -> op == '=' );
1307
1309
if (top -> child -> begin == top -> child -> end )
@@ -1351,21 +1353,13 @@ parseqatom(struct vars *v,
1351
1353
{
1352
1354
assert (!MESSY (top -> child -> flags ));
1353
1355
t = top -> child -> sibling ;
1354
- freesubre (v , top -> child );
1355
- top -> op = t -> op ;
1356
- top -> flags = t -> flags ;
1357
- top -> latype = t -> latype ;
1358
- top -> id = t -> id ;
1359
- top -> capno = t -> capno ;
1360
- top -> backno = t -> backno ;
1361
- top -> min = t -> min ;
1362
- top -> max = t -> max ;
1363
- top -> child = t -> child ;
1364
- top -> begin = t -> begin ;
1365
- top -> end = t -> end ;
1366
- freesrnode (v , t );
1356
+ top -> child -> sibling = NULL ;
1357
+ freesubre (v , top );
1358
+ top = t ;
1367
1359
}
1368
1360
}
1361
+
1362
+ return top ;
1369
1363
}
1370
1364
1371
1365
/*
@@ -2109,7 +2103,9 @@ freesrnode(struct vars *v, /* might be NULL */
2109
2103
2110
2104
if (!NULLCNFA (sr -> cnfa ))
2111
2105
freecnfa (& sr -> cnfa );
2112
- sr -> flags = 0 ;
2106
+ sr -> flags = 0 ; /* in particular, not INUSE */
2107
+ sr -> child = sr -> sibling = NULL ;
2108
+ sr -> begin = sr -> end = NULL ;
2113
2109
2114
2110
if (v != NULL && v -> treechain != NULL )
2115
2111
{
0 commit comments