Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane2015-10-02 18:51:58 +0000
committerTom Lane2015-10-02 18:51:58 +0000
commitb63fc28776c5d2efdb4de326ad0f0b5b88f82220 (patch)
tree74bf395229d6c60aa24463714340ce1e41fdfc14 /src/backend/regex
parentf2c4ffc3307cab6619a28e77da9211416c8b1d83 (diff)
Add recursion depth protections to regular expression matching.
Some of the functions in regex compilation and execution recurse, and therefore could in principle be driven to stack overflow. The Tcl crew has seen this happen in practice in duptraverse(), though their fix was to put in a hard-wired limit on the number of recursive levels, which is not too appetizing --- fortunately, we have enough infrastructure to check the actually available stack. Greg Stark has also seen it in other places while fuzz testing on a machine with limited stack space. Let's put guards in to prevent crashes in all these places. Since the regex code would leak memory if we simply threw elog(ERROR), we have to introduce an API that checks for stack depth without throwing such an error. Fortunately that's not difficult.
Diffstat (limited to 'src/backend/regex')
-rw-r--r--src/backend/regex/regc_nfa.c68
-rw-r--r--src/backend/regex/regcomp.c34
-rw-r--r--src/backend/regex/rege_dfa.c7
-rw-r--r--src/backend/regex/regexec.c3
4 files changed, 102 insertions, 10 deletions
diff --git a/src/backend/regex/regc_nfa.c b/src/backend/regex/regc_nfa.c
index e474e48c28c..50a762ed7ac 100644
--- a/src/backend/regex/regc_nfa.c
+++ b/src/backend/regex/regc_nfa.c
@@ -683,6 +683,8 @@ delsub(struct nfa * nfa,
rp->tmp = rp; /* mark end */
deltraverse(nfa, lp, lp);
+ if (NISERR())
+ return; /* asserts might not hold after failure */
assert(lp->nouts == 0 && rp->nins == 0); /* did the job */
assert(lp->no != FREESTATE && rp->no != FREESTATE); /* no more */
@@ -702,6 +704,13 @@ deltraverse(struct nfa * nfa,
struct arc *a;
struct state *to;
+ /* Since this is recursive, it could be driven to stack overflow */
+ if (STACK_TOO_DEEP(nfa->v->re))
+ {
+ NERR(REG_ETOOBIG);
+ return;
+ }
+
if (s->nouts == 0)
return; /* nothing to do */
if (s->tmp != NULL)
@@ -713,6 +722,8 @@ deltraverse(struct nfa * nfa,
{
to = a->to;
deltraverse(nfa, leftend, to);
+ if (NISERR())
+ return; /* asserts might not hold after failure */
assert(to->nouts == 0 || to->tmp != NULL);
freearc(nfa, a);
if (to->nins == 0 && to->tmp == NULL)
@@ -767,6 +778,13 @@ duptraverse(struct nfa * nfa,
{
struct arc *a;
+ /* Since this is recursive, it could be driven to stack overflow */
+ if (STACK_TOO_DEEP(nfa->v->re))
+ {
+ NERR(REG_ETOOBIG);
+ return;
+ }
+
if (s->tmp != NULL)
return; /* already done */
@@ -796,6 +814,13 @@ cleartraverse(struct nfa * nfa,
{
struct arc *a;
+ /* Since this is recursive, it could be driven to stack overflow */
+ if (STACK_TOO_DEEP(nfa->v->re))
+ {
+ NERR(REG_ETOOBIG);
+ return;
+ }
+
if (s->tmp == NULL)
return;
s->tmp = NULL;
@@ -1284,7 +1309,7 @@ fixempties(struct nfa * nfa,
*/
for (s = nfa->states; s != NULL && !NISERR(); s = s->next)
{
- for (s2 = emptyreachable(s, s); s2 != s && !NISERR(); s2 = nexts)
+ for (s2 = emptyreachable(nfa, s, s); s2 != s && !NISERR(); s2 = nexts)
{
/*
* If s2 is doomed, we decide that (1) we will always push arcs
@@ -1342,19 +1367,28 @@ fixempties(struct nfa * nfa,
*
* The maximum recursion depth here is equal to the length of the longest
* loop-free chain of EMPTY arcs, which is surely no more than the size of
- * the NFA, and in practice will be a lot less than that.
+ * the NFA ... but that could still be enough to cause trouble.
*/
static struct state *
-emptyreachable(struct state * s, struct state * lastfound)
+emptyreachable(struct nfa * nfa,
+ struct state * s,
+ struct state * lastfound)
{
struct arc *a;
+ /* Since this is recursive, it could be driven to stack overflow */
+ if (STACK_TOO_DEEP(nfa->v->re))
+ {
+ NERR(REG_ETOOBIG);
+ return lastfound;
+ }
+
s->tmp = lastfound;
lastfound = s;
for (a = s->outs; a != NULL; a = a->outchain)
{
if (a->type == EMPTY && a->to->tmp == NULL)
- lastfound = emptyreachable(a->to, lastfound);
+ lastfound = emptyreachable(nfa, a->to, lastfound);
}
return lastfound;
}
@@ -1433,19 +1467,22 @@ cleanup(struct nfa * nfa)
struct state *nexts;
int n;
+ if (NISERR())
+ return;
+
/* clear out unreachable or dead-end states */
/* use pre to mark reachable, then post to mark can-reach-post */
markreachable(nfa, nfa->pre, (struct state *) NULL, nfa->pre);
markcanreach(nfa, nfa->post, nfa->pre, nfa->post);
- for (s = nfa->states; s != NULL; s = nexts)
+ for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
{
nexts = s->next;
if (s->tmp != nfa->post && !s->flag)
dropstate(nfa, s);
}
- assert(nfa->post->nins == 0 || nfa->post->tmp == nfa->post);
+ assert(NISERR() || nfa->post->nins == 0 || nfa->post->tmp == nfa->post);
cleartraverse(nfa, nfa->pre);
- assert(nfa->post->nins == 0 || nfa->post->tmp == NULL);
+ assert(NISERR() || nfa->post->nins == 0 || nfa->post->tmp == NULL);
/* the nins==0 (final unreachable) case will be caught later */
/* renumber surviving states */
@@ -1466,6 +1503,13 @@ markreachable(struct nfa * nfa,
{
struct arc *a;
+ /* Since this is recursive, it could be driven to stack overflow */
+ if (STACK_TOO_DEEP(nfa->v->re))
+ {
+ NERR(REG_ETOOBIG);
+ return;
+ }
+
if (s->tmp != okay)
return;
s->tmp = mark;
@@ -1485,6 +1529,13 @@ markcanreach(struct nfa * nfa,
{
struct arc *a;
+ /* Since this is recursive, it could be driven to stack overflow */
+ if (STACK_TOO_DEEP(nfa->v->re))
+ {
+ NERR(REG_ETOOBIG);
+ return;
+ }
+
if (s->tmp != okay)
return;
s->tmp = mark;
@@ -1502,6 +1553,9 @@ analyze(struct nfa * nfa)
struct arc *a;
struct arc *aa;
+ if (NISERR())
+ return 0;
+
if (nfa->pre->outs == NULL)
return REG_UIMPOSSIBLE;
for (a = nfa->pre->outs; a != NULL; a = a->outchain)
diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c
index d137ac0d3d1..62f6359e345 100644
--- a/src/backend/regex/regcomp.c
+++ b/src/backend/regex/regcomp.c
@@ -34,7 +34,7 @@
#include "regex/regguts.h"
-#include "miscadmin.h" /* needed by rcancelrequested() */
+#include "miscadmin.h" /* needed by rcancelrequested/rstacktoodeep */
/*
* forward declarations, up here so forward datatypes etc. are defined early
@@ -70,6 +70,7 @@ static int newlacon(struct vars *, struct state *, struct state *, int);
static void freelacons(struct subre *, int);
static void rfree(regex_t *);
static int rcancelrequested(void);
+static int rstacktoodeep(void);
#ifdef REG_DEBUG
static void dump(regex_t *, FILE *);
@@ -152,7 +153,7 @@ static int push(struct nfa *, struct arc *);
#define COMPATIBLE 3 /* compatible but not satisfied yet */
static int combine(struct arc *, struct arc *);
static void fixempties(struct nfa *, FILE *);
-static struct state *emptyreachable(struct state *, struct state *);
+static struct state *emptyreachable(struct nfa *, struct state *, struct state *);
static void replaceempty(struct nfa *, struct state *, struct state *);
static void cleanup(struct nfa *);
static void markreachable(struct nfa *, struct state *, struct state *, struct state *);
@@ -279,7 +280,8 @@ struct vars
/* static function list */
static const struct fns functions = {
rfree, /* regfree insides */
- rcancelrequested /* check for cancel request */
+ rcancelrequested, /* check for cancel request */
+ rstacktoodeep /* check for stack getting dangerously deep */
};
@@ -1626,6 +1628,16 @@ subre(struct vars * v,
{
struct subre *ret = v->treefree;
+ /*
+ * Checking for stack overflow here is sufficient to protect parse() and
+ * its recursive subroutines.
+ */
+ if (STACK_TOO_DEEP(v->re))
+ {
+ ERR(REG_ETOOBIG);
+ return NULL;
+ }
+
if (ret != NULL)
v->treefree = ret->left;
else
@@ -1938,6 +1950,22 @@ rcancelrequested(void)
return InterruptPending && (QueryCancelPending || ProcDiePending);
}
+/*
+ * rstacktoodeep - check for stack getting dangerously deep
+ *
+ * Return nonzero to fail the operation with error code REG_ETOOBIG,
+ * zero to keep going
+ *
+ * The current implementation is Postgres-specific. If we ever get around
+ * to splitting the regex code out as a standalone library, there will need
+ * to be some API to let applications define a callback function for this.
+ */
+static int
+rstacktoodeep(void)
+{
+ return stack_is_too_deep();
+}
+
#ifdef REG_DEBUG
/*
diff --git a/src/backend/regex/rege_dfa.c b/src/backend/regex/rege_dfa.c
index a9305dd7ccd..a37e4b0ef96 100644
--- a/src/backend/regex/rege_dfa.c
+++ b/src/backend/regex/rege_dfa.c
@@ -627,6 +627,13 @@ lacon(struct vars * v,
struct smalldfa sd;
chr *end;
+ /* Since this is recursive, it could be driven to stack overflow */
+ if (STACK_TOO_DEEP(v->re))
+ {
+ ERR(REG_ETOOBIG);
+ return 0;
+ }
+
n = co - pcnfa->ncolors;
assert(n < v->g->nlacons && v->g->lacons != NULL);
FDEBUG(("=== testing lacon %d\n", n));
diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c
index d18672c7c7c..8a21f2cb787 100644
--- a/src/backend/regex/regexec.c
+++ b/src/backend/regex/regexec.c
@@ -624,6 +624,9 @@ cdissect(struct vars * v,
/* handy place to check for operation cancel */
if (CANCEL_REQUESTED(v->re))
return REG_CANCEL;
+ /* ... and stack overrun */
+ if (STACK_TOO_DEEP(v->re))
+ return REG_ETOOBIG;
switch (t->op)
{