Improve performance of fixempties() pass in regular-expression compiler.

tglsfdc · tglsfdc · commit f5b7d103bc4a · 2015-10-16T15:55:59.000-04:00
The previous coding took something like O(N^4) time to fully process a
chain of N EMPTY arcs.  We can't really do much better than O(N^2) because
we have to insert about that many arcs, but we can do lots better than
what's there now.  The win comes partly from using mergeins() to amortize
de-duplication of arcs across multiple source states, and partly from
exploiting knowledge of the ordering of arcs for each state to avoid
looking at arcs we don't need to consider during the scan.  We do have
to be a bit careful of the possible reordering of arcs introduced by
the sort-merge coding of the previous commit, but that's not hard to
deal with.

Back-patch to all supported branches.
diff --git a/src/backend/regex/regc_nfa.c b/src/backend/regex/regc_nfa.c
@@ -593,40 +593,6 @@ hasnonemptyout(struct state * s)
 	return 0;
 }
 
-/*
- * nonemptyouts - count non-EMPTY out arcs of a state
- */
-static int
-nonemptyouts(struct state * s)
-{
-	int			n = 0;
-	struct arc *a;
-
-	for (a = s->outs; a != NULL; a = a->outchain)
-	{
-		if (a->type != EMPTY)
-			n++;
-	}
-	return n;
-}
-
-/*
- * nonemptyins - count non-EMPTY in arcs of a state
- */
-static int
-nonemptyins(struct state * s)
-{
-	int			n = 0;
-	struct arc *a;
-
-	for (a = s->ins; a != NULL; a = a->inchain)
-	{
-		if (a->type != EMPTY)
-			n++;
-	}
-	return n;
-}
-
 /*
  * findarc - find arc, if any, from given source with given type and color
  * If there is more than one such arc, the result is random.
@@ -1856,6 +1822,12 @@ fixempties(struct nfa * nfa,
 	struct state *nexts;
 	struct arc *a;
 	struct arc *nexta;
+	int			totalinarcs;
+	struct arc **inarcsorig;
+	struct arc **arcarray;
+	int			arccount;
+	int			prevnins;
+	int			nskip;
 
 	/*
 	 * First, get rid of any states whose sole out-arc is an EMPTY, since
@@ -1896,41 +1868,131 @@ fixempties(struct nfa * nfa,
 		dropstate(nfa, s);
 	}
 
+	if (NISERR())
+		return;
+
 	/*
-	 * For each remaining NFA state, find all other states that are reachable
-	 * from it by a chain of one or more EMPTY arcs.  Then generate new arcs
+	 * For each remaining NFA state, find all other states from which it is
+	 * reachable by a chain of one or more EMPTY arcs.  Then generate new arcs
 	 * that eliminate the need for each such chain.
 	 *
-	 * If we just do this straightforwardly, the algorithm gets slow in
-	 * complex graphs, because the same arcs get copied to all intermediate
-	 * states of an EMPTY chain, and then uselessly pushed repeatedly to the
-	 * chain's final state; we waste a lot of time in newarc's duplicate
-	 * checking.  To improve matters, we decree that any state with only EMPTY
-	 * out-arcs is "doomed" and will not be part of the final NFA. That can be
-	 * ensured by not adding any new out-arcs to such a state. Having ensured
-	 * that, we need not update the state's in-arcs list either; all arcs that
-	 * might have gotten pushed forward to it will just get pushed directly to
-	 * successor states.  This eliminates most of the useless duplicate arcs.
+	 * We could replace a chain of EMPTY arcs that leads from a "from" state
+	 * to a "to" state either by pushing non-EMPTY arcs forward (linking
+	 * directly from "from"'s predecessors to "to") or by pulling them back
+	 * (linking directly from "from" to "to"'s successors).  We choose to
+	 * always do the former; this choice is somewhat arbitrary, but the
+	 * approach below requires that we uniformly do one or the other.
+	 *
+	 * Suppose we have a chain of N successive EMPTY arcs (where N can easily
+	 * approach the size of the NFA).  All of the intermediate states must
+	 * have additional inarcs and outarcs, else they'd have been removed by
+	 * the steps above.  Assuming their inarcs are mostly not empties, we will
+	 * add O(N^2) arcs to the NFA, since a non-EMPTY inarc leading to any one
+	 * state in the chain must be duplicated to lead to all its successor
+	 * states as well.  So there is no hope of doing less than O(N^2) work;
+	 * however, we should endeavor to keep the big-O cost from being even
+	 * worse than that, which it can easily become without care.  In
+	 * particular, suppose we were to copy all S1's inarcs forward to S2, and
+	 * then also to S3, and then later we consider pushing S2's inarcs forward
+	 * to S3.  If we include the arcs already copied from S1 in that, we'd be
+	 * doing O(N^3) work.  (The duplicate-arc elimination built into newarc()
+	 * and its cohorts would get rid of the extra arcs, but not without cost.)
+	 *
+	 * We can avoid this cost by treating only arcs that existed at the start
+	 * of this phase as candidates to be pushed forward.  To identify those,
+	 * we remember the first inarc each state had to start with.  We rely on
+	 * the fact that newarc() and friends put new arcs on the front of their
+	 * to-states' inchains, and that this phase never deletes arcs, so that
+	 * the original arcs must be the last arcs in their to-states' inchains.
+	 *
+	 * So the process here is that, for each state in the NFA, we gather up
+	 * all non-EMPTY inarcs of states that can reach the target state via
+	 * EMPTY arcs.  We then sort, de-duplicate, and merge these arcs into the
+	 * target state's inchain.  (We can safely use sort-merge for this as long
+	 * as we update each state's original-arcs pointer after we add arcs to
+	 * it; the sort step of mergeins probably changed the order of the old
+	 * arcs.)
+	 *
+	 * Another refinement worth making is that, because we only add non-EMPTY
+	 * arcs during this phase, and all added arcs have the same from-state as
+	 * the non-EMPTY arc they were cloned from, we know ahead of time that any
+	 * states having only EMPTY outarcs will be useless for lack of outarcs
+	 * after we drop the EMPTY arcs.  (They cannot gain non-EMPTY outarcs if
+	 * they had none to start with.)  So we need not bother to update the
+	 * inchains of such states at all.
+	 */
+
+	/* Remember the states' first original inarcs */
+	/* ... and while at it, count how many old inarcs there are altogether */
+	inarcsorig = (struct arc **) MALLOC(nfa->nstates * sizeof(struct arc *));
+	if (inarcsorig == NULL)
+	{
+		NERR(REG_ESPACE);
+		return;
+	}
+	totalinarcs = 0;
+	for (s = nfa->states; s != NULL; s = s->next)
+	{
+		inarcsorig[s->no] = s->ins;
+		totalinarcs += s->nins;
+	}
+
+	/*
+	 * Create a workspace for accumulating the inarcs to be added to the
+	 * current target state.  totalinarcs is probably a considerable
+	 * overestimate of the space needed, but the NFA is unlikely to be large
+	 * enough at this point to make it worth being smarter.
 	 */
+	arcarray = (struct arc **) MALLOC(totalinarcs * sizeof(struct arc *));
+	if (arcarray == NULL)
+	{
+		NERR(REG_ESPACE);
+		FREE(inarcsorig);
+		return;
+	}
+
+	/* And iterate over the target states */
 	for (s = nfa->states; s != NULL && !NISERR(); s = s->next)
 	{
-		for (s2 = emptyreachable(nfa, s, s); s2 != s && !NISERR(); s2 = nexts)
+		/* Ignore target states without non-EMPTY outarcs, per note above */
+		if (!s->flag && !hasnonemptyout(s))
+			continue;
+
+		/* Find predecessor states and accumulate their original inarcs */
+		arccount = 0;
+		for (s2 = emptyreachable(nfa, s, s, inarcsorig); s2 != s; s2 = nexts)
 		{
-			/*
-			 * If s2 is doomed, we decide that (1) we will always push arcs
-			 * forward to it, not pull them back to s; and (2) we can optimize
-			 * away the push-forward, per comment above.  So do nothing.
-			 */
-			if (s2->flag || hasnonemptyout(s2))
-				replaceempty(nfa, s, s2);
+			/* Add s2's original inarcs to arcarray[], but ignore empties */
+			for (a = inarcsorig[s2->no]; a != NULL; a = a->inchain)
+			{
+				if (a->type != EMPTY)
+					arcarray[arccount++] = a;
+			}
 
 			/* Reset the tmp fields as we walk back */
 			nexts = s2->tmp;
 			s2->tmp = NULL;
 		}
 		s->tmp = NULL;
+		assert(arccount <= totalinarcs);
+
+		/* Remember how many original inarcs this state has */
+		prevnins = s->nins;
+
+		/* Add non-duplicate inarcs to target state */
+		mergeins(nfa, s, arcarray, arccount);
+
+		/* Now we must update the state's inarcsorig pointer */
+		nskip = s->nins - prevnins;
+		a = s->ins;
+		while (nskip-- > 0)
+			a = a->inchain;
+		inarcsorig[s->no] = a;
 	}
 
+	FREE(arcarray);
+	FREE(inarcsorig);
+
 	if (NISERR())
 		return;
 
@@ -1964,20 +2026,25 @@ fixempties(struct nfa * nfa,
 }
 
 /*
- * emptyreachable - recursively find all states reachable from s by EMPTY arcs
+ * emptyreachable - recursively find all states that can reach s by EMPTY arcs
  *
  * The return value is the last such state found.  Its tmp field links back
  * to the next-to-last such state, and so on back to s, so that all these
  * states can be located without searching the whole NFA.
  *
+ * Since this is only used in fixempties(), we pass in the inarcsorig[] array
+ * maintained by that function.  This lets us skip over all new inarcs, which
+ * are certainly not EMPTY arcs.
+ *
  * The maximum recursion depth here is equal to the length of the longest
  * loop-free chain of EMPTY arcs, which is surely no more than the size of
  * the NFA ... but that could still be enough to cause trouble.
  */
 static struct state *
 emptyreachable(struct nfa * nfa,
 			   struct state * s,
-			   struct state * lastfound)
+			   struct state * lastfound,
+			   struct arc ** inarcsorig)
 {
 	struct arc *a;
 
@@ -1990,78 +2057,14 @@ emptyreachable(struct nfa * nfa,
 
 	s->tmp = lastfound;
 	lastfound = s;
-	for (a = s->outs; a != NULL; a = a->outchain)
+	for (a = inarcsorig[s->no]; a != NULL; a = a->inchain)
 	{
-		if (a->type == EMPTY && a->to->tmp == NULL)
-			lastfound = emptyreachable(nfa, a->to, lastfound);
+		if (a->type == EMPTY && a->from->tmp == NULL)
+			lastfound = emptyreachable(nfa, a->from, lastfound, inarcsorig);
 	}
 	return lastfound;
 }
 
-/*
- * replaceempty - replace an EMPTY arc chain with some non-empty arcs
- *
- * The EMPTY arc(s) should be deleted later, but we can't do it here because
- * they may still be needed to identify other arc chains during fixempties().
- */
-static void
-replaceempty(struct nfa * nfa,
-			 struct state * from,
-			 struct state * to)
-{
-	int			fromouts;
-	int			toins;
-
-	assert(from != to);
-
-	/*
-	 * Create replacement arcs that bypass the need for the EMPTY chain.  We
-	 * can do this either by pushing arcs forward (linking directly from
-	 * "from"'s predecessors to "to") or by pulling them back (linking
-	 * directly from "from" to "to"'s successors).  In general, we choose
-	 * whichever way creates greater fan-out or fan-in, so as to improve the
-	 * odds of reducing the other state to zero in-arcs or out-arcs and
-	 * thereby being able to delete it.  However, if "from" is doomed (has no
-	 * non-EMPTY out-arcs), we must keep it so, so always push forward in that
-	 * case.
-	 *
-	 * The fan-out/fan-in comparison should count only non-EMPTY arcs.  If
-	 * "from" is doomed, we can skip counting "to"'s arcs, since we want to
-	 * force taking the copyins path in that case.
-	 */
-	fromouts = nonemptyouts(from);
-	toins = (fromouts == 0) ? 1 : nonemptyins(to);
-
-	if (fromouts > toins)
-	{
-		copyouts(nfa, to, from, 0);
-		return;
-	}
-	if (fromouts < toins)
-	{
-		copyins(nfa, from, to, 0);
-		return;
-	}
-
-	/*
-	 * fromouts == toins.  Decide on secondary issue: copy fewest arcs.
-	 *
-	 * Doesn't seem to be worth the trouble to exclude empties from these
-	 * comparisons; that takes extra time and doesn't seem to improve the
-	 * resulting graph much.
-	 */
-	if (from->nins > to->nouts)
-	{
-		copyouts(nfa, to, from, 0);
-		return;
-	}
-	else
-	{
-		copyins(nfa, from, to, 0);
-		return;
-	}
-}
-
 /*
  * isconstraintarc - detect whether an arc is of a constraint type
  */
diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c
@@ -129,8 +129,6 @@ static struct arc *allocarc(struct nfa *, struct state *);
 static void freearc(struct nfa *, struct arc *);
 static void changearctarget(struct arc *, struct state *);
 static int	hasnonemptyout(struct state *);
-static int	nonemptyouts(struct state *);
-static int	nonemptyins(struct state *);
 static struct arc *findarc(struct state *, int, pcolor);
 static void cparc(struct nfa *, struct arc *, struct state *, struct state *);
 static void sortins(struct nfa *, struct state *);
@@ -160,8 +158,8 @@ static int	push(struct nfa *, struct arc *);
 #define COMPATIBLE	3			/* compatible but not satisfied yet */
 static int	combine(struct arc *, struct arc *);
 static void fixempties(struct nfa *, FILE *);
-static struct state *emptyreachable(struct nfa *, struct state *, struct state *);
-static void replaceempty(struct nfa *, struct state *, struct state *);
+static struct state *emptyreachable(struct nfa *, struct state *,
+			   struct state *, struct arc **);
 static int	isconstraintarc(struct arc *);
 static int	hasconstraintout(struct state *);
 static void fixconstraintloops(struct nfa *, FILE *);