@@ -318,21 +318,22 @@ typedef struct
318
318
* arcs - outgoing arcs of this state (List of TrgmArc)
319
319
* enterKeys - enter keys reachable from this state without reading any
320
320
* predictable trigram (List of TrgmStateKey)
321
- * fin - flag indicating this state is final
322
- * init - flag indicating this state is initial
321
+ * flags - flag bits
323
322
* parent - parent state, if this state has been merged into another
324
- * children - child states (states that have been merged into this one)
323
+ * tentParent - planned parent state, if considering a merge
325
324
* number - number of this state (used at the packaging stage)
326
325
*/
326
+ #define TSTATE_INIT 0x01 /* flag indicating this state is initial */
327
+ #define TSTATE_FIN 0x02 /* flag indicating this state is final */
328
+
327
329
typedef struct TrgmState
328
330
{
329
331
TrgmStateKey stateKey ; /* hashtable key: must be first field */
330
332
List * arcs ;
331
333
List * enterKeys ;
332
- bool fin ;
333
- bool init ;
334
+ int flags ;
334
335
struct TrgmState * parent ;
335
- List * children ;
336
+ struct TrgmState * tentParent ;
336
337
int number ;
337
338
} TrgmState ;
338
339
@@ -599,7 +600,7 @@ createTrgmNFAInternal(regex_t *regex, TrgmPackedGraph **graph,
599
600
* get from the initial state to the final state without reading any
600
601
* predictable trigram.
601
602
*/
602
- if (trgmNFA .initState -> fin )
603
+ if (trgmNFA .initState -> flags & TSTATE_FIN )
603
604
return NULL ;
604
605
605
606
/*
@@ -925,7 +926,7 @@ transformGraph(TrgmNFA *trgmNFA)
925
926
initkey .nstate = pg_reg_getinitialstate (trgmNFA -> regex );
926
927
927
928
initstate = getState (trgmNFA , & initkey );
928
- initstate -> init = true ;
929
+ initstate -> flags |= TSTATE_INIT ;
929
930
trgmNFA -> initState = initstate ;
930
931
931
932
/*
@@ -943,7 +944,7 @@ transformGraph(TrgmNFA *trgmNFA)
943
944
* actual processing.
944
945
*/
945
946
if (trgmNFA -> overflowed )
946
- state -> fin = true ;
947
+ state -> flags |= TSTATE_FIN ;
947
948
else
948
949
processState (trgmNFA , state );
949
950
@@ -968,7 +969,7 @@ processState(TrgmNFA *trgmNFA, TrgmState *state)
968
969
* queue is empty. But we can quit if the state gets marked final.
969
970
*/
970
971
addKey (trgmNFA , state , & state -> stateKey );
971
- while (trgmNFA -> keysQueue != NIL && !state -> fin )
972
+ while (trgmNFA -> keysQueue != NIL && !( state -> flags & TSTATE_FIN ) )
972
973
{
973
974
TrgmStateKey * key = (TrgmStateKey * ) linitial (trgmNFA -> keysQueue );
974
975
@@ -980,7 +981,7 @@ processState(TrgmNFA *trgmNFA, TrgmState *state)
980
981
* Add outgoing arcs only if state isn't final (we have no interest in
981
982
* outgoing arcs if we already match)
982
983
*/
983
- if (!state -> fin )
984
+ if (!( state -> flags & TSTATE_FIN ) )
984
985
addArcs (trgmNFA , state );
985
986
}
986
987
@@ -989,7 +990,7 @@ processState(TrgmNFA *trgmNFA, TrgmState *state)
989
990
* whether this should result in any further enter keys being added.
990
991
* If so, add those keys to keysQueue so that processState will handle them.
991
992
*
992
- * If the enter key is for the NFA's final state, set state->fin = TRUE .
993
+ * If the enter key is for the NFA's final state, mark state as TSTATE_FIN .
993
994
* This situation means that we can reach the final state from this expanded
994
995
* state without reading any predictable trigram, so we must consider this
995
996
* state as an accepting one.
@@ -1059,7 +1060,7 @@ addKey(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key)
1059
1060
/* If state is now known final, mark it and we're done */
1060
1061
if (key -> nstate == pg_reg_getfinalstate (trgmNFA -> regex ))
1061
1062
{
1062
- state -> fin = true ;
1063
+ state -> flags |= TSTATE_FIN ;
1063
1064
return ;
1064
1065
}
1065
1066
@@ -1385,10 +1386,9 @@ getState(TrgmNFA *trgmNFA, TrgmStateKey *key)
1385
1386
/* New state: initialize and queue it */
1386
1387
state -> arcs = NIL ;
1387
1388
state -> enterKeys = NIL ;
1388
- state -> init = false;
1389
- state -> fin = false;
1389
+ state -> flags = 0 ;
1390
1390
state -> parent = NULL ;
1391
- state -> children = NIL ;
1391
+ state -> tentParent = NULL ;
1392
1392
state -> number = -1 ;
1393
1393
1394
1394
trgmNFA -> queue = lappend (trgmNFA -> queue , state );
@@ -1582,20 +1582,60 @@ selectColorTrigrams(TrgmNFA *trgmNFA)
1582
1582
TrgmArcInfo * arcInfo = (TrgmArcInfo * ) lfirst (cell );
1583
1583
TrgmState * source = arcInfo -> source ,
1584
1584
* target = arcInfo -> target ;
1585
+ int source_flags ,
1586
+ target_flags ;
1585
1587
1586
1588
/* examine parent states, if any merging has already happened */
1587
1589
while (source -> parent )
1588
1590
source = source -> parent ;
1589
1591
while (target -> parent )
1590
1592
target = target -> parent ;
1591
1593
1592
- if ((source -> init || target -> init ) &&
1593
- (source -> fin || target -> fin ))
1594
+ /* we must also consider merges we are planning right now */
1595
+ source_flags = source -> flags ;
1596
+ while (source -> tentParent )
1597
+ {
1598
+ source = source -> tentParent ;
1599
+ source_flags |= source -> flags ;
1600
+ }
1601
+ target_flags = target -> flags ;
1602
+ while (target -> tentParent )
1603
+ {
1604
+ target = target -> tentParent ;
1605
+ target_flags |= target -> flags ;
1606
+ }
1607
+
1608
+ /* would fully-merged state have both INIT and FIN set? */
1609
+ if (((source_flags | target_flags ) & (TSTATE_INIT | TSTATE_FIN )) ==
1610
+ (TSTATE_INIT | TSTATE_FIN ))
1594
1611
{
1595
1612
canRemove = false;
1596
1613
break ;
1597
1614
}
1615
+
1616
+ /* ok so far, so remember planned merge */
1617
+ if (source != target )
1618
+ target -> tentParent = source ;
1598
1619
}
1620
+
1621
+ /* We must clear all the tentParent fields before continuing */
1622
+ foreach (cell , trgmInfo -> arcs )
1623
+ {
1624
+ TrgmArcInfo * arcInfo = (TrgmArcInfo * ) lfirst (cell );
1625
+ TrgmState * target = arcInfo -> target ;
1626
+ TrgmState * ttarget ;
1627
+
1628
+ while (target -> parent )
1629
+ target = target -> parent ;
1630
+
1631
+ while ((ttarget = target -> tentParent ) != NULL )
1632
+ {
1633
+ target -> tentParent = NULL ;
1634
+ target = ttarget ;
1635
+ }
1636
+ }
1637
+
1638
+ /* Now, move on if we can't drop this trigram */
1599
1639
if (!canRemove )
1600
1640
continue ;
1601
1641
@@ -1611,7 +1651,12 @@ selectColorTrigrams(TrgmNFA *trgmNFA)
1611
1651
while (target -> parent )
1612
1652
target = target -> parent ;
1613
1653
if (source != target )
1654
+ {
1614
1655
mergeStates (source , target );
1656
+ /* Assert we didn't merge initial and final states */
1657
+ Assert ((source -> flags & (TSTATE_INIT | TSTATE_FIN )) !=
1658
+ (TSTATE_INIT | TSTATE_FIN ));
1659
+ }
1615
1660
}
1616
1661
1617
1662
/* Mark trigram unexpanded, and update totals */
@@ -1754,27 +1799,15 @@ fillTrgm(trgm *ptrgm, trgm_mb_char s[3])
1754
1799
static void
1755
1800
mergeStates (TrgmState * state1 , TrgmState * state2 )
1756
1801
{
1757
- ListCell * cell ;
1758
-
1759
1802
Assert (state1 != state2 );
1760
1803
Assert (!state1 -> parent );
1761
1804
Assert (!state2 -> parent );
1762
1805
1763
- /* state1 absorbs state2's init/fin flags */
1764
- state1 -> init |= state2 -> init ;
1765
- state1 -> fin |= state2 -> fin ;
1806
+ /* state1 absorbs state2's flags */
1807
+ state1 -> flags |= state2 -> flags ;
1766
1808
1767
- /* state2, and all its children, become children of state1 */
1768
- foreach (cell , state2 -> children )
1769
- {
1770
- TrgmState * state = (TrgmState * ) lfirst (cell );
1771
-
1772
- state -> parent = state1 ;
1773
- }
1809
+ /* state2, and indirectly all its children, become children of state1 */
1774
1810
state2 -> parent = state1 ;
1775
- state1 -> children = list_concat (state1 -> children , state2 -> children );
1776
- state1 -> children = lappend (state1 -> children , state2 );
1777
- state2 -> children = NIL ;
1778
1811
}
1779
1812
1780
1813
/*
@@ -1843,9 +1876,9 @@ packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext)
1843
1876
1844
1877
if (state -> number < 0 )
1845
1878
{
1846
- if (state -> init )
1879
+ if (state -> flags & TSTATE_INIT )
1847
1880
state -> number = 0 ;
1848
- else if (state -> fin )
1881
+ else if (state -> flags & TSTATE_FIN )
1849
1882
state -> number = 1 ;
1850
1883
else
1851
1884
{
@@ -2109,9 +2142,9 @@ printTrgmNFA(TrgmNFA *trgmNFA)
2109
2142
ListCell * cell ;
2110
2143
2111
2144
appendStringInfo (& buf , "s%p" , (void * ) state );
2112
- if (state -> fin )
2145
+ if (state -> flags & TSTATE_FIN )
2113
2146
appendStringInfoString (& buf , " [shape = doublecircle]" );
2114
- if (state -> init )
2147
+ if (state -> flags & TSTATE_INIT )
2115
2148
initstate = state ;
2116
2149
appendStringInfo (& buf , " [label = \"%d\"]" , state -> stateKey .nstate );
2117
2150
appendStringInfoString (& buf , ";\n" );
0 commit comments