You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/05/13 21:20:06 UTC

svn commit: r1102875 - in /lucene/dev/trunk/lucene/src: java/org/apache/lucene/util/automaton/ test-framework/org/apache/lucene/util/automaton/ test/org/apache/lucene/util/automaton/

Author: rmuir
Date: Fri May 13 19:20:05 2011
New Revision: 1102875

URL: http://svn.apache.org/viewvc?rev=1102875&view=rev
Log:
LUCENE-3094: optimize lev automata construction, don't keep around detached states

Modified:
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/LevenshteinAutomata.java
    lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/automaton/AutomatonTestUtil.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/LevenshteinAutomata.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/LevenshteinAutomata.java?rev=1102875&r1=1102874&r2=1102875&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/LevenshteinAutomata.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/LevenshteinAutomata.java Fri May 13 19:20:05 2011
@@ -143,13 +143,16 @@ public class LevenshteinAutomata {
       if (dest >= 0)
         for (int r = 0; r < numRanges; r++)
           states[k].addTransition(new Transition(rangeLower[r], rangeUpper[r], states[dest]));      
-      // reduce the state: this doesn't appear to help anything
-      //states[k].reduce();
     }
 
     Automaton a = new Automaton(states[0]);
     a.setDeterministic(true);
-    a.setNumberedStates(states);
+    // we create some useless unconnected states, and its a net-win overall to remove these,
+    // as well as to combine any adjacent transitions (it makes later algorithms more efficient).
+    // so, while we could set our numberedStates here, its actually best not to, and instead to
+    // force a traversal in reduce, pruning the unconnected states while we combine adjacent transitions.
+    //a.setNumberedStates(states);
+    a.reduce();
     // we need not trim transitions to dead states, as they are not created.
     //a.restoreInvariant();
     return a;

Modified: lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/automaton/AutomatonTestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/automaton/AutomatonTestUtil.java?rev=1102875&r1=1102874&r2=1102875&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/automaton/AutomatonTestUtil.java (original)
+++ lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/automaton/AutomatonTestUtil.java Fri May 13 19:20:05 2011
@@ -397,4 +397,15 @@ public class AutomatonTestUtil {
     path.remove(s);
     return true;
   }
+  
+  
+  /**
+   * Checks that an automaton has no detached states that are unreachable
+   * from the initial state.
+   */
+  public static void assertNoDetachedStates(Automaton a) {
+    int numStates = a.getNumberOfStates();
+    a.clearNumberedStates(); // force recomputation of cached numbered states
+    assert numStates == a.getNumberOfStates() : "automaton has " + (numStates - a.getNumberOfStates()) + " detached states";
+  }
 }

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java?rev=1102875&r1=1102874&r2=1102875&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java Fri May 13 19:20:05 2011
@@ -39,6 +39,11 @@ public class TestLevenshteinAutomata ext
     assertCharVectors(2);
   }
   
+  // LUCENE-3094
+  public void testNoWastedStates() throws Exception {
+    AutomatonTestUtil.assertNoDetachedStates(new LevenshteinAutomata("abc").toAutomaton(1));
+  }
+  
   /** 
    * Tests all possible characteristic vectors for some n
    * This exhaustively tests the parametric transitions tables.
@@ -66,6 +71,7 @@ public class TestLevenshteinAutomata ext
       assertNotNull(automata[n]);
       assertTrue(automata[n].isDeterministic());
       assertTrue(SpecialOperations.isFinite(automata[n]));
+      AutomatonTestUtil.assertNoDetachedStates(automata[n]);
       // check that the dfa for n-1 accepts a subset of the dfa for n
       if (n > 0) {
         assertTrue(automata[n-1].subsetOf(automata[n]));