You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2015/07/03 22:04:05 UTC

svn commit: r1689081 - in /lucene/dev/branches/branch_5x: ./ lucene/ lucene/core/ lucene/core/src/java/org/apache/lucene/util/automaton/ lucene/core/src/test/org/apache/lucene/util/automaton/ lucene/suggest/ lucene/suggest/src/test/org/apache/lucene/se...

Author: mikemccand
Date: Fri Jul  3 20:04:05 2015
New Revision: 1689081

URL: http://svn.apache.org/r1689081
Log:
LUCENE-6365: fix buggy Operations.topoSort; add test

Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/lucene/   (props changed)
    lucene/dev/branches/branch_5x/lucene/core/   (props changed)
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
    lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java
    lucene/dev/branches/branch_5x/lucene/suggest/   (props changed)
    lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java?rev=1689081&r1=1689080&r2=1689081&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java Fri Jul  3 20:04:05 2015
@@ -150,7 +150,7 @@ final public class Operations {
 
   /**
    * Returns an automaton that accepts the union of the empty string and the
-   * language of the given automaton.
+   * language of the given automaton.  This may create a dead state.
    * <p>
    * Complexity: linear in number of states.
    */
@@ -1130,7 +1130,6 @@ final public class Operations {
     IntsRefBuilder builder = new IntsRefBuilder();
     HashSet<Integer> visited = new HashSet<>();
     int s = 0;
-    boolean done;
     Transition t = new Transition();
     while (true) {
       visited.add(s);
@@ -1421,32 +1420,49 @@ final public class Operations {
     return result;
   }
 
-  /** Returns the topological sort of all states.  Behavior is undefined if this
-   *  automaton has cycles.  CPU cost is O(numTransitions). */
+  /** Returns the topological sort of all states reachable from
+   *  the initial state.  Behavior is undefined if this
+   *  automaton has cycles.  CPU cost is O(numTransitions),
+   *  and the implementation is recursive so an automaton
+   *  matching long strings may exhaust the java stack. */
   public static int[] topoSortStates(Automaton a) {
+    if (a.getNumStates() == 0) {
+      return new int[0];
+    }
     int numStates = a.getNumStates();
     int[] states = new int[numStates];
     final BitSet visited = new BitSet(numStates);
-    final LinkedList<Integer> worklist = new LinkedList<>();
-    worklist.add(0);
-    visited.set(0);
-    int upto = 0;
-    states[upto] = 0;
-    upto++;
-    Transition t = new Transition();
-    while (worklist.size() > 0) {
-      int s = worklist.removeFirst();
-      int count = a.initTransition(s, t);
-      for (int i=0;i<count;i++) {
-        a.getNextTransition(t);
-        if (!visited.get(t.dest)) {
-          visited.set(t.dest);
-          worklist.add(t.dest);
-          states[upto++] = t.dest;
-        }
-      }
+    int upto = topoSortStatesRecurse(a, visited, states, 0, 0);
+
+    if (upto < states.length) {
+      // There were dead states
+      int[] newStates = new int[upto];
+      System.arraycopy(states, 0, newStates, 0, upto);
+      states = newStates;
+    }
+
+    // Reverse the order:
+    for(int i=0;i<states.length/2;i++) {
+      int s = states[i];
+      states[i] = states[states.length-1-i];
+      states[states.length-1-i] = s;
     }
 
     return states;
   }
+
+  private static int topoSortStatesRecurse(Automaton a, BitSet visited, int[] states, int upto, int state) {
+    Transition t = new Transition();
+    int count = a.initTransition(state, t);
+    for (int i=0;i<count;i++) {
+      a.getNextTransition(t);
+      if (!visited.get(t.dest)) {
+        visited.set(t.dest);
+        upto = topoSortStatesRecurse(a, visited, states, upto, t.dest);
+      }
+    }
+    states[upto] = state;
+    upto++;
+    return upto;
+  }
 }

Modified: lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java?rev=1689081&r1=1689080&r2=1689081&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java Fri Jul  3 20:04:05 2015
@@ -783,6 +783,7 @@ public class TestAutomaton extends Lucen
           if (VERBOSE) {
             System.out.println("  op=optional");
           }
+          // NOTE: This can add a dead state:
           a = Operations.optional(a);
           terms.add(new BytesRef());
         }
@@ -1032,11 +1033,48 @@ public class TestAutomaton extends Lucen
 
       assertSame(terms, a);
       assertEquals(AutomatonTestUtil.isDeterministicSlow(a), a.isDeterministic());
+
+      if (random().nextInt(10) == 7) {
+        a = verifyTopoSort(a);
+      }
     }
 
     assertSame(terms, a);
   }
 
+  /** Runs topo sort, verifies transitions then only "go forwards", and
+   *  builds and returns new automaton with those remapped toposorted states. */
+  private Automaton verifyTopoSort(Automaton a) {
+    int[] sorted = Operations.topoSortStates(a);
+    // This can be < if we removed dead states:
+    assertTrue(sorted.length <= a.getNumStates());
+    Automaton a2 = new Automaton();
+    int[] stateMap = new int[a.getNumStates()];
+    Arrays.fill(stateMap, -1);
+    Transition transition = new Transition();
+    for(int state : sorted) {
+      int newState = a2.createState();
+      a2.setAccept(newState, a.isAccept(state));
+
+      // Each state should only appear once in the sort:
+      assertEquals(-1, stateMap[state]);
+      stateMap[state] = newState;
+    }
+
+    // 2nd pass: add new transitions
+    for(int state : sorted) {
+      int count = a.initTransition(state, transition);
+      for(int i=0;i<count;i++) {
+        a.getNextTransition(transition);
+        assert stateMap[transition.dest] > stateMap[state];
+        a2.addTransition(stateMap[state], stateMap[transition.dest], transition.min, transition.max);
+      }
+    }
+
+    a2.finishState();
+    return a2;
+  }
+
   private void assertSame(Collection<BytesRef> terms, Automaton a) {
 
     try {

Modified: lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java?rev=1689081&r1=1689080&r2=1689081&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java (original)
+++ lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java Fri Jul  3 20:04:05 2015
@@ -1233,6 +1233,8 @@ public class AnalyzingSuggesterTest exte
       suggester.build(new InputArrayIterator(new Input[] {
             new Input(bigString, 7)}));
       fail("did not hit expected exception");
+    } catch (StackOverflowError soe) {
+      // OK
     } catch (IllegalArgumentException iae) {
       // expected
     }