You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2015/07/03 22:04:05 UTC
svn commit: r1689081 - in /lucene/dev/branches/branch_5x: ./ lucene/
lucene/core/ lucene/core/src/java/org/apache/lucene/util/automaton/
lucene/core/src/test/org/apache/lucene/util/automaton/ lucene/suggest/
lucene/suggest/src/test/org/apache/lucene/se...
Author: mikemccand
Date: Fri Jul 3 20:04:05 2015
New Revision: 1689081
URL: http://svn.apache.org/r1689081
Log:
LUCENE-6365: fix buggy Operations.topoSort; add test
Modified:
lucene/dev/branches/branch_5x/ (props changed)
lucene/dev/branches/branch_5x/lucene/ (props changed)
lucene/dev/branches/branch_5x/lucene/core/ (props changed)
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java
lucene/dev/branches/branch_5x/lucene/suggest/ (props changed)
lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java?rev=1689081&r1=1689080&r2=1689081&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java Fri Jul 3 20:04:05 2015
@@ -150,7 +150,7 @@ final public class Operations {
/**
* Returns an automaton that accepts the union of the empty string and the
- * language of the given automaton.
+ * language of the given automaton. This may create a dead state.
* <p>
* Complexity: linear in number of states.
*/
@@ -1130,7 +1130,6 @@ final public class Operations {
IntsRefBuilder builder = new IntsRefBuilder();
HashSet<Integer> visited = new HashSet<>();
int s = 0;
- boolean done;
Transition t = new Transition();
while (true) {
visited.add(s);
@@ -1421,32 +1420,49 @@ final public class Operations {
return result;
}
- /** Returns the topological sort of all states. Behavior is undefined if this
- * automaton has cycles. CPU cost is O(numTransitions). */
+ /** Returns the topological sort of all states reachable from
+ * the initial state. Behavior is undefined if this
+ * automaton has cycles. CPU cost is O(numTransitions),
+ * and the implementation is recursive so an automaton
+ * matching long strings may exhaust the java stack. */
public static int[] topoSortStates(Automaton a) {
+ if (a.getNumStates() == 0) {
+ return new int[0];
+ }
int numStates = a.getNumStates();
int[] states = new int[numStates];
final BitSet visited = new BitSet(numStates);
- final LinkedList<Integer> worklist = new LinkedList<>();
- worklist.add(0);
- visited.set(0);
- int upto = 0;
- states[upto] = 0;
- upto++;
- Transition t = new Transition();
- while (worklist.size() > 0) {
- int s = worklist.removeFirst();
- int count = a.initTransition(s, t);
- for (int i=0;i<count;i++) {
- a.getNextTransition(t);
- if (!visited.get(t.dest)) {
- visited.set(t.dest);
- worklist.add(t.dest);
- states[upto++] = t.dest;
- }
- }
+ int upto = topoSortStatesRecurse(a, visited, states, 0, 0);
+
+ if (upto < states.length) {
+ // There were dead states
+ int[] newStates = new int[upto];
+ System.arraycopy(states, 0, newStates, 0, upto);
+ states = newStates;
+ }
+
+ // Reverse the order:
+ for(int i=0;i<states.length/2;i++) {
+ int s = states[i];
+ states[i] = states[states.length-1-i];
+ states[states.length-1-i] = s;
}
return states;
}
+
+ private static int topoSortStatesRecurse(Automaton a, BitSet visited, int[] states, int upto, int state) {
+ Transition t = new Transition();
+ int count = a.initTransition(state, t);
+ for (int i=0;i<count;i++) {
+ a.getNextTransition(t);
+ if (!visited.get(t.dest)) {
+ visited.set(t.dest);
+ upto = topoSortStatesRecurse(a, visited, states, upto, t.dest);
+ }
+ }
+ states[upto] = state;
+ upto++;
+ return upto;
+ }
}
Modified: lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java?rev=1689081&r1=1689080&r2=1689081&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java Fri Jul 3 20:04:05 2015
@@ -783,6 +783,7 @@ public class TestAutomaton extends Lucen
if (VERBOSE) {
System.out.println(" op=optional");
}
+ // NOTE: This can add a dead state:
a = Operations.optional(a);
terms.add(new BytesRef());
}
@@ -1032,11 +1033,48 @@ public class TestAutomaton extends Lucen
assertSame(terms, a);
assertEquals(AutomatonTestUtil.isDeterministicSlow(a), a.isDeterministic());
+
+ if (random().nextInt(10) == 7) {
+ a = verifyTopoSort(a);
+ }
}
assertSame(terms, a);
}
+ /** Runs topo sort, verifies transitions then only "go forwards", and
+ * builds and returns new automaton with those remapped toposorted states. */
+ private Automaton verifyTopoSort(Automaton a) {
+ int[] sorted = Operations.topoSortStates(a);
+ // This can be < if we removed dead states:
+ assertTrue(sorted.length <= a.getNumStates());
+ Automaton a2 = new Automaton();
+ int[] stateMap = new int[a.getNumStates()];
+ Arrays.fill(stateMap, -1);
+ Transition transition = new Transition();
+ for(int state : sorted) {
+ int newState = a2.createState();
+ a2.setAccept(newState, a.isAccept(state));
+
+ // Each state should only appear once in the sort:
+ assertEquals(-1, stateMap[state]);
+ stateMap[state] = newState;
+ }
+
+ // 2nd pass: add new transitions
+ for(int state : sorted) {
+ int count = a.initTransition(state, transition);
+ for(int i=0;i<count;i++) {
+ a.getNextTransition(transition);
+ assert stateMap[transition.dest] > stateMap[state];
+ a2.addTransition(stateMap[state], stateMap[transition.dest], transition.min, transition.max);
+ }
+ }
+
+ a2.finishState();
+ return a2;
+ }
+
private void assertSame(Collection<BytesRef> terms, Automaton a) {
try {
Modified: lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java?rev=1689081&r1=1689080&r2=1689081&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java (original)
+++ lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java Fri Jul 3 20:04:05 2015
@@ -1233,6 +1233,8 @@ public class AnalyzingSuggesterTest exte
suggester.build(new InputArrayIterator(new Input[] {
new Input(bigString, 7)}));
fail("did not hit expected exception");
+ } catch (StackOverflowError soe) {
+ // OK
} catch (IllegalArgumentException iae) {
// expected
}