You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/06/11 23:56:22 UTC
svn commit: r1602031 - in /lucene/dev/branches/lucene5752/lucene: core/src/java/org/apache/lucene/analysis/ core/src/java/org/apache/lucene/util/automaton/ core/src/test/org/apache/lucene/analysis/ suggest/src/java/org/apache/lucene/search/suggest/anal...

Author: mikemccand
Date: Wed Jun 11 21:56:21 2014
New Revision: 1602031

URL: http://svn.apache.org/r1602031
Log:
LUCENE-5752: cutover suggesters

Modified:
    lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java
    lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/BasicAutomata.java
    lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/LightAutomaton.java
    lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/UTF32ToUTF8Light.java
    lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java
    lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
    lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FSTUtil.java
    lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java
    lucene/dev/branches/lucene5752/lucene/suggest/src/test/org/apache/lucene/search/suggest/InputArrayIterator.java
    lucene/dev/branches/lucene5752/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java

Modified: lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java?rev=1602031&r1=1602030&r2=1602031&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java (original)
+++ lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java Wed Jun 11 21:56:21 2014
@@ -26,6 +26,7 @@ import org.apache.lucene.analysis.tokena
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RollingBuffer;
 import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.LightAutomaton;
 import org.apache.lucene.util.automaton.State;
 import org.apache.lucene.util.automaton.Transition;
 
@@ -61,15 +62,15 @@ public class TokenStreamToAutomaton {
 
   private static class Position implements RollingBuffer.Resettable {
     // Any tokens that ended at our position arrive to this state:
-    State arriving;
+    int arriving = -1;
 
     // Any tokens that start at our position leave from this state:
-    State leaving;
+    int leaving = -1;
 
     @Override
     public void reset() {
-      arriving = null;
-      leaving = null;
+      arriving = -1;
+      leaving = -1;
     }
   }
 
@@ -98,9 +99,9 @@ public class TokenStreamToAutomaton {
    *  TokenStream}, and creates the corresponding
    *  automaton where arcs are bytes (or Unicode code points 
    *  if unicodeArcs = true) from each term. */
-  public Automaton toAutomaton(TokenStream in) throws IOException {
-    final Automaton a = new Automaton();
-    boolean deterministic = true;
+  public LightAutomaton toAutomaton(TokenStream in) throws IOException {
+    final LightAutomaton.Builder builder = new LightAutomaton.Builder();
+    builder.createState();
 
     final TermToBytesRefAttribute termBytesAtt = in.addAttribute(TermToBytesRefAttribute.class);
     final PositionIncrementAttribute posIncAtt = in.addAttribute(PositionIncrementAttribute.class);
@@ -132,34 +133,29 @@ public class TokenStreamToAutomaton {
         pos += posInc;
 
         posData = positions.get(pos);
-        assert posData.leaving == null;
+        assert posData.leaving == -1;
 
-        if (posData.arriving == null) {
+        if (posData.arriving == -1) {
           // No token ever arrived to this position
           if (pos == 0) {
             // OK: this is the first token
-            posData.leaving = a.getInitialState();
+            posData.leaving = 0;
           } else {
             // This means there's a hole (eg, StopFilter
             // does this):
-            posData.leaving = new State();
-            addHoles(a.getInitialState(), positions, pos);
+            posData.leaving = builder.createState();
+            addHoles(builder, positions, pos);
           }
         } else {
-          posData.leaving = new State();
-          posData.arriving.addTransition(new Transition(POS_SEP, posData.leaving));
+          posData.leaving = builder.createState();
+          builder.addTransition(posData.arriving, posData.leaving, POS_SEP);
           if (posInc > 1) {
             // A token spanned over a hole; add holes
             // "under" it:
-            addHoles(a.getInitialState(), positions, pos);
+            addHoles(builder, positions, pos);
           }
         }
         positions.freeBefore(pos);
-      } else {
-        // note: this isn't necessarily true. its just that we aren't surely det.
-        // we could optimize this further (e.g. buffer and sort synonyms at a position)
-        // but thats probably overkill. this is cheap and dirty
-        deterministic = false;
       }
 
       final int endPos = pos + posLengthAtt.getPositionLength();
@@ -168,31 +164,33 @@ public class TokenStreamToAutomaton {
       final BytesRef termUTF8 = changeToken(term);
       int[] termUnicode = null;
       final Position endPosData = positions.get(endPos);
-      if (endPosData.arriving == null) {
-        endPosData.arriving = new State();
+      if (endPosData.arriving == -1) {
+        endPosData.arriving = builder.createState();
       }
 
-      State state = posData.leaving;
       int termLen;
       if (unicodeArcs) {
         final String utf16 = termUTF8.utf8ToString();
         termUnicode = new int[utf16.codePointCount(0, utf16.length())];
         termLen = termUnicode.length;
-        for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp))
+        for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) {
           termUnicode[j++] = cp = utf16.codePointAt(i);
+        }
       } else {
         termLen = termUTF8.length;
       }
 
+      int state = posData.leaving;
+
       for(int byteIDX=0;byteIDX<termLen;byteIDX++) {
-        final State nextState = byteIDX == termLen-1 ? endPosData.arriving : new State();
+        final int nextState = byteIDX == termLen-1 ? endPosData.arriving : builder.createState();
         int c;
         if (unicodeArcs) {
           c = termUnicode[byteIDX];
         } else {
           c = termUTF8.bytes[termUTF8.offset + byteIDX] & 0xff;
         }
-        state.addTransition(new Transition(c, nextState));
+        builder.addTransition(state, nextState, c);
         state = nextState;
       }
 
@@ -200,28 +198,26 @@ public class TokenStreamToAutomaton {
     }
 
     in.end();
-    State endState = null;
+    int endState = -1;
     if (offsetAtt.endOffset() > maxOffset) {
-      endState = new State();
-      endState.setAccept(true);
+      endState = builder.createState();
+      builder.setAccept(endState, true);
     }
 
     pos++;
     while (pos <= positions.getMaxPos()) {
       posData = positions.get(pos);
-      if (posData.arriving != null) {
-        if (endState != null) {
-          posData.arriving.addTransition(new Transition(POS_SEP, endState));
+      if (posData.arriving != -1) {
+        if (endState != -1) {
+          builder.addTransition(posData.arriving, endState, POS_SEP);
         } else {
-          posData.arriving.setAccept(true);
+          builder.setAccept(posData.arriving, true);
         }
       }
       pos++;
     }
 
-    //toDot(a);
-    a.setDeterministic(deterministic);
-    return a;
+    return builder.finish();
   }
 
   // for debugging!
@@ -235,26 +231,26 @@ public class TokenStreamToAutomaton {
   }
   */
 
-  private static void addHoles(State startState, RollingBuffer<Position> positions, int pos) {
+  private static void addHoles(LightAutomaton.Builder builder, RollingBuffer<Position> positions, int pos) {
     Position posData = positions.get(pos);
     Position prevPosData = positions.get(pos-1);
 
-    while(posData.arriving == null || prevPosData.leaving == null) {
-      if (posData.arriving == null) {
-        posData.arriving = new State();
-        posData.arriving.addTransition(new Transition(POS_SEP, posData.leaving));
+    while(posData.arriving == -1 || prevPosData.leaving == -1) {
+      if (posData.arriving == -1) {
+        posData.arriving = builder.createState();
+        builder.addTransition(posData.arriving, posData.leaving, POS_SEP);
       }
-      if (prevPosData.leaving == null) {
+      if (prevPosData.leaving == -1) {
         if (pos == 1) {
-          prevPosData.leaving = startState;
+          prevPosData.leaving = 0;
         } else {
-          prevPosData.leaving = new State();
+          prevPosData.leaving = builder.createState();
         }
-        if (prevPosData.arriving != null) {
-          prevPosData.arriving.addTransition(new Transition(POS_SEP, prevPosData.leaving));
+        if (prevPosData.arriving != -1) {
+          builder.addTransition(prevPosData.arriving, prevPosData.leaving, POS_SEP);
         }
       }
-      prevPosData.leaving.addTransition(new Transition(HOLE, posData.arriving));
+      builder.addTransition(prevPosData.leaving, posData.arriving, HOLE);
       pos--;
       if (pos <= 0) {
         break;

Modified: lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/BasicAutomata.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/BasicAutomata.java?rev=1602031&r1=1602030&r2=1602031&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/BasicAutomata.java (original)
+++ lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/BasicAutomata.java Wed Jun 11 21:56:21 2014
@@ -483,6 +483,21 @@ final public class BasicAutomata {
     return a;
   }
 
+  public static LightAutomaton makeStringLight(int[] word, int offset, int length) {
+    LightAutomaton a = new LightAutomaton();
+    a.createState();
+    int s = 0;
+    for (int i = offset; i < offset+length; i++) {
+      int s2 = a.createState();
+      a.addTransition(s, s2, word[i]);
+      s = s2;
+    }
+    a.setAccept(s, true);
+    a.finish();
+
+    return a;
+  }
+
   /**
    * Returns a new (deterministic and minimal) automaton that accepts the union
    * of the given collection of {@link BytesRef}s representing UTF-8 encoded

Modified: lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/LightAutomaton.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/LightAutomaton.java?rev=1602031&r1=1602030&r2=1602031&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/LightAutomaton.java (original)
+++ lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/LightAutomaton.java Wed Jun 11 21:56:21 2014
@@ -35,7 +35,7 @@ import org.apache.lucene.util.Sorter;
 //   - could use packed int arrays instead
 //   - could encode dest w/ delta from to?
 
-// nocommit should we keep determinized bit?
+// nocommit should we keep determinized bit?  it could be entirely privately computed now?
 
 /** Uses only int[]s to represent the automaton, but requires that all
  *  transitions for each state are added at once.  If this is too restrictive,

Modified: lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/UTF32ToUTF8Light.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/UTF32ToUTF8Light.java?rev=1602031&r1=1602030&r2=1602031&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/UTF32ToUTF8Light.java (original)
+++ lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/UTF32ToUTF8Light.java Wed Jun 11 21:56:21 2014
@@ -274,6 +274,9 @@ public final class UTF32ToUTF8Light {
     //if (utf32.isSingleton()) {
     //utf32 = utf32.cloneExpanded();
     //}
+    if (utf32.getNumStates() == 0) {
+      return utf32;
+    }
 
     int[] map = new int[utf32.getNumStates()];
     Arrays.fill(map, -1);

Modified: lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java?rev=1602031&r1=1602030&r2=1602031&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java (original)
+++ lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java Wed Jun 11 21:56:21 2014
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis;
  */
 
 import java.io.IOException;
-import java.io.StringWriter;
 import java.io.PrintWriter;
+import java.io.StringWriter;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -32,6 +32,7 @@ import org.apache.lucene.analysis.tokena
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.BasicAutomata;
 import org.apache.lucene.util.automaton.BasicOperations;
+import org.apache.lucene.util.automaton.LightAutomaton;
 
 public class TestGraphTokenizers extends BaseTokenStreamTestCase {
 
@@ -409,9 +410,10 @@ public class TestGraphTokenizers extends
       new Token[] {
         token("abc", 1, 1),
       });
-    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
-    final Automaton expected = BasicAutomata.makeString("abc");
-    assertTrue(BasicOperations.sameLanguage(expected, actual));
+    final LightAutomaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
+    final LightAutomaton expected = s2a("abc");
+    assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(expected),
+                                            BasicOperations.determinize(actual)));
   }
 
   public void testMultipleHoles() throws Exception {
@@ -420,9 +422,10 @@ public class TestGraphTokenizers extends
         token("a", 1, 1),
         token("b", 3, 1),
       });
-    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
-    final Automaton expected = join(s2a("a"), SEP_A, HOLE_A, SEP_A, HOLE_A, SEP_A, s2a("b")); 
-    assertTrue(BasicOperations.sameLanguage(expected, actual));
+    final LightAutomaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
+    final LightAutomaton expected = join(s2a("a"), SEP_A, HOLE_A, SEP_A, HOLE_A, SEP_A, s2a("b")); 
+    assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(expected),
+                                            BasicOperations.determinize(actual)));
   }
 
   public void testSynOverMultipleHoles() throws Exception {
@@ -432,11 +435,12 @@ public class TestGraphTokenizers extends
         token("x", 0, 3),
         token("b", 3, 1),
       });
-    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
-    final Automaton a1 = join(s2a("a"), SEP_A, HOLE_A, SEP_A, HOLE_A, SEP_A, s2a("b")); 
-    final Automaton a2 = join(s2a("x"), SEP_A, s2a("b")); 
-    final Automaton expected = BasicOperations.union(a1, a2);
-    assertTrue(BasicOperations.sameLanguage(expected, actual));
+    final LightAutomaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
+    final LightAutomaton a1 = join(s2a("a"), SEP_A, HOLE_A, SEP_A, HOLE_A, SEP_A, s2a("b")); 
+    final LightAutomaton a2 = join(s2a("x"), SEP_A, s2a("b")); 
+    final LightAutomaton expected = BasicOperations.unionLight(a1, a2);
+    assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(expected),
+                                            BasicOperations.determinize(actual)));
   }
 
   // for debugging!
@@ -450,25 +454,25 @@ public class TestGraphTokenizers extends
   }
   */
 
-  private static final Automaton SEP_A = BasicAutomata.makeChar(TokenStreamToAutomaton.POS_SEP);
-  private static final Automaton HOLE_A = BasicAutomata.makeChar(TokenStreamToAutomaton.HOLE);
+  private static final LightAutomaton SEP_A = BasicAutomata.makeCharLight(TokenStreamToAutomaton.POS_SEP);
+  private static final LightAutomaton HOLE_A = BasicAutomata.makeCharLight(TokenStreamToAutomaton.HOLE);
 
-  private Automaton join(String ... strings) {
-    List<Automaton> as = new ArrayList<>();
+  private LightAutomaton join(String ... strings) {
+    List<LightAutomaton> as = new ArrayList<>();
     for(String s : strings) {
-      as.add(BasicAutomata.makeString(s));
+      as.add(s2a(s));
       as.add(SEP_A);
     }
     as.remove(as.size()-1);
-    return BasicOperations.concatenate(as);
+    return BasicOperations.concatenateLight(as);
   }
 
-  private Automaton join(Automaton ... as) {
-    return BasicOperations.concatenate(Arrays.asList(as));
+  private LightAutomaton join(LightAutomaton ... as) {
+    return BasicOperations.concatenateLight(Arrays.asList(as));
   }
 
-  private Automaton s2a(String s) {
-    return BasicAutomata.makeString(s);
+  private LightAutomaton s2a(String s) {
+    return BasicAutomata.makeStringLight(s);
   }
 
   public void testTwoTokens() throws Exception {
@@ -478,11 +482,12 @@ public class TestGraphTokenizers extends
         token("abc", 1, 1),
         token("def", 1, 1),
       });
-    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
-    final Automaton expected =  join("abc", "def");
+    final LightAutomaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
+    final LightAutomaton expected =  join("abc", "def");
 
     //toDot(actual);
-    assertTrue(BasicOperations.sameLanguage(expected, actual));
+    assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(expected),
+                                            BasicOperations.determinize(actual)));
   }
 
   public void testHole() throws Exception {
@@ -492,12 +497,13 @@ public class TestGraphTokenizers extends
         token("abc", 1, 1),
         token("def", 2, 1),
       });
-    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
+    final LightAutomaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
 
-    final Automaton expected = join(s2a("abc"), SEP_A, HOLE_A, SEP_A, s2a("def"));
+    final LightAutomaton expected = join(s2a("abc"), SEP_A, HOLE_A, SEP_A, s2a("def"));
 
     //toDot(actual);
-    assertTrue(BasicOperations.sameLanguage(expected, actual));
+    assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(expected),
+                                            BasicOperations.determinize(actual)));
   }
 
   public void testOverlappedTokensSausage() throws Exception {
@@ -508,11 +514,12 @@ public class TestGraphTokenizers extends
         token("abc", 1, 1),
         token("xyz", 0, 1)
       });
-    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
-    final Automaton a1 = BasicAutomata.makeString("abc");
-    final Automaton a2 = BasicAutomata.makeString("xyz");
-    final Automaton expected = BasicOperations.union(a1, a2);
-    assertTrue(BasicOperations.sameLanguage(expected, actual));
+    final LightAutomaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
+    final LightAutomaton a1 = s2a("abc");
+    final LightAutomaton a2 = s2a("xyz");
+    final LightAutomaton expected = BasicOperations.unionLight(a1, a2);
+    assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(expected),
+                                            BasicOperations.determinize(actual)));
   }
 
   public void testOverlappedTokensLattice() throws Exception {
@@ -523,13 +530,14 @@ public class TestGraphTokenizers extends
         token("xyz", 0, 2),
         token("def", 1, 1),
       });
-    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
-    final Automaton a1 = BasicAutomata.makeString("xyz");
-    final Automaton a2 = join("abc", "def");
+    final LightAutomaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
+    final LightAutomaton a1 = s2a("xyz");
+    final LightAutomaton a2 = join("abc", "def");
                                                                    
-    final Automaton expected = BasicOperations.union(a1, a2);
+    final LightAutomaton expected = BasicOperations.unionLight(a1, a2);
     //toDot(actual);
-    assertTrue(BasicOperations.sameLanguage(expected, actual));
+    assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(expected),
+                                            BasicOperations.determinize(actual)));
   }
 
   public void testSynOverHole() throws Exception {
@@ -540,14 +548,15 @@ public class TestGraphTokenizers extends
         token("X", 0, 2),
         token("b", 2, 1),
       });
-    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
-    final Automaton a1 = BasicOperations.union(
+    final LightAutomaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
+    final LightAutomaton a1 = BasicOperations.unionLight(
                                                join(s2a("a"), SEP_A, HOLE_A),
-                                               BasicAutomata.makeString("X"));
-    final Automaton expected = BasicOperations.concatenate(a1,
+                                               s2a("X"));
+    final LightAutomaton expected = BasicOperations.concatenateLight(a1,
                                                            join(SEP_A, s2a("b")));
     //toDot(actual);
-    assertTrue(BasicOperations.sameLanguage(expected, actual));
+    assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(expected),
+                                            BasicOperations.determinize(actual)));
   }
 
   public void testSynOverHole2() throws Exception {
@@ -558,11 +567,12 @@ public class TestGraphTokenizers extends
         token("abc", 0, 3),
         token("def", 2, 1),
       });
-    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
-    final Automaton expected = BasicOperations.union(
+    final LightAutomaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
+    final LightAutomaton expected = BasicOperations.unionLight(
                                                      join(s2a("xyz"), SEP_A, HOLE_A, SEP_A, s2a("def")),
-                                                     BasicAutomata.makeString("abc"));
-    assertTrue(BasicOperations.sameLanguage(expected, actual));
+                                                     s2a("abc"));
+    assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(expected),
+                                            BasicOperations.determinize(actual)));
   }
 
   public void testOverlappedTokensLattice2() throws Exception {
@@ -574,12 +584,13 @@ public class TestGraphTokenizers extends
         token("def", 1, 1),
         token("ghi", 1, 1),
       });
-    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
-    final Automaton a1 = BasicAutomata.makeString("xyz");
-    final Automaton a2 = join("abc", "def", "ghi");
-    final Automaton expected = BasicOperations.union(a1, a2);
+    final LightAutomaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
+    final LightAutomaton a1 = s2a("xyz");
+    final LightAutomaton a2 = join("abc", "def", "ghi");
+    final LightAutomaton expected = BasicOperations.unionLight(a1, a2);
     //toDot(actual);
-    assertTrue(BasicOperations.sameLanguage(expected, actual));
+    assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(expected),
+                                            BasicOperations.determinize(actual)));
   }
 
   public void testToDot() throws Exception {
@@ -594,10 +605,11 @@ public class TestGraphTokenizers extends
       new Token[] {
         token("abc", 2, 1),
       });
-    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
-    final Automaton expected = join(HOLE_A, SEP_A, s2a("abc"));
+    final LightAutomaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
+    final LightAutomaton expected = join(HOLE_A, SEP_A, s2a("abc"));
     //toDot(actual);
-    assertTrue(BasicOperations.sameLanguage(expected, actual));
+    assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(expected),
+                                            BasicOperations.determinize(actual)));
   }
 
   // TODO: testEndsWithHole... but we need posInc to set in TS.end()
@@ -608,9 +620,10 @@ public class TestGraphTokenizers extends
         token("a", 1, 1),
         token("X", 0, 10),
       });
-    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
-    final Automaton expected = BasicOperations.union(BasicAutomata.makeString("a"),
-                                                     BasicAutomata.makeString("X"));
-    assertTrue(BasicOperations.sameLanguage(expected, actual));
+    final LightAutomaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
+    final LightAutomaton expected = BasicOperations.unionLight(s2a("a"),
+                                                               s2a("X"));
+    assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(expected),
+                                            BasicOperations.determinize(actual)));
   }
 }

Modified: lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java?rev=1602031&r1=1602030&r2=1602031&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java (original)
+++ lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java Wed Jun 11 21:56:21 2014
@@ -17,6 +17,16 @@ package org.apache.lucene.search.suggest
  * limitations under the License.
  */
 
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.TokenStreamToAutomaton;
@@ -35,28 +45,20 @@ import org.apache.lucene.util.OfflineSor
 import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.BasicOperations;
+import org.apache.lucene.util.automaton.LightAutomaton;
 import org.apache.lucene.util.automaton.SpecialOperations;
 import org.apache.lucene.util.automaton.State;
 import org.apache.lucene.util.automaton.Transition;
 import org.apache.lucene.util.fst.Builder;
 import org.apache.lucene.util.fst.ByteSequenceOutputs;
-import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.FST.BytesReader;
-import org.apache.lucene.util.fst.PairOutputs;
+import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.PairOutputs.Pair;
+import org.apache.lucene.util.fst.PairOutputs;
 import org.apache.lucene.util.fst.PositiveIntOutputs;
-import org.apache.lucene.util.fst.Util;
 import org.apache.lucene.util.fst.Util.Result;
 import org.apache.lucene.util.fst.Util.TopResults;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
+import org.apache.lucene.util.fst.Util;
 
 /**
  * Suggester that first analyzes the surface form, adds the
@@ -255,37 +257,65 @@ public class AnalyzingSuggester extends 
     return fst == null ? 0 : fst.ramBytesUsed();
   }
 
-  private void copyDestTransitions(State from, State to, List<Transition> transitions) {
-    if (to.isAccept()) {
-      from.setAccept(true);
-    }
-    for(Transition t : to.getTransitions()) {
-      transitions.add(t);
+  private int[] topoSortStates(LightAutomaton a) {
+    int[] states = new int[a.getNumStates()];
+    final Set<Integer> visited = new HashSet<>();
+    final LinkedList<Integer> worklist = new LinkedList<>();
+    worklist.add(0);
+    visited.add(0);
+    int upto = 0;
+    states[upto] = 0;
+    upto++;
+    LightAutomaton.Transition t = new LightAutomaton.Transition();
+    while (worklist.size() > 0) {
+      int s = worklist.removeFirst();
+      int count = a.initTransition(s, t);
+      for (int i=0;i<count;i++) {
+        a.getNextTransition(t);
+        if (!visited.contains(t.dest)) {
+          visited.add(t.dest);
+          worklist.add(t.dest);
+          states[upto++] = t.dest;
+        }
+      }
     }
+    return states;
   }
 
+
   // Replaces SEP with epsilon or remaps them if
   // we were asked to preserve them:
-  private void replaceSep(Automaton a) {
+  private LightAutomaton replaceSep(LightAutomaton a) {
 
-    State[] states = a.getNumberedStates();
+    LightAutomaton result = new LightAutomaton();
+
+    // Copy all states over
+    int numStates = a.getNumStates();
+    for(int s=0;s<numStates;s++) {
+      result.createState();
+      result.setAccept(s, a.isAccept(s));
+    }
 
     // Go in reverse topo sort so we know we only have to
     // make one pass:
-    for(int stateNumber=states.length-1;stateNumber >=0;stateNumber--) {
-      final State state = states[stateNumber];
+    LightAutomaton.Transition t = new LightAutomaton.Transition();
+    int[] topoSortStates = topoSortStates(a);
+    for(int i=0;i<topoSortStates.length;i++) {
+      int state = topoSortStates[topoSortStates.length-1-i];
       List<Transition> newTransitions = new ArrayList<>();
-      for(Transition t : state.getTransitions()) {
-        assert t.getMin() == t.getMax();
-        if (t.getMin() == TokenStreamToAutomaton.POS_SEP) {
+      int count = a.initTransition(state, t);
+      for(int j=0;j<count;j++) {
+        a.getNextTransition(t);
+        if (t.min == TokenStreamToAutomaton.POS_SEP) {
+          assert t.max == TokenStreamToAutomaton.POS_SEP;
           if (preserveSep) {
             // Remap to SEP_LABEL:
-            newTransitions.add(new Transition(SEP_LABEL, t.getDest()));
+            result.addTransition(state, t.dest, SEP_LABEL);
           } else {
-            copyDestTransitions(state, t.getDest(), newTransitions);
-            a.setDeterministic(false);
+            result.addEpsilon(state, t.dest);
           }
-        } else if (t.getMin() == TokenStreamToAutomaton.HOLE) {
+        } else if (t.min == TokenStreamToAutomaton.HOLE) {
+          assert t.max == TokenStreamToAutomaton.HOLE;
 
           // Just remove the hole: there will then be two
           // SEP tokens next to each other, which will only
@@ -294,19 +324,21 @@ public class AnalyzingSuggester extends 
           // that's somehow a problem we can always map HOLE
           // to a dedicated byte (and escape it in the
           // input).
-          copyDestTransitions(state, t.getDest(), newTransitions);
-          a.setDeterministic(false);
+          result.addEpsilon(state, t.dest);
         } else {
-          newTransitions.add(t);
+          result.addTransition(state, t.dest, t.min, t.max);
         }
       }
-      state.setTransitions(newTransitions.toArray(new Transition[newTransitions.size()]));
     }
+
+    result.finish();
+
+    return result;
   }
 
   /** Used by subclass to change the lookup automaton, if
    *  necessary. */
-  protected Automaton convertAutomaton(Automaton a) {
+  protected LightAutomaton convertAutomaton(LightAutomaton a) {
     return a;
   }
   
@@ -665,8 +697,7 @@ public class AnalyzingSuggester extends 
     }
     final BytesRef utf8Key = new BytesRef(key);
     try {
-
-      Automaton lookupAutomaton = toLookupAutomaton(key);
+      LightAutomaton lookupAutomaton = toLookupAutomaton(key);
 
       final CharsRef spare = new CharsRef();
 
@@ -818,7 +849,7 @@ public class AnalyzingSuggester extends 
 
   /** Returns all prefix paths to initialize the search. */
   protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
-                                                                       Automaton lookupAutomaton,
+                                                                       LightAutomaton lookupAutomaton,
                                                                        FST<Pair<Long,BytesRef>> fst)
     throws IOException {
     return prefixPaths;
@@ -826,7 +857,7 @@ public class AnalyzingSuggester extends 
   
   final Set<IntsRef> toFiniteStrings(final BytesRef surfaceForm, final TokenStreamToAutomaton ts2a) throws IOException {
     // Analyze surface form:
-    Automaton automaton = null;
+    LightAutomaton automaton = null;
     try (TokenStream ts = indexAnalyzer.tokenStream("", surfaceForm.utf8ToString())) {
 
       // Create corresponding automaton: labels are bytes
@@ -835,7 +866,7 @@ public class AnalyzingSuggester extends 
       automaton = ts2a.toAutomaton(ts);
     }
 
-    replaceSep(automaton);
+    automaton = replaceSep(automaton);
     automaton = convertAutomaton(automaton);
 
     // TODO: LUCENE-5660 re-enable this once we disallow massive suggestion strings
@@ -848,32 +879,25 @@ public class AnalyzingSuggester extends 
     // TODO: we could walk & add simultaneously, so we
     // don't have to alloc [possibly biggish]
     // intermediate HashSet in RAM:
+
     return SpecialOperations.getFiniteStrings(automaton, maxGraphExpansions);
   }
 
-  final Automaton toLookupAutomaton(final CharSequence key) throws IOException {
+  final LightAutomaton toLookupAutomaton(final CharSequence key) throws IOException {
     // TODO: is there a Reader from a CharSequence?
     // Turn tokenstream into automaton:
-    Automaton automaton = null;
+    LightAutomaton automaton = null;
     try (TokenStream ts = queryAnalyzer.tokenStream("", key.toString())) {
-      automaton = (getTokenStreamToAutomaton()).toAutomaton(ts);
+        automaton = getTokenStreamToAutomaton().toAutomaton(ts);
     }
 
-    // TODO: we could use the end offset to "guess"
-    // whether the final token was a partial token; this
-    // would only be a heuristic ... but maybe an OK one.
-    // This way we could eg differentiate "net" from "net ",
-    // which we can't today...
-
-    replaceSep(automaton);
+    automaton = replaceSep(automaton);
 
     // TODO: we can optimize this somewhat by determinizing
     // while we convert
-    BasicOperations.determinize(automaton);
+    automaton = BasicOperations.determinize(automaton);
     return automaton;
   }
-  
-  
 
   /**
    * Returns the weight associated with an input string,

Modified: lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FSTUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FSTUtil.java?rev=1602031&r1=1602030&r2=1602031&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FSTUtil.java (original)
+++ lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FSTUtil.java Wed Jun 11 21:56:21 2014
@@ -17,12 +17,14 @@ package org.apache.lucene.search.suggest
  * limitations under the License.
  */
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
-import java.io.IOException;
 
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.BasicOperations;
+import org.apache.lucene.util.automaton.LightAutomaton;
 import org.apache.lucene.util.automaton.State;
 import org.apache.lucene.util.automaton.Transition;
 import org.apache.lucene.util.fst.FST;
@@ -43,7 +45,7 @@ public class FSTUtil {
   public static final class Path<T> {
 
     /** Node in the automaton where path ends: */
-    public final State state;
+    public final int state;
 
     /** Node in the FST where path ends: */
     public final FST.Arc<T> fstNode;
@@ -55,7 +57,7 @@ public class FSTUtil {
     public final IntsRef input;
 
     /** Sole constructor. */
-    public Path(State state, FST.Arc<T> fstNode, T output, IntsRef input) {
+    public Path(int state, FST.Arc<T> fstNode, T output, IntsRef input) {
       this.state = state;
       this.fstNode = fstNode;
       this.output = output;
@@ -67,21 +69,23 @@ public class FSTUtil {
    * Enumerates all minimal prefix paths in the automaton that also intersect the FST,
    * accumulating the FST end node and output for each path.
    */
-  public static <T> List<Path<T>> intersectPrefixPaths(Automaton a, FST<T> fst)
+  public static <T> List<Path<T>> intersectPrefixPaths(LightAutomaton a, FST<T> fst)
       throws IOException {
-    assert a.isDeterministic();
+    assert BasicOperations.isDeterministic(a);
     final List<Path<T>> queue = new ArrayList<>();
     final List<Path<T>> endNodes = new ArrayList<>();
-    queue.add(new Path<>(a.getInitialState(), fst
+    queue.add(new Path<>(0, fst
         .getFirstArc(new FST.Arc<T>()), fst.outputs.getNoOutput(),
         new IntsRef()));
     
     final FST.Arc<T> scratchArc = new FST.Arc<>();
     final FST.BytesReader fstReader = fst.getBytesReader();
-    
+
+    LightAutomaton.Transition t = new LightAutomaton.Transition();
+
     while (queue.size() != 0) {
       final Path<T> path = queue.remove(queue.size() - 1);
-      if (path.state.isAccept()) {
+      if (a.isAccept(path.state)) {
         endNodes.add(path);
         // we can stop here if we accept this path,
         // we accept all further paths too
@@ -89,18 +93,20 @@ public class FSTUtil {
       }
       
       IntsRef currentInput = path.input;
-      for (Transition t : path.state.getTransitions()) {
-        final int min = t.getMin();
-        final int max = t.getMax();
+      int count = a.initTransition(path.state, t);
+      for (int i=0;i<count;i++) {
+        a.getNextTransition(t);
+        final int min = t.min;
+        final int max = t.max;
         if (min == max) {
-          final FST.Arc<T> nextArc = fst.findTargetArc(t.getMin(),
+          final FST.Arc<T> nextArc = fst.findTargetArc(t.min,
               path.fstNode, scratchArc, fstReader);
           if (nextArc != null) {
             final IntsRef newInput = new IntsRef(currentInput.length + 1);
             newInput.copyInts(currentInput);
-            newInput.ints[currentInput.length] = t.getMin();
+            newInput.ints[currentInput.length] = t.min;
             newInput.length = currentInput.length + 1;
-            queue.add(new Path<>(t.getDest(), new FST.Arc<T>()
+            queue.add(new Path<>(t.dest, new FST.Arc<T>()
                 .copyFrom(nextArc), fst.outputs
                 .add(path.output, nextArc.output), newInput));
           }
@@ -122,7 +128,7 @@ public class FSTUtil {
             newInput.copyInts(currentInput);
             newInput.ints[currentInput.length] = nextArc.label;
             newInput.length = currentInput.length + 1;
-            queue.add(new Path<>(t.getDest(), new FST.Arc<T>()
+            queue.add(new Path<>(t.dest, new FST.Arc<T>()
                 .copyFrom(nextArc), fst.outputs
                 .add(path.output, nextArc.output), newInput));
             final int label = nextArc.label; // used in assert

Modified: lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java?rev=1602031&r1=1602030&r2=1602031&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java (original)
+++ lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java Wed Jun 11 21:56:21 2014
@@ -32,8 +32,10 @@ import org.apache.lucene.util.automaton.
 import org.apache.lucene.util.automaton.BasicAutomata;
 import org.apache.lucene.util.automaton.BasicOperations;
 import org.apache.lucene.util.automaton.LevenshteinAutomata;
+import org.apache.lucene.util.automaton.LightAutomaton;
 import org.apache.lucene.util.automaton.SpecialOperations;
 import org.apache.lucene.util.automaton.UTF32ToUTF8;
+import org.apache.lucene.util.automaton.UTF32ToUTF8Light;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.PairOutputs.Pair;
 
@@ -177,7 +179,7 @@ public final class FuzzySuggester extend
   
   @Override
   protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
-                                                                       Automaton lookupAutomaton,
+                                                                       LightAutomaton lookupAutomaton,
                                                                        FST<Pair<Long,BytesRef>> fst)
     throws IOException {
 
@@ -191,7 +193,7 @@ public final class FuzzySuggester extend
     // "compete") ... in which case I think the wFST needs
     // to be log weights or something ...
 
-    Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton));
+    LightAutomaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton));
     /*
       Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), StandardCharsets.UTF_8);
       w.write(levA.toDot());
@@ -202,10 +204,10 @@ public final class FuzzySuggester extend
   }
 
   @Override
-  protected Automaton convertAutomaton(Automaton a) {
+  protected LightAutomaton convertAutomaton(LightAutomaton a) {
     if (unicodeAware) {
-      Automaton utf8automaton = new UTF32ToUTF8().convert(a);
-      BasicOperations.determinize(utf8automaton);
+      LightAutomaton utf8automaton = new UTF32ToUTF8Light().convert(a);
+      utf8automaton = BasicOperations.determinize(utf8automaton);
       return utf8automaton;
     } else {
       return a;
@@ -219,16 +221,16 @@ public final class FuzzySuggester extend
     return tsta;
   }
 
-  Automaton toLevenshteinAutomata(Automaton automaton) {
+  LightAutomaton toLevenshteinAutomata(LightAutomaton automaton) {
     final Set<IntsRef> ref = SpecialOperations.getFiniteStrings(automaton, -1);
-    Automaton subs[] = new Automaton[ref.size()];
+    LightAutomaton subs[] = new LightAutomaton[ref.size()];
     int upto = 0;
     for (IntsRef path : ref) {
       if (path.length <= nonFuzzyPrefix || path.length < minFuzzyLength) {
-        subs[upto] = BasicAutomata.makeString(path.ints, path.offset, path.length);
+        subs[upto] = BasicAutomata.makeStringLight(path.ints, path.offset, path.length);
         upto++;
       } else {
-        Automaton prefix = BasicAutomata.makeString(path.ints, path.offset, nonFuzzyPrefix);
+        LightAutomaton prefix = BasicAutomata.makeStringLight(path.ints, path.offset, nonFuzzyPrefix);
         int ints[] = new int[path.length-nonFuzzyPrefix];
         System.arraycopy(path.ints, path.offset+nonFuzzyPrefix, ints, 0, ints.length);
         // TODO: maybe add alphaMin to LevenshteinAutomata,
@@ -237,9 +239,8 @@ public final class FuzzySuggester extend
         // edited... but then 0 byte is "in general" allowed
         // on input (but not in UTF8).
         LevenshteinAutomata lev = new LevenshteinAutomata(ints, unicodeAware ? Character.MAX_CODE_POINT : 255, transpositions);
-        Automaton levAutomaton = lev.toAutomaton(maxEdits);
-        Automaton combined = BasicOperations.concatenate(Arrays.asList(prefix, levAutomaton));
-        combined.setDeterministic(true); // its like the special case in concatenate itself, except we cloneExpanded already
+        LightAutomaton levAutomaton = lev.toLightAutomaton(maxEdits);
+        LightAutomaton combined = BasicOperations.concatenateLight(prefix, levAutomaton);
         subs[upto] = combined;
         upto++;
       }
@@ -247,19 +248,17 @@ public final class FuzzySuggester extend
 
     if (subs.length == 0) {
       // automaton is empty, there is no accepted paths through it
-      return BasicAutomata.makeEmpty(); // matches nothing
+      return BasicAutomata.makeEmptyLight(); // matches nothing
     } else if (subs.length == 1) {
       // no synonyms or anything: just a single path through the tokenstream
       return subs[0];
     } else {
       // multiple paths: this is really scary! is it slow?
       // maybe we should not do this and throw UOE?
-      Automaton a = BasicOperations.union(Arrays.asList(subs));
+      LightAutomaton a = BasicOperations.unionLight(Arrays.asList(subs));
       // TODO: we could call toLevenshteinAutomata() before det? 
       // this only happens if you have multiple paths anyway (e.g. synonyms)
-      BasicOperations.determinize(a);
-
-      return a;
+      return BasicOperations.determinize(a);
     }
   }
 }

Modified: lucene/dev/branches/lucene5752/lucene/suggest/src/test/org/apache/lucene/search/suggest/InputArrayIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/suggest/src/test/org/apache/lucene/search/suggest/InputArrayIterator.java?rev=1602031&r1=1602030&r2=1602031&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/suggest/src/test/org/apache/lucene/search/suggest/InputArrayIterator.java (original)
+++ lucene/dev/branches/lucene5752/lucene/suggest/src/test/org/apache/lucene/search/suggest/InputArrayIterator.java Wed Jun 11 21:56:21 2014
@@ -92,4 +92,4 @@ public final class InputArrayIterator im
   public boolean hasContexts() {
     return hasContexts;
   }
-}
\ No newline at end of file
+}

Modified: lucene/dev/branches/lucene5752/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java?rev=1602031&r1=1602030&r2=1602031&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java (original)
+++ lucene/dev/branches/lucene5752/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java Wed Jun 11 21:56:21 2014
@@ -40,14 +40,16 @@ import org.apache.lucene.analysis.TokenS
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.search.suggest.Lookup.LookupResult;
 import org.apache.lucene.search.suggest.Input;
 import org.apache.lucene.search.suggest.InputArrayIterator;
+import org.apache.lucene.search.suggest.Lookup.LookupResult;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
 import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.BasicOperations;
+import org.apache.lucene.util.automaton.LightAutomaton;
 import org.apache.lucene.util.automaton.State;
 import org.apache.lucene.util.fst.Util;
 
@@ -752,29 +754,30 @@ public class FuzzySuggesterTest extends 
       // us the "answer key" (ie maybe we have a bug in
       // suggester.toLevA ...) ... but testRandom2() fixes
       // this:
-      Automaton automaton = suggester.convertAutomaton(suggester.toLevenshteinAutomata(suggester.toLookupAutomaton(analyzedKey)));
-      assertTrue(automaton.isDeterministic());
+      LightAutomaton automaton = suggester.convertAutomaton(suggester.toLevenshteinAutomata(suggester.toLookupAutomaton(analyzedKey)));
+      assertTrue(BasicOperations.isDeterministic(automaton));
+
       // TODO: could be faster... but its slowCompletor for a reason
       BytesRef spare = new BytesRef();
       for (TermFreqPayload2 e : slowCompletor) {
         spare.copyChars(e.analyzedForm);
         Set<IntsRef> finiteStrings = suggester.toFiniteStrings(spare, tokenStreamToAutomaton);
         for (IntsRef intsRef : finiteStrings) {
-          State p = automaton.getInitialState();
+          int p = 0;
           BytesRef ref = Util.toBytesRef(intsRef, spare);
           boolean added = false;
           for (int i = ref.offset; i < ref.length; i++) {
-            State q = p.step(ref.bytes[i] & 0xff);
-            if (q == null) {
+            int q = automaton.step(p, ref.bytes[i] & 0xff);
+            if (q == -1) {
               break;
-            } else if (q.isAccept()) {
+            } else if (automaton.isAccept(q)) {
               matches.add(new LookupResult(e.surfaceForm, e.weight));
               added = true;
               break;
             }
             p = q;
           }
-          if (!added && p.isAccept()) {
+          if (!added && automaton.isAccept(p)) {
             matches.add(new LookupResult(e.surfaceForm, e.weight));
           } 
         }