You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/06/19 10:22:11 UTC

svn commit: r1603752 [1/3] - in /lucene/dev/trunk: ./ dev-tools/ lucene/ lucene/analysis/ lucene/analysis/common/ lucene/analysis/common/src/test/org/apache/lucene/analysis/core/ lucene/codecs/ lucene/codecs/src/java/org/apache/lucene/codecs/memory/ lu...

Author: mikemccand
Date: Thu Jun 19 08:22:08 2014
New Revision: 1603752

URL: http://svn.apache.org/r1603752
Log:
LUCENE-5752: switch to simpler Automaton implementation

Added:
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java
      - copied unchanged from r1603645, lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java
      - copied unchanged from r1603645, lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/AutomatonProvider.java
      - copied unchanged from r1603645, lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/AutomatonProvider.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/DaciukMihovAutomatonBuilder.java
      - copied unchanged from r1603645, lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/DaciukMihovAutomatonBuilder.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java
      - copied unchanged from r1603645, lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
      - copied unchanged from r1603645, lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/SortedIntSet.java
      - copied unchanged from r1603645, lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/SortedIntSet.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/StatePair.java
      - copied unchanged from r1603645, lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/StatePair.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/Transition.java
      - copied unchanged from r1603645, lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/Transition.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/UTF32ToUTF8.java
      - copied unchanged from r1603645, lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/UTF32ToUTF8.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java
      - copied unchanged from r1603645, lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java
      - copied unchanged from r1603645, lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java
Removed:
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/BasicAutomata.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/BasicOperations.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/SpecialOperations.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/State.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/automaton/TestBasicOperations.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/automaton/TestSpecialOperations.java
Modified:
    lucene/dev/trunk/   (props changed)
    lucene/dev/trunk/dev-tools/   (props changed)
    lucene/dev/trunk/lucene/   (props changed)
    lucene/dev/trunk/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/trunk/lucene/analysis/   (props changed)
    lucene/dev/trunk/lucene/analysis/common/   (props changed)
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java
    lucene/dev/trunk/lucene/build.xml   (contents, props changed)
    lucene/dev/trunk/lucene/codecs/   (props changed)
    lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
    lucene/dev/trunk/lucene/common-build.xml   (props changed)
    lucene/dev/trunk/lucene/core/   (props changed)
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnumFrame.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/RegexpQuery.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/WildcardQuery.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/ByteRunAutomaton.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/CharacterRunAutomaton.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/LevenshteinAutomata.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/RunAutomaton.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/analysis/TestGraphTokenizers.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQuery.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestDocTermOrdsRewriteMethod.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestFuzzyQuery.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestRegexpQuery.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestRegexpRandom2.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestWildcard.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminism.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminizeLexicon.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/automaton/TestMinimize.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/automaton/TestUTF32ToUTF8.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
    lucene/dev/trunk/lucene/expressions/   (props changed)
    lucene/dev/trunk/lucene/facet/   (props changed)
    lucene/dev/trunk/lucene/highlighter/   (props changed)
    lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java
    lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
    lucene/dev/trunk/lucene/join/   (props changed)
    lucene/dev/trunk/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
    lucene/dev/trunk/lucene/memory/   (props changed)
    lucene/dev/trunk/lucene/misc/   (props changed)
    lucene/dev/trunk/lucene/queries/   (props changed)
    lucene/dev/trunk/lucene/queryparser/   (props changed)
    lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/precedence/TestPrecedenceQueryParser.java
    lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java
    lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java
    lucene/dev/trunk/lucene/sandbox/   (props changed)
    lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java
    lucene/dev/trunk/lucene/spatial/   (props changed)
    lucene/dev/trunk/lucene/suggest/   (props changed)
    lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
    lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FSTUtil.java
    lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java
    lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/InputArrayIterator.java
    lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java
    lucene/dev/trunk/lucene/test-framework/   (props changed)
    lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java
    lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/search/SearchEquivalenceTestBase.java
    lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java
    lucene/dev/trunk/lucene/tools/   (props changed)
    lucene/dev/trunk/solr/   (props changed)
    lucene/dev/trunk/solr/CHANGES.txt   (props changed)
    lucene/dev/trunk/solr/contrib/   (props changed)
    lucene/dev/trunk/solr/core/   (props changed)
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/SolrQueryParser.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java
    lucene/dev/trunk/solr/solrj/   (props changed)
    lucene/dev/trunk/solr/test-framework/   (props changed)
    lucene/dev/trunk/solr/test-framework/src/java/org/apache/solr/analysis/MockTokenFilterFactory.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1603752&r1=1603751&r2=1603752&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Thu Jun 19 08:22:08 2014
@@ -98,6 +98,10 @@ Other
 
 (No Changes)
 
+API Changes
+
+* LUCENE-5752: Simplified Automaton API to be immutable. (Mike McCandless)
+
 ======================= Lucene 4.9.0 =======================
 
 Changes in Runtime Behavior

Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java?rev=1603752&r1=1603751&r2=1603752&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java Thu Jun 19 08:22:08 2014
@@ -31,11 +31,9 @@ import org.apache.lucene.analysis.tokena
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.util.TestUtil;
-import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.BasicOperations;
+import org.apache.lucene.util.automaton.Operations;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
-import org.apache.lucene.util.automaton.State;
-import org.apache.lucene.util.automaton.Transition;
+import org.apache.lucene.util.automaton.Automaton;
 
 /**
  * Compares MockTokenizer (which is simple with no optimizations) with equivalent 
@@ -50,18 +48,18 @@ public class TestDuelingAnalyzers extend
   @Override
   public void setUp() throws Exception {
     super.setUp();
+    Automaton single = new Automaton();
+    int initial = single.createState();
+    int accept = single.createState();
+    single.setAccept(accept, true);
+
     // build an automaton matching this jvm's letter definition
-    State initial = new State();
-    State accept = new State();
-    accept.setAccept(true);
     for (int i = 0; i <= 0x10FFFF; i++) {
       if (Character.isLetter(i)) {
-        initial.addTransition(new Transition(i, i, accept));
+        single.addTransition(initial, accept, i);
       }
     }
-    Automaton single = new Automaton(initial);
-    single.reduce();
-    Automaton repeat = BasicOperations.repeat(single);
+    Automaton repeat = Operations.repeat(single);
     jvmLetter = new CharacterRunAutomaton(repeat);
   }
   

Modified: lucene/dev/trunk/lucene/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/build.xml?rev=1603752&r1=1603751&r2=1603752&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/build.xml (original)
+++ lucene/dev/trunk/lucene/build.xml Thu Jun 19 08:22:08 2014
@@ -258,6 +258,7 @@
     <!-- test-framework: problems -->
 
     <!-- too much to fix core/ for now, but enforce full javadocs for key packages -->
+    <check-missing-javadocs dir="build/docs/core/org/apache/lucene/util/automaton" level="method"/>
     <check-missing-javadocs dir="build/docs/core/org/apache/lucene/analysis" level="method"/>
     <check-missing-javadocs dir="build/docs/core/org/apache/lucene/document" level="method"/>
     <check-missing-javadocs dir="build/docs/core/org/apache/lucene/search/similarities" level="method"/>

Modified: lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java?rev=1603752&r1=1603751&r2=1603752&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java (original)
+++ lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java Thu Jun 19 08:22:08 2014
@@ -29,8 +29,8 @@ import org.apache.lucene.codecs.Postings
 import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; // javadocs
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.OrdTermState;
 import org.apache.lucene.index.SegmentReadState;
@@ -940,10 +940,11 @@ public final class DirectPostingsFormat 
       private final class State {
         int changeOrd;
         int state;
-        Transition[] transitions;
         int transitionUpto;
+        int transitionCount;
         int transitionMax;
         int transitionMin;
+        final Transition transition = new Transition();
       }
 
       private State[] states;
@@ -957,7 +958,8 @@ public final class DirectPostingsFormat 
         states[0] = new State();
         states[0].changeOrd = terms.length;
         states[0].state = runAutomaton.getInitialState();
-        states[0].transitions = compiledAutomaton.sortedTransitions[states[0].state];
+        states[0].transitionCount = compiledAutomaton.automaton.getNumTransitions(states[0].state);
+        compiledAutomaton.automaton.initTransition(states[0].state, states[0].transition);
         states[0].transitionUpto = -1;
         states[0].transitionMax = -1;
 
@@ -978,9 +980,10 @@ public final class DirectPostingsFormat 
 
               while (label > states[i].transitionMax) {
                 states[i].transitionUpto++;
-                assert states[i].transitionUpto < states[i].transitions.length;
-                states[i].transitionMin = states[i].transitions[states[i].transitionUpto].getMin();
-                states[i].transitionMax = states[i].transitions[states[i].transitionUpto].getMax();
+                assert states[i].transitionUpto < states[i].transitionCount;
+                compiledAutomaton.automaton.getNextTransition(states[i].transition);
+                states[i].transitionMin = states[i].transition.min;
+                states[i].transitionMax = states[i].transition.max;
                 assert states[i].transitionMin >= 0;
                 assert states[i].transitionMin <= 255;
                 assert states[i].transitionMax >= 0;
@@ -1037,7 +1040,8 @@ public final class DirectPostingsFormat 
                     stateUpto++;
                     states[stateUpto].changeOrd = skips[skipOffset + skipUpto++];
                     states[stateUpto].state = nextState;
-                    states[stateUpto].transitions = compiledAutomaton.sortedTransitions[nextState];
+                    states[stateUpto].transitionCount = compiledAutomaton.automaton.getNumTransitions(nextState);
+                    compiledAutomaton.automaton.initTransition(states[stateUpto].state, states[stateUpto].transition);
                     states[stateUpto].transitionUpto = -1;
                     states[stateUpto].transitionMax = -1;
                     //System.out.println("  push " + states[stateUpto].transitions.length + " trans");
@@ -1191,7 +1195,7 @@ public final class DirectPostingsFormat 
           while (label > state.transitionMax) {
             //System.out.println("  label=" + label + " vs max=" + state.transitionMax + " transUpto=" + state.transitionUpto + " vs " + state.transitions.length);
             state.transitionUpto++;
-            if (state.transitionUpto == state.transitions.length) {
+            if (state.transitionUpto == state.transitionCount) {
               // We've exhausted transitions leaving this
               // state; force pop+next/skip now:
               //System.out.println("forcepop: stateUpto=" + stateUpto);
@@ -1210,9 +1214,10 @@ public final class DirectPostingsFormat 
               }
               continue nextTerm;
             }
-            assert state.transitionUpto < state.transitions.length: " state.transitionUpto=" + state.transitionUpto + " vs " + state.transitions.length;
-            state.transitionMin = state.transitions[state.transitionUpto].getMin();
-            state.transitionMax = state.transitions[state.transitionUpto].getMax();
+            compiledAutomaton.automaton.getNextTransition(state.transition);
+            assert state.transitionUpto < state.transitionCount: " state.transitionUpto=" + state.transitionUpto + " vs " + state.transitionCount;
+            state.transitionMin = state.transition.min;
+            state.transitionMax = state.transition.max;
             assert state.transitionMin >= 0;
             assert state.transitionMin <= 255;
             assert state.transitionMax >= 0;
@@ -1310,7 +1315,8 @@ public final class DirectPostingsFormat 
             stateUpto++;
             states[stateUpto].state = nextState;
             states[stateUpto].changeOrd = skips[skipOffset + skipUpto++];
-            states[stateUpto].transitions = compiledAutomaton.sortedTransitions[nextState];
+            states[stateUpto].transitionCount = compiledAutomaton.automaton.getNumTransitions(nextState);
+            compiledAutomaton.automaton.initTransition(nextState, states[stateUpto].transition);
             states[stateUpto].transitionUpto = -1;
             states[stateUpto].transitionMax = -1;
             

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java?rev=1603752&r1=1603751&r2=1603752&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java Thu Jun 19 08:22:08 2014
@@ -26,8 +26,6 @@ import org.apache.lucene.analysis.tokena
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RollingBuffer;
 import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.State;
-import org.apache.lucene.util.automaton.Transition;
 
 // TODO: maybe also toFST?  then we can translate atts into FST outputs/weights
 
@@ -61,15 +59,15 @@ public class TokenStreamToAutomaton {
 
   private static class Position implements RollingBuffer.Resettable {
     // Any tokens that ended at our position arrive to this state:
-    State arriving;
+    int arriving = -1;
 
     // Any tokens that start at our position leave from this state:
-    State leaving;
+    int leaving = -1;
 
     @Override
     public void reset() {
-      arriving = null;
-      leaving = null;
+      arriving = -1;
+      leaving = -1;
     }
   }
 
@@ -99,8 +97,8 @@ public class TokenStreamToAutomaton {
    *  automaton where arcs are bytes (or Unicode code points 
    *  if unicodeArcs = true) from each term. */
   public Automaton toAutomaton(TokenStream in) throws IOException {
-    final Automaton a = new Automaton();
-    boolean deterministic = true;
+    final Automaton.Builder builder = new Automaton.Builder();
+    builder.createState();
 
     final TermToBytesRefAttribute termBytesAtt = in.addAttribute(TermToBytesRefAttribute.class);
     final PositionIncrementAttribute posIncAtt = in.addAttribute(PositionIncrementAttribute.class);
@@ -132,34 +130,29 @@ public class TokenStreamToAutomaton {
         pos += posInc;
 
         posData = positions.get(pos);
-        assert posData.leaving == null;
+        assert posData.leaving == -1;
 
-        if (posData.arriving == null) {
+        if (posData.arriving == -1) {
           // No token ever arrived to this position
           if (pos == 0) {
             // OK: this is the first token
-            posData.leaving = a.getInitialState();
+            posData.leaving = 0;
           } else {
             // This means there's a hole (eg, StopFilter
             // does this):
-            posData.leaving = new State();
-            addHoles(a.getInitialState(), positions, pos);
+            posData.leaving = builder.createState();
+            addHoles(builder, positions, pos);
           }
         } else {
-          posData.leaving = new State();
-          posData.arriving.addTransition(new Transition(POS_SEP, posData.leaving));
+          posData.leaving = builder.createState();
+          builder.addTransition(posData.arriving, posData.leaving, POS_SEP);
           if (posInc > 1) {
             // A token spanned over a hole; add holes
             // "under" it:
-            addHoles(a.getInitialState(), positions, pos);
+            addHoles(builder, positions, pos);
           }
         }
         positions.freeBefore(pos);
-      } else {
-        // note: this isn't necessarily true. its just that we aren't surely det.
-        // we could optimize this further (e.g. buffer and sort synonyms at a position)
-        // but thats probably overkill. this is cheap and dirty
-        deterministic = false;
       }
 
       final int endPos = pos + posLengthAtt.getPositionLength();
@@ -168,31 +161,33 @@ public class TokenStreamToAutomaton {
       final BytesRef termUTF8 = changeToken(term);
       int[] termUnicode = null;
       final Position endPosData = positions.get(endPos);
-      if (endPosData.arriving == null) {
-        endPosData.arriving = new State();
+      if (endPosData.arriving == -1) {
+        endPosData.arriving = builder.createState();
       }
 
-      State state = posData.leaving;
       int termLen;
       if (unicodeArcs) {
         final String utf16 = termUTF8.utf8ToString();
         termUnicode = new int[utf16.codePointCount(0, utf16.length())];
         termLen = termUnicode.length;
-        for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp))
+        for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) {
           termUnicode[j++] = cp = utf16.codePointAt(i);
+        }
       } else {
         termLen = termUTF8.length;
       }
 
+      int state = posData.leaving;
+
       for(int byteIDX=0;byteIDX<termLen;byteIDX++) {
-        final State nextState = byteIDX == termLen-1 ? endPosData.arriving : new State();
+        final int nextState = byteIDX == termLen-1 ? endPosData.arriving : builder.createState();
         int c;
         if (unicodeArcs) {
           c = termUnicode[byteIDX];
         } else {
           c = termUTF8.bytes[termUTF8.offset + byteIDX] & 0xff;
         }
-        state.addTransition(new Transition(c, nextState));
+        builder.addTransition(state, nextState, c);
         state = nextState;
       }
 
@@ -200,28 +195,26 @@ public class TokenStreamToAutomaton {
     }
 
     in.end();
-    State endState = null;
+    int endState = -1;
     if (offsetAtt.endOffset() > maxOffset) {
-      endState = new State();
-      endState.setAccept(true);
+      endState = builder.createState();
+      builder.setAccept(endState, true);
     }
 
     pos++;
     while (pos <= positions.getMaxPos()) {
       posData = positions.get(pos);
-      if (posData.arriving != null) {
-        if (endState != null) {
-          posData.arriving.addTransition(new Transition(POS_SEP, endState));
+      if (posData.arriving != -1) {
+        if (endState != -1) {
+          builder.addTransition(posData.arriving, endState, POS_SEP);
         } else {
-          posData.arriving.setAccept(true);
+          builder.setAccept(posData.arriving, true);
         }
       }
       pos++;
     }
 
-    //toDot(a);
-    a.setDeterministic(deterministic);
-    return a;
+    return builder.finish();
   }
 
   // for debugging!
@@ -235,26 +228,26 @@ public class TokenStreamToAutomaton {
   }
   */
 
-  private static void addHoles(State startState, RollingBuffer<Position> positions, int pos) {
+  private static void addHoles(Automaton.Builder builder, RollingBuffer<Position> positions, int pos) {
     Position posData = positions.get(pos);
     Position prevPosData = positions.get(pos-1);
 
-    while(posData.arriving == null || prevPosData.leaving == null) {
-      if (posData.arriving == null) {
-        posData.arriving = new State();
-        posData.arriving.addTransition(new Transition(POS_SEP, posData.leaving));
+    while(posData.arriving == -1 || prevPosData.leaving == -1) {
+      if (posData.arriving == -1) {
+        posData.arriving = builder.createState();
+        builder.addTransition(posData.arriving, posData.leaving, POS_SEP);
       }
-      if (prevPosData.leaving == null) {
+      if (prevPosData.leaving == -1) {
         if (pos == 1) {
-          prevPosData.leaving = startState;
+          prevPosData.leaving = 0;
         } else {
-          prevPosData.leaving = new State();
+          prevPosData.leaving = builder.createState();
         }
-        if (prevPosData.arriving != null) {
-          prevPosData.arriving.addTransition(new Transition(POS_SEP, prevPosData.leaving));
+        if (prevPosData.arriving != -1) {
+          builder.addTransition(prevPosData.arriving, prevPosData.leaving, POS_SEP);
         }
       }
-      prevPosData.leaving.addTransition(new Transition(HOLE, posData.arriving));
+      builder.addTransition(prevPosData.leaving, posData.arriving, HOLE);
       pos--;
       if (pos <= 0) {
         break;

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java?rev=1603752&r1=1603751&r2=1603752&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java Thu Jun 19 08:22:08 2014
@@ -18,43 +18,25 @@ package org.apache.lucene.codecs.blocktr
  */
 
 import java.io.IOException;
-import java.io.PrintStream;
 import java.util.Collections;
 import java.util.Iterator;
 import java.util.TreeMap;
 
-import org.apache.lucene.codecs.BlockTermState;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.FieldsProducer;
 import org.apache.lucene.codecs.PostingsReaderBase;
 import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentInfo;
-import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.util.StringHelper;
-import org.apache.lucene.util.automaton.CompiledAutomaton;
-import org.apache.lucene.util.automaton.RunAutomaton;
-import org.apache.lucene.util.automaton.Transition;
-import org.apache.lucene.util.fst.ByteSequenceOutputs;
-import org.apache.lucene.util.fst.FST;
-import org.apache.lucene.util.fst.Outputs;
-import org.apache.lucene.util.fst.Util;
 
 /** A block-based terms index and dictionary that assigns
  *  terms to variable length blocks according to how they

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java?rev=1603752&r1=1603751&r2=1603752&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java Thu Jun 19 08:22:08 2014
@@ -347,7 +347,7 @@ final class IntersectTermsEnum extends T
       if (currentFrame.suffix != 0) {
         final int label = currentFrame.suffixBytes[currentFrame.startBytePos] & 0xff;
         while (label > currentFrame.curTransitionMax) {
-          if (currentFrame.transitionIndex >= currentFrame.transitions.length-1) {
+          if (currentFrame.transitionIndex >= currentFrame.transitionCount-1) {
             // Stop processing this frame -- no further
             // matches are possible because we've moved
             // beyond what the max transition will allow
@@ -359,7 +359,8 @@ final class IntersectTermsEnum extends T
             continue nextTerm;
           }
           currentFrame.transitionIndex++;
-          currentFrame.curTransitionMax = currentFrame.transitions[currentFrame.transitionIndex].getMax();
+          compiledAutomaton.automaton.getNextTransition(currentFrame.transition);
+          currentFrame.curTransitionMax = currentFrame.transition.max;
           //if (DEBUG) System.out.println("      next trans=" + currentFrame.transitions[currentFrame.transitionIndex]);
         }
       }

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnumFrame.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnumFrame.java?rev=1603752&r1=1603751&r2=1603752&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnumFrame.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnumFrame.java Thu Jun 19 08:22:08 2014
@@ -68,9 +68,10 @@ final class IntersectTermsEnumFrame {
   int numFollowFloorBlocks;
   int nextFloorLabel;
         
-  Transition[] transitions;
+  Transition transition = new Transition();
   int curTransitionMax;
   int transitionIndex;
+  int transitionCount;
 
   FST.Arc<BytesRef> arc;
 
@@ -112,7 +113,7 @@ final class IntersectTermsEnumFrame {
         nextFloorLabel = 256;
       }
       // if (DEBUG) System.out.println("    nextFloorLabel=" + (char) nextFloorLabel);
-    } while (numFollowFloorBlocks != 0 && nextFloorLabel <= transitions[transitionIndex].getMin());
+    } while (numFollowFloorBlocks != 0 && nextFloorLabel <= transition.min);
 
     load(null);
   }
@@ -120,9 +121,11 @@ final class IntersectTermsEnumFrame {
   public void setState(int state) {
     this.state = state;
     transitionIndex = 0;
-    transitions = ite.compiledAutomaton.sortedTransitions[state];
-    if (transitions.length != 0) {
-      curTransitionMax = transitions[0].getMax();
+    transitionCount = ite.compiledAutomaton.automaton.getNumTransitions(state);
+    if (transitionCount != 0) {
+      ite.compiledAutomaton.automaton.initTransition(state, transition);
+      ite.compiledAutomaton.automaton.getNextTransition(transition);
+      curTransitionMax = transition.max;
     } else {
       curTransitionMax = -1;
     }
@@ -132,7 +135,7 @@ final class IntersectTermsEnumFrame {
 
     // if (DEBUG) System.out.println("    load fp=" + fp + " fpOrig=" + fpOrig + " frameIndexData=" + frameIndexData + " trans=" + (transitions.length != 0 ? transitions[0] : "n/a" + " state=" + state));
 
-    if (frameIndexData != null && transitions.length != 0) {
+    if (frameIndexData != null && transitionCount != 0) {
       // Floor frame
       if (floorData.length < frameIndexData.length) {
         this.floorData = new byte[ArrayUtil.oversize(frameIndexData.length, 1)];
@@ -151,7 +154,8 @@ final class IntersectTermsEnumFrame {
         // first block in case it has empty suffix:
         if (!ite.runAutomaton.isAccept(state)) {
           // Maybe skip floor blocks:
-          while (numFollowFloorBlocks != 0 && nextFloorLabel <= transitions[0].getMin()) {
+          assert transitionIndex == 0: "transitionIndex=" + transitionIndex;
+          while (numFollowFloorBlocks != 0 && nextFloorLabel <= transition.min) {
             fp = fpOrig + (floorDataReader.readVLong() >>> 1);
             numFollowFloorBlocks--;
             // if (DEBUG) System.out.println("    skip floor block!  nextFloorLabel=" + (char) nextFloorLabel + " vs target=" + (char) transitions[0].getMin() + " newFP=" + fp + " numFollowFloorBlocks=" + numFollowFloorBlocks);

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java?rev=1603752&r1=1603751&r2=1603752&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java Thu Jun 19 08:22:08 2014
@@ -24,6 +24,7 @@ import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.automaton.ByteRunAutomaton;
 import org.apache.lucene.util.automaton.CompiledAutomaton;
+import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.Transition;
 
 /**
@@ -51,7 +52,7 @@ class AutomatonTermsEnum extends Filtere
   // true if the automaton accepts a finite language
   private final boolean finite;
   // array of sorted transitions for each state, indexed by state number
-  private final Transition[][] allTransitions;
+  private final Automaton automaton;
   // for path tracking: each long records gen when we last
   // visited the state; we use gens to avoid having to clear
   private final long[] visited;
@@ -79,7 +80,7 @@ class AutomatonTermsEnum extends Filtere
     this.runAutomaton = compiled.runAutomaton;
     assert this.runAutomaton != null;
     this.commonSuffixRef = compiled.commonSuffixRef;
-    this.allTransitions = compiled.sortedTransitions;
+    this.automaton = compiled.automaton;
 
     // used for path tracking, where each bit is a numbered state.
     visited = new long[runAutomaton.getSize()];
@@ -124,6 +125,8 @@ class AutomatonTermsEnum extends Filtere
     }
   }
 
+  private Transition transition = new Transition();
+
   /**
    * Sets the enum to operate in linear fashion, as we have found
    * a looping transition at position: we set an upper bound and 
@@ -133,16 +136,20 @@ class AutomatonTermsEnum extends Filtere
     assert linear == false;
     
     int state = runAutomaton.getInitialState();
+    assert state == 0;
     int maxInterval = 0xff;
+    //System.out.println("setLinear pos=" + position + " seekbytesRef=" + seekBytesRef);
     for (int i = 0; i < position; i++) {
       state = runAutomaton.step(state, seekBytesRef.bytes[i] & 0xff);
       assert state >= 0: "state=" + state;
     }
-    for (int i = 0; i < allTransitions[state].length; i++) {
-      Transition t = allTransitions[state][i];
-      if (t.getMin() <= (seekBytesRef.bytes[position] & 0xff) && 
-          (seekBytesRef.bytes[position] & 0xff) <= t.getMax()) {
-        maxInterval = t.getMax();
+    final int numTransitions = automaton.getNumTransitions(state);
+    automaton.initTransition(state, transition);
+    for (int i = 0; i < numTransitions; i++) {
+      automaton.getNextTransition(transition);
+      if (transition.min <= (seekBytesRef.bytes[position] & 0xff) && 
+          (seekBytesRef.bytes[position] & 0xff) <= transition.max) {
+        maxInterval = transition.max;
         break;
       }
     }
@@ -250,19 +257,19 @@ class AutomatonTermsEnum extends Filtere
     seekBytesRef.length = position;
     visited[state] = curGen;
 
-    Transition transitions[] = allTransitions[state];
-
+    final int numTransitions = automaton.getNumTransitions(state);
+    automaton.initTransition(state, transition);
     // find the minimal path (lexicographic order) that is >= c
     
-    for (int i = 0; i < transitions.length; i++) {
-      Transition transition = transitions[i];
-      if (transition.getMax() >= c) {
-        int nextChar = Math.max(c, transition.getMin());
+    for (int i = 0; i < numTransitions; i++) {
+      automaton.getNextTransition(transition);
+      if (transition.max >= c) {
+        int nextChar = Math.max(c, transition.min);
         // append either the next sequential char, or the minimum transition
         seekBytesRef.grow(seekBytesRef.length + 1);
         seekBytesRef.length++;
         seekBytesRef.bytes[seekBytesRef.length - 1] = (byte) nextChar;
-        state = transition.getDest().getNumber();
+        state = transition.dest;
         /* 
          * as long as is possible, continue down the minimal path in
          * lexicographic order. if a loop or accept state is encountered, stop.
@@ -274,13 +281,14 @@ class AutomatonTermsEnum extends Filtere
            * so the below is ok, if it is not an accept state,
            * then there MUST be at least one transition.
            */
-          transition = allTransitions[state][0];
-          state = transition.getDest().getNumber();
+          automaton.initTransition(state, transition);
+          automaton.getNextTransition(transition);
+          state = transition.dest;
           
           // append the minimum transition
           seekBytesRef.grow(seekBytesRef.length + 1);
           seekBytesRef.length++;
-          seekBytesRef.bytes[seekBytesRef.length - 1] = (byte) transition.getMin();
+          seekBytesRef.bytes[seekBytesRef.length - 1] = (byte) transition.min;
           
           // we found a loop, record it for faster enumeration
           if (!finite && !linear && visited[state] == curGen) {

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java?rev=1603752&r1=1603751&r2=1603752&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java Thu Jun 19 08:22:08 2014
@@ -24,11 +24,11 @@ import java.util.List;
 
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.FilteredTermsEnum;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.index.FilteredTermsEnum;
 import org.apache.lucene.util.Attribute;
 import org.apache.lucene.util.AttributeImpl;
 import org.apache.lucene.util.AttributeSource;
@@ -36,8 +36,6 @@ import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.BasicAutomata;
-import org.apache.lucene.util.automaton.BasicOperations;
 import org.apache.lucene.util.automaton.ByteRunAutomaton;
 import org.apache.lucene.util.automaton.CompiledAutomaton;
 import org.apache.lucene.util.automaton.LevenshteinAutomata;
@@ -152,7 +150,7 @@ public class FuzzyTermsEnum extends Term
       throws IOException {
     final List<CompiledAutomaton> runAutomata = initAutomata(editDistance);
     if (editDistance < runAutomata.size()) {
-      //if (BlockTreeTermsWriter.DEBUG) System.out.println("FuzzyTE.getAEnum: ed=" + editDistance + " lastTerm=" + (lastTerm==null ? "null" : lastTerm.utf8ToString()));
+      //System.out.println("FuzzyTE.getAEnum: ed=" + editDistance + " lastTerm=" + (lastTerm==null ? "null" : lastTerm.utf8ToString()));
       final CompiledAutomaton compiled = runAutomata.get(editDistance);
       return new AutomatonFuzzyTermsEnum(terms.intersect(compiled, lastTerm == null ? null : compiled.floor(lastTerm, new BytesRef())),
                                          runAutomata.subList(0, editDistance + 1).toArray(new CompiledAutomaton[editDistance + 1]));
@@ -165,20 +163,15 @@ public class FuzzyTermsEnum extends Term
   private List<CompiledAutomaton> initAutomata(int maxDistance) {
     final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
     //System.out.println("cached automata size: " + runAutomata.size());
-    if (runAutomata.size() <= maxDistance && 
+    if (runAutomata.size() <= maxDistance &&
         maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
       LevenshteinAutomata builder = 
         new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);
 
+      String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength);
       for (int i = runAutomata.size(); i <= maxDistance; i++) {
-        Automaton a = builder.toAutomaton(i);
+        Automaton a = builder.toAutomaton(i, prefix);
         //System.out.println("compute automaton n=" + i);
-        // constant prefix
-        if (realPrefixLength > 0) {
-          Automaton prefix = BasicAutomata.makeString(
-            UnicodeUtil.newString(termText, 0, realPrefixLength));
-          a = BasicOperations.concatenate(prefix, a);
-        }
         runAutomata.add(new CompiledAutomaton(a, true, false));
       }
     }

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/RegexpQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/RegexpQuery.java?rev=1603752&r1=1603751&r2=1603752&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/RegexpQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/RegexpQuery.java Thu Jun 19 08:22:08 2014
@@ -1,7 +1,6 @@
 package org.apache.lucene.search;
 
 import org.apache.lucene.index.Term;
-
 import org.apache.lucene.util.ToStringUtils;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.AutomatonProvider;

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/WildcardQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/WildcardQuery.java?rev=1603752&r1=1603751&r2=1603752&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/WildcardQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/WildcardQuery.java Thu Jun 19 08:22:08 2014
@@ -17,14 +17,14 @@ package org.apache.lucene.search;
  * limitations under the License.
  */
 
+import java.util.ArrayList;
+import java.util.List;
+
 import org.apache.lucene.index.Term;
 import org.apache.lucene.util.ToStringUtils;
+import org.apache.lucene.util.automaton.Automata;
+import org.apache.lucene.util.automaton.Operations;
 import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.BasicAutomata;
-import org.apache.lucene.util.automaton.BasicOperations;
-
-import java.util.ArrayList;
-import java.util.List;
 
 /** Implements the wildcard search query. Supported wildcards are <code>*</code>, which
  * matches any character sequence (including the empty one), and <code>?</code>,
@@ -72,26 +72,26 @@ public class WildcardQuery extends Autom
       int length = Character.charCount(c);
       switch(c) {
         case WILDCARD_STRING: 
-          automata.add(BasicAutomata.makeAnyString());
+          automata.add(Automata.makeAnyString());
           break;
         case WILDCARD_CHAR:
-          automata.add(BasicAutomata.makeAnyChar());
+          automata.add(Automata.makeAnyChar());
           break;
         case WILDCARD_ESCAPE:
           // add the next codepoint instead, if it exists
           if (i + length < wildcardText.length()) {
             final int nextChar = wildcardText.codePointAt(i + length);
             length += Character.charCount(nextChar);
-            automata.add(BasicAutomata.makeChar(nextChar));
+            automata.add(Automata.makeChar(nextChar));
             break;
           } // else fallthru, lenient parsing with a trailing \
         default:
-          automata.add(BasicAutomata.makeChar(c));
+          automata.add(Automata.makeChar(c));
       }
       i += length;
     }
     
-    return BasicOperations.concatenate(automata);
+    return Operations.concatenate(automata);
   }
   
   /**

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/ByteRunAutomaton.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/ByteRunAutomaton.java?rev=1603752&r1=1603751&r2=1603752&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/ByteRunAutomaton.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/ByteRunAutomaton.java Thu Jun 19 08:22:08 2014
@@ -21,7 +21,8 @@ package org.apache.lucene.util.automaton
  * Automaton representation for matching UTF-8 byte[].
  */
 public class ByteRunAutomaton extends RunAutomaton {
-  
+
+  /** Converts incoming automaton to byte-based (UTF32ToUTF8) first */
   public ByteRunAutomaton(Automaton a) {
     this(a, false);
   }

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/CharacterRunAutomaton.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/CharacterRunAutomaton.java?rev=1603752&r1=1603751&r2=1603752&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/CharacterRunAutomaton.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/CharacterRunAutomaton.java Thu Jun 19 08:22:08 2014
@@ -22,6 +22,7 @@ package org.apache.lucene.util.automaton
  */
 public class CharacterRunAutomaton extends RunAutomaton {
 
+  /** Sole constructor. */
   public CharacterRunAutomaton(Automaton a) {
     super(a, Character.MAX_CODE_POINT, false);
   }

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java?rev=1603752&r1=1603751&r2=1603752&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java Thu Jun 19 08:22:08 2014
@@ -19,7 +19,6 @@ package org.apache.lucene.util.automaton
   
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.List;
 
 import org.apache.lucene.index.Terms;
@@ -52,6 +51,8 @@ public class CompiledAutomaton {
     /** Catch-all for any other automata. */
     NORMAL
   };
+
+  /** If simplify is true this will be the "simplified" type; else, this is NORMAL */
   public final AUTOMATON_TYPE type;
 
   /** 
@@ -65,21 +66,22 @@ public class CompiledAutomaton {
    * only valid for {@link AUTOMATON_TYPE#NORMAL}.
    */
   public final ByteRunAutomaton runAutomaton;
-  // TODO: would be nice if these sortedTransitions had "int
-  // to;" instead of "State to;" somehow:
+
   /**
    * Two dimensional array of transitions, indexed by state
    * number for traversal. The state numbering is consistent with
    * {@link #runAutomaton}. 
    * Only valid for {@link AUTOMATON_TYPE#NORMAL}.
    */
-  public final Transition[][] sortedTransitions;
+  public final Automaton automaton;
+
   /**
    * Shared common suffix accepted by the automaton. Only valid
    * for {@link AUTOMATON_TYPE#NORMAL}, and only when the
    * automaton accepts an infinite language.
    */
   public final BytesRef commonSuffixRef;
+
   /**
    * Indicates if the automaton accepts a finite set of strings.
    * Null if this was not computed.
@@ -87,125 +89,157 @@ public class CompiledAutomaton {
    */
   public final Boolean finite;
 
+  /** Create this, passing simplify=true and finite=null, so that we try
+   *  to simplify the automaton and determine if it is finite. */
   public CompiledAutomaton(Automaton automaton) {
     this(automaton, null, true);
   }
 
+  /** Create this.  If finite is null, we use {@link Operations#isFinite}
+   *  to determine whether it is finite.  If simplify is true, we run
+   *  possibly expensive operations to determine if the automaton is one
+   *  the cases in {@link CompiledAutomaton.AUTOMATON_TYPE}. */
   public CompiledAutomaton(Automaton automaton, Boolean finite, boolean simplify) {
 
+    if (automaton.getNumStates() == 0) {
+      automaton = new Automaton();
+      automaton.createState();
+    }
+
     if (simplify) {
+
       // Test whether the automaton is a "simple" form and
       // if so, don't create a runAutomaton.  Note that on a
       // large automaton these tests could be costly:
-      if (BasicOperations.isEmpty(automaton)) {
+
+      if (Operations.isEmpty(automaton)) {
         // matches nothing
         type = AUTOMATON_TYPE.NONE;
         term = null;
         commonSuffixRef = null;
         runAutomaton = null;
-        sortedTransitions = null;
+        this.automaton = null;
         this.finite = null;
         return;
-      } else if (BasicOperations.isTotal(automaton)) {
+      // NOTE: only approximate, because automaton may not be minimal:
+      } else if (Operations.isTotal(automaton)) {
         // matches all possible strings
         type = AUTOMATON_TYPE.ALL;
         term = null;
         commonSuffixRef = null;
         runAutomaton = null;
-        sortedTransitions = null;
+        this.automaton = null;
         this.finite = null;
         return;
       } else {
-        final String commonPrefix;
+
+        automaton = Operations.determinize(automaton);
+
+        final String commonPrefix = Operations.getCommonPrefix(automaton);
         final String singleton;
-        if (automaton.getSingleton() == null) {
-          commonPrefix = SpecialOperations.getCommonPrefix(automaton);
-          if (commonPrefix.length() > 0 && BasicOperations.sameLanguage(automaton, BasicAutomata.makeString(commonPrefix))) {
-            singleton = commonPrefix;
-          } else {
-            singleton = null;
-          }
+
+        if (commonPrefix.length() > 0 && Operations.sameLanguage(automaton, Automata.makeString(commonPrefix))) {
+          singleton = commonPrefix;
         } else {
-          commonPrefix = null;
-          singleton = automaton.getSingleton();
+          singleton = null;
         }
-      
+
         if (singleton != null) {
-          // matches a fixed string in singleton or expanded
-          // representation
+          // matches a fixed string
           type = AUTOMATON_TYPE.SINGLE;
           term = new BytesRef(singleton);
           commonSuffixRef = null;
           runAutomaton = null;
-          sortedTransitions = null;
-          this.finite = null;
-          return;
-        } else if (BasicOperations.sameLanguage(automaton, BasicOperations.concatenate(
-                                                                                       BasicAutomata.makeString(commonPrefix), BasicAutomata.makeAnyString()))) {
-          // matches a constant prefix
-          type = AUTOMATON_TYPE.PREFIX;
-          term = new BytesRef(commonPrefix);
-          commonSuffixRef = null;
-          runAutomaton = null;
-          sortedTransitions = null;
+          this.automaton = null;
           this.finite = null;
           return;
+        } else if (commonPrefix.length() > 0) {
+          Automaton other = Operations.concatenate(Automata.makeString(commonPrefix), Automata.makeAnyString());
+          other = Operations.determinize(other);
+          assert Operations.hasDeadStates(other) == false;
+          if (Operations.sameLanguage(automaton, other)) {
+            // matches a constant prefix
+            type = AUTOMATON_TYPE.PREFIX;
+            term = new BytesRef(commonPrefix);
+            commonSuffixRef = null;
+            runAutomaton = null;
+            this.automaton = null;
+            this.finite = null;
+            return;
+          }
         }
       }
     }
 
     type = AUTOMATON_TYPE.NORMAL;
     term = null;
+
     if (finite == null) {
-      this.finite = SpecialOperations.isFinite(automaton);
+      this.finite = Operations.isFinite(automaton);
     } else {
       this.finite = finite;
     }
+
     Automaton utf8 = new UTF32ToUTF8().convert(automaton);
     if (this.finite) {
       commonSuffixRef = null;
     } else {
-      commonSuffixRef = SpecialOperations.getCommonSuffixBytesRef(utf8);
+      commonSuffixRef = Operations.getCommonSuffixBytesRef(utf8);
     }
     runAutomaton = new ByteRunAutomaton(utf8, true);
-    sortedTransitions = utf8.getSortedTransitions();
+
+    this.automaton = runAutomaton.automaton;
   }
+
+  private Transition transition = new Transition();
   
   //private static final boolean DEBUG = BlockTreeTermsWriter.DEBUG;
 
   private BytesRef addTail(int state, BytesRef term, int idx, int leadLabel) {
-
+    //System.out.println("addTail state=" + state + " term=" + term.utf8ToString() + " idx=" + idx + " leadLabel=" + (char) leadLabel);
+    //System.out.println(automaton.toDot());
     // Find biggest transition that's < label
     // TODO: use binary search here
-    Transition maxTransition = null;
-    for (Transition transition : sortedTransitions[state]) {
+    int maxIndex = -1;
+    int numTransitions = automaton.initTransition(state, transition);
+    for(int i=0;i<numTransitions;i++) {
+      automaton.getNextTransition(transition);
       if (transition.min < leadLabel) {
-        maxTransition = transition;
+        maxIndex = i;
+      } else {
+        // Transitions are alway sorted
+        break;
       }
     }
 
-    assert maxTransition != null;
+    //System.out.println("  maxIndex=" + maxIndex);
+
+    assert maxIndex != -1;
+    automaton.getTransition(state, maxIndex, transition);
 
     // Append floorLabel
     final int floorLabel;
-    if (maxTransition.max > leadLabel-1) {
+    if (transition.max > leadLabel-1) {
       floorLabel = leadLabel-1;
     } else {
-      floorLabel = maxTransition.max;
+      floorLabel = transition.max;
     }
+    //System.out.println("  floorLabel=" + (char) floorLabel);
     if (idx >= term.bytes.length) {
       term.grow(1+idx);
     }
     //if (DEBUG) System.out.println("  add floorLabel=" + (char) floorLabel + " idx=" + idx);
     term.bytes[idx] = (byte) floorLabel;
 
-    state = maxTransition.to.getNumber();
+    state = transition.dest;
+    //System.out.println("  dest: " + state);
     idx++;
 
     // Push down to last accept state
     while (true) {
-      Transition[] transitions = sortedTransitions[state];
-      if (transitions.length == 0) {
+      numTransitions = automaton.getNumTransitions(state);
+      if (numTransitions == 0) {
+        //System.out.println("state=" + state + " 0 trans");
         assert runAutomaton.isAccept(state);
         term.length = idx;
         //if (DEBUG) System.out.println("  return " + term.utf8ToString());
@@ -213,14 +247,15 @@ public class CompiledAutomaton {
       } else {
         // We are pushing "top" -- so get last label of
         // last transition:
-        assert transitions.length != 0;
-        Transition lastTransition = transitions[transitions.length-1];
+        //System.out.println("get state=" + state + " numTrans=" + numTransitions);
+        automaton.getTransition(state, numTransitions-1, transition);
         if (idx >= term.bytes.length) {
           term.grow(1+idx);
         }
         //if (DEBUG) System.out.println("  push maxLabel=" + (char) lastTransition.max + " idx=" + idx);
-        term.bytes[idx] = (byte) lastTransition.max;
-        state = lastTransition.to.getNumber();
+        //System.out.println("  add trans dest=" + scratch.dest + " label=" + (char) scratch.max);
+        term.bytes[idx] = (byte) transition.max;
+        state = transition.dest;
         idx++;
       }
     }
@@ -229,6 +264,8 @@ public class CompiledAutomaton {
   // TODO: should this take startTerm too?  This way
   // Terms.intersect could forward to this method if type !=
   // NORMAL:
+  /** Return a {@link TermsEnum} intersecting the provided {@link Terms}
+   *  with the terms accepted by this automaton. */
   public TermsEnum getTermsEnum(Terms terms) throws IOException {
     switch(type) {
     case NONE:
@@ -301,33 +338,36 @@ public class CompiledAutomaton {
         // Pop back to a state that has a transition
         // <= our label:
         while (true) {
-          Transition[] transitions = sortedTransitions[state];
-          if (transitions.length == 0) {
+          int numTransitions = automaton.getNumTransitions(state);
+          if (numTransitions == 0) {
             assert runAutomaton.isAccept(state);
             output.length = idx;
             //if (DEBUG) System.out.println("  return " + output.utf8ToString());
             return output;
-          } else if (label-1 < transitions[0].min) {
+          } else {
+            automaton.getTransition(state, 0, transition);
 
-            if (runAutomaton.isAccept(state)) {
-              output.length = idx;
-              //if (DEBUG) System.out.println("  return " + output.utf8ToString());
-              return output;
-            }
-            // pop
-            if (stack.size() == 0) {
-              //if (DEBUG) System.out.println("  pop ord=" + idx + " return null");
-              return null;
+            if (label-1 < transition.min) {
+
+              if (runAutomaton.isAccept(state)) {
+                output.length = idx;
+                //if (DEBUG) System.out.println("  return " + output.utf8ToString());
+                return output;
+              }
+              // pop
+              if (stack.size() == 0) {
+                //if (DEBUG) System.out.println("  pop ord=" + idx + " return null");
+                return null;
+              } else {
+                state = stack.remove(stack.size()-1);
+                idx--;
+                //if (DEBUG) System.out.println("  pop ord=" + (idx+1) + " label=" + (char) label + " first trans.min=" + (char) transitions[0].min);
+                label = input.bytes[input.offset + idx] & 0xff;
+              }
             } else {
-              state = stack.remove(stack.size()-1);
-              idx--;
-              //if (DEBUG) System.out.println("  pop ord=" + (idx+1) + " label=" + (char) label + " first trans.min=" + (char) transitions[0].min);
-              label = input.bytes[input.offset + idx] & 0xff;
+              //if (DEBUG) System.out.println("  stop pop ord=" + idx + " first trans.min=" + (char) transitions[0].min);
+              break;
             }
-
-          } else {
-            //if (DEBUG) System.out.println("  stop pop ord=" + idx + " first trans.min=" + (char) transitions[0].min);
-            break;
           }
         }
 
@@ -346,26 +386,6 @@ public class CompiledAutomaton {
       }
     }
   }
-  
-  public String toDot() {
-    StringBuilder b = new StringBuilder("digraph CompiledAutomaton {\n");
-    b.append("  rankdir = LR;\n");
-    int initial = runAutomaton.getInitialState();
-    for (int i = 0; i < sortedTransitions.length; i++) {
-      b.append("  ").append(i);
-      if (runAutomaton.isAccept(i)) b.append(" [shape=doublecircle,label=\"\"];\n");
-      else b.append(" [shape=circle,label=\"\"];\n");
-      if (i == initial) {
-        b.append("  initial [shape=plaintext,label=\"\"];\n");
-        b.append("  initial -> ").append(i).append("\n");
-      }
-      for (int j = 0; j < sortedTransitions[i].length; j++) {
-        b.append("  ").append(i);
-        sortedTransitions[i][j].appendDot(b);
-      }
-    }
-    return b.append("}\n").toString();
-  }
 
   @Override
   public int hashCode() {

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/LevenshteinAutomata.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/LevenshteinAutomata.java?rev=1603752&r1=1603751&r2=1603752&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/LevenshteinAutomata.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/LevenshteinAutomata.java Thu Jun 19 08:22:08 2014
@@ -21,6 +21,8 @@ import java.util.Iterator;
 import java.util.SortedSet;
 import java.util.TreeSet;
 
+import org.apache.lucene.util.UnicodeUtil;
+
 /**
  * Class to construct DFAs that match a word within some edit distance.
  * <p>
@@ -30,7 +32,8 @@ import java.util.TreeSet;
  * @lucene.experimental
  */
 public class LevenshteinAutomata {
-  /** @lucene.internal */
+  /** Maximum edit distance this class can generate an automaton for.
+   *  @lucene.internal */
   public static final int MAXIMUM_SUPPORTED_DISTANCE = 2;
   /* input word */
   final int word[];
@@ -112,7 +115,7 @@ public class LevenshteinAutomata {
     }
     return word;
   }
-  
+
   /**
    * Compute a DFA that accepts all strings within an edit distance of <code>n</code>.
    * <p>
@@ -125,8 +128,25 @@ public class LevenshteinAutomata {
    * </p>
    */
   public Automaton toAutomaton(int n) {
+    return toAutomaton(n, "");
+  }
+
+  /**
+   * Compute a DFA that accepts all strings within an edit distance of <code>n</code>,
+   * matching the specified exact prefix.
+   * <p>
+   * All automata have the following properties:
+   * <ul>
+   * <li>They are deterministic (DFA).
+   * <li>There are no transitions to dead states.
+   * <li>They are not minimal (some transitions could be combined).
+   * </ul>
+   * </p>
+   */
+  public Automaton toAutomaton(int n, String prefix) {
+    assert prefix != null;
     if (n == 0) {
-      return BasicAutomata.makeString(word, 0, word.length);
+      return Automata.makeString(prefix + UnicodeUtil.newString(word, 0, word.length));
     }
     
     if (n >= descriptions.length)
@@ -135,15 +155,36 @@ public class LevenshteinAutomata {
     final int range = 2*n+1;
     ParametricDescription description = descriptions[n];
     // the number of states is based on the length of the word and n
-    State states[] = new State[description.size()];
+    int numStates = description.size();
+
+    Automaton a = new Automaton();
+    int lastState;
+    if (prefix != null) {
+      // Insert prefix
+      lastState = a.createState();
+      for (int i = 0, cp = 0; i < prefix.length(); i += Character.charCount(cp)) {
+        int state = a.createState();
+        cp = prefix.codePointAt(i);
+        a.addTransition(lastState, state, cp, cp);
+        lastState = state;
+      }
+    } else {
+      lastState = a.createState();
+    }
+
+    int stateOffset = lastState;
+    a.setAccept(lastState, description.isAccept(0));
+
     // create all states, and mark as accept states if appropriate
-    for (int i = 0; i < states.length; i++) {
-      states[i] = new State();
-      states[i].number = i;
-      states[i].setAccept(description.isAccept(i));
+    for (int i = 1; i < numStates; i++) {
+      int state = a.createState();
+      a.setAccept(state, description.isAccept(i));
     }
+
+    // TODO: this creates bogus states/transitions (states are final, have self loops, and can't be reached from an init state)
+
     // create transitions from state to state
-    for (int k = 0; k < states.length; k++) {
+    for (int k = 0; k < numStates; k++) {
       final int xpos = description.getPosition(k);
       if (xpos < 0)
         continue;
@@ -154,31 +195,26 @@ public class LevenshteinAutomata {
         // get the characteristic vector at this position wrt ch
         final int cvec = getVector(ch, xpos, end);
         int dest = description.transition(k, xpos, cvec);
-        if (dest >= 0)
-          states[k].addTransition(new Transition(ch, states[dest]));
+        if (dest >= 0) {
+          a.addTransition(stateOffset+k, stateOffset+dest, ch);
+        }
       }
       // add transitions for all other chars in unicode
       // by definition, their characteristic vectors are always 0,
       // because they do not exist in the input string.
       int dest = description.transition(k, xpos, 0); // by definition
-      if (dest >= 0)
-        for (int r = 0; r < numRanges; r++)
-          states[k].addTransition(new Transition(rangeLower[r], rangeUpper[r], states[dest]));      
+      if (dest >= 0) {
+        for (int r = 0; r < numRanges; r++) {
+          a.addTransition(stateOffset+k, stateOffset+dest, rangeLower[r], rangeUpper[r]);
+        }
+      }
     }
 
-    Automaton a = new Automaton(states[0]);
-    a.setDeterministic(true);
-    // we create some useless unconnected states, and its a net-win overall to remove these,
-    // as well as to combine any adjacent transitions (it makes later algorithms more efficient).
-    // so, while we could set our numberedStates here, its actually best not to, and instead to
-    // force a traversal in reduce, pruning the unconnected states while we combine adjacent transitions.
-    //a.setNumberedStates(states);
-    a.reduce();
-    // we need not trim transitions to dead states, as they are not created.
-    //a.restoreInvariant();
+    a.finishState();
+    assert a.isDeterministic();
     return a;
   }
-  
+
   /**
    * Get the characteristic vector <code>X(x, V)</code> 
    * where V is <code>substring(pos, end)</code>

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java?rev=1603752&r1=1603751&r2=1603752&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java Thu Jun 19 08:22:08 2014
@@ -361,8 +361,6 @@ public class RegExp {
    */
   public static final int NONE = 0x0000;
   
-  private static boolean allow_mutation = false;
-  
   Kind kind;
   RegExp exp1, exp2;
   String s;
@@ -419,13 +417,13 @@ public class RegExp {
     to = e.to;
     b = null;
   }
-  
+
   /**
    * Constructs new <code>Automaton</code> from this <code>RegExp</code>. Same
    * as <code>toAutomaton(null)</code> (empty automaton map).
    */
   public Automaton toAutomaton() {
-    return toAutomatonAllowMutate(null, null);
+    return toAutomaton(null, null);
   }
   
   /**
@@ -439,7 +437,7 @@ public class RegExp {
    */
   public Automaton toAutomaton(AutomatonProvider automaton_provider)
       throws IllegalArgumentException {
-    return toAutomatonAllowMutate(null, automaton_provider);
+    return toAutomaton(null, automaton_provider);
   }
   
   /**
@@ -454,32 +452,9 @@ public class RegExp {
    */
   public Automaton toAutomaton(Map<String,Automaton> automata)
       throws IllegalArgumentException {
-    return toAutomatonAllowMutate(automata, null);
-  }
-  
-  /**
-   * Sets or resets allow mutate flag. If this flag is set, then automata
-   * construction uses mutable automata, which is slightly faster but not thread
-   * safe. By default, the flag is not set.
-   * 
-   * @param flag if true, the flag is set
-   * @return previous value of the flag
-   */
-  public boolean setAllowMutate(boolean flag) {
-    boolean b = allow_mutation;
-    allow_mutation = flag;
-    return b;
-  }
-  
-  private Automaton toAutomatonAllowMutate(Map<String,Automaton> automata,
-      AutomatonProvider automaton_provider) throws IllegalArgumentException {
-    boolean b = false;
-    if (allow_mutation) b = Automaton.setAllowMutate(true); // thread unsafe
-    Automaton a = toAutomaton(automata, automaton_provider);
-    if (allow_mutation) Automaton.setAllowMutate(b);
-    return a;
+    return toAutomaton(automata, null);
   }
-  
+
   private Automaton toAutomaton(Map<String,Automaton> automata,
       AutomatonProvider automaton_provider) throws IllegalArgumentException {
     List<Automaton> list;
@@ -489,8 +464,8 @@ public class RegExp {
         list = new ArrayList<>();
         findLeaves(exp1, Kind.REGEXP_UNION, list, automata, automaton_provider);
         findLeaves(exp2, Kind.REGEXP_UNION, list, automata, automaton_provider);
-        a = BasicOperations.union(list);
-        MinimizationOperations.minimize(a);
+        a = Operations.union(list);
+        a = MinimizationOperations.minimize(a);
         break;
       case REGEXP_CONCATENATION:
         list = new ArrayList<>();
@@ -498,66 +473,72 @@ public class RegExp {
             automaton_provider);
         findLeaves(exp2, Kind.REGEXP_CONCATENATION, list, automata,
             automaton_provider);
-        a = BasicOperations.concatenate(list);
-        MinimizationOperations.minimize(a);
+        a = Operations.concatenate(list);
+        a = MinimizationOperations.minimize(a);
         break;
       case REGEXP_INTERSECTION:
-        a = exp1.toAutomaton(automata, automaton_provider).intersection(
+        a = Operations.intersection(
+            exp1.toAutomaton(automata, automaton_provider),
             exp2.toAutomaton(automata, automaton_provider));
-        MinimizationOperations.minimize(a);
+        a = MinimizationOperations.minimize(a);
         break;
       case REGEXP_OPTIONAL:
-        a = exp1.toAutomaton(automata, automaton_provider).optional();
-        MinimizationOperations.minimize(a);
+        a = Operations.optional(exp1.toAutomaton(automata, automaton_provider));
+        a = MinimizationOperations.minimize(a);
         break;
       case REGEXP_REPEAT:
-        a = exp1.toAutomaton(automata, automaton_provider).repeat();
-        MinimizationOperations.minimize(a);
+        a = Operations.repeat(exp1.toAutomaton(automata, automaton_provider));
+        a = MinimizationOperations.minimize(a);
         break;
       case REGEXP_REPEAT_MIN:
-        a = exp1.toAutomaton(automata, automaton_provider).repeat(min);
-        MinimizationOperations.minimize(a);
+        a = Operations.repeat(exp1.toAutomaton(automata, automaton_provider), min);
+        a = MinimizationOperations.minimize(a);
         break;
       case REGEXP_REPEAT_MINMAX:
-        a = exp1.toAutomaton(automata, automaton_provider).repeat(min, max);
-        MinimizationOperations.minimize(a);
+        a = Operations.repeat(exp1.toAutomaton(automata, automaton_provider), min, max);
+        a = MinimizationOperations.minimize(a);
         break;
       case REGEXP_COMPLEMENT:
-        a = exp1.toAutomaton(automata, automaton_provider).complement();
-        MinimizationOperations.minimize(a);
+        a = Operations.complement(exp1.toAutomaton(automata, automaton_provider));
+        a = MinimizationOperations.minimize(a);
         break;
       case REGEXP_CHAR:
-        a = BasicAutomata.makeChar(c);
+        a = Automata.makeChar(c);
         break;
       case REGEXP_CHAR_RANGE:
-        a = BasicAutomata.makeCharRange(from, to);
+        a = Automata.makeCharRange(from, to);
         break;
       case REGEXP_ANYCHAR:
-        a = BasicAutomata.makeAnyChar();
+        a = Automata.makeAnyChar();
         break;
       case REGEXP_EMPTY:
-        a = BasicAutomata.makeEmpty();
+        a = Automata.makeEmpty();
         break;
       case REGEXP_STRING:
-        a = BasicAutomata.makeString(s);
+        a = Automata.makeString(s);
         break;
       case REGEXP_ANYSTRING:
-        a = BasicAutomata.makeAnyString();
+        a = Automata.makeAnyString();
         break;
       case REGEXP_AUTOMATON:
         Automaton aa = null;
-        if (automata != null) aa = automata.get(s);
-        if (aa == null && automaton_provider != null) try {
-          aa = automaton_provider.getAutomaton(s);
-        } catch (IOException e) {
-          throw new IllegalArgumentException(e);
+        if (automata != null) {
+          aa = automata.get(s);
         }
-        if (aa == null) throw new IllegalArgumentException("'" + s
-            + "' not found");
-        a = aa.clone(); // always clone here (ignore allow_mutate)
+        if (aa == null && automaton_provider != null) {
+          try {
+            aa = automaton_provider.getAutomaton(s);
+          } catch (IOException e) {
+            throw new IllegalArgumentException(e);
+          }
+        }
+        if (aa == null) {
+          throw new IllegalArgumentException("'" + s + "' not found");
+        }
+        a = aa;
         break;
       case REGEXP_INTERVAL:
-        a = BasicAutomata.makeInterval(min, max, digits);
+        a = Automata.makeInterval(min, max, digits);
         break;
     }
     return a;
@@ -568,7 +549,9 @@ public class RegExp {
     if (exp.kind == kind) {
       findLeaves(exp.exp1, kind, list, automata, automaton_provider);
       findLeaves(exp.exp2, kind, list, automata, automaton_provider);
-    } else list.add(exp.toAutomaton(automata, automaton_provider));
+    } else {
+      list.add(exp.toAutomaton(automata, automaton_provider));
+    }
   }
   
   /**

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/RunAutomaton.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/RunAutomaton.java?rev=1603752&r1=1603751&r2=1603752&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/RunAutomaton.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/automaton/RunAutomaton.java Thu Jun 19 08:22:08 2014
@@ -37,6 +37,7 @@ import java.util.Arrays;
  * @lucene.experimental
  */
 public abstract class RunAutomaton {
+  final Automaton automaton;
   final int maxInterval;
   final int size;
   final boolean[] accept;
@@ -65,10 +66,10 @@ public abstract class RunAutomaton {
           if (j + 1 < points.length) max = (points[j + 1] - 1);
           else max = maxInterval;
           b.append(" ");
-          Transition.appendCharString(min, b);
+          Automaton.appendCharString(min, b);
           if (min != max) {
             b.append("-");
-            Transition.appendCharString(max, b);
+            Automaton.appendCharString(max, b);
           }
           b.append(" -> ").append(k).append("\n");
         }
@@ -110,7 +111,7 @@ public abstract class RunAutomaton {
    * Gets character class of given codepoint
    */
   final int getCharClass(int c) {
-    return SpecialOperations.findIndex(c, points);
+    return Operations.findIndex(c, points);
   }
 
   /**
@@ -121,23 +122,23 @@ public abstract class RunAutomaton {
    */
   public RunAutomaton(Automaton a, int maxInterval, boolean tableize) {
     this.maxInterval = maxInterval;
-    a.determinize();
+    a = Operations.determinize(a);
+    this.automaton = a;
     points = a.getStartPoints();
-    final State[] states = a.getNumberedStates();
-    initial = a.initial.number;
-    size = states.length;
+    initial = 0;
+    size = Math.max(1,a.getNumStates());
     accept = new boolean[size];
     transitions = new int[size * points.length];
-    for (int n = 0; n < size * points.length; n++)
-      transitions[n] = -1;
-    for (State s : states) {
-      int n = s.number;
-      accept[n] = s.accept;
+    Arrays.fill(transitions, -1);
+    for (int n=0;n<size;n++) {
+      accept[n] = a.isAccept(n);
       for (int c = 0; c < points.length; c++) {
-        State q = s.step(points[c]);
-        if (q != null) transitions[n * points.length + c] = q.number;
+        int dest = a.step(n, points[c]);
+        assert dest == -1 || dest < size;
+        transitions[n * points.length + c] = dest;
       }
     }
+
     /*
      * Set alphabet table for optimal run performance.
      */
@@ -145,8 +146,9 @@ public abstract class RunAutomaton {
       classmap = new int[maxInterval + 1];
       int i = 0;
       for (int j = 0; j <= maxInterval; j++) {
-        if (i + 1 < points.length && j == points[i + 1])
+        if (i + 1 < points.length && j == points[i + 1]) {
           i++;
+        }
         classmap[j] = i;
       }
     } else {
@@ -162,10 +164,11 @@ public abstract class RunAutomaton {
    * transition function.)
    */
   public final int step(int state, int c) {
-    if (classmap == null)
+    if (classmap == null) {
       return transitions[state * points.length + getCharClass(c)];
-    else
+    } else {
       return transitions[state * points.length + classmap[c]];
+    }
   }
 
   @Override