You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by pa...@apache.org on 2021/12/20 22:52:15 UTC
[lucene] branch main updated: LUCENE-10010 Introduce NFARunAutomaton to run NFA directly (#225)

This is an automated email from the ASF dual-hosted git repository.

patrickz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new c972c6a  LUCENE-10010 Introduce NFARunAutomaton to run NFA directly (#225)
c972c6a is described below

commit c972c6a7754e9ff1f838ed72ff09e7def00be5f8
Author: Patrick Zhai <zh...@users.noreply.github.com>
AuthorDate: Mon Dec 20 14:52:09 2021 -0800

    LUCENE-10010 Introduce NFARunAutomaton to run NFA directly (#225)
    
    LUCENE-10010 Introduce NFARunAutomaton to run NFA directly
    
    Created 2 new interfaces ByteRunnable and TransisitonAccessor to represent function necessary for performing automaton run. Introduced NFARunAutomaton and incorporated into CompiledAutomaton and enables directly running AutomatonQuery based on NFA for all codecs.
---
 lucene/CHANGES.txt                                 |   6 +-
 .../lucene40/blocktree/FieldReader.java            |   6 +-
 .../lucene40/blocktree/IntersectTermsEnum.java     |  12 +-
 .../blocktreeords/OrdsIntersectTermsEnum.java      |  38 +-
 .../blocktreeords/OrdsIntersectTermsEnumFrame.java |   8 +-
 .../lucene/codecs/memory/DirectPostingsFormat.java |  56 +--
 .../lucene/codecs/memory/FSTTermsReader.java       |   6 +-
 .../codecs/uniformsplit/IntersectBlockReader.java  |  69 ++--
 .../codecs/lucene90/blocktree/FieldReader.java     |   6 +-
 .../lucene90/blocktree/IntersectTermsEnum.java     |  12 +-
 .../apache/lucene/index/AutomatonTermsEnum.java    |  52 +--
 .../org/apache/lucene/search/AutomatonQuery.java   |   6 +-
 .../apache/lucene/util/automaton/Automaton.java    |  16 +-
 .../lucene/util/automaton/ByteRunAutomaton.java    |  13 +-
 .../{ByteRunAutomaton.java => ByteRunnable.java}   |  42 +-
 .../lucene/util/automaton/CompiledAutomaton.java   |  82 +++-
 .../lucene/util/automaton/NFARunAutomaton.java     | 429 +++++++++++++++++++++
 .../apache/lucene/util/automaton/Operations.java   |   6 +-
 .../apache/lucene/util/automaton/RunAutomaton.java |   7 +-
 .../org/apache/lucene/util/automaton/StateSet.java |   5 +
 .../lucene/util/automaton/TransitionAccessor.java  |  39 ++
 .../apache/lucene/search/TestAutomatonQuery.java   |  12 -
 .../util/automaton/MinimizationOperations.java     |   1 +
 .../lucene/util/automaton/TestNFARunAutomaton.java | 176 +++++++++
 24 files changed, 896 insertions(+), 209 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 974197f..79a2fdd 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -41,13 +41,15 @@ API Changes
 
 * LUCENE-10244: MultiCollector::getCollectors is now public, allowing users to access the wrapped
   collectors. (Andriy Redko)
-  
+
 * LUCENE-10197: UnifiedHighlighter now has a Builder to construct it.  The UH's setters are now
   deprecated.  (Animesh Pandey, David Smiley)
 
 New Features
 ---------------------
 
+* LUCENE-10010 Introduce NFARunAutomaton to run NFA directly. (Patrick Zhai)
+
 * LUCENE-10255: Lucene JARs are now proper modules, with module descriptors and dependency information.
   (Chris Hegarty, Uwe Schindler, Tomoko Uchida, Dawid Weiss)
 
@@ -112,7 +114,7 @@ Bug Fixes
 
 * LUCENE-10287: Fix Luke startup script to add jdk.unsupported module so
   MMapDirectory is used when opening indexes. (Uwe Schindler)
-  
+
 * LUCENE-10236: Stop duplicating norms when scoring in CombinedFieldQuery.
   (Zach Chen, Jim Ferenczi, Julie Tibshirani)
 
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/FieldReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/FieldReader.java
index 284b090..0ede117 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/FieldReader.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/FieldReader.java
@@ -188,7 +188,11 @@ public final class FieldReader extends Terms {
       throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
     }
     return new IntersectTermsEnum(
-        this, compiled.automaton, compiled.runAutomaton, compiled.commonSuffixRef, startTerm);
+        this,
+        compiled.getTransitionAccessor(),
+        compiled.getByteRunnable(),
+        compiled.commonSuffixRef,
+        startTerm);
   }
 
   @Override
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/IntersectTermsEnum.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/IntersectTermsEnum.java
index 12a40d7..c61795b 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/IntersectTermsEnum.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/IntersectTermsEnum.java
@@ -27,9 +27,9 @@ import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.StringHelper;
-import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.RunAutomaton;
+import org.apache.lucene.util.automaton.ByteRunnable;
 import org.apache.lucene.util.automaton.Transition;
+import org.apache.lucene.util.automaton.TransitionAccessor;
 import org.apache.lucene.util.fst.ByteSequenceOutputs;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.Outputs;
@@ -53,8 +53,8 @@ final class IntersectTermsEnum extends BaseTermsEnum {
   @SuppressWarnings({"rawtypes", "unchecked"})
   private FST.Arc<BytesRef>[] arcs = new FST.Arc[5];
 
-  final RunAutomaton runAutomaton;
-  final Automaton automaton;
+  final ByteRunnable runAutomaton;
+  final TransitionAccessor automaton;
   final BytesRef commonSuffix;
 
   private IntersectTermsEnumFrame currentFrame;
@@ -72,8 +72,8 @@ final class IntersectTermsEnum extends BaseTermsEnum {
   // regexp foo*bar must be at least length 6 bytes
   public IntersectTermsEnum(
       FieldReader fr,
-      Automaton automaton,
-      RunAutomaton runAutomaton,
+      TransitionAccessor automaton,
+      ByteRunnable runAutomaton,
       BytesRef commonSuffix,
       BytesRef startTerm)
       throws IOException {
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java
index 2a95da5..a4752ee 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java
@@ -27,8 +27,9 @@ import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.util.automaton.ByteRunnable;
 import org.apache.lucene.util.automaton.CompiledAutomaton;
-import org.apache.lucene.util.automaton.RunAutomaton;
+import org.apache.lucene.util.automaton.TransitionAccessor;
 import org.apache.lucene.util.fst.FST;
 
 // NOTE: cannot seek!
@@ -40,8 +41,9 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
   @SuppressWarnings({"rawtypes", "unchecked"})
   private FST.Arc<Output>[] arcs = new FST.Arc[5];
 
-  final RunAutomaton runAutomaton;
-  final CompiledAutomaton compiledAutomaton;
+  final ByteRunnable byteRunnable;
+  protected final TransitionAccessor transitionAccessor;
+  private final BytesRef commonSuffixRef;
 
   private OrdsIntersectTermsEnumFrame currentFrame;
 
@@ -62,8 +64,9 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
     // brToString(compiled.commonSuffixRef));
     // }
     this.fr = fr;
-    runAutomaton = compiled.runAutomaton;
-    compiledAutomaton = compiled;
+    this.byteRunnable = compiled.getByteRunnable();
+    this.transitionAccessor = compiled.getTransitionAccessor();
+    commonSuffixRef = compiled.commonSuffixRef;
     in = fr.parent.in.clone();
     stack = new OrdsIntersectTermsEnumFrame[5];
     for (int idx = 0; idx < stack.length; idx++) {
@@ -220,7 +223,7 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
     int state = currentFrame.state;
     for (int idx = 0; idx < currentFrame.suffix; idx++) {
       state =
-          runAutomaton.step(
+          byteRunnable.step(
               state, currentFrame.suffixBytes[currentFrame.startBytePos + idx] & 0xff);
       assert state != -1;
     }
@@ -390,7 +393,7 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
             continue nextTerm;
           }
           currentFrame.transitionIndex++;
-          compiledAutomaton.automaton.getNextTransition(currentFrame.transition);
+          transitionAccessor.getNextTransition(currentFrame.transition);
           currentFrame.curTransitionMax = currentFrame.transition.max;
           // if (DEBUG) System.out.println("      next trans=" +
           // currentFrame.transitions[currentFrame.transitionIndex]);
@@ -398,9 +401,9 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
       }
 
       // First test the common suffix, if set:
-      if (compiledAutomaton.commonSuffixRef != null && !isSubBlock) {
+      if (commonSuffixRef != null && !isSubBlock) {
         final int termLen = currentFrame.prefix + currentFrame.suffix;
-        if (termLen < compiledAutomaton.commonSuffixRef.length) {
+        if (termLen < commonSuffixRef.length) {
           // No match
           // if (DEBUG) {
           //   System.out.println("      skip: common suffix length");
@@ -409,10 +412,10 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
         }
 
         final byte[] suffixBytes = currentFrame.suffixBytes;
-        final byte[] commonSuffixBytes = compiledAutomaton.commonSuffixRef.bytes;
+        final byte[] commonSuffixBytes = commonSuffixRef.bytes;
 
-        final int lenInPrefix = compiledAutomaton.commonSuffixRef.length - currentFrame.suffix;
-        assert compiledAutomaton.commonSuffixRef.offset == 0;
+        final int lenInPrefix = commonSuffixRef.length - currentFrame.suffix;
+        assert commonSuffixRef.offset == 0;
         int suffixBytesPos;
         int commonSuffixBytesPos = 0;
 
@@ -434,14 +437,11 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
           }
           suffixBytesPos = currentFrame.startBytePos;
         } else {
-          suffixBytesPos =
-              currentFrame.startBytePos
-                  + currentFrame.suffix
-                  - compiledAutomaton.commonSuffixRef.length;
+          suffixBytesPos = currentFrame.startBytePos + currentFrame.suffix - commonSuffixRef.length;
         }
 
         // Test overlapping suffix part:
-        final int commonSuffixBytesPosEnd = compiledAutomaton.commonSuffixRef.length;
+        final int commonSuffixBytesPosEnd = commonSuffixRef.length;
         while (commonSuffixBytesPos < commonSuffixBytesPosEnd) {
           if (suffixBytes[suffixBytesPos++] != commonSuffixBytes[commonSuffixBytesPos++]) {
             // if (DEBUG) {
@@ -462,7 +462,7 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
       int state = currentFrame.state;
       for (int idx = 0; idx < currentFrame.suffix; idx++) {
         state =
-            runAutomaton.step(
+            byteRunnable.step(
                 state, currentFrame.suffixBytes[currentFrame.startBytePos + idx] & 0xff);
         if (state == -1) {
           // No match
@@ -485,7 +485,7 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
         // currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" :
         // currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" +
         // currentFrame.outputPrefix);
-      } else if (runAutomaton.isAccept(state)) {
+      } else if (byteRunnable.isAccept(state)) {
         copyTerm();
         // if (DEBUG) System.out.println("      term match to state=" + state + "; return term=" +
         // brToString(term));
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnumFrame.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnumFrame.java
index d2d48f9..88e5536 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnumFrame.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnumFrame.java
@@ -126,10 +126,10 @@ final class OrdsIntersectTermsEnumFrame {
   public void setState(int state) {
     this.state = state;
     transitionIndex = 0;
-    transitionCount = ite.compiledAutomaton.automaton.getNumTransitions(state);
+    transitionCount = ite.transitionAccessor.getNumTransitions(state);
     if (transitionCount != 0) {
-      ite.compiledAutomaton.automaton.initTransition(state, transition);
-      ite.compiledAutomaton.automaton.getNextTransition(transition);
+      ite.transitionAccessor.initTransition(state, transition);
+      ite.transitionAccessor.getNextTransition(transition);
       curTransitionMax = transition.max;
     } else {
       curTransitionMax = -1;
@@ -164,7 +164,7 @@ final class OrdsIntersectTermsEnumFrame {
 
         // If current state is accept, we must process
         // first block in case it has empty suffix:
-        if (!ite.runAutomaton.isAccept(state)) {
+        if (!ite.byteRunnable.isAccept(state)) {
           // Maybe skip floor blocks:
           assert transitionIndex == 0 : "transitionIndex=" + transitionIndex;
           while (numFollowFloorBlocks != 0 && nextFloorLabel <= transition.min) {
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
index f66ca45..c243dd0 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
@@ -44,9 +44,10 @@ import org.apache.lucene.util.Accountable;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.automaton.ByteRunnable;
 import org.apache.lucene.util.automaton.CompiledAutomaton;
-import org.apache.lucene.util.automaton.RunAutomaton;
 import org.apache.lucene.util.automaton.Transition;
+import org.apache.lucene.util.automaton.TransitionAccessor;
 
 // TODO:
 //   - build depth-N prefix hash?
@@ -943,8 +944,9 @@ public final class DirectPostingsFormat extends PostingsFormat {
     }
 
     private final class DirectIntersectTermsEnum extends BaseTermsEnum {
-      private final RunAutomaton runAutomaton;
-      private final CompiledAutomaton compiledAutomaton;
+      private final ByteRunnable byteRunnable;
+      protected final TransitionAccessor transitionAccessor;
+      private final BytesRef commonSuffixRef;
       private int termOrd;
       private final BytesRef scratch = new BytesRef();
 
@@ -962,15 +964,16 @@ public final class DirectPostingsFormat extends PostingsFormat {
       private int stateUpto;
 
       public DirectIntersectTermsEnum(CompiledAutomaton compiled, BytesRef startTerm) {
-        runAutomaton = compiled.runAutomaton;
-        compiledAutomaton = compiled;
+        this.byteRunnable = compiled.getByteRunnable();
+        this.transitionAccessor = compiled.getTransitionAccessor();
+        commonSuffixRef = compiled.commonSuffixRef;
         termOrd = -1;
         states = new State[1];
         states[0] = new State();
         states[0].changeOrd = terms.length;
         states[0].state = 0;
-        states[0].transitionCount = compiledAutomaton.automaton.getNumTransitions(states[0].state);
-        compiledAutomaton.automaton.initTransition(states[0].state, states[0].transition);
+        states[0].transitionCount = transitionAccessor.getNumTransitions(states[0].state);
+        transitionAccessor.initTransition(states[0].state, states[0].transition);
         states[0].transitionUpto = -1;
         states[0].transitionMax = -1;
 
@@ -992,7 +995,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
               while (label > states[i].transitionMax) {
                 states[i].transitionUpto++;
                 assert states[i].transitionUpto < states[i].transitionCount;
-                compiledAutomaton.automaton.getNextTransition(states[i].transition);
+                transitionAccessor.getNextTransition(states[i].transition);
                 states[i].transitionMin = states[i].transition.min;
                 states[i].transitionMax = states[i].transition.max;
                 assert states[i].transitionMin >= 0;
@@ -1045,7 +1048,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
                   if (skipUpto < numSkips) {
                     grow();
 
-                    final int nextState = runAutomaton.step(states[stateUpto].state, label);
+                    final int nextState = byteRunnable.step(states[stateUpto].state, label);
 
                     // Automaton is required to accept startTerm:
                     assert nextState != -1;
@@ -1054,8 +1057,8 @@ public final class DirectPostingsFormat extends PostingsFormat {
                     states[stateUpto].changeOrd = skips[skipOffset + skipUpto++];
                     states[stateUpto].state = nextState;
                     states[stateUpto].transitionCount =
-                        compiledAutomaton.automaton.getNumTransitions(nextState);
-                    compiledAutomaton.automaton.initTransition(
+                        transitionAccessor.getNumTransitions(nextState);
+                    transitionAccessor.initTransition(
                         states[stateUpto].state, states[stateUpto].transition);
                     states[stateUpto].transitionUpto = -1;
                     states[stateUpto].transitionMax = -1;
@@ -1154,7 +1157,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
           // if (DEBUG) {
           //   System.out.println("  visit empty string");
           // }
-          if (runAutomaton.isAccept(states[0].state)) {
+          if (byteRunnable.isAccept(states[0].state)) {
             scratch.bytes = termBytes;
             scratch.offset = 0;
             scratch.length = 0;
@@ -1236,7 +1239,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
               }
               continue nextTerm;
             }
-            compiledAutomaton.automaton.getNextTransition(state.transition);
+            transitionAccessor.getNextTransition(state.transition);
             assert state.transitionUpto < state.transitionCount
                 : " state.transitionUpto=" + state.transitionUpto + " vs " + state.transitionCount;
             state.transitionMin = state.transition.min;
@@ -1304,7 +1307,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
             }
           }
 
-          int nextState = runAutomaton.step(states[stateUpto].state, label);
+          int nextState = byteRunnable.step(states[stateUpto].state, label);
 
           if (nextState == -1) {
             // Skip
@@ -1340,9 +1343,8 @@ public final class DirectPostingsFormat extends PostingsFormat {
             stateUpto++;
             states[stateUpto].state = nextState;
             states[stateUpto].changeOrd = skips[skipOffset + skipUpto++];
-            states[stateUpto].transitionCount =
-                compiledAutomaton.automaton.getNumTransitions(nextState);
-            compiledAutomaton.automaton.initTransition(nextState, states[stateUpto].transition);
+            states[stateUpto].transitionCount = transitionAccessor.getNumTransitions(nextState);
+            transitionAccessor.initTransition(nextState, states[stateUpto].transition);
             states[stateUpto].transitionUpto = -1;
             states[stateUpto].transitionMax = -1;
 
@@ -1350,7 +1352,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
               // if (DEBUG) {
               //   System.out.println("  term ends after push");
               // }
-              if (runAutomaton.isAccept(nextState)) {
+              if (byteRunnable.isAccept(nextState)) {
                 // if (DEBUG) {
                 //   System.out.println("  automaton accepts: return");
                 // }
@@ -1374,17 +1376,17 @@ public final class DirectPostingsFormat extends PostingsFormat {
 
             // TODO: add assert that we don't inc too many times
 
-            if (compiledAutomaton.commonSuffixRef != null) {
-              // System.out.println("suffix " + compiledAutomaton.commonSuffixRef.utf8ToString());
-              assert compiledAutomaton.commonSuffixRef.offset == 0;
-              if (termLength < compiledAutomaton.commonSuffixRef.length) {
+            if (commonSuffixRef != null) {
+              // System.out.println("suffix " + commonSuffixRef.utf8ToString());
+              assert commonSuffixRef.offset == 0;
+              if (termLength < commonSuffixRef.length) {
                 termOrd++;
                 skipUpto = 0;
                 continue nextTerm;
               }
-              int offset = termOffset + termLength - compiledAutomaton.commonSuffixRef.length;
-              for (int suffix = 0; suffix < compiledAutomaton.commonSuffixRef.length; suffix++) {
-                if (termBytes[offset + suffix] != compiledAutomaton.commonSuffixRef.bytes[suffix]) {
+              int offset = termOffset + termLength - commonSuffixRef.length;
+              for (int suffix = 0; suffix < commonSuffixRef.length; suffix++) {
+                if (termBytes[offset + suffix] != commonSuffixRef.bytes[suffix]) {
                   termOrd++;
                   skipUpto = 0;
                   continue nextTerm;
@@ -1394,7 +1396,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
 
             int upto = stateUpto + 1;
             while (upto < termLength) {
-              nextState = runAutomaton.step(nextState, termBytes[termOffset + upto] & 0xFF);
+              nextState = byteRunnable.step(nextState, termBytes[termOffset + upto] & 0xFF);
               if (nextState == -1) {
                 termOrd++;
                 skipUpto = 0;
@@ -1406,7 +1408,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
               upto++;
             }
 
-            if (runAutomaton.isAccept(nextState)) {
+            if (byteRunnable.isAccept(nextState)) {
               scratch.bytes = termBytes;
               scratch.offset = termOffsets[termOrd];
               scratch.length = termOffsets[1 + termOrd] - scratch.offset;
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
index 4f3791e..8a16de8 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
@@ -45,7 +45,7 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.util.automaton.ByteRunAutomaton;
+import org.apache.lucene.util.automaton.ByteRunnable;
 import org.apache.lucene.util.automaton.CompiledAutomaton;
 import org.apache.lucene.util.fst.BytesRefFSTEnum;
 import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput;
@@ -436,7 +436,7 @@ public class FSTTermsReader extends FieldsProducer {
       final Outputs<FSTTermOutputs.TermData> fstOutputs;
 
       /* query automaton to intersect with */
-      final ByteRunAutomaton fsa;
+      final ByteRunnable fsa;
 
       private final class Frame {
         /* fst stats */
@@ -464,7 +464,7 @@ public class FSTTermsReader extends FieldsProducer {
         this.fst = dict;
         this.fstReader = fst.getBytesReader();
         this.fstOutputs = dict.outputs;
-        this.fsa = compiled.runAutomaton;
+        this.fsa = compiled.getByteRunnable();
         this.level = -1;
         this.stack = new Frame[16];
         for (int i = 0; i < stack.length; i++) {
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/IntersectBlockReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/IntersectBlockReader.java
index d003807..22559af 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/IntersectBlockReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/IntersectBlockReader.java
@@ -27,10 +27,10 @@ import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.lucene.util.IntsRefBuilder;
-import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.ByteRunAutomaton;
+import org.apache.lucene.util.automaton.ByteRunnable;
 import org.apache.lucene.util.automaton.CompiledAutomaton;
 import org.apache.lucene.util.automaton.Transition;
+import org.apache.lucene.util.automaton.TransitionAccessor;
 
 /**
  * The "intersect" {@link TermsEnum} response to {@link
@@ -72,8 +72,8 @@ public class IntersectBlockReader extends BlockReader {
    */
   protected final int NUM_CONSECUTIVELY_REJECTED_TERMS_THRESHOLD = 4;
 
-  protected final Automaton automaton;
-  protected final ByteRunAutomaton runAutomaton;
+  protected final TransitionAccessor transitionAccessor;
+  protected final ByteRunnable byteRunnable;
   protected final boolean finite;
   protected final BytesRef commonSuffix; // maybe null
   protected final int minTermLength;
@@ -106,8 +106,8 @@ public class IntersectBlockReader extends BlockReader {
       BlockDecoder blockDecoder)
       throws IOException {
     super(dictionaryBrowserSupplier, blockInput, postingsReader, fieldMetadata, blockDecoder);
-    automaton = compiled.automaton;
-    runAutomaton = compiled.runAutomaton;
+    this.byteRunnable = compiled.getByteRunnable();
+    this.transitionAccessor = compiled.getTransitionAccessor();
     finite = compiled.finite;
     commonSuffix = compiled.commonSuffixRef;
     minTermLength = getMinTermLength();
@@ -136,16 +136,16 @@ public class IntersectBlockReader extends BlockReader {
     int state = 0;
     Transition t = null;
     while (true) {
-      if (runAutomaton.isAccept(state)) {
+      if (byteRunnable.isAccept(state)) {
         // The common prefix reaches a final state. So common prefix and common suffix overlap.
         // Min term length is the max between common prefix and common suffix lengths.
         return Math.max(commonPrefixLength, commonSuffixLength);
       }
-      if (automaton.getNumTransitions(state) == 1) {
+      if (transitionAccessor.getNumTransitions(state) == 1) {
         if (t == null) {
           t = new Transition();
         }
-        automaton.getTransition(state, 0, t);
+        transitionAccessor.getTransition(state, 0, t);
         if (t.min == t.max) {
           state = t.dest;
           commonPrefixLength++;
@@ -230,10 +230,10 @@ public class IntersectBlockReader extends BlockReader {
         int state = states[index];
         while (true) {
           if (index == lineTerm.length) {
-            if (runAutomaton.isAccept(state)) {
+            if (byteRunnable.isAccept(state)) {
               // The automaton accepts the current term. Record the number of matched bytes and
               // return the term.
-              assert runAutomaton.run(lineTerm.bytes, 0, lineTerm.length);
+              assert byteRunnable.run(lineTerm.bytes, 0, lineTerm.length);
               numMatchedBytes = index;
               if (numConsecutivelyRejectedTerms > 0) {
                 numConsecutivelyRejectedTerms = 0;
@@ -243,7 +243,7 @@ public class IntersectBlockReader extends BlockReader {
             }
             break;
           }
-          state = runAutomaton.step(state, lineTerm.bytes[index] & 0xff);
+          state = byteRunnable.step(state, lineTerm.bytes[index] & 0xff);
           if (state == -1) {
             // The automaton rejects the current term.
             break;
@@ -254,7 +254,7 @@ public class IntersectBlockReader extends BlockReader {
       }
       // The current term is not accepted by the automaton.
       // Still record the reached automaton state to start the next term steps from there.
-      assert !runAutomaton.run(lineTerm.bytes, 0, lineTerm.length);
+      assert !byteRunnable.run(lineTerm.bytes, 0, lineTerm.length);
       numMatchedBytes = index;
       // If the number of consecutively rejected terms reaches the threshold,
       // then determine whether it is worthwhile to jump to a block away.
@@ -366,7 +366,7 @@ public class IntersectBlockReader extends BlockReader {
   protected class AutomatonNextTermCalculator {
     // for path tracking: each short records gen when we last
     // visited the state; we use gens to avoid having to clear
-    protected final short[] visited;
+    protected short[] visited;
     protected short curGen;
     // the reference used for seeking forwards through the term dictionary
     protected final BytesRefBuilder seekBytesRef = new BytesRefBuilder();
@@ -380,19 +380,22 @@ public class IntersectBlockReader extends BlockReader {
     protected final IntsRefBuilder savedStates = new IntsRefBuilder();
 
     protected AutomatonNextTermCalculator(CompiledAutomaton compiled) {
-      visited = compiled.finite ? null : new short[runAutomaton.getSize()];
+      visited = compiled.finite ? null : new short[byteRunnable.getSize()];
     }
 
     /** Records the given state has been visited. */
-    protected void setVisited(int state) {
-      if (!finite) {
+    private void setVisited(int state) {
+      if (finite == false) {
+        if (state >= visited.length) {
+          visited = ArrayUtil.grow(visited, state + 1);
+        }
         visited[state] = curGen;
       }
     }
 
     /** Indicates whether the given state has been visited. */
-    protected boolean isVisited(int state) {
-      return !finite && visited[state] == curGen;
+    private boolean isVisited(int state) {
+      return !finite && state < visited.length && visited[state] == curGen;
     }
 
     /** True if the current state of the automata is best iterated linearly (without seeking). */
@@ -406,7 +409,7 @@ public class IntersectBlockReader extends BlockReader {
       if (term == null) {
         assert seekBytesRef.length() == 0;
         // return the empty term, as it's valid
-        if (runAutomaton.isAccept(0)) {
+        if (byteRunnable.isAccept(0)) {
           return seekBytesRef.get();
         }
       } else {
@@ -433,13 +436,13 @@ public class IntersectBlockReader extends BlockReader {
       int maxInterval = 0xff;
       // System.out.println("setLinear pos=" + position + " seekbytesRef=" + seekBytesRef);
       for (int i = 0; i < position; i++) {
-        state = runAutomaton.step(state, seekBytesRef.byteAt(i) & 0xff);
+        state = byteRunnable.step(state, seekBytesRef.byteAt(i) & 0xff);
         assert state >= 0 : "state=" + state;
       }
-      final int numTransitions = automaton.getNumTransitions(state);
-      automaton.initTransition(state, transition);
+      final int numTransitions = transitionAccessor.getNumTransitions(state);
+      transitionAccessor.initTransition(state, transition);
       for (int i = 0; i < numTransitions; i++) {
-        automaton.getNextTransition(transition);
+        transitionAccessor.getNextTransition(transition);
         if (transition.min <= (seekBytesRef.byteAt(position) & 0xff)
             && (seekBytesRef.byteAt(position) & 0xff) <= transition.max) {
           maxInterval = transition.max;
@@ -484,7 +487,7 @@ public class IntersectBlockReader extends BlockReader {
         // walk the automaton until a character is rejected.
         for (state = savedStates.intAt(pos); pos < seekBytesRef.length(); pos++) {
           setVisited(state);
-          int nextState = runAutomaton.step(state, seekBytesRef.byteAt(pos) & 0xff);
+          int nextState = byteRunnable.step(state, seekBytesRef.byteAt(pos) & 0xff);
           if (nextState == -1) break;
           savedStates.setIntAt(pos + 1, nextState);
           // we found a loop, record it for faster enumeration
@@ -502,8 +505,8 @@ public class IntersectBlockReader extends BlockReader {
           /* no more solutions exist from this useful portion, backtrack */
           if ((pos = backtrack(pos)) < 0) /* no more solutions at all */ return false;
           final int newState =
-              runAutomaton.step(savedStates.intAt(pos), seekBytesRef.byteAt(pos) & 0xff);
-          if (newState >= 0 && runAutomaton.isAccept(newState))
+              byteRunnable.step(savedStates.intAt(pos), seekBytesRef.byteAt(pos) & 0xff);
+          if (newState >= 0 && byteRunnable.isAccept(newState))
             /* String is good to go as-is */
             return true;
           /* else advance further */
@@ -548,12 +551,12 @@ public class IntersectBlockReader extends BlockReader {
       seekBytesRef.setLength(position);
       setVisited(state);
 
-      final int numTransitions = automaton.getNumTransitions(state);
-      automaton.initTransition(state, transition);
+      final int numTransitions = transitionAccessor.getNumTransitions(state);
+      transitionAccessor.initTransition(state, transition);
       // find the minimal path (lexicographic order) that is >= c
 
       for (int i = 0; i < numTransitions; i++) {
-        automaton.getNextTransition(transition);
+        transitionAccessor.getNextTransition(transition);
         if (transition.max >= c) {
           int nextChar = Math.max(c, transition.min);
           // append either the next sequential char, or the minimum transition
@@ -564,15 +567,15 @@ public class IntersectBlockReader extends BlockReader {
            * as long as is possible, continue down the minimal path in
            * lexicographic order. if a loop or accept state is encountered, stop.
            */
-          while (!isVisited(state) && !runAutomaton.isAccept(state)) {
+          while (!isVisited(state) && !byteRunnable.isAccept(state)) {
             setVisited(state);
             /*
              * Note: we work with a DFA with no transitions to dead states.
              * so the below is ok, if it is not an accept state,
              * then there MUST be at least one transition.
              */
-            automaton.initTransition(state, transition);
-            automaton.getNextTransition(transition);
+            transitionAccessor.initTransition(state, transition);
+            transitionAccessor.getNextTransition(transition);
             state = transition.dest;
 
             // append the minimum transition
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/FieldReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/FieldReader.java
index 78556f4..7aba781 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/FieldReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/FieldReader.java
@@ -184,7 +184,11 @@ public final class FieldReader extends Terms {
       throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
     }
     return new IntersectTermsEnum(
-        this, compiled.automaton, compiled.runAutomaton, compiled.commonSuffixRef, startTerm);
+        this,
+        compiled.getTransitionAccessor(),
+        compiled.getByteRunnable(),
+        compiled.commonSuffixRef,
+        startTerm);
   }
 
   @Override
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/IntersectTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/IntersectTermsEnum.java
index 7c35f51..5773e4a 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/IntersectTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/IntersectTermsEnum.java
@@ -27,9 +27,9 @@ import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.StringHelper;
-import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.RunAutomaton;
+import org.apache.lucene.util.automaton.ByteRunnable;
 import org.apache.lucene.util.automaton.Transition;
+import org.apache.lucene.util.automaton.TransitionAccessor;
 import org.apache.lucene.util.fst.ByteSequenceOutputs;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.Outputs;
@@ -53,8 +53,8 @@ final class IntersectTermsEnum extends BaseTermsEnum {
   @SuppressWarnings({"rawtypes", "unchecked"})
   private FST.Arc<BytesRef>[] arcs = new FST.Arc[5];
 
-  final RunAutomaton runAutomaton;
-  final Automaton automaton;
+  final ByteRunnable runAutomaton;
+  final TransitionAccessor automaton;
   final BytesRef commonSuffix;
 
   private IntersectTermsEnumFrame currentFrame;
@@ -72,8 +72,8 @@ final class IntersectTermsEnum extends BaseTermsEnum {
   // regexp foo*bar must be at least length 6 bytes
   public IntersectTermsEnum(
       FieldReader fr,
-      Automaton automaton,
-      RunAutomaton runAutomaton,
+      TransitionAccessor automaton,
+      ByteRunnable runAutomaton,
       BytesRef commonSuffix,
       BytesRef startTerm)
       throws IOException {
diff --git a/lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java
index d6884dd..ac16477 100644
--- a/lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java
@@ -23,10 +23,10 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.lucene.util.IntsRefBuilder;
 import org.apache.lucene.util.StringHelper;
-import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.ByteRunAutomaton;
+import org.apache.lucene.util.automaton.ByteRunnable;
 import org.apache.lucene.util.automaton.CompiledAutomaton;
 import org.apache.lucene.util.automaton.Transition;
+import org.apache.lucene.util.automaton.TransitionAccessor;
 
 /**
  * A FilteredTermsEnum that enumerates terms based upon what is accepted by a DFA.
@@ -47,16 +47,16 @@ import org.apache.lucene.util.automaton.Transition;
  */
 public class AutomatonTermsEnum extends FilteredTermsEnum {
   // a tableized array-based form of the DFA
-  private final ByteRunAutomaton runAutomaton;
+  private final ByteRunnable byteRunnable;
   // common suffix of the automaton
   private final BytesRef commonSuffixRef;
   // true if the automaton accepts a finite language
   private final boolean finite;
   // array of sorted transitions for each state, indexed by state number
-  private final Automaton automaton;
+  private final TransitionAccessor transitionAccessor;
   // Used for visited state tracking: each short records gen when we last
   // visited the state; we use gens to avoid having to clear
-  private final short[] visited;
+  private short[] visited;
   private short curGen;
   // the reference used for seeking forwards through the term dictionary
   private final BytesRefBuilder seekBytesRef = new BytesRefBuilder();
@@ -82,25 +82,27 @@ public class AutomatonTermsEnum extends FilteredTermsEnum {
       throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
     }
     this.finite = compiled.finite;
-    this.runAutomaton = compiled.runAutomaton;
-    assert this.runAutomaton != null;
+    this.byteRunnable = compiled.getByteRunnable();
+    this.transitionAccessor = compiled.getTransitionAccessor();
     this.commonSuffixRef = compiled.commonSuffixRef;
-    this.automaton = compiled.automaton;
 
     // No need to track visited states for a finite language without loops.
-    visited = finite ? null : new short[runAutomaton.getSize()];
+    visited = finite ? null : new short[byteRunnable.getSize()];
   }
 
   /** Records the given state has been visited. */
   private void setVisited(int state) {
     if (!finite) {
+      if (state >= visited.length) {
+        visited = ArrayUtil.grow(visited, state + 1);
+      }
       visited[state] = curGen;
     }
   }
 
   /** Indicates whether the given state has been visited. */
   private boolean isVisited(int state) {
-    return !finite && visited[state] == curGen;
+    return !finite && state < visited.length && visited[state] == curGen;
   }
 
   /**
@@ -110,7 +112,7 @@ public class AutomatonTermsEnum extends FilteredTermsEnum {
   @Override
   protected AcceptStatus accept(final BytesRef term) {
     if (commonSuffixRef == null || StringHelper.endsWith(term, commonSuffixRef)) {
-      if (runAutomaton.run(term.bytes, term.offset, term.length))
+      if (byteRunnable.run(term.bytes, term.offset, term.length))
         return linear ? AcceptStatus.YES : AcceptStatus.YES_AND_SEEK;
       else
         return (linear && term.compareTo(linearUpperBound) < 0)
@@ -129,7 +131,7 @@ public class AutomatonTermsEnum extends FilteredTermsEnum {
     if (term == null) {
       assert seekBytesRef.length() == 0;
       // return the empty term, as it's valid
-      if (runAutomaton.isAccept(0)) {
+      if (byteRunnable.isAccept(0)) {
         return seekBytesRef.get();
       }
     } else {
@@ -155,13 +157,13 @@ public class AutomatonTermsEnum extends FilteredTermsEnum {
     int maxInterval = 0xff;
     // System.out.println("setLinear pos=" + position + " seekbytesRef=" + seekBytesRef);
     for (int i = 0; i < position; i++) {
-      state = runAutomaton.step(state, seekBytesRef.byteAt(i) & 0xff);
+      state = byteRunnable.step(state, seekBytesRef.byteAt(i) & 0xff);
       assert state >= 0 : "state=" + state;
     }
-    final int numTransitions = automaton.getNumTransitions(state);
-    automaton.initTransition(state, transition);
+    final int numTransitions = transitionAccessor.getNumTransitions(state);
+    transitionAccessor.initTransition(state, transition);
     for (int i = 0; i < numTransitions; i++) {
-      automaton.getNextTransition(transition);
+      transitionAccessor.getNextTransition(transition);
       if (transition.min <= (seekBytesRef.byteAt(position) & 0xff)
           && (seekBytesRef.byteAt(position) & 0xff) <= transition.max) {
         maxInterval = transition.max;
@@ -206,7 +208,7 @@ public class AutomatonTermsEnum extends FilteredTermsEnum {
       // walk the automaton until a character is rejected.
       for (state = savedStates.intAt(pos); pos < seekBytesRef.length(); pos++) {
         setVisited(state);
-        int nextState = runAutomaton.step(state, seekBytesRef.byteAt(pos) & 0xff);
+        int nextState = byteRunnable.step(state, seekBytesRef.byteAt(pos) & 0xff);
         if (nextState == -1) break;
         savedStates.setIntAt(pos + 1, nextState);
         // we found a loop, record it for faster enumeration
@@ -227,8 +229,8 @@ public class AutomatonTermsEnum extends FilteredTermsEnum {
           return false;
         }
         final int newState =
-            runAutomaton.step(savedStates.intAt(pos), seekBytesRef.byteAt(pos) & 0xff);
-        if (newState >= 0 && runAutomaton.isAccept(newState)) {
+            byteRunnable.step(savedStates.intAt(pos), seekBytesRef.byteAt(pos) & 0xff);
+        if (newState >= 0 && byteRunnable.isAccept(newState)) {
           /* String is good to go as-is */
           return true;
         }
@@ -274,12 +276,12 @@ public class AutomatonTermsEnum extends FilteredTermsEnum {
     seekBytesRef.setLength(position);
     setVisited(state);
 
-    final int numTransitions = automaton.getNumTransitions(state);
-    automaton.initTransition(state, transition);
+    final int numTransitions = transitionAccessor.getNumTransitions(state);
+    transitionAccessor.initTransition(state, transition);
     // find the minimal path (lexicographic order) that is >= c
 
     for (int i = 0; i < numTransitions; i++) {
-      automaton.getNextTransition(transition);
+      transitionAccessor.getNextTransition(transition);
       if (transition.max >= c) {
         int nextChar = Math.max(c, transition.min);
         // append either the next sequential char, or the minimum transition
@@ -290,15 +292,15 @@ public class AutomatonTermsEnum extends FilteredTermsEnum {
          * as long as is possible, continue down the minimal path in
          * lexicographic order. if a loop or accept state is encountered, stop.
          */
-        while (!isVisited(state) && !runAutomaton.isAccept(state)) {
+        while (!isVisited(state) && !byteRunnable.isAccept(state)) {
           setVisited(state);
           /*
            * Note: we work with a DFA with no transitions to dead states.
            * so the below is ok, if it is not an accept state,
            * then there MUST be at least one transition.
            */
-          automaton.initTransition(state, transition);
-          automaton.getNextTransition(transition);
+          transitionAccessor.initTransition(state, transition);
+          transitionAccessor.getNextTransition(transition);
           state = transition.dest;
 
           // append the minimum transition
diff --git a/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java b/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java
index 327960f..b3f056c 100644
--- a/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java
@@ -61,7 +61,6 @@ public class AutomatonQuery extends MultiTermQuery implements Accountable {
    * @param term Term containing field and possibly some pattern structure. The term text is
    *     ignored.
    * @param automaton Automaton to run, terms that are accepted are considered a match.
-   * @throws IllegalArgumentException if automaton is not determinized
    */
   public AutomatonQuery(final Term term, Automaton automaton) {
     this(term, automaton, false);
@@ -75,7 +74,6 @@ public class AutomatonQuery extends MultiTermQuery implements Accountable {
    * @param automaton Automaton to run, terms that are accepted are considered a match.
    * @param isBinary if true, this automaton is already binary and will not go through the
    *     UTF32ToUTF8 conversion
-   * @throws IllegalArgumentException if automaton is not determinized
    */
   public AutomatonQuery(final Term term, Automaton automaton, boolean isBinary) {
     super(term.field());
@@ -143,6 +141,10 @@ public class AutomatonQuery extends MultiTermQuery implements Accountable {
     return automaton;
   }
 
+  public CompiledAutomaton getCompiled() {
+    return compiled;
+  }
+
   /** Is this a binary (byte) oriented automaton. See the constructor. */
   public boolean isAutomatonBinary() {
     return automatonIsBinary;
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java b/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java
index f461046..8c0ae65 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java
@@ -46,7 +46,7 @@ import org.apache.lucene.util.Sorter;
  *
  * @lucene.experimental
  */
-public class Automaton implements Accountable {
+public class Automaton implements Accountable, TransitionAccessor {
 
   /**
    * Where we next write to the int[] states; this increments by 2 for each added state because we
@@ -340,7 +340,7 @@ public class Automaton implements Accountable {
     return nextTransition / 3;
   }
 
-  /** How many transitions this state has. */
+  @Override
   public int getNumTransitions(int state) {
     assert state >= 0;
     int count = states[2 * state + 1];
@@ -475,11 +475,7 @@ public class Automaton implements Accountable {
         }
       };
 
-  /**
-   * Initialize the provided Transition to iterate through all transitions leaving the specified
-   * state. You must call {@link #getNextTransition} to get each transition. Returns the number of
-   * transitions leaving this state.
-   */
+  @Override
   public int initTransition(int state, Transition t) {
     assert state < nextState / 2 : "state=" + state + " nextState=" + nextState;
     t.source = state;
@@ -487,7 +483,7 @@ public class Automaton implements Accountable {
     return getNumTransitions(state);
   }
 
-  /** Iterate to the next transition after the provided one */
+  @Override
   public void getNextTransition(Transition t) {
     // Make sure there is still a transition left:
     assert (t.transitionUpto + 3 - states[2 * t.source]) <= 3 * states[2 * t.source + 1];
@@ -535,9 +531,7 @@ public class Automaton implements Accountable {
     return false;
   }
 
-  /**
-   * Fill the provided {@link Transition} with the index'th transition leaving the specified state.
-   */
+  @Override
   public void getTransition(int state, int index, Transition t) {
     int i = states[2 * state] + 3 * index;
     t.source = state;
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/ByteRunAutomaton.java b/lucene/core/src/java/org/apache/lucene/util/automaton/ByteRunAutomaton.java
index 5f65445..d26dcd5 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/ByteRunAutomaton.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/ByteRunAutomaton.java
@@ -17,7 +17,7 @@
 package org.apache.lucene.util.automaton;
 
 /** Automaton representation for matching UTF-8 byte[]. */
-public class ByteRunAutomaton extends RunAutomaton {
+public class ByteRunAutomaton extends RunAutomaton implements ByteRunnable {
 
   /**
    * Converts incoming automaton to byte-based (UTF32ToUTF8) first
@@ -44,15 +44,4 @@ public class ByteRunAutomaton extends RunAutomaton {
     // we checked the input is a DFA, according to mike this determinization is contained :)
     return Operations.determinize(new UTF32ToUTF8().convert(a), Integer.MAX_VALUE);
   }
-
-  /** Returns true if the given byte array is accepted by this automaton */
-  public boolean run(byte[] s, int offset, int length) {
-    int p = 0;
-    int l = offset + length;
-    for (int i = offset; i < l; i++) {
-      p = step(p, s[i] & 0xFF);
-      if (p == -1) return false;
-    }
-    return accept.get(p);
-  }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/ByteRunAutomaton.java b/lucene/core/src/java/org/apache/lucene/util/automaton/ByteRunnable.java
similarity index 53%
copy from lucene/core/src/java/org/apache/lucene/util/automaton/ByteRunAutomaton.java
copy to lucene/core/src/java/org/apache/lucene/util/automaton/ByteRunnable.java
index 5f65445..e355b65 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/ByteRunAutomaton.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/ByteRunnable.java
@@ -16,43 +16,43 @@
  */
 package org.apache.lucene.util.automaton;
 
-/** Automaton representation for matching UTF-8 byte[]. */
-public class ByteRunAutomaton extends RunAutomaton {
+/** A runnable automaton accepting byte array as input */
+public interface ByteRunnable {
 
   /**
-   * Converts incoming automaton to byte-based (UTF32ToUTF8) first
+   * Returns the state obtained by reading the given char from the given state. Returns -1 if not
+   * obtaining any such state.
    *
-   * @throws IllegalArgumentException if the automaton is not deterministic
+   * @param state the last state
+   * @param c the input codepoint
+   * @return the next state, -1 if no such transaction
    */
-  public ByteRunAutomaton(Automaton a) {
-    this(a, false);
-  }
+  int step(int state, int c);
 
   /**
-   * expert: if isBinary is true, the input is already byte-based
+   * Returns acceptance status for given state.
    *
-   * @throws IllegalArgumentException if the automaton is not deterministic
+   * @param state the state
+   * @return whether the state is accepted
    */
-  public ByteRunAutomaton(Automaton a, boolean isBinary) {
-    super(isBinary ? a : convert(a), 256);
-  }
+  boolean isAccept(int state);
 
-  static Automaton convert(Automaton a) {
-    if (!a.isDeterministic()) {
-      throw new IllegalArgumentException("Automaton must be deterministic");
-    }
-    // we checked the input is a DFA, according to mike this determinization is contained :)
-    return Operations.determinize(new UTF32ToUTF8().convert(a), Integer.MAX_VALUE);
-  }
+  /**
+   * Returns number of states this automaton has, note this may not be an accurate number in case of
+   * NFA
+   *
+   * @return number of states
+   */
+  int getSize();
 
   /** Returns true if the given byte array is accepted by this automaton */
-  public boolean run(byte[] s, int offset, int length) {
+  default boolean run(byte[] s, int offset, int length) {
     int p = 0;
     int l = offset + length;
     for (int i = offset; i < l; i++) {
       p = step(p, s[i] & 0xFF);
       if (p == -1) return false;
     }
-    return accept.get(p);
+    return isAccept(p);
   }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java b/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java
index 1627bb6..c842271 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java
@@ -19,6 +19,7 @@ package org.apache.lucene.util.automaton;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Objects;
 import org.apache.lucene.index.SingleTermsEnum;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
@@ -34,8 +35,10 @@ import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.UnicodeUtil;
 
 /**
- * Immutable class holding compiled details for a given Automaton. The Automaton must be
- * deterministic, must not have dead states but is not necessarily minimal.
+ * Immutable class holding compiled details for a given Automaton. The Automaton could either be
+ * deterministic or non-deterministic, For deterministic automaton, it must not have dead states but
+ * is not necessarily minimal. And will be executed using {@link ByteRunAutomaton} For
+ * non-deterministic automaton, it will be executed using {@link NFARunAutomaton}
  *
  * @lucene.experimental
  */
@@ -78,6 +81,14 @@ public class CompiledAutomaton implements Accountable {
   public final Automaton automaton;
 
   /**
+   * Matcher directly run on a NFA, it will determinize the state on need and caches it, note that
+   * this field and {@link #runAutomaton} will not be non-null at the same time
+   *
+   * <p>TODO: merge this with runAutomaton
+   */
+  final NFARunAutomaton nfaRunAutomaton;
+
+  /**
    * Shared common suffix accepted by the automaton. Only valid for {@link AUTOMATON_TYPE#NORMAL},
    * and only when the automaton accepts an infinite language. This will be null if the common
    * prefix is length 0.
@@ -131,8 +142,6 @@ public class CompiledAutomaton implements Accountable {
    * Create this. If finite is null, we use {@link Operations#isFinite} to determine whether it is
    * finite. If simplify is true, we run possibly expensive operations to determine if the automaton
    * is one the cases in {@link CompiledAutomaton.AUTOMATON_TYPE}.
-   *
-   * @throws IllegalArgumentException if the automaton is not deterministic
    */
   public CompiledAutomaton(Automaton automaton, Boolean finite, boolean simplify) {
     this(automaton, finite, simplify, false);
@@ -142,22 +151,17 @@ public class CompiledAutomaton implements Accountable {
    * Create this. If finite is null, we use {@link Operations#isFinite} to determine whether it is
    * finite. If simplify is true, we run possibly expensive operations to determine if the automaton
    * is one the cases in {@link CompiledAutomaton.AUTOMATON_TYPE}.
-   *
-   * @throws IllegalArgumentException if the automaton is not deterministic
    */
   public CompiledAutomaton(
       Automaton automaton, Boolean finite, boolean simplify, boolean isBinary) {
-    // require DFA for input, as this class currently can't handle NFAs.
-    if (!automaton.isDeterministic()) {
-      throw new IllegalArgumentException("Automaton must be deterministic");
-    }
 
     if (automaton.getNumStates() == 0) {
       automaton = new Automaton();
       automaton.createState();
     }
 
-    if (simplify) {
+    // simplify requires a DFA
+    if (simplify && automaton.isDeterministic()) {
 
       // Test whether the automaton is a "simple" form and
       // if so, don't create a runAutomaton.  Note that on a
@@ -172,6 +176,7 @@ public class CompiledAutomaton implements Accountable {
         this.automaton = null;
         this.finite = null;
         sinkState = -1;
+        nfaRunAutomaton = null;
         return;
       }
 
@@ -193,6 +198,7 @@ public class CompiledAutomaton implements Accountable {
         this.automaton = null;
         this.finite = null;
         sinkState = -1;
+        nfaRunAutomaton = null;
         return;
       }
 
@@ -214,6 +220,7 @@ public class CompiledAutomaton implements Accountable {
                   UnicodeUtil.newString(singleton.ints, singleton.offset, singleton.length));
         }
         sinkState = -1;
+        nfaRunAutomaton = null;
         return;
       }
     }
@@ -251,16 +258,24 @@ public class CompiledAutomaton implements Accountable {
       }
     }
 
-    // We already had a DFA (or threw exception), according to mike UTF32toUTF8 won't "blow up"
-    binary = Operations.determinize(binary, Integer.MAX_VALUE);
-    runAutomaton = new ByteRunAutomaton(binary, true);
+    if (automaton.isDeterministic() == false && binary.isDeterministic() == false) {
+      this.automaton = null;
+      this.runAutomaton = null;
+      this.sinkState = -1;
+      this.nfaRunAutomaton = new NFARunAutomaton(binary, 0xff);
+    } else {
+      // We already had a DFA (or threw exception), according to mike UTF32toUTF8 won't "blow up"
+      binary = Operations.determinize(binary, Integer.MAX_VALUE);
+      runAutomaton = new ByteRunAutomaton(binary, true);
 
-    this.automaton = runAutomaton.automaton;
+      this.automaton = runAutomaton.automaton;
 
-    // TODO: this is a bit fragile because if the automaton is not minimized there could be more
-    // than 1 sink state but auto-prefix will fail
-    // to run for those:
-    sinkState = findSinkState(this.automaton);
+      // TODO: this is a bit fragile because if the automaton is not minimized there could be more
+      // than 1 sink state but auto-prefix will fail
+      // to run for those:
+      sinkState = findSinkState(this.automaton);
+      nfaRunAutomaton = null;
+    }
   }
 
   private Transition transition = new Transition();
@@ -472,6 +487,32 @@ public class CompiledAutomaton implements Accountable {
     }
   }
 
+  /**
+   * Get a {@link ByteRunnable} instance, it will be different depending on whether a NFA or DFA is
+   * passed in, and does not guarantee returning non-null object
+   */
+  public ByteRunnable getByteRunnable() {
+    // they can be both null but not both non-null
+    assert nfaRunAutomaton == null || runAutomaton == null;
+    if (nfaRunAutomaton == null) {
+      return runAutomaton;
+    }
+    return nfaRunAutomaton;
+  }
+
+  /**
+   * Get a {@link TransitionAccessor} instance, it will be different depending on whether a NFA or
+   * DFA is passed in, and does not guarantee returning non-null object
+   */
+  public TransitionAccessor getTransitionAccessor() {
+    // they can be both null but not both non-null
+    assert nfaRunAutomaton == null || automaton == null;
+    if (nfaRunAutomaton == null) {
+      return automaton;
+    }
+    return nfaRunAutomaton;
+  }
+
   @Override
   public int hashCode() {
     final int prime = 31;
@@ -492,7 +533,8 @@ public class CompiledAutomaton implements Accountable {
     if (type == AUTOMATON_TYPE.SINGLE) {
       if (!term.equals(other.term)) return false;
     } else if (type == AUTOMATON_TYPE.NORMAL) {
-      if (!runAutomaton.equals(other.runAutomaton)) return false;
+      return Objects.equals(runAutomaton, other.runAutomaton)
+          && Objects.equals(nfaRunAutomaton, other.nfaRunAutomaton);
     }
 
     return true;
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/NFARunAutomaton.java b/lucene/core/src/java/org/apache/lucene/util/automaton/NFARunAutomaton.java
new file mode 100644
index 0000000..0957715
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/NFARunAutomaton.java
@@ -0,0 +1,429 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.util.automaton;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.hppc.BitMixer;
+
+/**
+ * A RunAutomaton that does not require DFA. It will lazily determinize on-demand, memorizing the
+ * generated DFA states that has been explored
+ *
+ * <p>implemented based on: https://swtch.com/~rsc/regexp/regexp1.html
+ */
+public class NFARunAutomaton implements ByteRunnable, TransitionAccessor {
+
+  /** state ordinal of "no such state" */
+  public static final int MISSING = -1;
+
+  private static final int NOT_COMPUTED = -2;
+
+  private final Automaton automaton;
+  private final int[] points;
+  private final Map<DState, Integer> dStateToOrd = new HashMap<>(); // could init lazily?
+  private DState[] dStates;
+  private final int alphabetSize;
+  final int[] classmap; // map from char number to class
+
+  private final Operations.PointTransitionSet transitionSet =
+      new Operations.PointTransitionSet(); // reusable
+  private final StateSet statesSet = new StateSet(5); // reusable
+
+  /**
+   * Constructor, assuming alphabet size is the whole Unicode code point space
+   *
+   * @param automaton incoming automaton, should be NFA, for DFA please use {@link RunAutomaton} for
+   *     better efficiency
+   */
+  public NFARunAutomaton(Automaton automaton) {
+    this(automaton, Character.MAX_CODE_POINT);
+  }
+
+  /**
+   * Constructor
+   *
+   * @param automaton incoming automaton, should be NFA, for DFA please use {@link RunAutomaton} *
+   *     for better efficiency
+   * @param alphabetSize alphabet size
+   */
+  public NFARunAutomaton(Automaton automaton, int alphabetSize) {
+    this.automaton = automaton;
+    points = automaton.getStartPoints();
+    this.alphabetSize = alphabetSize;
+    dStates = new DState[10];
+    findDState(new DState(new int[] {0}));
+
+    /*
+     * Set alphabet table for optimal run performance.
+     */
+    classmap = new int[Math.min(256, alphabetSize)];
+    int i = 0;
+    for (int j = 0; j < classmap.length; j++) {
+      if (i + 1 < points.length && j == points[i + 1]) {
+        i++;
+      }
+      classmap[j] = i;
+    }
+  }
+
+  /**
+   * For a given state and an incoming character (codepoint), return the next state
+   *
+   * @param state incoming state, should either be 0 or some state that is returned previously by
+   *     this function
+   * @param c codepoint
+   * @return the next state or {@link #MISSING} if the transition doesn't exist
+   */
+  @Override
+  public int step(int state, int c) {
+    assert dStates[state] != null;
+    return step(dStates[state], c);
+  }
+
+  @Override
+  public boolean isAccept(int state) {
+    assert dStates[state] != null;
+    return dStates[state].isAccept;
+  }
+
+  @Override
+  public int getSize() {
+    return dStates.length;
+  }
+
+  /**
+   * Run through a given codepoint array, return accepted or not, should only be used in test
+   *
+   * @param s String represented by an int array
+   * @return accept or not
+   */
+  boolean run(int[] s) {
+    int p = 0;
+    for (int c : s) {
+      p = step(p, c);
+      if (p == MISSING) return false;
+    }
+    return dStates[p].isAccept;
+  }
+
+  /**
+   * From an existing DFA state, step to next DFA state given character c if the transition is
+   * previously tried then this operation will just use the cached result, otherwise it will call
+   * {@link DState#step(int)} to get the next state and cache the result
+   */
+  private int step(DState dState, int c) {
+    int charClass = getCharClass(c);
+    return dState.nextState(charClass);
+  }
+
+  /**
+   * return the ordinal of given DFA state, generate a new ordinal if the given DFA state is a new
+   * one
+   */
+  private int findDState(DState dState) {
+    if (dState == null) {
+      return MISSING;
+    }
+    int ord = dStateToOrd.getOrDefault(dState, -1);
+    if (ord >= 0) {
+      return ord;
+    }
+    ord = dStateToOrd.size();
+    dStateToOrd.put(dState, ord);
+    assert ord >= dStates.length || dStates[ord] == null;
+    if (ord >= dStates.length) {
+      dStates = ArrayUtil.grow(dStates, ord + 1);
+    }
+    dStates[ord] = dState;
+    return ord;
+  }
+
+  /** Gets character class of given codepoint */
+  final int getCharClass(int c) {
+    assert c < alphabetSize;
+
+    if (c < classmap.length) {
+      return classmap[c];
+    }
+
+    // binary search
+    int a = 0;
+    int b = points.length;
+    while (b - a > 1) {
+      int d = (a + b) >>> 1;
+      if (points[d] > c) b = d;
+      else if (points[d] < c) a = d;
+      else return d;
+    }
+    return a;
+  }
+
+  @Override
+  public int initTransition(int state, Transition t) {
+    t.source = state;
+    t.transitionUpto = -1;
+    return getNumTransitions(state);
+  }
+
+  @Override
+  public void getNextTransition(Transition t) {
+    assert t.transitionUpto < points.length - 1 && t.transitionUpto >= -1;
+    while (dStates[t.source].transitions[++t.transitionUpto] == MISSING) {
+      // this shouldn't throw AIOOBE as long as this function is only called
+      // numTransitions times
+    }
+    assert dStates[t.source].transitions[t.transitionUpto] != NOT_COMPUTED;
+    t.dest = dStates[t.source].transitions[t.transitionUpto];
+
+    t.min = points[t.transitionUpto];
+    if (t.transitionUpto == points.length - 1) {
+      t.max = alphabetSize - 1;
+    } else {
+      t.max = points[t.transitionUpto + 1] - 1;
+    }
+  }
+
+  @Override
+  public int getNumTransitions(int state) {
+    dStates[state].determinize();
+    return dStates[state].outgoingTransitions;
+  }
+
+  @Override
+  public void getTransition(int state, int index, Transition t) {
+    dStates[state].determinize();
+    int outgoingTransitions = -1;
+    t.transitionUpto = -1;
+    t.source = state;
+    while (outgoingTransitions < index && t.transitionUpto < points.length - 1) {
+      if (dStates[t.source].transitions[++t.transitionUpto] != MISSING) {
+        outgoingTransitions++;
+      }
+    }
+    assert outgoingTransitions == index;
+
+    t.min = points[t.transitionUpto];
+    if (t.transitionUpto == points.length - 1) {
+      t.max = alphabetSize - 1;
+    } else {
+      t.max = points[t.transitionUpto + 1] - 1;
+    }
+  }
+
+  private class DState {
+    private final int[] nfaStates;
+    // this field is lazily init'd when first time caller wants to add a new transition
+    private int[] transitions;
+    private final int hashCode;
+    private final boolean isAccept;
+    private final Transition stepTransition = new Transition();
+    private Transition minimalTransition;
+    private int computedTransitions;
+    private int outgoingTransitions;
+
+    private DState(int[] nfaStates) {
+      assert nfaStates != null && nfaStates.length > 0;
+      this.nfaStates = nfaStates;
+      int hashCode = nfaStates.length;
+      boolean isAccept = false;
+      for (int s : nfaStates) {
+        hashCode += BitMixer.mix(s);
+        if (automaton.isAccept(s)) {
+          isAccept = true;
+        }
+      }
+      this.isAccept = isAccept;
+      this.hashCode = hashCode;
+    }
+
+    private int nextState(int charClass) {
+      initTransitions();
+      assert charClass < transitions.length;
+      if (transitions[charClass] == NOT_COMPUTED) {
+        assignTransition(charClass, findDState(step(points[charClass])));
+        // we could potentially update more than one char classes
+        if (minimalTransition != null) {
+          // to the left
+          int cls = charClass;
+          while (cls > 0 && points[--cls] >= minimalTransition.min) {
+            assert transitions[cls] == NOT_COMPUTED || transitions[cls] == transitions[charClass];
+            assignTransition(cls, transitions[charClass]);
+          }
+          // to the right
+          cls = charClass;
+          while (cls < points.length - 1 && points[++cls] <= minimalTransition.max) {
+            assert transitions[cls] == NOT_COMPUTED || transitions[cls] == transitions[charClass];
+            assignTransition(cls, transitions[charClass]);
+          }
+          minimalTransition = null;
+        }
+      }
+      return transitions[charClass];
+    }
+
+    private void assignTransition(int charClass, int dest) {
+      if (transitions[charClass] == NOT_COMPUTED) {
+        computedTransitions++;
+        transitions[charClass] = dest;
+        if (transitions[charClass] != MISSING) {
+          outgoingTransitions++;
+        }
+      }
+    }
+
+    /**
+     * given a list of NFA states and a character c, compute the output list of NFA state which is
+     * wrapped as a DFA state
+     */
+    private DState step(int c) {
+      statesSet.reset(); // TODO: fork IntHashSet from hppc instead?
+      int numTransitions;
+      int left = -1, right = alphabetSize;
+      for (int nfaState : nfaStates) {
+        numTransitions = automaton.initTransition(nfaState, stepTransition);
+        // TODO: binary search should be faster, since transitions are sorted
+        for (int i = 0; i < numTransitions; i++) {
+          automaton.getNextTransition(stepTransition);
+          if (stepTransition.min <= c && stepTransition.max >= c) {
+            statesSet.incr(stepTransition.dest);
+            left = Math.max(stepTransition.min, left);
+            right = Math.min(stepTransition.max, right);
+          }
+          if (stepTransition.max < c) {
+            left = Math.max(stepTransition.max + 1, left);
+          }
+          if (stepTransition.min > c) {
+            right = Math.min(stepTransition.min - 1, right);
+            // transitions in automaton are sorted
+            break;
+          }
+        }
+      }
+      if (statesSet.size() == 0) {
+        return null;
+      }
+      minimalTransition = new Transition();
+      minimalTransition.min = left;
+      minimalTransition.max = right;
+      return new DState(statesSet.getArray());
+    }
+
+    // determinize this state only
+    private void determinize() {
+      if (transitions != null && computedTransitions == transitions.length) {
+        // already determinized
+        return;
+      }
+      initTransitions();
+      // Mostly forked from Operations.determinize
+      transitionSet.reset();
+      for (int nfaState : nfaStates) {
+        int numTransitions = automaton.initTransition(nfaState, stepTransition);
+        for (int i = 0; i < numTransitions; i++) {
+          automaton.getNextTransition(stepTransition);
+          transitionSet.add(stepTransition);
+        }
+      }
+      if (transitionSet.count == 0) {
+        // no outgoing transitions
+        Arrays.fill(transitions, MISSING);
+        computedTransitions = transitions.length;
+        return;
+      }
+
+      transitionSet
+          .sort(); // TODO: could use a PQ (heap) instead, since transitions for each state are
+      // sorted
+      statesSet.reset();
+      int lastPoint = -1;
+      int charClass = 0;
+      for (int i = 0; i < transitionSet.count; i++) {
+        final int point = transitionSet.points[i].point;
+        if (statesSet.size() > 0) {
+          assert lastPoint != -1;
+          int ord = findDState(new DState(statesSet.getArray()));
+          while (points[charClass] < lastPoint) {
+            assignTransition(charClass++, MISSING);
+          }
+          assert points[charClass] == lastPoint;
+          while (charClass < points.length && points[charClass] < point) {
+            assert transitions[charClass] == NOT_COMPUTED || transitions[charClass] == ord;
+            assignTransition(charClass++, ord);
+          }
+          assert (charClass == points.length && point == alphabetSize)
+              || points[charClass] == point;
+        }
+
+        // process transitions that end on this point
+        // (closes an overlapping interval)
+        int[] transitions = transitionSet.points[i].ends.transitions;
+        int limit = transitionSet.points[i].ends.next;
+        for (int j = 0; j < limit; j += 3) {
+          int dest = transitions[j];
+          statesSet.decr(dest);
+        }
+        transitionSet.points[i].ends.next = 0;
+
+        // process transitions that start on this point
+        // (opens a new interval)
+        transitions = transitionSet.points[i].starts.transitions;
+        limit = transitionSet.points[i].starts.next;
+        for (int j = 0; j < limit; j += 3) {
+          int dest = transitions[j];
+          statesSet.incr(dest);
+        }
+
+        lastPoint = point;
+        transitionSet.points[i].starts.next = 0;
+      }
+      assert statesSet.size() == 0;
+      assert computedTransitions
+          >= charClass; // it's also possible that some transitions after the charClass has already
+      // been explored
+      // no more outgoing transitions, set rest of transition to MISSING
+      assert charClass == transitions.length
+          || transitions[charClass] == MISSING
+          || transitions[charClass] == NOT_COMPUTED;
+      Arrays.fill(transitions, charClass, transitions.length, MISSING);
+      computedTransitions = transitions.length;
+    }
+
+    private void initTransitions() {
+      if (transitions == null) {
+        transitions = new int[points.length];
+        Arrays.fill(transitions, NOT_COMPUTED);
+      }
+    }
+
+    @Override
+    public int hashCode() {
+      return hashCode;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+      DState dState = (DState) o;
+      return hashCode == dState.hashCode && Arrays.equals(nfaStates, dState.nfaStates);
+    }
+  }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java b/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
index 1b54b57..08bcfa6 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
@@ -526,7 +526,7 @@ public final class Operations {
   }
 
   // Simple custom ArrayList<Transition>
-  private static final class TransitionList {
+  static final class TransitionList {
     // dest, min, max
     int[] transitions = new int[3];
     int next;
@@ -544,7 +544,7 @@ public final class Operations {
 
   // Holds all transitions that start on this int point, or
   // end at this point-1
-  private static final class PointTransitions implements Comparable<PointTransitions> {
+  static final class PointTransitions implements Comparable<PointTransitions> {
     int point;
     final TransitionList ends = new TransitionList();
     final TransitionList starts = new TransitionList();
@@ -571,7 +571,7 @@ public final class Operations {
     }
   }
 
-  private static final class PointTransitionSet {
+  static final class PointTransitionSet {
     int count;
     PointTransitions[] points = new PointTransitions[5];
 
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/RunAutomaton.java b/lucene/core/src/java/org/apache/lucene/util/automaton/RunAutomaton.java
index dea87b9..06a674c 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/RunAutomaton.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/RunAutomaton.java
@@ -130,7 +130,12 @@ public abstract class RunAutomaton implements Accountable {
     return size;
   }
 
-  /** Returns acceptance status for given state. */
+  /**
+   * Returns acceptance status for given state.
+   *
+   * @param state the state
+   * @return whether the state is accepted
+   */
   public final boolean isAccept(int state) {
     return accept.get(state);
   }
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/StateSet.java b/lucene/core/src/java/org/apache/lucene/util/automaton/StateSet.java
index 6d4a7ea..985ed65 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/StateSet.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/StateSet.java
@@ -66,6 +66,11 @@ final class StateSet extends IntSet {
     }
   }
 
+  void reset() {
+    inner.clear();
+    keyChanged();
+  }
+
   /**
    * Create a snapshot of this int set associated with a given state. The snapshot will not retain
    * any frequency information about the elements of this set, only existence.
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/TransitionAccessor.java b/lucene/core/src/java/org/apache/lucene/util/automaton/TransitionAccessor.java
new file mode 100644
index 0000000..33a3d5e
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/TransitionAccessor.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.util.automaton;
+
+/** Interface accessing the transitions of an automaton */
+public interface TransitionAccessor {
+
+  /**
+   * Initialize the provided Transition to iterate through all transitions leaving the specified
+   * state. You must call {@link #getNextTransition} to get each transition. Returns the number of
+   * transitions leaving this state.
+   */
+  int initTransition(int state, Transition t);
+
+  /** Iterate to the next transition after the provided one */
+  void getNextTransition(Transition t);
+
+  /** How many transitions this state has. */
+  int getNumTransitions(int state);
+
+  /**
+   * Fill the provided {@link Transition} with the index'th transition leaving the specified state.
+   */
+  void getTransition(int state, int index, Transition t);
+}
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQuery.java
index 71ed126..706f39e 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQuery.java
@@ -127,18 +127,6 @@ public class TestAutomatonQuery extends LuceneTestCase {
             DEFAULT_DETERMINIZE_WORK_LIMIT));
   }
 
-  /** Test that a nondeterministic automaton fails, it should throw Exception */
-  public void testNFA() throws IOException {
-    // accept this or three, the union is an NFA (two transitions for 't' from
-    // initial state)
-    Automaton nfa = Operations.union(Automata.makeString("this"), Automata.makeString("three"));
-    expectThrows(
-        IllegalArgumentException.class,
-        () -> {
-          assertAutomatonHits(2, nfa);
-        });
-  }
-
   public void testEquals() {
     AutomatonQuery a1 = new AutomatonQuery(newTerm("foobar"), Automata.makeString("foobar"));
     // reference to a1
diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/MinimizationOperations.java b/lucene/core/src/test/org/apache/lucene/util/automaton/MinimizationOperations.java
index ffd6edd..d45d602 100644
--- a/lucene/core/src/test/org/apache/lucene/util/automaton/MinimizationOperations.java
+++ b/lucene/core/src/test/org/apache/lucene/util/automaton/MinimizationOperations.java
@@ -53,6 +53,7 @@ public final class MinimizationOperations {
    *     what to specify.
    */
   public static Automaton minimize(Automaton a, int determinizeWorkLimit) {
+
     if (a.getNumStates() == 0 || (a.isAccept(0) == false && a.getNumTransitions(0) == 0)) {
       // Fastmatch for common case
       return new Automaton();
diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestNFARunAutomaton.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestNFARunAutomaton.java
new file mode 100644
index 0000000..6b4c480
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestNFARunAutomaton.java
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.util.automaton;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.AutomatonQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+
+public class TestNFARunAutomaton extends LuceneTestCase {
+
+  private static final String FIELD = "field";
+
+  @SuppressWarnings("unused")
+  public void testWithRandomRegex() {
+    for (int i = 0; i < 100; i++) {
+      RegExp regExp = new RegExp(AutomatonTestUtil.randomRegexp(random()), RegExp.NONE);
+      ;
+      Automaton nfa = regExp.toAutomaton();
+      if (nfa.isDeterministic()) {
+        i--;
+        continue;
+      }
+      Automaton dfa = Operations.determinize(nfa, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
+      NFARunAutomaton candidate = new NFARunAutomaton(nfa);
+      AutomatonTestUtil.RandomAcceptedStrings randomStringGen;
+      try {
+        randomStringGen = new AutomatonTestUtil.RandomAcceptedStrings(dfa);
+      } catch (IllegalArgumentException e) {
+        i--;
+        continue; // sometimes the automaton accept nothing and throw this exception
+      }
+
+      for (int round = 0; round < 20; round++) {
+        // test order of accepted strings and random (likely rejected) strings alternatively to make
+        // sure caching system works correctly
+        if (random().nextBoolean()) {
+          testAcceptedString(regExp, randomStringGen, candidate, 10);
+          testRandomString(regExp, dfa, candidate, 10);
+        } else {
+          testRandomString(regExp, dfa, candidate, 10);
+          testAcceptedString(regExp, randomStringGen, candidate, 10);
+        }
+      }
+    }
+  }
+
+  public void testRandomAutomatonQuery() throws IOException {
+    final int docNum = 50;
+    final int automatonNum = 50;
+    Directory directory = newDirectory();
+    RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
+
+    Set<String> vocab = new HashSet<>();
+    Set<String> perDocVocab = new HashSet<>();
+    for (int i = 0; i < docNum; i++) {
+      perDocVocab.clear();
+      int termNum = random().nextInt(20) + 30;
+      while (perDocVocab.size() < termNum) {
+        String randomString;
+        while ((randomString = TestUtil.randomUnicodeString(random())).length() == 0)
+          ;
+        perDocVocab.add(randomString);
+        vocab.add(randomString);
+      }
+      Document document = new Document();
+      document.add(
+          newTextField(
+              FIELD, perDocVocab.stream().reduce("", (s1, s2) -> s1 + " " + s2), Field.Store.NO));
+      writer.addDocument(document);
+    }
+    writer.commit();
+    IndexReader reader = DirectoryReader.open(directory);
+    IndexSearcher searcher = new IndexSearcher(reader);
+
+    Set<String> foreignVocab = new HashSet<>();
+    while (foreignVocab.size() < vocab.size()) {
+      String randomString;
+      while ((randomString = TestUtil.randomUnicodeString(random())).length() == 0)
+        ;
+      foreignVocab.add(randomString);
+    }
+
+    ArrayList<String> vocabList = new ArrayList<>(vocab);
+    ArrayList<String> foreignVocabList = new ArrayList<>(foreignVocab);
+
+    Set<String> perQueryVocab = new HashSet<>();
+
+    for (int i = 0; i < automatonNum; i++) {
+      perQueryVocab.clear();
+      int termNum = random().nextInt(40) + 30;
+      while (perQueryVocab.size() < termNum) {
+        if (random().nextBoolean()) {
+          perQueryVocab.add(vocabList.get(random().nextInt(vocabList.size())));
+        } else {
+          perQueryVocab.add(foreignVocabList.get(random().nextInt(foreignVocabList.size())));
+        }
+      }
+      Automaton a = null;
+      for (String term : perQueryVocab) {
+        if (a == null) {
+          a = Automata.makeString(term);
+        } else {
+          a = Operations.union(a, Automata.makeString(term));
+        }
+      }
+      if (a.isDeterministic()) {
+        i--;
+        continue;
+      }
+      AutomatonQuery dfaQuery =
+          new AutomatonQuery(
+              new Term(FIELD),
+              Operations.determinize(a, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT));
+      AutomatonQuery nfaQuery = new AutomatonQuery(new Term(FIELD), a);
+      assertNotNull(nfaQuery.getCompiled().nfaRunAutomaton);
+      assertEquals(searcher.count(dfaQuery), searcher.count(nfaQuery));
+    }
+    reader.close();
+    writer.close();
+    directory.close();
+  }
+
+  private void testAcceptedString(
+      RegExp regExp,
+      AutomatonTestUtil.RandomAcceptedStrings randomStringGen,
+      NFARunAutomaton candidate,
+      int repeat) {
+    for (int n = 0; n < repeat; n++) {
+      int[] acceptedString = randomStringGen.getRandomAcceptedString(random());
+      assertTrue(
+          "regExp: " + regExp + " testString: " + Arrays.toString(acceptedString),
+          candidate.run(acceptedString));
+    }
+  }
+
+  private void testRandomString(
+      RegExp regExp, Automaton dfa, NFARunAutomaton candidate, int repeat) {
+    for (int n = 0; n < repeat; n++) {
+      int[] randomString =
+          random().ints(random().nextInt(50), 0, Character.MAX_CODE_POINT).toArray();
+      assertEquals(
+          "regExp: " + regExp + " testString: " + Arrays.toString(randomString),
+          Operations.run(dfa, new IntsRef(randomString, 0, randomString.length)),
+          candidate.run(randomString));
+    }
+  }
+}