You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2021/12/08 02:39:16 UTC
[lucene] branch main updated: LUCENE-10010: don't determinize/minimize in RegExp (#513)

This is an automated email from the ASF dual-hosted git repository.

rmuir pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new 84e4b85  LUCENE-10010: don't determinize/minimize in RegExp (#513)
84e4b85 is described below

commit 84e4b85b094ccf2dc248ad90a302307d3618b81e
Author: Robert Muir <rm...@apache.org>
AuthorDate: Tue Dec 7 21:39:13 2021 -0500

    LUCENE-10010: don't determinize/minimize in RegExp (#513)
    
    Previously, RegExp called minimize() at every parsing step. There is little point to making an NFA execution when it is doing this: minimize() implies exponential determinize().
    
    Moreover, some minimize() calls are missing, and in fact in rare cases RegExp can already return an NFA today (for certain syntax)
    
    Instead, RegExp parsing should do none of this, instead it may return a DFA or NFA. NOTE: many simple regexps happen to be still returned as DFA, just because of the algorithms in use.
    
    Callers can decide whether to determinize or minimize. RegExp parsing should not run in exponential time.
    
    All src/java callsites were modified to call minimize(), to prevent any performance problems. minimize() seems unnecessary, but let's approach removing minimization as a separate PR. src/test was fixed to just use determinize() in preparation for this.
    
    Add new unit test for RegExp parsing
    
    New test tries to test each symbol/node independently, to make it easier to maintain this code.
    The new test case now exceeds 90% coverage of the regexp parser.
---
 lucene/CHANGES.txt                                 |   2 +-
 lucene/MIGRATE.md                                  |   2 +-
 .../lucene/analysis/hunspell/AffixCondition.java   |   7 +-
 .../pattern/SimplePatternSplitTokenizer.java       |   5 +-
 .../SimplePatternSplitTokenizerFactory.java        |   3 +-
 .../lucene/analysis/minhash/TestMinHashFilter.java |   7 +-
 .../lucene50/TestBlockPostingsFormat3.java         |   7 +-
 .../java/org/apache/lucene/search/RegexpQuery.java |   6 +-
 .../lucene/util/automaton/AutomatonProvider.java   |   2 +-
 .../org/apache/lucene/util/automaton/RegExp.java   | 172 +----
 .../org/apache/lucene/index/TestTermsEnum.java     |   2 +
 .../org/apache/lucene/index/TestTermsEnum2.java    |   1 +
 .../org/apache/lucene/search/TestRegexpQuery.java  |   7 +-
 .../apache/lucene/search/TestRegexpRandom2.java    |   4 +-
 .../apache/lucene/util/automaton/TestMinimize.java |   4 +-
 .../apache/lucene/util/automaton/TestRegExp.java   |  54 +-
 .../lucene/util/automaton/TestRegExpParsing.java   | 708 +++++++++++++++++++++
 .../test/org/apache/lucene/util/fst/TestFSTs.java  |   4 +-
 .../lucene/queries/intervals/TestIntervals.java    |   9 +-
 .../suggest/document/RegexCompletionQuery.java     |   6 +-
 .../lucene/index/BaseDocValuesFormatTestCase.java  |  13 +-
 .../org/apache/lucene/util/LuceneTestCase.java     |   7 +-
 22 files changed, 818 insertions(+), 214 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index e935c4e..1d70bfb 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -8,7 +8,7 @@ http://s.apache.org/luceneversions
 API Changes
 ---------------------
 
-* LUCENE-10010: AutomatonQuery, CompiledAutomaton, and RunAutomaton
+* LUCENE-10010: AutomatonQuery, CompiledAutomaton, RunAutomaton, RegExp
   classes no longer determinize NFAs. Instead it is the responsibility
   of the caller to determinize.  (Robert Muir)
 
diff --git a/lucene/MIGRATE.md b/lucene/MIGRATE.md
index 8081832..d61b9fb 100644
--- a/lucene/MIGRATE.md
+++ b/lucene/MIGRATE.md
@@ -19,7 +19,7 @@
 
 ## Migration from Lucene 9.x to Lucene 10.0
 
-### AutomatonQuery/CompiledAutomaton/RunAutomaton no longer determinize (LUCENE-10010)
+### AutomatonQuery/CompiledAutomaton/RunAutomaton/RegExp no longer determinize (LUCENE-10010)
 
 These classes no longer take a `determinizeWorkLimit` and no longer determinize
 behind the scenes. It is the responsibility of the caller to to call
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/AffixCondition.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/AffixCondition.java
index 25cb293..42c4ab0 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/AffixCondition.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/AffixCondition.java
@@ -21,6 +21,8 @@ import static org.apache.lucene.analysis.hunspell.AffixKind.SUFFIX;
 
 import java.util.regex.PatternSyntaxException;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
+import org.apache.lucene.util.automaton.MinimizationOperations;
+import org.apache.lucene.util.automaton.Operations;
 import org.apache.lucene.util.automaton.RegExp;
 
 /**
@@ -153,7 +155,10 @@ interface AffixCondition {
   private static AffixCondition regexpCondition(AffixKind kind, String condition, int charCount) {
     boolean forSuffix = kind == AffixKind.SUFFIX;
     CharacterRunAutomaton automaton =
-        new CharacterRunAutomaton(new RegExp(escapeDash(condition), RegExp.NONE).toAutomaton());
+        new CharacterRunAutomaton(
+            MinimizationOperations.minimize(
+                new RegExp(escapeDash(condition), RegExp.NONE).toAutomaton(),
+                Operations.DEFAULT_DETERMINIZE_WORK_LIMIT));
     return (word, offset, length) ->
         length >= charCount
             && automaton.run(word, forSuffix ? offset + length - charCount : offset, charCount);
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/SimplePatternSplitTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/SimplePatternSplitTokenizer.java
index 640ac92..8062dad 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/SimplePatternSplitTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/SimplePatternSplitTokenizer.java
@@ -24,6 +24,7 @@ import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.AttributeFactory;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
+import org.apache.lucene.util.automaton.MinimizationOperations;
 import org.apache.lucene.util.automaton.Operations;
 import org.apache.lucene.util.automaton.RegExp;
 
@@ -74,7 +75,9 @@ public final class SimplePatternSplitTokenizer extends Tokenizer {
   /** See {@link RegExp} for the accepted syntax. */
   public SimplePatternSplitTokenizer(
       AttributeFactory factory, String regexp, int determinizeWorkLimit) {
-    this(factory, new RegExp(regexp).toAutomaton(determinizeWorkLimit));
+    this(
+        factory,
+        MinimizationOperations.minimize(new RegExp(regexp).toAutomaton(), determinizeWorkLimit));
   }
 
   /** Runs a pre-built automaton. */
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/SimplePatternSplitTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/SimplePatternSplitTokenizerFactory.java
index 7472bba..2e510e6 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/SimplePatternSplitTokenizerFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/SimplePatternSplitTokenizerFactory.java
@@ -21,6 +21,7 @@ import java.util.Map;
 import org.apache.lucene.analysis.TokenizerFactory;
 import org.apache.lucene.util.AttributeFactory;
 import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.MinimizationOperations;
 import org.apache.lucene.util.automaton.Operations;
 import org.apache.lucene.util.automaton.RegExp;
 
@@ -73,7 +74,7 @@ public class SimplePatternSplitTokenizerFactory extends TokenizerFactory {
     determinizeWorkLimit =
         getInt(args, "determinizeWorkLimit", Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
     dfa =
-        Operations.determinize(
+        MinimizationOperations.minimize(
             new RegExp(require(args, PATTERN)).toAutomaton(), determinizeWorkLimit);
     if (args.isEmpty() == false) {
       throw new IllegalArgumentException("Unknown parameters: " + args);
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/minhash/TestMinHashFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/minhash/TestMinHashFilter.java
index 77a2ebe..e140777 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/minhash/TestMinHashFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/minhash/TestMinHashFilter.java
@@ -31,6 +31,7 @@ import org.apache.lucene.analysis.minhash.MinHashFilter.FixedSizeTreeSet;
 import org.apache.lucene.analysis.minhash.MinHashFilter.LongPair;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
+import org.apache.lucene.util.automaton.Operations;
 import org.apache.lucene.util.automaton.RegExp;
 import org.junit.Test;
 
@@ -365,8 +366,10 @@ public class TestMinHashFilter extends BaseTokenStreamTestCase {
     MockTokenizer tokenizer =
         new MockTokenizer(
             new CharacterRunAutomaton(
-                new RegExp("[^ \t\r\n]+([ \t\r\n]+[^ \t\r\n]+){" + (shingleSize - 1) + "}")
-                    .toAutomaton()),
+                Operations.determinize(
+                    new RegExp("[^ \t\r\n]+([ \t\r\n]+[^ \t\r\n]+){" + (shingleSize - 1) + "}")
+                        .toAutomaton(),
+                    Operations.DEFAULT_DETERMINIZE_WORK_LIMIT)),
             true);
     tokenizer.setEnableChecks(true);
     if (shingles != null) {
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/TestBlockPostingsFormat3.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/TestBlockPostingsFormat3.java
index 2cfbec6..7dec8c4 100644
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/TestBlockPostingsFormat3.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/TestBlockPostingsFormat3.java
@@ -49,8 +49,10 @@ import org.apache.lucene.util.English;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.LuceneTestCase.Nightly;
 import org.apache.lucene.util.TestUtil;
+import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.AutomatonTestUtil;
 import org.apache.lucene.util.automaton.CompiledAutomaton;
+import org.apache.lucene.util.automaton.Operations;
 import org.apache.lucene.util.automaton.RegExp;
 
 /** Tests partial enumeration (only pulling a subset of the indexed data) */
@@ -196,8 +198,9 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
       int numIntersections = atLeast(3);
       for (int i = 0; i < numIntersections; i++) {
         String re = AutomatonTestUtil.randomRegexp(random());
-        CompiledAutomaton automaton =
-            new CompiledAutomaton(new RegExp(re, RegExp.NONE).toAutomaton());
+        Automaton a = new RegExp(re, RegExp.NONE).toAutomaton();
+        a = Operations.determinize(a, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
+        CompiledAutomaton automaton = new CompiledAutomaton(a);
         if (automaton.type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
           // TODO: test start term too
           TermsEnum leftIntersection = leftTerms.intersect(automaton, null);
diff --git a/lucene/core/src/java/org/apache/lucene/search/RegexpQuery.java b/lucene/core/src/java/org/apache/lucene/search/RegexpQuery.java
index 350a1e2..bc2dfc5 100644
--- a/lucene/core/src/java/org/apache/lucene/search/RegexpQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/RegexpQuery.java
@@ -19,6 +19,7 @@ package org.apache.lucene.search;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.AutomatonProvider;
+import org.apache.lucene.util.automaton.MinimizationOperations;
 import org.apache.lucene.util.automaton.Operations;
 import org.apache.lucene.util.automaton.RegExp;
 
@@ -139,9 +140,8 @@ public class RegexpQuery extends AutomatonQuery {
       int determinizeWorkLimit) {
     super(
         term,
-        Operations.determinize(
-            new RegExp(term.text(), syntax_flags, match_flags)
-                .toAutomaton(provider, determinizeWorkLimit),
+        MinimizationOperations.minimize(
+            new RegExp(term.text(), syntax_flags, match_flags).toAutomaton(provider),
             determinizeWorkLimit));
   }
 
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/AutomatonProvider.java b/lucene/core/src/java/org/apache/lucene/util/automaton/AutomatonProvider.java
index 75698dd..2f3ad6c 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/AutomatonProvider.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/AutomatonProvider.java
@@ -32,7 +32,7 @@ package org.apache.lucene.util.automaton;
 import java.io.IOException;
 
 /**
- * Automaton provider for <code>RegExp.</code> {@link RegExp#toAutomaton(AutomatonProvider,int)}
+ * Automaton provider for <code>RegExp.</code> {@link RegExp#toAutomaton(AutomatonProvider)}
  *
  * @lucene.experimental
  */
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java b/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java
index ecd59fd..7dc4ee4 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java
@@ -155,21 +155,6 @@ import java.util.Set;
  * </tr>
  *
  * <tr>
- * <td><i>complexp</i></td>
- * <td>::=</td>
- * <td><code><b>~</b></code>&nbsp;<i>complexp</i></td>
- * <td>(complement)</td>
- * <td><small>[OPTIONAL]</small></td>
- * </tr>
- * <tr>
- * <td></td>
- * <td>|</td>
- * <td><i>charclassexp</i></td>
- * <td></td>
- * <td></td>
- * </tr>
- *
- * <tr>
  * <td><i>charclassexp</i></td>
  * <td>::=</td>
  * <td><code><b>[</b></code>&nbsp;<i>charclasses</i>&nbsp;<code><b>]</b></code></td>
@@ -407,9 +392,6 @@ public class RegExp {
   /** Syntax flag, enables intersection (<code>&amp;</code>). */
   public static final int INTERSECTION = 0x0001;
 
-  /** Syntax flag, enables complement (<code>~</code>). */
-  public static final int COMPLEMENT = 0x0002;
-
   /** Syntax flag, enables empty language (<code>#</code>). */
   public static final int EMPTY = 0x0004;
 
@@ -556,165 +538,84 @@ public class RegExp {
    * toAutomaton(null)</code> (empty automaton map).
    */
   public Automaton toAutomaton() {
-    return toAutomaton(null, null, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
-  }
-
-  /**
-   * Constructs new <code>Automaton</code> from this <code>RegExp</code>. The constructed automaton
-   * is minimal and deterministic and has no transitions to dead states.
-   *
-   * @param determinizeWorkLimit maximum effort to spend while determinizing the automata. If
-   *     determinizing the automata would require more than this effort,
-   *     TooComplexToDeterminizeException is thrown. Higher numbers require more space but can
-   *     process more complex regexes. Use {@link Operations#DEFAULT_DETERMINIZE_WORK_LIMIT} as a
-   *     decent default if you don't otherwise know what to specify.
-   * @exception IllegalArgumentException if this regular expression uses a named identifier that is
-   *     not available from the automaton provider
-   * @exception TooComplexToDeterminizeException if determinizing this regexp requires more effort
-   *     than determinizeWorkLimit states
-   */
-  public Automaton toAutomaton(int determinizeWorkLimit)
-      throws IllegalArgumentException, TooComplexToDeterminizeException {
-    return toAutomaton(null, null, determinizeWorkLimit);
+    return toAutomaton(null, null);
   }
 
   /**
-   * Constructs new <code>Automaton</code> from this <code>RegExp</code>. The constructed automaton
-   * is minimal and deterministic and has no transitions to dead states.
+   * Constructs new <code>Automaton</code> from this <code>RegExp</code>.
    *
    * @param automaton_provider provider of automata for named identifiers
-   * @param determinizeWorkLimit maximum effort to spend while determinizing the automata. If
-   *     determinizing the automata would require more than this effort,
-   *     TooComplexToDeterminizeException is thrown. Higher numbers require more space but can
-   *     process more complex regexes. Use {@link Operations#DEFAULT_DETERMINIZE_WORK_LIMIT} as a
-   *     decent default if you don't otherwise know what to specify.
    * @exception IllegalArgumentException if this regular expression uses a named identifier that is
    *     not available from the automaton provider
-   * @exception TooComplexToDeterminizeException if determinizing this regexp requires more effort
-   *     than determinizeWorkLimit states
    */
-  public Automaton toAutomaton(AutomatonProvider automaton_provider, int determinizeWorkLimit)
+  public Automaton toAutomaton(AutomatonProvider automaton_provider)
       throws IllegalArgumentException, TooComplexToDeterminizeException {
-    return toAutomaton(null, automaton_provider, determinizeWorkLimit);
+    return toAutomaton(null, automaton_provider);
   }
 
   /**
-   * Constructs new <code>Automaton</code> from this <code>RegExp</code>. The constructed automaton
-   * is minimal and deterministic and has no transitions to dead states.
+   * Constructs new <code>Automaton</code> from this <code>RegExp</code>.
    *
    * @param automata a map from automaton identifiers to automata (of type <code>Automaton</code>).
-   * @param determinizeWorkLimit maximum effort to spend while determinizing the automata. If
-   *     determinizing the automata would require more than this effort,
-   *     TooComplexToDeterminizeException is thrown. Higher numbers require more space but can
-   *     process more complex regexes.
    * @exception IllegalArgumentException if this regular expression uses a named identifier that
    *     does not occur in the automaton map
-   * @exception TooComplexToDeterminizeException if determinizing this regexp requires more effort
-   *     than determinizeWorkLimit states
    */
-  public Automaton toAutomaton(Map<String, Automaton> automata, int determinizeWorkLimit)
+  public Automaton toAutomaton(Map<String, Automaton> automata)
       throws IllegalArgumentException, TooComplexToDeterminizeException {
-    return toAutomaton(automata, null, determinizeWorkLimit);
+    return toAutomaton(automata, null);
   }
 
   private Automaton toAutomaton(
-      Map<String, Automaton> automata,
-      AutomatonProvider automaton_provider,
-      int determinizeWorkLimit)
-      throws IllegalArgumentException, TooComplexToDeterminizeException {
-    try {
-      return toAutomatonInternal(automata, automaton_provider, determinizeWorkLimit);
-    } catch (TooComplexToDeterminizeException e) {
-      throw new TooComplexToDeterminizeException(this, e);
-    }
-  }
-
-  private Automaton toAutomatonInternal(
-      Map<String, Automaton> automata,
-      AutomatonProvider automaton_provider,
-      int determinizeWorkLimit)
+      Map<String, Automaton> automata, AutomatonProvider automaton_provider)
       throws IllegalArgumentException {
     List<Automaton> list;
     Automaton a = null;
     switch (kind) {
       case REGEXP_PRE_CLASS:
         RegExp expanded = expandPredefined();
-        a = expanded.toAutomatonInternal(automata, automaton_provider, determinizeWorkLimit);
+        a = expanded.toAutomaton(automata, automaton_provider);
         break;
       case REGEXP_UNION:
         list = new ArrayList<>();
-        findLeaves(
-            exp1, Kind.REGEXP_UNION, list, automata, automaton_provider, determinizeWorkLimit);
-        findLeaves(
-            exp2, Kind.REGEXP_UNION, list, automata, automaton_provider, determinizeWorkLimit);
+        findLeaves(exp1, Kind.REGEXP_UNION, list, automata, automaton_provider);
+        findLeaves(exp2, Kind.REGEXP_UNION, list, automata, automaton_provider);
         a = Operations.union(list);
-        a = MinimizationOperations.minimize(a, determinizeWorkLimit);
         break;
       case REGEXP_CONCATENATION:
         list = new ArrayList<>();
-        findLeaves(
-            exp1,
-            Kind.REGEXP_CONCATENATION,
-            list,
-            automata,
-            automaton_provider,
-            determinizeWorkLimit);
-        findLeaves(
-            exp2,
-            Kind.REGEXP_CONCATENATION,
-            list,
-            automata,
-            automaton_provider,
-            determinizeWorkLimit);
+        findLeaves(exp1, Kind.REGEXP_CONCATENATION, list, automata, automaton_provider);
+        findLeaves(exp2, Kind.REGEXP_CONCATENATION, list, automata, automaton_provider);
         a = Operations.concatenate(list);
-        a = MinimizationOperations.minimize(a, determinizeWorkLimit);
         break;
       case REGEXP_INTERSECTION:
         a =
             Operations.intersection(
-                exp1.toAutomatonInternal(automata, automaton_provider, determinizeWorkLimit),
-                exp2.toAutomatonInternal(automata, automaton_provider, determinizeWorkLimit));
-        a = MinimizationOperations.minimize(a, determinizeWorkLimit);
+                exp1.toAutomaton(automata, automaton_provider),
+                exp2.toAutomaton(automata, automaton_provider));
         break;
       case REGEXP_OPTIONAL:
-        a =
-            Operations.optional(
-                exp1.toAutomatonInternal(automata, automaton_provider, determinizeWorkLimit));
-        a = MinimizationOperations.minimize(a, determinizeWorkLimit);
+        a = Operations.optional(exp1.toAutomaton(automata, automaton_provider));
         break;
       case REGEXP_REPEAT:
-        a =
-            Operations.repeat(
-                exp1.toAutomatonInternal(automata, automaton_provider, determinizeWorkLimit));
-        a = MinimizationOperations.minimize(a, determinizeWorkLimit);
+        a = Operations.repeat(exp1.toAutomaton(automata, automaton_provider));
         break;
       case REGEXP_REPEAT_MIN:
-        a = exp1.toAutomatonInternal(automata, automaton_provider, determinizeWorkLimit);
-        int minNumStates = (a.getNumStates() - 1) * min;
-        if (minNumStates > determinizeWorkLimit) {
-          throw new TooComplexToDeterminizeException(a, minNumStates);
-        }
+        a = exp1.toAutomaton(automata, automaton_provider);
         a = Operations.repeat(a, min);
-        a = MinimizationOperations.minimize(a, determinizeWorkLimit);
         break;
       case REGEXP_REPEAT_MINMAX:
-        a = exp1.toAutomatonInternal(automata, automaton_provider, determinizeWorkLimit);
-        int minMaxNumStates = (a.getNumStates() - 1) * max;
-        if (minMaxNumStates > determinizeWorkLimit) {
-          throw new TooComplexToDeterminizeException(a, minMaxNumStates);
-        }
+        a = exp1.toAutomaton(automata, automaton_provider);
         a = Operations.repeat(a, min, max);
         break;
       case REGEXP_COMPLEMENT:
-        a =
-            Operations.complement(
-                exp1.toAutomatonInternal(automata, automaton_provider, determinizeWorkLimit),
-                determinizeWorkLimit);
-        a = MinimizationOperations.minimize(a, determinizeWorkLimit);
+        // we don't support arbitrary complement, just "negated character class"
+        // this is just a list of characters (e.g. "a") or ranges (e.g. "b-d")
+        a = exp1.toAutomaton(automata, automaton_provider);
+        a = Operations.complement(a, Integer.MAX_VALUE);
         break;
       case REGEXP_CHAR:
         if (check(ASCII_CASE_INSENSITIVE)) {
-          a = toCaseInsensitiveChar(c, determinizeWorkLimit);
+          a = toCaseInsensitiveChar(c);
         } else {
           a = Automata.makeChar(c);
         }
@@ -730,7 +631,7 @@ public class RegExp {
         break;
       case REGEXP_STRING:
         if (check(ASCII_CASE_INSENSITIVE)) {
-          a = toCaseInsensitiveString(determinizeWorkLimit);
+          a = toCaseInsensitiveString();
         } else {
           a = Automata.makeString(s);
         }
@@ -762,7 +663,7 @@ public class RegExp {
     return a;
   }
 
-  private Automaton toCaseInsensitiveChar(int codepoint, int determinizeWorkLimit) {
+  private Automaton toCaseInsensitiveChar(int codepoint) {
     Automaton case1 = Automata.makeChar(codepoint);
     // For now we only work with ASCII characters
     if (codepoint > 128) {
@@ -775,23 +676,20 @@ public class RegExp {
     Automaton result;
     if (altCase != codepoint) {
       result = Operations.union(case1, Automata.makeChar(altCase));
-      result = MinimizationOperations.minimize(result, determinizeWorkLimit);
     } else {
       result = case1;
     }
     return result;
   }
 
-  private Automaton toCaseInsensitiveString(int determinizeWorkLimit) {
+  private Automaton toCaseInsensitiveString() {
     List<Automaton> list = new ArrayList<>();
 
     Iterator<Integer> iter = s.codePoints().iterator();
     while (iter.hasNext()) {
-      list.add(toCaseInsensitiveChar(iter.next(), determinizeWorkLimit));
+      list.add(toCaseInsensitiveChar(iter.next()));
     }
-    Automaton a = Operations.concatenate(list);
-    a = MinimizationOperations.minimize(a, determinizeWorkLimit);
-    return a;
+    return Operations.concatenate(list);
   }
 
   private void findLeaves(
@@ -799,13 +697,12 @@ public class RegExp {
       Kind kind,
       List<Automaton> list,
       Map<String, Automaton> automata,
-      AutomatonProvider automaton_provider,
-      int determinizeWorkLimit) {
+      AutomatonProvider automaton_provider) {
     if (exp.kind == kind) {
-      findLeaves(exp.exp1, kind, list, automata, automaton_provider, determinizeWorkLimit);
-      findLeaves(exp.exp2, kind, list, automata, automaton_provider, determinizeWorkLimit);
+      findLeaves(exp.exp1, kind, list, automata, automaton_provider);
+      findLeaves(exp.exp2, kind, list, automata, automaton_provider);
     } else {
-      list.add(exp.toAutomatonInternal(automata, automaton_provider, determinizeWorkLimit));
+      list.add(exp.toAutomaton(automata, automaton_provider));
     }
   }
 
@@ -1214,8 +1111,7 @@ public class RegExp {
   }
 
   final RegExp parseComplExp() throws IllegalArgumentException {
-    if (check(COMPLEMENT) && match('~')) return makeComplement(flags, parseComplExp());
-    else return parseCharClassExp();
+    return parseCharClassExp();
   }
 
   final RegExp parseCharClassExp() throws IllegalArgumentException {
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
index d1cea76..547e80c 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
@@ -42,6 +42,7 @@ import org.apache.lucene.util.TestUtil;
 import org.apache.lucene.util.automaton.Automata;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.CompiledAutomaton;
+import org.apache.lucene.util.automaton.Operations;
 import org.apache.lucene.util.automaton.RegExp;
 
 @SuppressCodecs({"SimpleText", "Direct"})
@@ -827,6 +828,7 @@ public class TestTermsEnum extends LuceneTestCase {
     Terms terms = sub.terms("field");
 
     Automaton automaton = new RegExp(".*d", RegExp.NONE).toAutomaton();
+    automaton = Operations.determinize(automaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
     CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false);
     TermsEnum te;
 
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java
index ee292f4..eb5c756 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java
@@ -164,6 +164,7 @@ public class TestTermsEnum2 extends LuceneTestCase {
     for (int i = 0; i < numIterations; i++) {
       String reg = AutomatonTestUtil.randomRegexp(random());
       Automaton automaton = new RegExp(reg, RegExp.NONE).toAutomaton();
+      automaton = Operations.determinize(automaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
       CompiledAutomaton ca =
           new CompiledAutomaton(automaton, Operations.isFinite(automaton), false);
       TermsEnum te = MultiTerms.getTerms(reader, "field").intersect(ca, null);
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestRegexpQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestRegexpQuery.java
index bd86bae..7f05c7a 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestRegexpQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestRegexpQuery.java
@@ -142,10 +142,9 @@ public class TestRegexpQuery extends LuceneTestCase {
     assertEquals(1, caseInsensitiveRegexQueryNrHits("Quick"));
   }
 
-  public void testRegexComplement() throws IOException {
-    assertEquals(1, regexQueryNrHits("4934~[3]"));
-    // not the empty lang, i.e. match all docs
-    assertEquals(1, regexQueryNrHits("~#"));
+  public void testRegexNegatedCharacterClass() throws IOException {
+    assertEquals(1, regexQueryNrHits("[^a-z]"));
+    assertEquals(1, regexQueryNrHits("[^03ad]"));
   }
 
   public void testCustomProvider() throws IOException {
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestRegexpRandom2.java b/lucene/core/src/test/org/apache/lucene/search/TestRegexpRandom2.java
index 901d539..dca393a 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestRegexpRandom2.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestRegexpRandom2.java
@@ -41,6 +41,7 @@ import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.AutomatonTestUtil;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
+import org.apache.lucene.util.automaton.Operations;
 import org.apache.lucene.util.automaton.RegExp;
 
 /**
@@ -110,7 +111,8 @@ public class TestRegexpRandom2 extends LuceneTestCase {
     DumbRegexpQuery(Term term, int flags) {
       super(term.field());
       RegExp re = new RegExp(term.text(), flags);
-      automaton = re.toAutomaton();
+      automaton =
+          Operations.determinize(re.toAutomaton(), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
     }
 
     @Override
diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestMinimize.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestMinimize.java
index d31fd00..343ad8e 100644
--- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestMinimize.java
+++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestMinimize.java
@@ -61,6 +61,8 @@ public class TestMinimize extends LuceneTestCase {
   /** n^2 space usage in Hopcroft minimization? */
   @Nightly
   public void testMinimizeHuge() {
-    new RegExp("+-*(A|.....|BC)*]", RegExp.NONE).toAutomaton(1000000);
+    Automaton a = new RegExp("+-*(A|.....|BC)*]", RegExp.NONE).toAutomaton();
+    Automaton b = MinimizationOperations.minimize(a, 1000000);
+    assertTrue(b.isDeterministic());
   }
 }
diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExp.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExp.java
index 48308a5..7d6d934 100644
--- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExp.java
+++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExp.java
@@ -34,62 +34,22 @@ public class TestRegExp extends LuceneTestCase {
     assertFalse(run.run("ad"));
   }
 
-  /**
-   * Compiles a regular expression that is prohibitively expensive to determinize and expexts to
-   * catch an exception for it.
-   */
-  public void testDeterminizeTooManyStates() {
-    // LUCENE-6046
-    String source = "[ac]*a[ac]{50,200}";
-    TooComplexToDeterminizeException expected =
-        expectThrows(
-            TooComplexToDeterminizeException.class,
-            () -> {
-              new RegExp(source).toAutomaton();
-            });
-    assertTrue(expected.getMessage().contains(source));
-  }
-
-  public void testSerializeTooManyStatesToRepeat() throws Exception {
-    String source = "a{50001}";
-    TooComplexToDeterminizeException expected =
-        expectThrows(
-            TooComplexToDeterminizeException.class,
-            () -> {
-              new RegExp(source).toAutomaton(50000);
-            });
-    assertTrue(expected.getMessage().contains(source));
-  }
-
-  // LUCENE-6713
-  public void testSerializeTooManyStatesToDeterminizeExc() throws Exception {
-    // LUCENE-6046
-    String source = "[ac]*a[ac]{50,200}";
-    TooComplexToDeterminizeException expected =
-        expectThrows(
-            TooComplexToDeterminizeException.class,
-            () -> {
-              new RegExp(source).toAutomaton();
-            });
-    assertTrue(expected.getMessage().contains(source));
-  }
-
   // LUCENE-6046
   public void testRepeatWithEmptyString() throws Exception {
-    Automaton a = new RegExp("[^y]*{1,2}").toAutomaton(1000);
+    Automaton a = new RegExp("[^y]*{1,2}").toAutomaton();
     // paranoia:
     assertTrue(a.toString().length() > 0);
   }
 
   public void testRepeatWithEmptyLanguage() throws Exception {
-    Automaton a = new RegExp("#*").toAutomaton(1000);
+    Automaton a = new RegExp("#*").toAutomaton();
     // paranoia:
     assertTrue(a.toString().length() > 0);
-    a = new RegExp("#+").toAutomaton(1000);
+    a = new RegExp("#+").toAutomaton();
     assertTrue(a.toString().length() > 0);
-    a = new RegExp("#{2,10}").toAutomaton(1000);
+    a = new RegExp("#{2,10}").toAutomaton();
     assertTrue(a.toString().length() > 0);
-    a = new RegExp("#?").toAutomaton(1000);
+    a = new RegExp("#?").toAutomaton();
     assertTrue(a.toString().length() > 0);
   }
 
@@ -258,7 +218,8 @@ public class TestRegExp extends LuceneTestCase {
 
     int matchFlags = caseSensitiveQuery ? 0 : RegExp.ASCII_CASE_INSENSITIVE;
     RegExp regex = new RegExp(regexPattern, RegExp.ALL, matchFlags);
-    Automaton automaton = regex.toAutomaton();
+    Automaton automaton =
+        Operations.determinize(regex.toAutomaton(), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
     ByteRunAutomaton bytesMatcher = new ByteRunAutomaton(automaton);
     BytesRef br = newBytesRef(docValue);
     assertTrue(
@@ -276,6 +237,7 @@ public class TestRegExp extends LuceneTestCase {
     if (caseSensitiveQuery == false) {
       RegExp caseSensitiveRegex = new RegExp(regexPattern);
       Automaton csAutomaton = caseSensitiveRegex.toAutomaton();
+      csAutomaton = Operations.determinize(csAutomaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
       ByteRunAutomaton csBytesMatcher = new ByteRunAutomaton(csAutomaton);
       assertFalse(
           "[" + regexPattern + "] with case sensitive setting should not match [" + docValue + "]",
diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExpParsing.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExpParsing.java
new file mode 100644
index 0000000..9671f1b
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExpParsing.java
@@ -0,0 +1,708 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.util.automaton;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.Set;
+import org.apache.lucene.util.LuceneTestCase;
+
+/**
+ * Simple unit tests for RegExp parsing.
+ *
+ * <p>For each type of node, test the toString() and parse tree, test the resulting automaton's
+ * language, and whether it is deterministic
+ */
+public class TestRegExpParsing extends LuceneTestCase {
+
+  public void testAnyChar() {
+    RegExp re = new RegExp(".");
+    assertEquals(".", re.toString());
+    assertEquals("REGEXP_ANYCHAR\n", re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected = Automata.makeAnyChar();
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testAnyString() {
+    RegExp re = new RegExp("@", RegExp.ANYSTRING);
+    assertEquals("@", re.toString());
+    assertEquals("REGEXP_ANYSTRING\n", re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected = Automata.makeAnyString();
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testChar() {
+    RegExp re = new RegExp("c");
+    assertEquals("\\c", re.toString());
+    assertEquals("REGEXP_CHAR char=c\n", re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected = Automata.makeChar('c');
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testCaseInsensitiveChar() {
+    RegExp re = new RegExp("c", RegExp.NONE, RegExp.ASCII_CASE_INSENSITIVE);
+    assertEquals("\\c", re.toString());
+    assertEquals("REGEXP_CHAR char=c\n", re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected = Operations.union(Automata.makeChar('c'), Automata.makeChar('C'));
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testCaseInsensitiveCharUpper() {
+    RegExp re = new RegExp("C", RegExp.NONE, RegExp.ASCII_CASE_INSENSITIVE);
+    assertEquals("\\C", re.toString());
+    assertEquals("REGEXP_CHAR char=C\n", re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected = Operations.union(Automata.makeChar('c'), Automata.makeChar('C'));
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testCaseInsensitiveCharNotSensitive() {
+    RegExp re = new RegExp("4", RegExp.NONE, RegExp.ASCII_CASE_INSENSITIVE);
+    assertEquals("\\4", re.toString());
+    assertEquals("REGEXP_CHAR char=4\n", re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected = Automata.makeChar('4');
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testCaseInsensitiveCharNonAscii() {
+    RegExp re = new RegExp("Ж", RegExp.NONE, RegExp.ASCII_CASE_INSENSITIVE);
+    assertEquals("\\Ж", re.toString());
+    assertEquals("REGEXP_CHAR char=Ж\n", re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected = Automata.makeChar('Ж');
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testNegatedChar() {
+    RegExp re = new RegExp("[^c]");
+    // TODO: would be nice to emit negated class rather than this
+    assertEquals("(.&~(\\c))", re.toString());
+    assertEquals(
+        String.join(
+            "\n",
+            "REGEXP_INTERSECTION",
+            "  REGEXP_ANYCHAR",
+            "  REGEXP_COMPLEMENT",
+            "    REGEXP_CHAR char=c\n"),
+        re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected =
+        Operations.union(
+            Automata.makeCharRange(0, 'b'), Automata.makeCharRange('d', Integer.MAX_VALUE));
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testCharRange() {
+    RegExp re = new RegExp("[b-d]");
+    assertEquals("[\\b-\\d]", re.toString());
+    assertEquals("REGEXP_CHAR_RANGE from=b to=d\n", re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected = Automata.makeCharRange('b', 'd');
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testNegatedCharRange() {
+    RegExp re = new RegExp("[^b-d]");
+    // TODO: would be nice to emit negated class rather than this
+    assertEquals("(.&~([\\b-\\d]))", re.toString());
+    assertEquals(
+        String.join(
+            "\n",
+            "REGEXP_INTERSECTION",
+            "  REGEXP_ANYCHAR",
+            "  REGEXP_COMPLEMENT",
+            "    REGEXP_CHAR_RANGE from=b to=d\n"),
+        re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected =
+        Operations.union(
+            Automata.makeCharRange(0, 'a'), Automata.makeCharRange('e', Integer.MAX_VALUE));
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testIllegalCharRange() {
+    expectThrows(
+        IllegalArgumentException.class,
+        () -> {
+          new RegExp("[z-a]");
+        });
+  }
+
+  public void testCharClassDigit() {
+    RegExp re = new RegExp("[\\d]");
+    assertEquals("\\d", re.toString());
+    assertEquals("REGEXP_PRE_CLASS class=\\d\n", re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected = Automata.makeCharRange('0', '9');
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testCharClassNonDigit() {
+    RegExp re = new RegExp("[\\D]");
+    assertEquals("\\D", re.toString());
+    assertEquals("REGEXP_PRE_CLASS class=\\D\n", re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected =
+        Operations.minus(
+            Automata.makeAnyChar(),
+            Automata.makeCharRange('0', '9'),
+            Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testCharClassWhitespace() {
+    RegExp re = new RegExp("[\\s]");
+    assertEquals("\\s", re.toString());
+    assertEquals("REGEXP_PRE_CLASS class=\\s\n", re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected = Automata.makeChar(' ');
+    expected = Operations.union(expected, Automata.makeChar('\n'));
+    expected = Operations.union(expected, Automata.makeChar('\r'));
+    expected = Operations.union(expected, Automata.makeChar('\t'));
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testCharClassNonWhitespace() {
+    RegExp re = new RegExp("[\\S]");
+    assertEquals("\\S", re.toString());
+    assertEquals("REGEXP_PRE_CLASS class=\\S\n", re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected = Automata.makeAnyChar();
+    expected =
+        Operations.minus(
+            expected, Automata.makeChar(' '), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
+    expected =
+        Operations.minus(
+            expected, Automata.makeChar('\n'), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
+    expected =
+        Operations.minus(
+            expected, Automata.makeChar('\r'), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
+    expected =
+        Operations.minus(
+            expected, Automata.makeChar('\t'), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testCharClassWord() {
+    RegExp re = new RegExp("[\\w]");
+    assertEquals("\\w", re.toString());
+    assertEquals("REGEXP_PRE_CLASS class=\\w\n", re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected = Automata.makeCharRange('a', 'z');
+    expected = Operations.union(expected, Automata.makeCharRange('A', 'Z'));
+    expected = Operations.union(expected, Automata.makeCharRange('0', '9'));
+    expected = Operations.union(expected, Automata.makeChar('_'));
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testCharClassNonWord() {
+    RegExp re = new RegExp("[\\W]");
+    assertEquals("\\W", re.toString());
+    assertEquals("REGEXP_PRE_CLASS class=\\W\n", re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected = Automata.makeAnyChar();
+    expected =
+        Operations.minus(
+            expected, Automata.makeCharRange('a', 'z'), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
+    expected =
+        Operations.minus(
+            expected, Automata.makeCharRange('A', 'Z'), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
+    expected =
+        Operations.minus(
+            expected, Automata.makeCharRange('0', '9'), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
+    expected =
+        Operations.minus(
+            expected, Automata.makeChar('_'), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testTruncatedCharClass() {
+    expectThrows(
+        IllegalArgumentException.class,
+        () -> {
+          new RegExp("[b-d");
+        });
+  }
+
+  public void testBogusCharClass() {
+    expectThrows(
+        IllegalArgumentException.class,
+        () -> {
+          new RegExp("[\\q]");
+        });
+  }
+
+  public void testExcapedNotCharClass() {
+    RegExp re = new RegExp("[\\?]");
+    assertEquals("\\?", re.toString());
+    assertEquals("REGEXP_CHAR char=?\n", re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected = Automata.makeChar('?');
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testExcapedSlashNotCharClass() {
+    RegExp re = new RegExp("[\\\\]");
+    assertEquals("\\\\", re.toString());
+    assertEquals("REGEXP_CHAR char=\\\n", re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected = Automata.makeChar('\\');
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testEmpty() {
+    RegExp re = new RegExp("#", RegExp.EMPTY);
+    assertEquals("#", re.toString());
+    assertEquals("REGEXP_EMPTY\n", re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected = Automata.makeEmpty();
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testInterval() {
+    RegExp re = new RegExp("<5-40>");
+    assertEquals("<5-40>", re.toString());
+    assertEquals("REGEXP_INTERVAL<5-40>\n", re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    // TODO: numeric intervals are NFAs
+
+    Automaton expected = Automata.makeDecimalInterval(5, 40, 0);
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testBackwardsInterval() {
+    RegExp re = new RegExp("<40-5>");
+    assertEquals("<5-40>", re.toString());
+    assertEquals("REGEXP_INTERVAL<5-40>\n", re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    // TODO: numeric intervals are NFAs
+
+    Automaton expected = Automata.makeDecimalInterval(5, 40, 0);
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testTruncatedInterval() {
+    expectThrows(
+        IllegalArgumentException.class,
+        () -> {
+          new RegExp("<1-");
+        });
+  }
+
+  public void testTruncatedInterval2() {
+    expectThrows(
+        IllegalArgumentException.class,
+        () -> {
+          new RegExp("<1");
+        });
+  }
+
+  public void testEmptyInterval() {
+    expectThrows(
+        IllegalArgumentException.class,
+        () -> {
+          new RegExp("<->");
+        });
+  }
+
+  public void testOptional() {
+    RegExp re = new RegExp("a?");
+    assertEquals("(\\a)?", re.toString());
+    assertEquals(String.join("\n", "REGEXP_OPTIONAL", "  REGEXP_CHAR char=a\n"), re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected = Operations.optional(Automata.makeChar('a'));
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testRepeat0() {
+    RegExp re = new RegExp("a*");
+    assertEquals("(\\a)*", re.toString());
+    assertEquals(String.join("\n", "REGEXP_REPEAT", "  REGEXP_CHAR char=a\n"), re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected = Operations.repeat(Automata.makeChar('a'));
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testRepeat1() {
+    RegExp re = new RegExp("a+");
+    assertEquals("(\\a){1,}", re.toString());
+    assertEquals(
+        String.join("\n", "REGEXP_REPEAT_MIN min=1", "  REGEXP_CHAR char=a\n"), re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected = Operations.repeat(Automata.makeChar('a'), 1);
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testRepeatN() {
+    RegExp re = new RegExp("a{5}");
+    assertEquals("(\\a){5,5}", re.toString());
+    assertEquals(
+        String.join("\n", "REGEXP_REPEAT_MINMAX min=5 max=5", "  REGEXP_CHAR char=a\n"),
+        re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected = Operations.repeat(Automata.makeChar('a'), 5, 5);
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testRepeatNPlus() {
+    RegExp re = new RegExp("a{5,}");
+    assertEquals("(\\a){5,}", re.toString());
+    assertEquals(
+        String.join("\n", "REGEXP_REPEAT_MIN min=5", "  REGEXP_CHAR char=a\n"), re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected = Operations.repeat(Automata.makeChar('a'), 5);
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testRepeatMN() {
+    RegExp re = new RegExp("a{5,8}");
+    assertEquals("(\\a){5,8}", re.toString());
+    assertEquals(
+        String.join("\n", "REGEXP_REPEAT_MINMAX min=5 max=8", "  REGEXP_CHAR char=a\n"),
+        re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected = Operations.repeat(Automata.makeChar('a'), 5, 8);
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testTruncatedRepeat() {
+    expectThrows(
+        IllegalArgumentException.class,
+        () -> {
+          new RegExp("a{5,8");
+        });
+  }
+
+  public void testBogusRepeat() {
+    expectThrows(
+        IllegalArgumentException.class,
+        () -> {
+          new RegExp("a{Z}");
+        });
+  }
+
+  public void testString() {
+    RegExp re = new RegExp("boo");
+    assertEquals("\"boo\"", re.toString());
+    assertEquals("REGEXP_STRING string=boo\n", re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected = Automata.makeString("boo");
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testCaseInsensitiveString() {
+    RegExp re = new RegExp("boo", RegExp.NONE, RegExp.ASCII_CASE_INSENSITIVE);
+    assertEquals("\"boo\"", re.toString());
+    assertEquals("REGEXP_STRING string=boo\n", re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton c1 = Operations.union(Automata.makeChar('b'), Automata.makeChar('B'));
+    Automaton c2 = Operations.union(Automata.makeChar('o'), Automata.makeChar('O'));
+
+    Automaton expected = Operations.concatenate(c1, c2);
+    expected = Operations.concatenate(expected, c2);
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testExplicitString() {
+    RegExp re = new RegExp("\"boo\"");
+    assertEquals("\"boo\"", re.toString());
+    assertEquals("REGEXP_STRING string=boo\n", re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected = Automata.makeString("boo");
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testNotTerminatedString() {
+    expectThrows(
+        IllegalArgumentException.class,
+        () -> {
+          new RegExp("\"boo");
+        });
+  }
+
+  public void testConcatenation() {
+    RegExp re = new RegExp("[b-c][e-f]");
+    assertEquals("[\\b-\\c][\\e-\\f]", re.toString());
+    assertEquals(
+        String.join(
+            "\n",
+            "REGEXP_CONCATENATION",
+            "  REGEXP_CHAR_RANGE from=b to=c",
+            "  REGEXP_CHAR_RANGE from=e to=f\n"),
+        re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected =
+        Operations.concatenate(Automata.makeCharRange('b', 'c'), Automata.makeCharRange('e', 'f'));
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testIntersection() {
+    RegExp re = new RegExp("[b-f]&[e-f]");
+    assertEquals("([\\b-\\f]&[\\e-\\f])", re.toString());
+    assertEquals(
+        String.join(
+            "\n",
+            "REGEXP_INTERSECTION",
+            "  REGEXP_CHAR_RANGE from=b to=f",
+            "  REGEXP_CHAR_RANGE from=e to=f\n"),
+        re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected =
+        Operations.intersection(Automata.makeCharRange('b', 'f'), Automata.makeCharRange('e', 'f'));
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testTruncatedIntersection() {
+    expectThrows(
+        IllegalArgumentException.class,
+        () -> {
+          new RegExp("a&");
+        });
+  }
+
+  public void testTruncatedIntersectionParens() {
+    expectThrows(
+        IllegalArgumentException.class,
+        () -> {
+          new RegExp("(a)&(");
+        });
+  }
+
+  public void testUnion() {
+    RegExp re = new RegExp("[b-c]|[e-f]");
+    assertEquals("([\\b-\\c]|[\\e-\\f])", re.toString());
+    assertEquals(
+        String.join(
+            "\n",
+            "REGEXP_UNION",
+            "  REGEXP_CHAR_RANGE from=b to=c",
+            "  REGEXP_CHAR_RANGE from=e to=f\n"),
+        re.toStringTree());
+
+    Automaton actual = re.toAutomaton();
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected =
+        Operations.union(Automata.makeCharRange('b', 'c'), Automata.makeCharRange('e', 'f'));
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testTruncatedUnion() {
+    expectThrows(
+        IllegalArgumentException.class,
+        () -> {
+          new RegExp("a|");
+        });
+  }
+
+  public void testTruncatedUnionParens() {
+    expectThrows(
+        IllegalArgumentException.class,
+        () -> {
+          new RegExp("(a)|(");
+        });
+  }
+
+  public void testAutomaton() {
+    AutomatonProvider myProvider =
+        new AutomatonProvider() {
+          @Override
+          public Automaton getAutomaton(String name) {
+            return Automata.makeChar('z');
+          }
+        };
+    RegExp re = new RegExp("<myletter>", RegExp.ALL);
+    assertEquals("<myletter>", re.toString());
+    assertEquals("REGEXP_AUTOMATON\n", re.toStringTree());
+    assertEquals(Set.of("myletter"), re.getIdentifiers());
+
+    Automaton actual = re.toAutomaton(myProvider);
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected = Automata.makeChar('z');
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testAutomatonMap() {
+    RegExp re = new RegExp("<myletter>", RegExp.ALL);
+    assertEquals("<myletter>", re.toString());
+    assertEquals("REGEXP_AUTOMATON\n", re.toStringTree());
+    assertEquals(Set.of("myletter"), re.getIdentifiers());
+
+    Automaton actual = re.toAutomaton(Map.of("myletter", Automata.makeChar('z')));
+    assertTrue(actual.isDeterministic());
+
+    Automaton expected = Automata.makeChar('z');
+    assertSameLanguage(expected, actual);
+  }
+
+  public void testAutomatonIOException() {
+    AutomatonProvider myProvider =
+        new AutomatonProvider() {
+          @Override
+          public Automaton getAutomaton(String name) throws IOException {
+            throw new IOException("fake ioexception");
+          }
+        };
+    RegExp re = new RegExp("<myletter>", RegExp.ALL);
+    assertEquals("<myletter>", re.toString());
+    assertEquals("REGEXP_AUTOMATON\n", re.toStringTree());
+    assertEquals(Set.of("myletter"), re.getIdentifiers());
+
+    expectThrows(
+        IllegalArgumentException.class,
+        () -> {
+          re.toAutomaton(myProvider);
+        });
+  }
+
+  public void testAutomatonNotFound() {
+    RegExp re = new RegExp("<bogus>", RegExp.ALL);
+    assertEquals("<bogus>", re.toString());
+    assertEquals("REGEXP_AUTOMATON\n", re.toStringTree());
+
+    expectThrows(
+        IllegalArgumentException.class,
+        () -> {
+          re.toAutomaton(Map.of("myletter", Automata.makeChar('z')));
+        });
+  }
+
+  public void testIllegalSyntaxFlags() {
+    expectThrows(
+        IllegalArgumentException.class,
+        () -> {
+          new RegExp("bogus", Integer.MAX_VALUE);
+        });
+  }
+
+  public void testIllegalMatchFlags() {
+    expectThrows(
+        IllegalArgumentException.class,
+        () -> {
+          new RegExp("bogus", RegExp.ALL, 1);
+        });
+  }
+
+  private void assertSameLanguage(Automaton expected, Automaton actual) {
+    expected = Operations.determinize(expected, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
+    actual = Operations.determinize(actual, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
+    boolean result = Operations.sameLanguage(expected, actual);
+    if (result == false) {
+      System.out.println(expected.toDot());
+      System.out.println(actual.toDot());
+    }
+    assertTrue(result);
+  }
+}
diff --git a/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java b/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
index 4773236..bace840 100644
--- a/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
+++ b/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
@@ -69,9 +69,9 @@ import org.apache.lucene.util.LineFileDocs;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
 import org.apache.lucene.util.TestUtil;
+import org.apache.lucene.util.automaton.Automata;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.CompiledAutomaton;
-import org.apache.lucene.util.automaton.RegExp;
 import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput;
 import org.apache.lucene.util.fst.FST.Arc;
 import org.apache.lucene.util.fst.FST.BytesReader;
@@ -352,7 +352,7 @@ public class TestFSTs extends LuceneTestCase {
       BytesRef term;
       int ord = 0;
 
-      Automaton automaton = new RegExp(".*", RegExp.NONE).toAutomaton();
+      Automaton automaton = Automata.makeAnyString();
       final TermsEnum termsEnum2 =
           terms.intersect(new CompiledAutomaton(automaton, false, false), null);
 
diff --git a/lucene/queries/src/test/org/apache/lucene/queries/intervals/TestIntervals.java b/lucene/queries/src/test/org/apache/lucene/queries/intervals/TestIntervals.java
index abee3d0..9afd84b 100644
--- a/lucene/queries/src/test/org/apache/lucene/queries/intervals/TestIntervals.java
+++ b/lucene/queries/src/test/org/apache/lucene/queries/intervals/TestIntervals.java
@@ -50,7 +50,9 @@ import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.CompiledAutomaton;
+import org.apache.lucene.util.automaton.Operations;
 import org.apache.lucene.util.automaton.RegExp;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
@@ -1004,8 +1006,9 @@ public class TestIntervals extends LuceneTestCase {
 
   public void testMultiTerm() throws IOException {
     RegExp re = new RegExp("p.*e");
-    IntervalsSource source =
-        Intervals.multiterm(new CompiledAutomaton(re.toAutomaton()), re.toString());
+    Automaton automaton =
+        Operations.determinize(re.toAutomaton(), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
+    IntervalsSource source = Intervals.multiterm(new CompiledAutomaton(automaton), re.toString());
 
     checkIntervals(
         source,
@@ -1025,7 +1028,7 @@ public class TestIntervals extends LuceneTestCase {
             IllegalStateException.class,
             () -> {
               IntervalsSource s =
-                  Intervals.multiterm(new CompiledAutomaton(re.toAutomaton()), 1, re.toString());
+                  Intervals.multiterm(new CompiledAutomaton(automaton), 1, re.toString());
               for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) {
                 s.intervals("field1", ctx);
               }
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/RegexCompletionQuery.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/RegexCompletionQuery.java
index fe061da..324312d 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/RegexCompletionQuery.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/RegexCompletionQuery.java
@@ -25,6 +25,7 @@ import org.apache.lucene.search.Weight;
 import org.apache.lucene.search.suggest.BitsProducer;
 import org.apache.lucene.util.automaton.Automata;
 import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.MinimizationOperations;
 import org.apache.lucene.util.automaton.Operations;
 import org.apache.lucene.util.automaton.RegExp;
 
@@ -74,7 +75,7 @@ public class RegexCompletionQuery extends CompletionQuery {
    * @param term query is run against {@link Term#field()} and {@link Term#text()} is interpreted as
    *     a regular expression
    * @param flags used as syntax_flag in {@link RegExp#RegExp(String, int)}
-   * @param determinizeWorkLimit used in {@link RegExp#toAutomaton(int)}
+   * @param determinizeWorkLimit used in {@link MinimizationOperations#minimize(Automaton, int)}
    * @param filter used to query on a sub set of documents
    */
   public RegexCompletionQuery(Term term, int flags, int determinizeWorkLimit, BitsProducer filter) {
@@ -91,7 +92,8 @@ public class RegexCompletionQuery extends CompletionQuery {
     Automaton automaton =
         getTerm().text().isEmpty()
             ? Automata.makeEmpty()
-            : new RegExp(getTerm().text(), flags).toAutomaton(determinizeWorkLimit);
+            : MinimizationOperations.minimize(
+                new RegExp(getTerm().text(), flags).toAutomaton(), determinizeWorkLimit);
     return new CompletionWeight(this, automaton);
   }
 
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
index 3294643..9e494b0 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
@@ -70,6 +70,7 @@ import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.TestUtil;
 import org.apache.lucene.util.automaton.CompiledAutomaton;
+import org.apache.lucene.util.automaton.Operations;
 import org.apache.lucene.util.automaton.RegExp;
 
 /**
@@ -994,7 +995,11 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
     assertEquals(2, termsEnum.ord());
 
     // NORMAL automaton
-    termsEnum = dv.intersect(new CompiledAutomaton(new RegExp(".*l.*").toAutomaton()));
+    termsEnum =
+        dv.intersect(
+            new CompiledAutomaton(
+                Operations.determinize(
+                    new RegExp(".*l.*").toAutomaton(), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT)));
     assertEquals("hello", termsEnum.next().utf8ToString());
     assertEquals(1, termsEnum.ord());
     assertEquals("world", termsEnum.next().utf8ToString());
@@ -2191,7 +2196,11 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
     assertEquals(2, termsEnum.ord());
 
     // NORMAL automaton
-    termsEnum = dv.intersect(new CompiledAutomaton(new RegExp(".*l.*").toAutomaton()));
+    termsEnum =
+        dv.intersect(
+            new CompiledAutomaton(
+                Operations.determinize(
+                    new RegExp(".*l.*").toAutomaton(), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT)));
     assertEquals("hello", termsEnum.next().utf8ToString());
     assertEquals(1, termsEnum.ord());
     assertEquals("world", termsEnum.next().utf8ToString());
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
index 0c3f798..70587b0 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
@@ -135,8 +135,10 @@ import org.apache.lucene.store.MockDirectoryWrapper;
 import org.apache.lucene.store.MockDirectoryWrapper.Throttling;
 import org.apache.lucene.store.NRTCachingDirectory;
 import org.apache.lucene.store.RawDirectoryWrapper;
+import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.AutomatonTestUtil;
 import org.apache.lucene.util.automaton.CompiledAutomaton;
+import org.apache.lucene.util.automaton.Operations;
 import org.apache.lucene.util.automaton.RegExp;
 import org.junit.After;
 import org.junit.AfterClass;
@@ -2113,8 +2115,9 @@ public abstract class LuceneTestCase extends Assert {
       int numIntersections = atLeast(3);
       for (int i = 0; i < numIntersections; i++) {
         String re = AutomatonTestUtil.randomRegexp(random());
-        CompiledAutomaton automaton =
-            new CompiledAutomaton(new RegExp(re, RegExp.NONE).toAutomaton());
+        Automaton a = new RegExp(re, RegExp.NONE).toAutomaton();
+        a = Operations.determinize(a, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
+        CompiledAutomaton automaton = new CompiledAutomaton(a);
         if (automaton.type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
           // TODO: test start term too
           TermsEnum leftIntersection = leftTerms.intersect(automaton, null);