You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/11/04 23:16:03 UTC

svn commit: r1636758 - in /lucene/dev/trunk/lucene/core/src: java/org/apache/lucene/search/ test/org/apache/lucene/index/ test/org/apache/lucene/search/

Author: mikemccand
Date: Tue Nov  4 22:16:02 2014
New Revision: 1636758

URL: http://svn.apache.org/r1636758
Log:
LUCENE-6046: fix test failure, add maxDeterminizedStates to AutomatonQuery and WildcardQuery too

Modified:
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/RegexpQuery.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/WildcardQuery.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQuery.java

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java?rev=1636758&r1=1636757&r2=1636758&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java Tue Nov  4 22:16:02 2014
@@ -26,6 +26,7 @@ import org.apache.lucene.util.AttributeS
 import org.apache.lucene.util.ToStringUtils;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.CompiledAutomaton;
+import org.apache.lucene.util.automaton.Operations;
 
 /**
  * A {@link Query} that will match terms against a finite-state machine.
@@ -61,10 +62,26 @@ public class AutomatonQuery extends Mult
    *        match.
    */
   public AutomatonQuery(final Term term, Automaton automaton) {
+    this(term, automaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
+  }
+
+  /**
+   * Create a new AutomatonQuery from an {@link Automaton}.
+   * 
+   * @param term Term containing field and possibly some pattern structure. The
+   *        term text is ignored.
+   * @param automaton Automaton to run, terms that are accepted are considered a
+   *        match.
+   * @param maxDeterminizedStates maximum number of states in the resulting
+   *   automata.  If the automata would need more than this many states
+   *   TooComplextToDeterminizeException is thrown.  Higher number require more
+   *   space but can process more complex automata.
+   */
+  public AutomatonQuery(final Term term, Automaton automaton, int maxDeterminizedStates) {
     super(term.field());
     this.term = term;
     this.automaton = automaton;
-    this.compiled = new CompiledAutomaton(automaton);
+    this.compiled = new CompiledAutomaton(automaton, null, true, maxDeterminizedStates);
   }
 
   @Override

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/RegexpQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/RegexpQuery.java?rev=1636758&r1=1636757&r2=1636758&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/RegexpQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/RegexpQuery.java Tue Nov  4 22:16:02 2014
@@ -105,8 +105,9 @@ public class RegexpQuery extends Automat
    */
   public RegexpQuery(Term term, int flags, AutomatonProvider provider,
       int maxDeterminizedStates) {
-    super(term, new RegExp(term.text(), flags).toAutomaton(
-      provider, maxDeterminizedStates));
+    super(term,
+          new RegExp(term.text(), flags).toAutomaton(
+                       provider, maxDeterminizedStates), maxDeterminizedStates);
   }
   
   /** Prints a user-readable version of this query. */

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/WildcardQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/WildcardQuery.java?rev=1636758&r1=1636757&r2=1636758&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/WildcardQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/WildcardQuery.java Tue Nov  4 22:16:02 2014
@@ -23,8 +23,8 @@ import java.util.List;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.util.ToStringUtils;
 import org.apache.lucene.util.automaton.Automata;
-import org.apache.lucene.util.automaton.Operations;
 import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.Operations;
 
 /** Implements the wildcard search query. Supported wildcards are <code>*</code>, which
  * matches any character sequence (including the empty one), and <code>?</code>,
@@ -58,6 +58,17 @@ public class WildcardQuery extends Autom
   }
   
   /**
+   * Constructs a query for terms matching <code>term</code>.
+   * @param maxDeterminizedStates maximum number of states in the resulting
+   *   automata.  If the automata would need more than this many states
+   *   TooComplextToDeterminizeException is thrown.  Higher number require more
+   *   space but can process more complex automata.
+   */
+  public WildcardQuery(Term term, int maxDeterminizedStates) {
+    super(term, toAutomaton(term), maxDeterminizedStates);
+  }
+
+  /**
    * Convert Lucene wildcard syntax into an automaton.
    * @lucene.internal
    */

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java?rev=1636758&r1=1636757&r2=1636758&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java Tue Nov  4 22:16:02 2014
@@ -100,9 +100,9 @@ public class TestTermsEnum2 extends Luce
       Automaton alternate = Automata.makeStringUnion(matchedTerms);
       //System.out.println("match " + matchedTerms.size() + " " + alternate.getNumberOfStates() + " states, sigma=" + alternate.getStartPoints().length);
       //AutomatonTestUtil.minimizeSimple(alternate);
-      //System.out.println("minmize done");
+      //System.out.println("minimize done");
       AutomatonQuery a1 = new AutomatonQuery(new Term("field", ""), automaton);
-      AutomatonQuery a2 = new AutomatonQuery(new Term("field", ""), alternate);
+      AutomatonQuery a2 = new AutomatonQuery(new Term("field", ""), alternate, Integer.MAX_VALUE);
 
       ScoreDoc[] origHits = searcher.search(a1, 25).scoreDocs;
       ScoreDoc[] newHits = searcher.search(a2, 25).scoreDocs;

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQuery.java?rev=1636758&r1=1636757&r2=1636758&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQuery.java Tue Nov  4 22:16:02 2014
@@ -18,6 +18,9 @@ package org.apache.lucene.search;
  */
 
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
 import java.util.concurrent.CountDownLatch;
 
 import org.apache.lucene.document.Document;
@@ -30,6 +33,7 @@ import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.Rethrow;
 import org.apache.lucene.util.TestUtil;
@@ -237,4 +241,13 @@ public class TestAutomatonQuery extends 
       thread.join();
     }
   }
+
+  public void testHugeAutomaton() {
+    List<BytesRef> terms = new ArrayList<>();
+    while (terms.size() < 10000) {
+      terms.add(new BytesRef(TestUtil.randomUnicodeString(random())));
+    }
+    Collections.sort(terms);
+    new AutomatonQuery(new Term("foo", "bar"), Automata.makeStringUnion(terms), Integer.MAX_VALUE);
+  }
 }