You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/11/04 23:16:03 UTC
svn commit: r1636758 - in /lucene/dev/trunk/lucene/core/src:
java/org/apache/lucene/search/ test/org/apache/lucene/index/
test/org/apache/lucene/search/
Author: mikemccand
Date: Tue Nov 4 22:16:02 2014
New Revision: 1636758
URL: http://svn.apache.org/r1636758
Log:
LUCENE-6046: fix test failure, add maxDeterminizedStates to AutomatonQuery and WildcardQuery too
Modified:
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/RegexpQuery.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/WildcardQuery.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQuery.java
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java?rev=1636758&r1=1636757&r2=1636758&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java Tue Nov 4 22:16:02 2014
@@ -26,6 +26,7 @@ import org.apache.lucene.util.AttributeS
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.CompiledAutomaton;
+import org.apache.lucene.util.automaton.Operations;
/**
* A {@link Query} that will match terms against a finite-state machine.
@@ -61,10 +62,26 @@ public class AutomatonQuery extends Mult
* match.
*/
public AutomatonQuery(final Term term, Automaton automaton) {
+ this(term, automaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
+ }
+
+ /**
+ * Create a new AutomatonQuery from an {@link Automaton}.
+ *
+ * @param term Term containing field and possibly some pattern structure. The
+ * term text is ignored.
+ * @param automaton Automaton to run, terms that are accepted are considered a
+ * match.
+ * @param maxDeterminizedStates maximum number of states in the resulting
+ * automata. If the automata would need more than this many states
+ * TooComplextToDeterminizeException is thrown. Higher number require more
+ * space but can process more complex automata.
+ */
+ public AutomatonQuery(final Term term, Automaton automaton, int maxDeterminizedStates) {
super(term.field());
this.term = term;
this.automaton = automaton;
- this.compiled = new CompiledAutomaton(automaton);
+ this.compiled = new CompiledAutomaton(automaton, null, true, maxDeterminizedStates);
}
@Override
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/RegexpQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/RegexpQuery.java?rev=1636758&r1=1636757&r2=1636758&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/RegexpQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/RegexpQuery.java Tue Nov 4 22:16:02 2014
@@ -105,8 +105,9 @@ public class RegexpQuery extends Automat
*/
public RegexpQuery(Term term, int flags, AutomatonProvider provider,
int maxDeterminizedStates) {
- super(term, new RegExp(term.text(), flags).toAutomaton(
- provider, maxDeterminizedStates));
+ super(term,
+ new RegExp(term.text(), flags).toAutomaton(
+ provider, maxDeterminizedStates), maxDeterminizedStates);
}
/** Prints a user-readable version of this query. */
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/WildcardQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/WildcardQuery.java?rev=1636758&r1=1636757&r2=1636758&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/WildcardQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/WildcardQuery.java Tue Nov 4 22:16:02 2014
@@ -23,8 +23,8 @@ import java.util.List;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.automaton.Automata;
-import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.Operations;
/** Implements the wildcard search query. Supported wildcards are <code>*</code>, which
* matches any character sequence (including the empty one), and <code>?</code>,
@@ -58,6 +58,17 @@ public class WildcardQuery extends Autom
}
/**
+ * Constructs a query for terms matching <code>term</code>.
+ * @param maxDeterminizedStates maximum number of states in the resulting
+ * automata. If the automata would need more than this many states
+ * TooComplextToDeterminizeException is thrown. Higher number require more
+ * space but can process more complex automata.
+ */
+ public WildcardQuery(Term term, int maxDeterminizedStates) {
+ super(term, toAutomaton(term), maxDeterminizedStates);
+ }
+
+ /**
* Convert Lucene wildcard syntax into an automaton.
* @lucene.internal
*/
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java?rev=1636758&r1=1636757&r2=1636758&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java Tue Nov 4 22:16:02 2014
@@ -100,9 +100,9 @@ public class TestTermsEnum2 extends Luce
Automaton alternate = Automata.makeStringUnion(matchedTerms);
//System.out.println("match " + matchedTerms.size() + " " + alternate.getNumberOfStates() + " states, sigma=" + alternate.getStartPoints().length);
//AutomatonTestUtil.minimizeSimple(alternate);
- //System.out.println("minmize done");
+ //System.out.println("minimize done");
AutomatonQuery a1 = new AutomatonQuery(new Term("field", ""), automaton);
- AutomatonQuery a2 = new AutomatonQuery(new Term("field", ""), alternate);
+ AutomatonQuery a2 = new AutomatonQuery(new Term("field", ""), alternate, Integer.MAX_VALUE);
ScoreDoc[] origHits = searcher.search(a1, 25).scoreDocs;
ScoreDoc[] newHits = searcher.search(a2, 25).scoreDocs;
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQuery.java?rev=1636758&r1=1636757&r2=1636758&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQuery.java Tue Nov 4 22:16:02 2014
@@ -18,6 +18,9 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
import java.util.concurrent.CountDownLatch;
import org.apache.lucene.document.Document;
@@ -30,6 +33,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.Rethrow;
import org.apache.lucene.util.TestUtil;
@@ -237,4 +241,13 @@ public class TestAutomatonQuery extends
thread.join();
}
}
+
+ public void testHugeAutomaton() {
+ List<BytesRef> terms = new ArrayList<>();
+ while (terms.size() < 10000) {
+ terms.add(new BytesRef(TestUtil.randomUnicodeString(random())));
+ }
+ Collections.sort(terms);
+ new AutomatonQuery(new Term("foo", "bar"), Automata.makeStringUnion(terms), Integer.MAX_VALUE);
+ }
}