You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/11/04 23:29:28 UTC
svn commit: r1636762 [2/2] - in /lucene/dev/branches/lucene_solr_4_10: ./
lucene/ lucene/core/ lucene/core/src/java/org/apache/lucene/search/
lucene/core/src/java/org/apache/lucene/util/automaton/
lucene/core/src/test/org/apache/lucene/analysis/ lucene...
Modified: lucene/dev/branches/lucene_solr_4_10/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_10/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java?rev=1636762&r1=1636761&r2=1636762&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_10/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java (original)
+++ lucene/dev/branches/lucene_solr_4_10/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java Tue Nov 4 22:29:27 2014
@@ -36,6 +36,8 @@ import org.apache.lucene.util.UnicodeUti
import org.apache.lucene.util.automaton.AutomatonTestUtil.RandomAcceptedStrings;
import org.apache.lucene.util.fst.Util;
+import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES;
+
public class TestAutomaton extends LuceneTestCase {
public void testBasic() throws Exception {
@@ -111,7 +113,7 @@ public class TestAutomaton extends Lucen
Automata.makeAnyString(),
Automata.makeString("n"),
Automata.makeAnyString()));
- a = Operations.determinize(a);
+ a = Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES);
assertTrue(Operations.run(a, "mn"));
assertTrue(Operations.run(a, "mone"));
assertFalse(Operations.run(a, "m"));
@@ -122,7 +124,7 @@ public class TestAutomaton extends Lucen
Automaton a = Operations.union(Arrays.asList(
Automata.makeString("foobar"),
Automata.makeString("barbaz")));
- a = Operations.determinize(a);
+ a = Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES);
assertTrue(Operations.run(a, "foobar"));
assertTrue(Operations.run(a, "barbaz"));
@@ -134,7 +136,7 @@ public class TestAutomaton extends Lucen
Automata.makeString("foobar"),
Automata.makeString(""),
Automata.makeString("barbaz")));
- a = Operations.determinize(a);
+ a = Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES);
assertTrue(Operations.run(a, "foobar"));
assertTrue(Operations.run(a, "barbaz"));
assertTrue(Operations.run(a, ""));
@@ -144,7 +146,7 @@ public class TestAutomaton extends Lucen
public void testMinimizeSimple() throws Exception {
Automaton a = Automata.makeString("foobar");
- Automaton aMin = MinimizationOperations.minimize(a);
+ Automaton aMin = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
assertTrue(Operations.sameLanguage(a, aMin));
}
@@ -152,14 +154,16 @@ public class TestAutomaton extends Lucen
public void testMinimize2() throws Exception {
Automaton a = Operations.union(Arrays.asList(Automata.makeString("foobar"),
Automata.makeString("boobar")));
- Automaton aMin = MinimizationOperations.minimize(a);
- assertTrue(Operations.sameLanguage(Operations.determinize(Operations.removeDeadStates(a)), aMin));
+ Automaton aMin = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
+ assertTrue(Operations.sameLanguage(Operations.determinize(
+ Operations.removeDeadStates(a), DEFAULT_MAX_DETERMINIZED_STATES), aMin));
}
public void testReverse() throws Exception {
Automaton a = Automata.makeString("foobar");
Automaton ra = Operations.reverse(a);
- Automaton a2 = Operations.determinize(Operations.reverse(ra));
+ Automaton a2 = Operations.determinize(Operations.reverse(ra),
+ DEFAULT_MAX_DETERMINIZED_STATES);
assertTrue(Operations.sameLanguage(a, a2));
}
@@ -167,7 +171,7 @@ public class TestAutomaton extends Lucen
public void testOptional() throws Exception {
Automaton a = Automata.makeString("foobar");
Automaton a2 = Operations.optional(a);
- a2 = Operations.determinize(a2);
+ a2 = Operations.determinize(a2, DEFAULT_MAX_DETERMINIZED_STATES);
assertTrue(Operations.run(a, "foobar"));
assertFalse(Operations.run(a, ""));
@@ -177,7 +181,8 @@ public class TestAutomaton extends Lucen
public void testRepeatAny() throws Exception {
Automaton a = Automata.makeString("zee");
- Automaton a2 = Operations.determinize(Operations.repeat(a));
+ Automaton a2 = Operations.determinize(Operations.repeat(a),
+ DEFAULT_MAX_DETERMINIZED_STATES);
assertTrue(Operations.run(a2, ""));
assertTrue(Operations.run(a2, "zee"));
assertTrue(Operations.run(a2, "zeezee"));
@@ -186,7 +191,8 @@ public class TestAutomaton extends Lucen
public void testRepeatMin() throws Exception {
Automaton a = Automata.makeString("zee");
- Automaton a2 = Operations.determinize(Operations.repeat(a, 2));
+ Automaton a2 = Operations.determinize(Operations.repeat(a, 2),
+ DEFAULT_MAX_DETERMINIZED_STATES);
assertFalse(Operations.run(a2, ""));
assertFalse(Operations.run(a2, "zee"));
assertTrue(Operations.run(a2, "zeezee"));
@@ -195,7 +201,8 @@ public class TestAutomaton extends Lucen
public void testRepeatMinMax1() throws Exception {
Automaton a = Automata.makeString("zee");
- Automaton a2 = Operations.determinize(Operations.repeat(a, 0, 2));
+ Automaton a2 = Operations.determinize(Operations.repeat(a, 0, 2),
+ DEFAULT_MAX_DETERMINIZED_STATES);
assertTrue(Operations.run(a2, ""));
assertTrue(Operations.run(a2, "zee"));
assertTrue(Operations.run(a2, "zeezee"));
@@ -204,7 +211,8 @@ public class TestAutomaton extends Lucen
public void testRepeatMinMax2() throws Exception {
Automaton a = Automata.makeString("zee");
- Automaton a2 = Operations.determinize(Operations.repeat(a, 2, 4));
+ Automaton a2 = Operations.determinize(Operations.repeat(a, 2, 4),
+ DEFAULT_MAX_DETERMINIZED_STATES);
assertFalse(Operations.run(a2, ""));
assertFalse(Operations.run(a2, "zee"));
assertTrue(Operations.run(a2, "zeezee"));
@@ -215,7 +223,8 @@ public class TestAutomaton extends Lucen
public void testComplement() throws Exception {
Automaton a = Automata.makeString("zee");
- Automaton a2 = Operations.determinize(Operations.complement(a));
+ Automaton a2 = Operations.determinize(Operations.complement(a,
+ DEFAULT_MAX_DETERMINIZED_STATES), DEFAULT_MAX_DETERMINIZED_STATES);
assertTrue(Operations.run(a2, ""));
assertFalse(Operations.run(a2, "zee"));
assertTrue(Operations.run(a2, "zeezee"));
@@ -223,7 +232,8 @@ public class TestAutomaton extends Lucen
}
public void testInterval() throws Exception {
- Automaton a = Operations.determinize(Automata.makeInterval(17, 100, 3));
+ Automaton a = Operations.determinize(Automata.makeInterval(17, 100, 3),
+ DEFAULT_MAX_DETERMINIZED_STATES);
assertFalse(Operations.run(a, ""));
assertTrue(Operations.run(a, "017"));
assertTrue(Operations.run(a, "100"));
@@ -239,7 +249,8 @@ public class TestAutomaton extends Lucen
a.addTransition(init, fini, 'm');
a.addTransition(fini, fini, 'm');
a.finishState();
- assertEquals(0, Operations.getCommonSuffixBytesRef(a).length);
+ assertEquals(0, Operations.getCommonSuffixBytesRef(a,
+ DEFAULT_MAX_DETERMINIZED_STATES).length);
}
public void testReverseRandom1() throws Exception {
@@ -248,8 +259,9 @@ public class TestAutomaton extends Lucen
Automaton a = AutomatonTestUtil.randomAutomaton(random());
Automaton ra = Operations.reverse(a);
Automaton rra = Operations.reverse(ra);
- assertTrue(Operations.sameLanguage(Operations.determinize(Operations.removeDeadStates(a)),
- Operations.determinize(Operations.removeDeadStates(rra))));
+ assertTrue(Operations.sameLanguage(
+ Operations.determinize(Operations.removeDeadStates(a), DEFAULT_MAX_DETERMINIZED_STATES),
+ Operations.determinize(Operations.removeDeadStates(rra), DEFAULT_MAX_DETERMINIZED_STATES)));
}
}
@@ -262,7 +274,7 @@ public class TestAutomaton extends Lucen
a = Operations.removeDeadStates(a);
}
Automaton ra = Operations.reverse(a);
- Automaton rda = Operations.determinize(ra);
+ Automaton rda = Operations.determinize(ra, DEFAULT_MAX_DETERMINIZED_STATES);
if (Operations.isEmpty(a)) {
assertTrue(Operations.isEmpty(rda));
@@ -290,7 +302,8 @@ public class TestAutomaton extends Lucen
}
public void testAnyStringEmptyString() throws Exception {
- Automaton a = Operations.determinize(Automata.makeAnyString());
+ Automaton a = Operations.determinize(Automata.makeAnyString(),
+ DEFAULT_MAX_DETERMINIZED_STATES);
assertTrue(Operations.run(a, ""));
}
@@ -349,9 +362,9 @@ public class TestAutomaton extends Lucen
}
assertTrue(Operations.sameLanguage(
- Operations.determinize(Operations.removeDeadStates(a)),
- Operations.determinize(Operations.removeDeadStates(builder.finish()))));
-
+ Operations.determinize(Operations.removeDeadStates(a), DEFAULT_MAX_DETERMINIZED_STATES),
+ Operations.determinize(Operations.removeDeadStates(builder.finish()),
+ DEFAULT_MAX_DETERMINIZED_STATES)));
}
}
@@ -368,7 +381,8 @@ public class TestAutomaton extends Lucen
a.finishState();
assertFalse(Operations.isTotal(a));
a.setAccept(init, true);
- assertTrue(Operations.isTotal(MinimizationOperations.minimize(a)));
+ assertTrue(Operations.isTotal(MinimizationOperations.minimize(a,
+ DEFAULT_MAX_DETERMINIZED_STATES)));
}
public void testMinimizeEmpty() throws Exception {
@@ -377,7 +391,7 @@ public class TestAutomaton extends Lucen
int fini = a.createState();
a.addTransition(init, fini, 'a');
a.finishState();
- a = MinimizationOperations.minimize(a);
+ a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
assertEquals(0, a.getNumStates());
}
@@ -387,26 +401,29 @@ public class TestAutomaton extends Lucen
Automaton a3 = Automata.makeString("beebar");
Automaton a = Operations.union(Arrays.asList(a1, a2, a3));
if (random().nextBoolean()) {
- a = Operations.determinize(a);
+ a = Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES);
} else if (random().nextBoolean()) {
- a = MinimizationOperations.minimize(a);
+ a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
}
assertMatches(a, "foobar", "beebar", "boobar");
- Automaton a4 = Operations.determinize(Operations.minus(a, a2));
+ Automaton a4 = Operations.determinize(Operations.minus(a, a2,
+ DEFAULT_MAX_DETERMINIZED_STATES), DEFAULT_MAX_DETERMINIZED_STATES);
assertTrue(Operations.run(a4, "foobar"));
assertFalse(Operations.run(a4, "boobar"));
assertTrue(Operations.run(a4, "beebar"));
assertMatches(a4, "foobar", "beebar");
- a4 = Operations.determinize(Operations.minus(a4, a1));
+ a4 = Operations.determinize(Operations.minus(a4, a1,
+ DEFAULT_MAX_DETERMINIZED_STATES), DEFAULT_MAX_DETERMINIZED_STATES);
assertFalse(Operations.run(a4, "foobar"));
assertFalse(Operations.run(a4, "boobar"));
assertTrue(Operations.run(a4, "beebar"));
assertMatches(a4, "beebar");
- a4 = Operations.determinize(Operations.minus(a4, a3));
+ a4 = Operations.determinize(Operations.minus(a4, a3,
+ DEFAULT_MAX_DETERMINIZED_STATES), DEFAULT_MAX_DETERMINIZED_STATES);
assertFalse(Operations.run(a4, "foobar"));
assertFalse(Operations.run(a4, "boobar"));
assertFalse(Operations.run(a4, "beebar"));
@@ -415,7 +432,7 @@ public class TestAutomaton extends Lucen
public void testOneInterval() throws Exception {
Automaton a = Automata.makeInterval(999, 1032, 0);
- a = Operations.determinize(a);
+ a = Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES);
assertTrue(Operations.run(a, "0999"));
assertTrue(Operations.run(a, "00999"));
assertTrue(Operations.run(a, "000999"));
@@ -423,7 +440,7 @@ public class TestAutomaton extends Lucen
public void testAnotherInterval() throws Exception {
Automaton a = Automata.makeInterval(1, 2, 0);
- a = Operations.determinize(a);
+ a = Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES);
assertTrue(Operations.run(a, "01"));
}
@@ -445,9 +462,10 @@ public class TestAutomaton extends Lucen
}
String prefix = b.toString();
- Automaton a = Operations.determinize(Automata.makeInterval(min, max, digits));
+ Automaton a = Operations.determinize(Automata.makeInterval(min, max, digits),
+ DEFAULT_MAX_DETERMINIZED_STATES);
if (random().nextBoolean()) {
- a = MinimizationOperations.minimize(a);
+ a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
}
String mins = Integer.toString(min);
String maxs = Integer.toString(max);
@@ -487,7 +505,8 @@ public class TestAutomaton extends Lucen
expected.add(Util.toUTF32(s, ints));
}
- assertEquals(expected, Operations.getFiniteStrings(Operations.determinize(a), -1));
+ assertEquals(expected, Operations.getFiniteStrings(Operations.determinize(a,
+ DEFAULT_MAX_DETERMINIZED_STATES), -1));
}
public void testConcatenatePreservesDet() throws Exception {
@@ -578,13 +597,13 @@ public class TestAutomaton extends Lucen
if (VERBOSE) {
System.out.println(" randomNoOp: determinize");
}
- return Operations.determinize(a);
+ return Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES);
case 1:
if (a.getNumStates() < 100) {
if (VERBOSE) {
System.out.println(" randomNoOp: minimize");
}
- return MinimizationOperations.minimize(a);
+ return MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
} else {
if (VERBOSE) {
System.out.println(" randomNoOp: skip op=minimize: too many states (" + a.getNumStates() + ")");
@@ -725,7 +744,7 @@ public class TestAutomaton extends Lucen
if (VERBOSE) {
System.out.println(" op=determinize");
}
- a = Operations.determinize(a);
+ a = Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES);
assertTrue(a.isDeterministic());
break;
@@ -735,7 +754,7 @@ public class TestAutomaton extends Lucen
System.out.println(" op=minimize");
}
// minimize
- a = MinimizationOperations.minimize(a);
+ a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
} else if (VERBOSE) {
System.out.println(" skip op=minimize: too many states (" + a.getNumStates() + ")");
}
@@ -791,7 +810,7 @@ public class TestAutomaton extends Lucen
assertTrue(removed);
}
Automaton a2 = unionTerms(toRemove);
- a = Operations.minus(a, a2);
+ a = Operations.minus(a, a2, DEFAULT_MAX_DETERMINIZED_STATES);
}
}
break;
@@ -831,7 +850,7 @@ public class TestAutomaton extends Lucen
}
}
Automaton a2 = randomNoOp(Operations.union(as));
- a = Operations.minus(a, a2);
+ a = Operations.minus(a, a2, DEFAULT_MAX_DETERMINIZED_STATES);
}
break;
@@ -868,9 +887,9 @@ public class TestAutomaton extends Lucen
Automaton a2 = Operations.union(as);
if (random().nextBoolean()) {
- a2 = Operations.determinize(a2);
+ a2 = Operations.determinize(a2, DEFAULT_MAX_DETERMINIZED_STATES);
} else if (random().nextBoolean()) {
- a2 = MinimizationOperations.minimize(a2);
+ a2 = MinimizationOperations.minimize(a2, DEFAULT_MAX_DETERMINIZED_STATES);
}
a = Operations.intersection(a, a2);
@@ -944,7 +963,7 @@ public class TestAutomaton extends Lucen
if (VERBOSE) {
System.out.println(" op=remove the empty string");
}
- a = Operations.minus(a, Automata.makeEmptyString());
+ a = Operations.minus(a, Automata.makeEmptyString(), DEFAULT_MAX_DETERMINIZED_STATES);
terms.remove(new BytesRef());
break;
@@ -1024,7 +1043,7 @@ public class TestAutomaton extends Lucen
assertTrue(Operations.isFinite(a));
assertFalse(Operations.isTotal(a));
- Automaton detA = Operations.determinize(a);
+ Automaton detA = Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES);
// Make sure all terms are accepted:
IntsRefBuilder scratch = new IntsRefBuilder();
@@ -1058,8 +1077,10 @@ public class TestAutomaton extends Lucen
}
// Use sameLanguage:
- Automaton a2 = Operations.removeDeadStates(Operations.determinize(unionTerms(terms)));
- assertTrue(Operations.sameLanguage(a2, Operations.removeDeadStates(Operations.determinize(a))));
+ Automaton a2 = Operations.removeDeadStates(Operations.determinize(unionTerms(terms),
+ DEFAULT_MAX_DETERMINIZED_STATES));
+ assertTrue(Operations.sameLanguage(a2, Operations.removeDeadStates(Operations.determinize(a,
+ DEFAULT_MAX_DETERMINIZED_STATES))));
// Do same check, in UTF8 space
Automaton utf8 = randomNoOp(new UTF32ToUTF8().convert(a));
Modified: lucene/dev/branches/lucene_solr_4_10/lucene/core/src/test/org/apache/lucene/util/automaton/TestCompiledAutomaton.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_10/lucene/core/src/test/org/apache/lucene/util/automaton/TestCompiledAutomaton.java?rev=1636762&r1=1636761&r2=1636762&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_10/lucene/core/src/test/org/apache/lucene/util/automaton/TestCompiledAutomaton.java (original)
+++ lucene/dev/branches/lucene_solr_4_10/lucene/core/src/test/org/apache/lucene/util/automaton/TestCompiledAutomaton.java Tue Nov 4 22:29:27 2014
@@ -31,14 +31,14 @@ import org.apache.lucene.util.TestUtil;
public class TestCompiledAutomaton extends LuceneTestCase {
- private CompiledAutomaton build(String... strings) {
+ private CompiledAutomaton build(int maxDeterminizedStates, String... strings) {
final List<BytesRef> terms = new ArrayList<>();
for(String s : strings) {
terms.add(new BytesRef(s));
}
Collections.sort(terms);
final Automaton a = DaciukMihovAutomatonBuilder.build(terms);
- return new CompiledAutomaton(a, true, false);
+ return new CompiledAutomaton(a, true, false, maxDeterminizedStates);
}
private void testFloor(CompiledAutomaton c, String input, String expected) {
@@ -53,8 +53,8 @@ public class TestCompiledAutomaton exten
}
}
- private void testTerms(String[] terms) throws Exception {
- final CompiledAutomaton c = build(terms);
+ private void testTerms(int maxDeterminizedStates, String[] terms) throws Exception {
+ final CompiledAutomaton c = build(maxDeterminizedStates, terms);
final BytesRef[] termBytes = new BytesRef[terms.length];
for(int idx=0;idx<terms.length;idx++) {
termBytes[idx] = new BytesRef(terms[idx]);
@@ -100,7 +100,7 @@ public class TestCompiledAutomaton exten
while(terms.size() != numTerms) {
terms.add(randomString());
}
- testTerms(terms.toArray(new String[terms.size()]));
+ testTerms(numTerms * 100, terms.toArray(new String[terms.size()]));
}
private String randomString() {
@@ -109,7 +109,8 @@ public class TestCompiledAutomaton exten
}
public void testBasic() throws Exception {
- CompiledAutomaton c = build("fob", "foo", "goo");
+ CompiledAutomaton c = build(Operations.DEFAULT_MAX_DETERMINIZED_STATES,
+ "fob", "foo", "goo");
testFloor(c, "goo", "goo");
testFloor(c, "ga", "foo");
testFloor(c, "g", "foo");
Modified: lucene/dev/branches/lucene_solr_4_10/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminism.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_10/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminism.java?rev=1636762&r1=1636761&r2=1636762&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_10/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminism.java (original)
+++ lucene/dev/branches/lucene_solr_4_10/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminism.java Tue Nov 4 22:29:27 2014
@@ -19,6 +19,8 @@ package org.apache.lucene.util.automaton
import org.apache.lucene.util.LuceneTestCase;
+import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES;
+
/**
* Not completely thorough, but tries to test determinism correctness
* somewhat randomly.
@@ -39,29 +41,32 @@ public class TestDeterminism extends Luc
for (int i = 0; i < num; i++) {
Automaton a = AutomatonTestUtil.randomAutomaton(random());
a = AutomatonTestUtil.determinizeSimple(a);
- Automaton b = Operations.determinize(a);
+ Automaton b = Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES);
// TODO: more verifications possible?
assertTrue(Operations.sameLanguage(a, b));
}
}
private static void assertAutomaton(Automaton a) {
- a = Operations.determinize(Operations.removeDeadStates(a));
+ a = Operations.determinize(Operations.removeDeadStates(a), DEFAULT_MAX_DETERMINIZED_STATES);
// complement(complement(a)) = a
- Automaton equivalent = Operations.complement(Operations.complement(a));
+ Automaton equivalent = Operations.complement(Operations.complement(a,
+ DEFAULT_MAX_DETERMINIZED_STATES), DEFAULT_MAX_DETERMINIZED_STATES);
assertTrue(Operations.sameLanguage(a, equivalent));
// a union a = a
- equivalent = Operations.determinize(Operations.removeDeadStates(Operations.union(a, a)));
+ equivalent = Operations.determinize(Operations.removeDeadStates(Operations.union(a, a)),
+ DEFAULT_MAX_DETERMINIZED_STATES);
assertTrue(Operations.sameLanguage(a, equivalent));
// a intersect a = a
- equivalent = Operations.determinize(Operations.removeDeadStates(Operations.intersection(a, a)));
+ equivalent = Operations.determinize(Operations.removeDeadStates(Operations.intersection(a, a)),
+ DEFAULT_MAX_DETERMINIZED_STATES);
assertTrue(Operations.sameLanguage(a, equivalent));
// a minus a = empty
- Automaton empty = Operations.minus(a, a);
+ Automaton empty = Operations.minus(a, a, DEFAULT_MAX_DETERMINIZED_STATES);
assertTrue(Operations.isEmpty(empty));
// as long as don't accept the empty string
@@ -70,7 +75,8 @@ public class TestDeterminism extends Luc
//System.out.println("test " + a);
Automaton optional = Operations.optional(a);
//System.out.println("optional " + optional);
- equivalent = Operations.minus(optional, Automata.makeEmptyString());
+ equivalent = Operations.minus(optional, Automata.makeEmptyString(),
+ DEFAULT_MAX_DETERMINIZED_STATES);
//System.out.println("equiv " + equivalent);
assertTrue(Operations.sameLanguage(a, equivalent));
}
Modified: lucene/dev/branches/lucene_solr_4_10/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminizeLexicon.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_10/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminizeLexicon.java?rev=1636762&r1=1636761&r2=1636762&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_10/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminizeLexicon.java (original)
+++ lucene/dev/branches/lucene_solr_4_10/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminizeLexicon.java Tue Nov 4 22:29:27 2014
@@ -50,12 +50,12 @@ public class TestDeterminizeLexicon exte
public void assertLexicon() throws Exception {
Collections.shuffle(automata, random());
Automaton lex = Operations.union(automata);
- lex = Operations.determinize(lex);
+ lex = Operations.determinize(lex, 1000000);
assertTrue(Operations.isFinite(lex));
for (String s : terms) {
assertTrue(Operations.run(lex, s));
}
- final ByteRunAutomaton lexByte = new ByteRunAutomaton(lex);
+ final ByteRunAutomaton lexByte = new ByteRunAutomaton(lex, false, 1000000);
for (String s : terms) {
byte bytes[] = s.getBytes(StandardCharsets.UTF_8);
assertTrue(lexByte.run(bytes, 0, bytes.length));
Modified: lucene/dev/branches/lucene_solr_4_10/lucene/core/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_10/lucene/core/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java?rev=1636762&r1=1636761&r2=1636762&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_10/lucene/core/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java (original)
+++ lucene/dev/branches/lucene_solr_4_10/lucene/core/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java Tue Nov 4 22:29:27 2014
@@ -22,6 +22,8 @@ import java.util.List;
import org.apache.lucene.util.LuceneTestCase;
+import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES;
+
public class TestLevenshteinAutomata extends LuceneTestCase {
public void testLev0() throws Exception {
@@ -121,11 +123,11 @@ public class TestLevenshteinAutomata ext
private Automaton naiveLev1(String s) {
Automaton a = Automata.makeString(s);
a = Operations.union(a, insertionsOf(s));
- a = MinimizationOperations.minimize(a);
+ a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
a = Operations.union(a, deletionsOf(s));
- a = MinimizationOperations.minimize(a);
+ a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
a = Operations.union(a, substitutionsOf(s));
- a = MinimizationOperations.minimize(a);
+ a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
return a;
}
@@ -137,7 +139,7 @@ public class TestLevenshteinAutomata ext
private Automaton naiveLev1T(String s) {
Automaton a = naiveLev1(s);
a = Operations.union(a, transpositionsOf(s));
- a = MinimizationOperations.minimize(a);
+ a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
return a;
}
@@ -156,7 +158,7 @@ public class TestLevenshteinAutomata ext
}
Automaton a = Operations.union(list);
- a = MinimizationOperations.minimize(a);
+ a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
return a;
}
@@ -174,7 +176,7 @@ public class TestLevenshteinAutomata ext
}
Automaton a = Operations.union(list);
- a = MinimizationOperations.minimize(a);
+ a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
return a;
}
@@ -193,7 +195,7 @@ public class TestLevenshteinAutomata ext
}
Automaton a = Operations.union(list);
- a = MinimizationOperations.minimize(a);
+ a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
return a;
}
@@ -218,7 +220,7 @@ public class TestLevenshteinAutomata ext
}
}
Automaton a = Operations.union(list);
- a = MinimizationOperations.minimize(a);
+ a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
return a;
}
Modified: lucene/dev/branches/lucene_solr_4_10/lucene/core/src/test/org/apache/lucene/util/automaton/TestMinimize.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_10/lucene/core/src/test/org/apache/lucene/util/automaton/TestMinimize.java?rev=1636762&r1=1636761&r2=1636762&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_10/lucene/core/src/test/org/apache/lucene/util/automaton/TestMinimize.java (original)
+++ lucene/dev/branches/lucene_solr_4_10/lucene/core/src/test/org/apache/lucene/util/automaton/TestMinimize.java Tue Nov 4 22:29:27 2014
@@ -19,6 +19,8 @@ package org.apache.lucene.util.automaton
import org.apache.lucene.util.LuceneTestCase;
+import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES;
+
/**
* This test builds some randomish NFA/DFA and minimizes them.
*/
@@ -28,8 +30,10 @@ public class TestMinimize extends Lucene
int num = atLeast(200);
for (int i = 0; i < num; i++) {
Automaton a = AutomatonTestUtil.randomAutomaton(random());
- Automaton la = Operations.determinize(Operations.removeDeadStates(a));
- Automaton lb = MinimizationOperations.minimize(a);
+ Automaton la = Operations.determinize(Operations.removeDeadStates(a),
+ DEFAULT_MAX_DETERMINIZED_STATES);
+ Automaton lb = MinimizationOperations.minimize(a,
+ DEFAULT_MAX_DETERMINIZED_STATES);
assertTrue(Operations.sameLanguage(la, lb));
}
}
@@ -42,7 +46,8 @@ public class TestMinimize extends Lucene
for (int i = 0; i < num; i++) {
Automaton a = AutomatonTestUtil.randomAutomaton(random());
a = AutomatonTestUtil.minimizeSimple(a);
- Automaton b = MinimizationOperations.minimize(a);
+ Automaton b = MinimizationOperations.minimize(a,
+ DEFAULT_MAX_DETERMINIZED_STATES);
assertTrue(Operations.sameLanguage(a, b));
assertEquals(a.getNumStates(), b.getNumStates());
int numStates = a.getNumStates();
@@ -62,6 +67,6 @@ public class TestMinimize extends Lucene
/** n^2 space usage in Hopcroft minimization? */
public void testMinimizeHuge() {
- new RegExp("+-*(A|.....|BC)*]", RegExp.NONE).toAutomaton();
+ new RegExp("+-*(A|.....|BC)*]", RegExp.NONE).toAutomaton(1000000);
}
}
Modified: lucene/dev/branches/lucene_solr_4_10/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_10/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java?rev=1636762&r1=1636761&r2=1636762&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_10/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java (original)
+++ lucene/dev/branches/lucene_solr_4_10/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java Tue Nov 4 22:29:27 2014
@@ -24,6 +24,8 @@ import org.apache.lucene.util.fst.Util;
import com.carrotsearch.randomizedtesting.generators.RandomInts;
+import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES;
+
public class TestOperations extends LuceneTestCase {
/** Test string union. */
public void testStringUnion() {
@@ -51,7 +53,8 @@ public class TestOperations extends Luce
for (BytesRef bref : strings) {
eachIndividual[i++] = Automata.makeString(bref.utf8ToString());
}
- return Operations.determinize(Operations.union(Arrays.asList(eachIndividual)));
+ return Operations.determinize(Operations.union(Arrays.asList(eachIndividual)),
+ DEFAULT_MAX_DETERMINIZED_STATES);
}
/** Test concatenation with empty language returns empty */
@@ -71,12 +74,12 @@ public class TestOperations extends Luce
Automaton concat1 = Operations.concatenate(expandedSingleton, nfa);
Automaton concat2 = Operations.concatenate(singleton, nfa);
assertFalse(concat2.isDeterministic());
- assertTrue(Operations.sameLanguage(Operations.determinize(concat1),
- Operations.determinize(concat2)));
- assertTrue(Operations.sameLanguage(Operations.determinize(nfa),
- Operations.determinize(concat1)));
- assertTrue(Operations.sameLanguage(Operations.determinize(nfa),
- Operations.determinize(concat2)));
+ assertTrue(Operations.sameLanguage(Operations.determinize(concat1, 100),
+ Operations.determinize(concat2, 100)));
+ assertTrue(Operations.sameLanguage(Operations.determinize(nfa, 100),
+ Operations.determinize(concat1, 100)));
+ assertTrue(Operations.sameLanguage(Operations.determinize(nfa, 100),
+ Operations.determinize(concat2, 100)));
}
public void testGetRandomAcceptedString() throws Throwable {
@@ -86,7 +89,7 @@ public class TestOperations extends Luce
final RegExp re = new RegExp(AutomatonTestUtil.randomRegexp(random()), RegExp.NONE);
//System.out.println("TEST i=" + i + " re=" + re);
- final Automaton a = Operations.determinize(re.toAutomaton());
+ final Automaton a = Operations.determinize(re.toAutomaton(), DEFAULT_MAX_DETERMINIZED_STATES);
assertFalse(Operations.isEmpty(a));
final AutomatonTestUtil.RandomAcceptedStrings rx = new AutomatonTestUtil.RandomAcceptedStrings(a);
@@ -137,7 +140,7 @@ public class TestOperations extends Luce
*/
public void testFiniteStringsBasic() {
Automaton a = Operations.union(Automata.makeString("dog"), Automata.makeString("duck"));
- a = MinimizationOperations.minimize(a);
+ a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
Set<IntsRef> strings = getFiniteStrings(a, -1, true);
assertEquals(2, strings.size());
IntsRefBuilder dog = new IntsRefBuilder();
@@ -190,7 +193,7 @@ public class TestOperations extends Luce
// TODO: what other random things can we do here...
Automaton a = Operations.union(automata);
if (random().nextBoolean()) {
- a = MinimizationOperations.minimize(a);
+ a = MinimizationOperations.minimize(a, 1000000);
if (VERBOSE) {
System.out.println("TEST: a.minimize numStates=" + a.getNumStates());
}
@@ -198,7 +201,7 @@ public class TestOperations extends Luce
if (VERBOSE) {
System.out.println("TEST: a.determinize");
}
- a = Operations.determinize(a);
+ a = Operations.determinize(a, 1000000);
} else if (random().nextBoolean()) {
if (VERBOSE) {
System.out.println("TEST: a.removeDeadStates");
Modified: lucene/dev/branches/lucene_solr_4_10/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_10/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java?rev=1636762&r1=1636761&r2=1636762&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_10/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java (original)
+++ lucene/dev/branches/lucene_solr_4_10/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java Tue Nov 4 22:29:27 2014
@@ -25,6 +25,7 @@ import java.util.Set;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DocIdSet;
@@ -33,7 +34,6 @@ import org.apache.lucene.search.Filtered
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
-import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
@@ -904,7 +904,7 @@ public class FieldQueryTest extends Abst
public void testRegexpQuery() throws Exception {
makeIndexStrMV();
Term term = new Term(F, "d[a-z].g");
- defgMultiTermQueryTest(new RegexpQuery (term));
+ defgMultiTermQueryTest(new RegexpQuery(term));
}
public void testRangeQuery() throws Exception {
Modified: lucene/dev/branches/lucene_solr_4_10/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_10/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java?rev=1636762&r1=1636761&r2=1636762&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_10/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java (original)
+++ lucene/dev/branches/lucene_solr_4_10/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java Tue Nov 4 22:29:27 2014
@@ -35,6 +35,9 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.QueryBuilder;
import org.apache.lucene.util.Version;
+import org.apache.lucene.util.automaton.RegExp;
+
+import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES;
/** This class is overridden by QueryParser in QueryParser.jj
* and acts to separate the majority of the Java code from the .jj grammar file.
@@ -83,6 +86,7 @@ public abstract class QueryParserBase ex
boolean analyzeRangeTerms = false;
boolean autoGeneratePhraseQueries;
+ int maxDeterminizedStates = DEFAULT_MAX_DETERMINIZED_STATES;
// So the generated QueryParser(CharStream) won't error out
protected QueryParserBase() {
@@ -408,6 +412,24 @@ public abstract class QueryParserBase ex
return analyzeRangeTerms;
}
+ /**
+ * @param maxDeterminizedStates the maximum number of states that
+ * determinizing a regexp query can result in. If the query results in any
+ * more states a TooComplexToDeterminizeException is thrown.
+ */
+ public void setMaxDeterminizedStates(int maxDeterminizedStates) {
+ this.maxDeterminizedStates = maxDeterminizedStates;
+ }
+
+ /**
+ * @return the maximum number of states that determinizing a regexp query
+ * can result in. If the query results in any more states a
+ * TooComplexToDeterminizeException is thrown.
+ */
+ public int getMaxDeterminizedStates() {
+ return maxDeterminizedStates;
+ }
+
protected void addClause(List<BooleanClause> clauses, int conj, int mods, Query q) {
boolean required, prohibited;
@@ -563,7 +585,8 @@ public abstract class QueryParserBase ex
* @return new RegexpQuery instance
*/
protected Query newRegexpQuery(Term regexp) {
- RegexpQuery query = new RegexpQuery(regexp);
+ RegexpQuery query = new RegexpQuery(regexp, RegExp.ALL,
+ maxDeterminizedStates);
query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
Modified: lucene/dev/branches/lucene_solr_4_10/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/builders/RegexpQueryNodeBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_10/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/builders/RegexpQueryNodeBuilder.java?rev=1636762&r1=1636761&r2=1636762&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_10/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/builders/RegexpQueryNodeBuilder.java (original)
+++ lucene/dev/branches/lucene_solr_4_10/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/builders/RegexpQueryNodeBuilder.java Tue Nov 4 22:29:27 2014
@@ -38,6 +38,7 @@ public class RegexpQueryNodeBuilder impl
public RegexpQuery build(QueryNode queryNode) throws QueryNodeException {
RegexpQueryNode regexpNode = (RegexpQueryNode) queryNode;
+ // TODO: make the maxStates configurable w/ a reasonable default (QueryParserBase uses 10000)
RegexpQuery q = new RegexpQuery(new Term(regexpNode.getFieldAsString(),
regexpNode.textToBytesRef()));
Modified: lucene/dev/branches/lucene_solr_4_10/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_10/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonQuery.java?rev=1636762&r1=1636761&r2=1636762&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_10/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonQuery.java (original)
+++ lucene/dev/branches/lucene_solr_4_10/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonQuery.java Tue Nov 4 22:29:27 2014
@@ -40,6 +40,8 @@ import org.apache.lucene.util.automaton.
import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.Transition;
+import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES;
+
// TODO
// - compare perf to PhraseQuery exact and sloppy
// - optimize: find terms that are in fact MUST (because all paths
@@ -108,6 +110,16 @@ public class TermAutomatonQuery extends
/** Call this once you are done adding states/transitions. */
public void finish() {
+ finish(DEFAULT_MAX_DETERMINIZED_STATES);
+ }
+
+ /**
+ * Call this once you are done adding states/transitions.
+ * @param maxDeterminizedStates Maximum number of states created when
+ * determinizing the automaton. Higher numbers allow this operation to
+ * consume more memory but allow more complex automatons.
+ */
+ public void finish(int maxDeterminizedStates) {
Automaton automaton = builder.finish();
// System.out.println("before det:\n" + automaton.toDot());
@@ -171,7 +183,8 @@ public class TermAutomatonQuery extends
automaton = newAutomaton;
}
- det = Operations.removeDeadStates(Operations.determinize(automaton));
+ det = Operations.removeDeadStates(Operations.determinize(automaton,
+ maxDeterminizedStates));
}
@Override
Modified: lucene/dev/branches/lucene_solr_4_10/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_10/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java?rev=1636762&r1=1636761&r2=1636762&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_10/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java (original)
+++ lucene/dev/branches/lucene_solr_4_10/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java Tue Nov 4 22:29:27 2014
@@ -60,6 +60,8 @@ import org.apache.lucene.util.fst.Util.R
import org.apache.lucene.util.fst.Util.TopResults;
import org.apache.lucene.util.fst.Util;
+import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES;
+
/**
* Suggester that first analyzes the surface form, adds the
* analyzed form to a weighted FST, and then does the same
@@ -899,7 +901,7 @@ public class AnalyzingSuggester extends
// TODO: we can optimize this somewhat by determinizing
// while we convert
- automaton = Operations.determinize(automaton);
+ automaton = Operations.determinize(automaton, DEFAULT_MAX_DETERMINIZED_STATES);
return automaton;
}
Modified: lucene/dev/branches/lucene_solr_4_10/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_10/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java?rev=1636762&r1=1636761&r2=1636762&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_10/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java (original)
+++ lucene/dev/branches/lucene_solr_4_10/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java Tue Nov 4 22:29:27 2014
@@ -17,7 +17,6 @@ package org.apache.lucene.search.suggest
*/
import java.io.IOException;
-import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
@@ -30,13 +29,15 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.automaton.Automata;
-import org.apache.lucene.util.automaton.Operations;
-import org.apache.lucene.util.automaton.LevenshteinAutomata;
import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.LevenshteinAutomata;
+import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.UTF32ToUTF8;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.PairOutputs.Pair;
+import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES;
+
/**
* Implements a fuzzy {@link AnalyzingSuggester}. The similarity measurement is
* based on the Damerau-Levenshtein (optimal string alignment) algorithm, though
@@ -205,7 +206,7 @@ public final class FuzzySuggester extend
protected Automaton convertAutomaton(Automaton a) {
if (unicodeAware) {
Automaton utf8automaton = new UTF32ToUTF8().convert(a);
- utf8automaton = Operations.determinize(utf8automaton);
+ utf8automaton = Operations.determinize(utf8automaton, DEFAULT_MAX_DETERMINIZED_STATES);
return utf8automaton;
} else {
return a;
@@ -253,7 +254,7 @@ public final class FuzzySuggester extend
Automaton a = Operations.union(Arrays.asList(subs));
// TODO: we could call toLevenshteinAutomata() before det?
// this only happens if you have multiple paths anyway (e.g. synonyms)
- return Operations.determinize(a);
+ return Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES);
}
}
}
Modified: lucene/dev/branches/lucene_solr_4_10/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_10/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java?rev=1636762&r1=1636761&r2=1636762&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_10/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java (original)
+++ lucene/dev/branches/lucene_solr_4_10/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java Tue Nov 4 22:29:27 2014
@@ -40,6 +40,11 @@ import org.apache.lucene.util.UnicodeUti
* basic unoptimized implementations (*slow) for testing.
*/
public class AutomatonTestUtil {
+ /**
+ * Default maximum number of states that {@link Operations#determinize} should create.
+ */
+ public static final int DEFAULT_MAX_DETERMINIZED_STATES = 1000000;
+
/** Returns random string, including full unicode range. */
public static String randomRegexp(Random r) {
while (true) {
@@ -257,12 +262,12 @@ public class AutomatonTestUtil {
// get two random Automata from regexps
Automaton a1 = new RegExp(AutomatonTestUtil.randomRegexp(random), RegExp.NONE).toAutomaton();
if (random.nextBoolean()) {
- a1 = Operations.complement(a1);
+ a1 = Operations.complement(a1, DEFAULT_MAX_DETERMINIZED_STATES);
}
Automaton a2 = new RegExp(AutomatonTestUtil.randomRegexp(random), RegExp.NONE).toAutomaton();
if (random.nextBoolean()) {
- a2 = Operations.complement(a2);
+ a2 = Operations.complement(a2, DEFAULT_MAX_DETERMINIZED_STATES);
}
// combine them in random ways
@@ -270,7 +275,7 @@ public class AutomatonTestUtil {
case 0: return Operations.concatenate(a1, a2);
case 1: return Operations.union(a1, a2);
case 2: return Operations.intersection(a1, a2);
- default: return Operations.minus(a1, a2);
+ default: return Operations.minus(a1, a2, DEFAULT_MAX_DETERMINIZED_STATES);
}
}
Modified: lucene/dev/branches/lucene_solr_4_10/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_10/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java?rev=1636762&r1=1636761&r2=1636762&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_10/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java (original)
+++ lucene/dev/branches/lucene_solr_4_10/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java Tue Nov 4 22:29:27 2014
@@ -42,8 +42,8 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.automaton.Automata;
-import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.Operations;
import org.apache.solr.analysis.ReversedWildcardFilterFactory;
import org.apache.solr.analysis.TokenizerChain;
import org.apache.solr.common.SolrException;
@@ -789,7 +789,7 @@ public abstract class SolrQueryParserBas
Automata.makeChar(factory.getMarkerChar()),
Automata.makeAnyString());
// subtract these away
- automaton = Operations.minus(automaton, falsePositives);
+ automaton = Operations.minus(automaton, falsePositives, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
}
return new AutomatonQuery(term, automaton) {
// override toString so its completely transparent
Modified: lucene/dev/branches/lucene_solr_4_10/solr/core/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_10/solr/core/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java?rev=1636762&r1=1636761&r2=1636762&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_10/solr/core/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java (original)
+++ lucene/dev/branches/lucene_solr_4_10/solr/core/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java Tue Nov 4 22:29:27 2014
@@ -16,7 +16,6 @@ package org.apache.solr.analysis;
* limitations under the License.
*/
-
import java.io.IOException;
import java.io.StringReader;
import java.lang.reflect.Field;
@@ -28,8 +27,8 @@ import org.apache.lucene.analysis.MockTo
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.Query;
-import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.Operations;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.IndexSchema;
@@ -163,7 +162,8 @@ public class TestReversedWildcardFilterF
return false;
}
Automaton automaton = ((AutomatonQuery) q).getAutomaton();
- String prefix = Operations.getCommonPrefix(Operations.determinize(automaton));
+ String prefix = Operations.getCommonPrefix(Operations.determinize(automaton,
+ Operations.DEFAULT_MAX_DETERMINIZED_STATES));
return prefix.length() > 0 && prefix.charAt(0) == '\u0001';
}