You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/06/16 21:22:03 UTC
svn commit: r1602966 [2/2] - in /lucene/dev/branches/lucene5752/lucene:
core/src/java/org/apache/lucene/search/
core/src/java/org/apache/lucene/util/automaton/
core/src/test/org/apache/lucene/analysis/
core/src/test/org/apache/lucene/index/ core/src/te...
Modified: lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestLightAutomaton.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestLightAutomaton.java?rev=1602966&r1=1602965&r2=1602966&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestLightAutomaton.java (original)
+++ lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestLightAutomaton.java Mon Jun 16 19:22:02 2014
@@ -19,17 +19,19 @@ package org.apache.lucene.util.automaton
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
+import java.util.Iterator;
import java.util.List;
import java.util.Set;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
-import org.apache.lucene.util.automaton.AutomatonTestUtil.RandomAcceptedStringsLight;
+import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.util.automaton.AutomatonTestUtil.RandomAcceptedStrings;
import org.apache.lucene.util.fst.Util;
public class TestLightAutomaton extends LuceneTestCase {
@@ -46,7 +48,7 @@ public class TestLightAutomaton extends
a.addTransition(start, end, 'd', 'd');
a.addTransition(x, y, 'b', 'b');
a.addTransition(y, end, 'c', 'c');
- a.finish();
+ a.finishState();
}
public void testReduceBasic() throws Exception {
@@ -62,7 +64,7 @@ public class TestLightAutomaton extends
a.addTransition(start, end, 'x', 'x');
a.addTransition(start, end, 'y', 'y');
- a.finish();
+ a.finishState();
assertEquals(3, a.getNumTransitions(start));
Transition scratch = new Transition();
a.initTransition(start, scratch);
@@ -79,9 +81,9 @@ public class TestLightAutomaton extends
public void testSameLanguage() throws Exception {
LightAutomaton a1 = BasicAutomata.makeStringLight("foobar");
- LightAutomaton a2 = BasicOperations.concatenateLight(
+ LightAutomaton a2 = BasicOperations.removeDeadStates(BasicOperations.concatenateLight(
BasicAutomata.makeStringLight("foo"),
- BasicAutomata.makeStringLight("bar"));
+ BasicAutomata.makeStringLight("bar")));
assertTrue(BasicOperations.sameLanguage(a1, a2));
}
@@ -149,7 +151,7 @@ public class TestLightAutomaton extends
LightAutomaton a = BasicOperations.unionLight(Arrays.asList(BasicAutomata.makeStringLight("foobar"),
BasicAutomata.makeStringLight("boobar")));
LightAutomaton aMin = MinimizationOperationsLight.minimize(a);
- assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(a), aMin));
+ assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(BasicOperations.removeDeadStates(a)), aMin));
}
public void testReverse() throws Exception {
@@ -234,7 +236,7 @@ public class TestLightAutomaton extends
a.setAccept(fini, true);
a.addTransition(init, fini, 'm');
a.addTransition(fini, fini, 'm');
- a.finish();
+ a.finishState();
assertEquals(0, SpecialOperations.getCommonSuffixBytesRef(a).length);
}
@@ -244,8 +246,8 @@ public class TestLightAutomaton extends
LightAutomaton a = AutomatonTestUtil.randomAutomaton(random());
LightAutomaton ra = SpecialOperations.reverse(a);
LightAutomaton rra = SpecialOperations.reverse(ra);
- assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(a),
- BasicOperations.determinize(rra)));
+ assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(BasicOperations.removeDeadStates(a)),
+ BasicOperations.determinize(BasicOperations.removeDeadStates(rra))));
}
}
@@ -260,16 +262,16 @@ public class TestLightAutomaton extends
LightAutomaton ra = SpecialOperations.reverse(a);
LightAutomaton rda = BasicOperations.determinize(ra);
- if (a.isEmpty()) {
- assertTrue(rda.isEmpty());
+ if (BasicOperations.isEmpty(a)) {
+ assertTrue(BasicOperations.isEmpty(rda));
continue;
}
- RandomAcceptedStringsLight rasl = new RandomAcceptedStringsLight(a);
+ RandomAcceptedStrings ras = new RandomAcceptedStrings(a);
for(int iter2=0;iter2<20;iter2++) {
// Find string accepted by original automaton
- int[] s = rasl.getRandomAcceptedString(random());
+ int[] s = ras.getRandomAcceptedString(random());
// Reverse it
for(int j=0;j<s.length/2;j++) {
@@ -290,11 +292,16 @@ public class TestLightAutomaton extends
assertTrue(BasicOperations.run(a, ""));
}
+ public void testBasicIsEmpty() throws Exception {
+ LightAutomaton a = new LightAutomaton();
+ a.createState();
+ assertTrue(BasicOperations.isEmpty(a));
+ }
public void testRemoveDeadTransitionsEmpty() throws Exception {
LightAutomaton a = BasicAutomata.makeEmptyLight();
LightAutomaton a2 = BasicOperations.removeDeadStates(a);
- assertTrue(a2.isEmpty());
+ assertTrue(BasicOperations.isEmpty(a2));
}
public void testInvalidAddTransition() throws Exception {
@@ -340,13 +347,38 @@ public class TestLightAutomaton extends
}
assertTrue(BasicOperations.sameLanguage(
- BasicOperations.determinize(a),
- BasicOperations.determinize(builder.finish())));
+ BasicOperations.determinize(BasicOperations.removeDeadStates(a)),
+ BasicOperations.determinize(BasicOperations.removeDeadStates(builder.finish()))));
}
}
- // nocommit testMinus
+ public void testIsTotal() throws Exception {
+ assertFalse(BasicOperations.isTotal(new LightAutomaton()));
+ LightAutomaton a = new LightAutomaton();
+ int init = a.createState();
+ int fini = a.createState();
+ a.setAccept(fini, true);
+ a.addTransition(init, fini, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT);
+ a.finishState();
+ assertFalse(BasicOperations.isTotal(a));
+ a.addTransition(fini, fini, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT);
+ a.finishState();
+ assertFalse(BasicOperations.isTotal(a));
+ a.setAccept(init, true);
+ assertTrue(BasicOperations.isTotal(MinimizationOperationsLight.minimize(a)));
+ }
+
+ public void testMinimizeEmpty() throws Exception {
+ LightAutomaton a = new LightAutomaton();
+ int init = a.createState();
+ int fini = a.createState();
+ a.addTransition(init, fini, 'a');
+ a.finishState();
+ a = MinimizationOperationsLight.minimize(a);
+ assertEquals(0, a.getNumStates());
+ }
+
public void testMinus() throws Exception {
LightAutomaton a1 = BasicAutomata.makeStringLight("foobar");
LightAutomaton a2 = BasicAutomata.makeStringLight("boobar");
@@ -379,6 +411,20 @@ public class TestLightAutomaton extends
assertMatches(a4);
}
+ public void testOneInterval() throws Exception {
+ LightAutomaton a = BasicAutomata.makeIntervalLight(999, 1032, 0);
+ a = BasicOperations.determinize(a);
+ assertTrue(BasicOperations.run(a, "0999"));
+ assertTrue(BasicOperations.run(a, "00999"));
+ assertTrue(BasicOperations.run(a, "000999"));
+ }
+
+ public void testAnotherInterval() throws Exception {
+ LightAutomaton a = BasicAutomata.makeIntervalLight(1, 2, 0);
+ a = BasicOperations.determinize(a);
+ assertTrue(BasicOperations.run(a, "01"));
+ }
+
public void testIntervalRandom() throws Exception {
int ITERS = atLeast(100);
for(int iter=0;iter<ITERS;iter++) {
@@ -397,7 +443,7 @@ public class TestLightAutomaton extends
}
String prefix = b.toString();
- LightAutomaton a = BasicOperations.determinize(BasicAutomata.makeIntervalLight(min, max, digits ));
+ LightAutomaton a = BasicOperations.determinize(BasicAutomata.makeIntervalLight(min, max, digits));
if (random().nextBoolean()) {
a = MinimizationOperationsLight.minimize(a);
}
@@ -414,18 +460,43 @@ public class TestLightAutomaton extends
int x = random().nextInt(2*max);
boolean expected = x >= min && x <= max;
String sx = Integer.toString(x);
- if (digits > 0 && sx.length() < digits) {
+ if (sx.length() < digits) {
// Left-fill with 0s
sx = b.substring(sx.length()) + sx;
+ } else if (digits == 0) {
+ // Left-fill with random number of 0s:
+ int numZeros = random().nextInt(10);
+ StringBuilder sb = new StringBuilder();
+ for(int i=0;i<numZeros;i++) {
+ sb.append('0');
+ }
+ sb.append(sx);
+ sx = sb.toString();
}
assertEquals(expected, BasicOperations.run(a, sx));
}
}
}
- // nocommit testRemoveDead of an A acceptint nothing should go to emptye A (0 states)
+ private void assertMatches(LightAutomaton a, String... strings) {
+ Set<IntsRef> expected = new HashSet<>();
+ for(String s : strings) {
+ IntsRef ints = new IntsRef();
+ expected.add(Util.toUTF32(s, ints));
+ }
+
+ assertEquals(expected, SpecialOperations.getFiniteStrings(BasicOperations.determinize(a), -1));
+ }
+
+ public void testConcatenatePreservesDet() throws Exception {
+ LightAutomaton a1 = BasicAutomata.makeStringLight("foobar");
+ assertTrue(a1.isDeterministic());
+ LightAutomaton a2 = BasicAutomata.makeStringLight("baz");
+ assertTrue(a2.isDeterministic());
+ assertTrue((BasicOperations.concatenateLight(Arrays.asList(a1, a2)).isDeterministic()));
+ }
- public void testRemoveDead() throws Exception {
+ public void testRemoveDeadStates() throws Exception {
LightAutomaton a = BasicOperations.concatenateLight(Arrays.asList(BasicAutomata.makeStringLight("x"),
BasicAutomata.makeStringLight("y")));
assertEquals(4, a.getNumStates());
@@ -433,15 +504,510 @@ public class TestLightAutomaton extends
assertEquals(3, a.getNumStates());
}
- // nocommit more tests ... it's an algebra
+ public void testRemoveDeadStatesEmpty1() throws Exception {
+ LightAutomaton a = new LightAutomaton();
+ a.finishState();
+ assertTrue(BasicOperations.isEmpty(a));
+ assertTrue(BasicOperations.isEmpty(BasicOperations.removeDeadStates(a)));
+ }
- private void assertMatches(LightAutomaton a, String... strings) {
- Set<IntsRef> expected = new HashSet<>();
- for(String s : strings) {
- IntsRef ints = new IntsRef();
- expected.add(Util.toUTF32(s, ints));
+ public void testRemoveDeadStatesEmpty2() throws Exception {
+ LightAutomaton a = new LightAutomaton();
+ a.finishState();
+ assertTrue(BasicOperations.isEmpty(a));
+ assertTrue(BasicOperations.isEmpty(BasicOperations.removeDeadStates(a)));
+ }
+
+ public void testRemoveDeadStatesEmpty3() throws Exception {
+ LightAutomaton a = new LightAutomaton();
+ int init = a.createState();
+ int fini = a.createState();
+ a.addTransition(init, fini, 'a');
+ LightAutomaton a2 = BasicOperations.removeDeadStates(a);
+ assertEquals(0, a2.getNumStates());
+ }
+
+ public void testConcatEmpty() throws Exception {
+ // If you concat empty automaton to anything the result should still be empty:
+ LightAutomaton a = BasicOperations.concatenateLight(BasicAutomata.makeEmptyLight(),
+ BasicAutomata.makeStringLight("foo"));
+ assertEquals(new HashSet<IntsRef>(), SpecialOperations.getFiniteStrings(a, -1));
+
+ a = BasicOperations.concatenateLight(BasicAutomata.makeStringLight("foo"),
+ BasicAutomata.makeEmptyLight());
+ assertEquals(new HashSet<IntsRef>(), SpecialOperations.getFiniteStrings(a, -1));
+ }
+
+ public void testSeemsNonEmptyButIsNot1() throws Exception {
+ LightAutomaton a = new LightAutomaton();
+ // Init state has a transition but doesn't lead to accept
+ int init = a.createState();
+ int s = a.createState();
+ a.addTransition(init, s, 'a');
+ a.finishState();
+ assertTrue(BasicOperations.isEmpty(a));
+ }
+
+ public void testSeemsNonEmptyButIsNot2() throws Exception {
+ LightAutomaton a = new LightAutomaton();
+ int init = a.createState();
+ int s = a.createState();
+ a.addTransition(init, s, 'a');
+ // An orphan'd accept state
+ s = a.createState();
+ a.setAccept(s, true);
+ a.finishState();
+ assertTrue(BasicOperations.isEmpty(a));
+ }
+
+ public void testSameLanguage1() throws Exception {
+ LightAutomaton a = BasicAutomata.makeEmptyStringLight();
+ LightAutomaton a2 = BasicAutomata.makeEmptyStringLight();
+ int state = a2.createState();
+ a2.addTransition(0, state, 'a');
+ a2.finishState();
+ assertTrue(BasicOperations.sameLanguage(BasicOperations.removeDeadStates(a),
+ BasicOperations.removeDeadStates(a2)));
+ }
+
+ private LightAutomaton randomNoOp(LightAutomaton a) {
+ switch (random().nextInt(5)) {
+ case 0:
+ if (VERBOSE) {
+ System.out.println(" randomNoOp: determinize");
+ }
+ return BasicOperations.determinize(a);
+ case 1:
+ if (VERBOSE) {
+ System.out.println(" randomNoOp: minimize");
+ }
+ return MinimizationOperationsLight.minimize(a);
+ case 2:
+ if (VERBOSE) {
+ System.out.println(" randomNoOp: removeDeadStates");
+ }
+ return BasicOperations.removeDeadStates(a);
+ case 3:
+ if (VERBOSE) {
+ System.out.println(" randomNoOp: reverse reverse");
+ }
+ a = SpecialOperations.reverse(a);
+ a = randomNoOp(a);
+ return SpecialOperations.reverse(a);
+ case 4:
+ if (VERBOSE) {
+ System.out.println(" randomNoOp: concat empty string");
+ }
+ return BasicOperations.concatenateLight(a, BasicAutomata.makeEmptyStringLight());
+ case 5:
+ if (VERBOSE) {
+ System.out.println(" randomNoOp: union empty automaton");
+ }
+ return BasicOperations.unionLight(a, BasicAutomata.makeEmptyLight());
}
+ assert false;
+ return null;
+ }
- assertEquals(expected, SpecialOperations.getFiniteStrings(BasicOperations.determinize(a), -1));
+ private LightAutomaton unionTerms(Collection<BytesRef> terms) {
+ LightAutomaton a;
+ if (random().nextBoolean()) {
+ if (VERBOSE) {
+ System.out.println("TEST: unionTerms: use union");
+ }
+ List<LightAutomaton> as = new ArrayList<>();
+ for(BytesRef term : terms) {
+ as.add(BasicAutomata.makeStringLight(term.utf8ToString()));
+ }
+ a = BasicOperations.unionLight(as);
+ } else {
+ if (VERBOSE) {
+ System.out.println("TEST: unionTerms: use makeStringUnion");
+ }
+ List<BytesRef> termsList = new ArrayList<>(terms);
+ Collections.sort(termsList);
+ a = BasicAutomata.makeStringUnionLight(termsList);
+ }
+
+ return randomNoOp(a);
+ }
+
+ private String getRandomString(boolean isAscii) {
+ if (isAscii) {
+ return TestUtil.randomSimpleString(random());
+ } else {
+ return TestUtil.randomRealisticUnicodeString(random());
+ }
+ }
+
+ public void testRandomFinite() throws Exception {
+
+ int numTerms = atLeast(10);
+ int iters = atLeast(100);
+
+ // Some of the ops we do (stripping random byte, reverse) turn valid UTF8 into invalid if we allow non-ascii:
+ boolean isAscii = random().nextBoolean();
+
+ if (VERBOSE) {
+ System.out.println("TEST: isAscii=" + isAscii + " numTerms" + numTerms + " iters=" + iters);
+ }
+
+ Set<BytesRef> terms = new HashSet<>();
+ while (terms.size() < numTerms) {
+ terms.add(new BytesRef(getRandomString(isAscii)));
+ }
+
+ LightAutomaton a = unionTerms(terms);
+ assertSame(terms, a);
+
+ for(int iter=0;iter<iters;iter++) {
+ if (VERBOSE) {
+ System.out.println("TEST: iter=" + iter + " numTerms=" + terms.size());
+ System.out.println(" terms:");
+ for(BytesRef term : terms) {
+ System.out.println(" " + term);
+ }
+ }
+ switch(random().nextInt(14)) {
+
+ case 0:
+ // concatenate prefix
+ {
+ if (VERBOSE) {
+ System.out.println(" op=concat prefix");
+ }
+ Set<BytesRef> newTerms = new HashSet<>();
+ BytesRef prefix = new BytesRef(getRandomString(isAscii));
+ for(BytesRef term : terms) {
+ BytesRef newTerm = BytesRef.deepCopyOf(prefix);
+ newTerm.append(term);
+ newTerms.add(newTerm);
+ }
+ terms = newTerms;
+ boolean wasDeterministic1 = a.isDeterministic();
+ a = BasicOperations.concatenateLight(BasicAutomata.makeStringLight(prefix.utf8ToString()), a);
+ assertEquals(wasDeterministic1, a.isDeterministic());
+ }
+ break;
+
+ case 1:
+ // concatenate suffix
+ {
+ BytesRef suffix = new BytesRef(getRandomString(isAscii));
+ if (VERBOSE) {
+ System.out.println(" op=concat suffix " + suffix);
+ }
+ Set<BytesRef> newTerms = new HashSet<>();
+ for(BytesRef term : terms) {
+ BytesRef newTerm = BytesRef.deepCopyOf(term);
+ newTerm.append(suffix);
+ newTerms.add(newTerm);
+ }
+ terms = newTerms;
+ a = BasicOperations.concatenateLight(a, BasicAutomata.makeStringLight(suffix.utf8ToString()));
+ }
+ break;
+
+ // nocommit sometimes concat a suffix accepting more than 1 term, and sometimes non-det
+
+ case 2:
+ // determinize
+ if (VERBOSE) {
+ System.out.println(" op=determinize");
+ }
+ a = BasicOperations.determinize(a);
+ assertTrue(a.isDeterministic());
+ break;
+
+ case 3:
+ if (VERBOSE) {
+ System.out.println(" op=minimize");
+ }
+ // minimize
+ a = MinimizationOperationsLight.minimize(a);
+ break;
+
+ case 4:
+ // union
+ {
+ if (VERBOSE) {
+ System.out.println(" op=union");
+ }
+ Set<BytesRef> newTerms = new HashSet<>();
+ int numNewTerms = random().nextInt(5);
+ while (newTerms.size() < numNewTerms) {
+ newTerms.add(new BytesRef(getRandomString(isAscii)));
+ }
+ terms.addAll(newTerms);
+ LightAutomaton newA = unionTerms(newTerms);
+ a = BasicOperations.unionLight(a, newA);
+ }
+ break;
+
+ case 5:
+ // optional
+ {
+ if (VERBOSE) {
+ System.out.println(" op=optional");
+ }
+ a = BasicOperations.optionalLight(a);
+ terms.add(new BytesRef());
+ }
+ break;
+
+ case 6:
+ // minus finite
+ {
+ if (VERBOSE) {
+ System.out.println(" op=minus finite");
+ }
+ if (terms.size() > 0) {
+ RandomAcceptedStrings rasl = new RandomAcceptedStrings(BasicOperations.removeDeadStates(a));
+ Set<BytesRef> toRemove = new HashSet<>();
+ int numToRemove = TestUtil.nextInt(random(), 1, (terms.size()+1)/2);
+ while (toRemove.size() < numToRemove) {
+ int[] ints = rasl.getRandomAcceptedString(random());
+ BytesRef term = new BytesRef(UnicodeUtil.newString(ints, 0, ints.length));
+ if (toRemove.contains(term) == false) {
+ toRemove.add(term);
+ }
+ }
+ for(BytesRef term : toRemove) {
+ boolean removed = terms.remove(term);
+ assertTrue(removed);
+ }
+ LightAutomaton a2 = unionTerms(toRemove);
+ a = BasicOperations.minusLight(a, a2);
+ }
+ }
+ break;
+
+ case 7:
+ {
+ // minus infinite
+ List<LightAutomaton> as = new ArrayList<>();
+ int count = TestUtil.nextInt(random(), 1, 5);
+ Set<Integer> prefixes = new HashSet<>();
+ while(prefixes.size() < count) {
+ // prefix is a leading ascii byte; we remove <prefix>* from a
+ int prefix = random().nextInt(128);
+ prefixes.add(prefix);
+ }
+
+ if (VERBOSE) {
+ System.out.println(" op=minus infinite prefixes=" + prefixes);
+ }
+
+ for(int prefix : prefixes) {
+ // prefix is a leading ascii byte; we remove <prefix>* from a
+ LightAutomaton a2 = new LightAutomaton();
+ int init = a2.createState();
+ int state = a2.createState();
+ a2.addTransition(init, state, prefix);
+ a2.setAccept(state, true);
+ a2.addTransition(state, state, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT);
+ a2.finishState();
+ as.add(a2);
+ Iterator<BytesRef> it = terms.iterator();
+ while (it.hasNext()) {
+ BytesRef term = it.next();
+ if (term.length > 0 && (term.bytes[term.offset] & 0xFF) == prefix) {
+ it.remove();
+ }
+ }
+ }
+ LightAutomaton a2 = randomNoOp(BasicOperations.unionLight(as));
+ a = BasicOperations.minusLight(a, a2);
+ }
+ break;
+
+ case 8:
+ {
+ int count = TestUtil.nextInt(random(), 10, 20);
+ if (VERBOSE) {
+ System.out.println(" op=intersect infinite count=" + count);
+ }
+ // intersect infinite
+ List<LightAutomaton> as = new ArrayList<>();
+
+ Set<Integer> prefixes = new HashSet<>();
+ while(prefixes.size() < count) {
+ int prefix = random().nextInt(128);
+ prefixes.add(prefix);
+ }
+ if (VERBOSE) {
+ System.out.println(" prefixes=" + prefixes);
+ }
+
+ for(int prefix : prefixes) {
+ // prefix is a leading ascii byte; we retain <prefix>* in a
+ LightAutomaton a2 = new LightAutomaton();
+ int init = a2.createState();
+ int state = a2.createState();
+ a2.addTransition(init, state, prefix);
+ a2.setAccept(state, true);
+ a2.addTransition(state, state, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT);
+ a2.finishState();
+ as.add(a2);
+ prefixes.add(prefix);
+ }
+
+ LightAutomaton a2 = BasicOperations.unionLight(as);
+ if (random().nextBoolean()) {
+ a2 = BasicOperations.determinize(a2);
+ } else if (random().nextBoolean()) {
+ a2 = MinimizationOperationsLight.minimize(a2);
+ }
+ a = BasicOperations.intersectionLight(a, a2);
+
+ Iterator<BytesRef> it = terms.iterator();
+ while (it.hasNext()) {
+ BytesRef term = it.next();
+ if (term.length == 0 || prefixes.contains(term.bytes[term.offset]&0xff) == false) {
+ if (VERBOSE) {
+ System.out.println(" drop term=" + term);
+ }
+ it.remove();
+ } else {
+ if (VERBOSE) {
+ System.out.println(" keep term=" + term);
+ }
+ }
+ }
+ }
+ break;
+
+ case 9:
+ // reverse
+ if (VERBOSE) {
+ System.out.println(" op=reverse");
+ }
+ a = SpecialOperations.reverse(a);
+ Set<BytesRef> newTerms = new HashSet<>();
+ for(BytesRef term : terms) {
+ newTerms.add(new BytesRef(new StringBuilder(term.utf8ToString()).reverse().toString()));
+ }
+ terms = newTerms;
+ break;
+
+ case 10:
+ if (VERBOSE) {
+ System.out.println(" op=randomNoOp");
+ }
+ a = randomNoOp(a);
+ break;
+
+ case 11:
+ // interval
+ int min = random().nextInt(1000);
+ int max = min + random().nextInt(50);
+ // digits must be non-zero else we make cycle
+ int digits = Integer.toString(max).length();
+ if (VERBOSE) {
+ System.out.println(" op=union interval min=" + min + " max=" + max + " digits=" + digits);
+ }
+ a = BasicOperations.unionLight(a, BasicAutomata.makeIntervalLight(min, max, digits));
+ StringBuilder b = new StringBuilder();
+ for(int i=0;i<digits;i++) {
+ b.append('0');
+ }
+ String prefix = b.toString();
+ for(int i=min;i<=max;i++) {
+ String s = Integer.toString(i);
+ if (s.length() < digits) {
+ // Left-fill with 0s
+ s = prefix.substring(s.length()) + s;
+ }
+ terms.add(new BytesRef(s));
+ }
+ break;
+
+ case 12:
+ if (VERBOSE) {
+ System.out.println(" op=remove the empty string");
+ }
+ a = BasicOperations.minusLight(a, BasicAutomata.makeEmptyStringLight());
+ terms.remove(new BytesRef());
+ break;
+
+ case 13:
+ if (VERBOSE) {
+ System.out.println(" op=add the empty string");
+ }
+ a = BasicOperations.unionLight(a, BasicAutomata.makeEmptyStringLight());
+ terms.add(new BytesRef());
+ break;
+ }
+
+ assertSame(terms, a);
+ }
+
+ assertSame(terms, a);
+ }
+
+ private void assertSame(Collection<BytesRef> terms, LightAutomaton a) {
+
+ try {
+ assertTrue(SpecialOperations.isFinite(a));
+ assertFalse(BasicOperations.isTotal(a));
+
+ LightAutomaton detA = BasicOperations.determinize(a);
+
+ // Make sure all terms are accepted:
+ IntsRef scratch = new IntsRef();
+ for(BytesRef term : terms) {
+ Util.toIntsRef(term, scratch);
+ assertTrue("failed to accept term=" + term.utf8ToString(), BasicOperations.run(detA, term.utf8ToString()));
+ }
+
+ // Use getFiniteStrings:
+ Set<IntsRef> expected = new HashSet<>();
+ for(BytesRef term : terms) {
+ IntsRef intsRef = new IntsRef();
+ Util.toUTF32(term.utf8ToString(), intsRef);
+ expected.add(intsRef);
+ }
+ Set<IntsRef> actual = SpecialOperations.getFiniteStrings(a, -1);
+
+ if (expected.equals(actual) == false) {
+ System.out.println("FAILED:");
+ for(IntsRef term : expected) {
+ if (actual.contains(term) == false) {
+ System.out.println(" term=" + term + " should be accepted but isn't");
+ }
+ }
+ for(IntsRef term : actual) {
+ if (expected.contains(term) == false) {
+ System.out.println(" term=" + term + " is accepted but should not be");
+ }
+ }
+ throw new AssertionError("mismatch");
+ }
+
+ // Use sameLanguage:
+ LightAutomaton a2 = BasicOperations.removeDeadStates(BasicOperations.determinize(unionTerms(terms)));
+ assertTrue(BasicOperations.sameLanguage(a2, BasicOperations.removeDeadStates(BasicOperations.determinize(a))));
+
+ // Do same check, in UTF8 space
+ LightAutomaton utf8 = randomNoOp(new UTF32ToUTF8Light().convert(a));
+
+ Set<IntsRef> expected2 = new HashSet<>();
+ for(BytesRef term : terms) {
+ IntsRef intsRef = new IntsRef();
+ Util.toIntsRef(term, intsRef);
+ expected2.add(intsRef);
+ }
+ assertEquals(expected2, SpecialOperations.getFiniteStrings(utf8, -1));
+ } catch (AssertionError ae) {
+ System.out.println("TEST: FAILED: not same");
+ System.out.println(" terms (count=" + terms.size() + "):");
+ for(BytesRef term : terms) {
+ System.out.println(" " + term);
+ }
+ System.out.println(" automaton:");
+ System.out.println(a.toDot());
+ //a.writeDot("fail");
+ throw ae;
+ }
}
}
Modified: lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestMinimize.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestMinimize.java?rev=1602966&r1=1602965&r2=1602966&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestMinimize.java (original)
+++ lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestMinimize.java Mon Jun 16 19:22:02 2014
@@ -28,8 +28,8 @@ public class TestMinimize extends Lucene
int num = atLeast(200);
for (int i = 0; i < num; i++) {
LightAutomaton a = AutomatonTestUtil.randomAutomaton(random());
- LightAutomaton la = BasicOperations.determinize(a);
- LightAutomaton lb = BasicOperations.determinize(MinimizationOperationsLight.minimize(a));
+ LightAutomaton la = BasicOperations.determinize(BasicOperations.removeDeadStates(a));
+ LightAutomaton lb = MinimizationOperationsLight.minimize(a);
assertTrue(BasicOperations.sameLanguage(la, lb));
}
}
Modified: lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestUTF32ToUTF8.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestUTF32ToUTF8.java?rev=1602966&r1=1602965&r2=1602966&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestUTF32ToUTF8.java (original)
+++ lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestUTF32ToUTF8.java Mon Jun 16 19:22:02 2014
@@ -17,13 +17,17 @@ package org.apache.lucene.util.automaton
* limitations under the License.
*/
+import java.nio.charset.StandardCharsets;
+import java.util.HashSet;
+import java.util.Random;
+import java.util.Set;
+
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
-import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.UnicodeUtil;
-
-import java.nio.charset.StandardCharsets;
-import java.util.Random;
+import org.apache.lucene.util.fst.Util;
public class TestUTF32ToUTF8 extends LuceneTestCase {
@@ -203,11 +207,25 @@ public class TestUTF32ToUTF8 extends Luc
assertAutomaton(new RegExp(AutomatonTestUtil.randomRegexp(random()), RegExp.NONE).toLightAutomaton());
}
}
+
+ public void testSingleton() throws Exception {
+ int iters = atLeast(100);
+ for(int iter=0;iter<iters;iter++) {
+ String s = TestUtil.randomRealisticUnicodeString(random());
+ LightAutomaton a = BasicAutomata.makeStringLight(s);
+ LightAutomaton utf8 = new UTF32ToUTF8Light().convert(a);
+ IntsRef ints = new IntsRef();
+ Util.toIntsRef(new BytesRef(s), ints);
+ Set<IntsRef> set = new HashSet<>();
+ set.add(ints);
+ assertEquals(set, SpecialOperations.getFiniteStrings(utf8, -1));
+ }
+ }
private void assertAutomaton(LightAutomaton automaton) throws Exception {
CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton);
ByteRunAutomaton bra = new ByteRunAutomaton(automaton);
- final AutomatonTestUtil.RandomAcceptedStringsLight ras = new AutomatonTestUtil.RandomAcceptedStringsLight(automaton);
+ final AutomatonTestUtil.RandomAcceptedStrings ras = new AutomatonTestUtil.RandomAcceptedStrings(automaton);
int num = atLeast(1000);
for (int i = 0; i < num; i++) {
Modified: lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java?rev=1602966&r1=1602965&r2=1602966&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java (original)
+++ lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java Mon Jun 16 19:22:02 2014
@@ -328,7 +328,7 @@ public class AnalyzingSuggester extends
}
}
- result.finish();
+ result.finishState();
return result;
}
Modified: lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FSTUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FSTUtil.java?rev=1602966&r1=1602965&r2=1602966&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FSTUtil.java (original)
+++ lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FSTUtil.java Mon Jun 16 19:22:02 2014
@@ -22,7 +22,6 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.util.IntsRef;
-import org.apache.lucene.util.automaton.BasicOperations;
import org.apache.lucene.util.automaton.LightAutomaton;
import org.apache.lucene.util.automaton.Transition;
import org.apache.lucene.util.fst.FST;
@@ -72,6 +71,10 @@ public class FSTUtil {
assert a.isDeterministic();
final List<Path<T>> queue = new ArrayList<>();
final List<Path<T>> endNodes = new ArrayList<>();
+ if (a.getNumStates() == 0) {
+ return endNodes;
+ }
+
queue.add(new Path<>(0, fst
.getFirstArc(new FST.Arc<T>()), fst.outputs.getNoOutput(),
new IntsRef()));
Modified: lucene/dev/branches/lucene5752/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java?rev=1602966&r1=1602965&r2=1602966&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java (original)
+++ lucene/dev/branches/lucene5752/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java Mon Jun 16 19:22:02 2014
@@ -20,7 +20,6 @@ package org.apache.lucene.util.automaton
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
-import java.util.IdentityHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
@@ -31,7 +30,6 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.UnicodeUtil;
-import org.apache.lucene.util.fst.Util;
/**
* Utilities for testing automata.
@@ -136,7 +134,7 @@ public class AutomatonTestUtil {
* Once created, call {@link #getRandomAcceptedString(Random)}
* to get a new string (in UTF-32 codepoints).
*/
- public static class RandomAcceptedStringsLight {
+ public static class RandomAcceptedStrings {
private final Map<Transition,Boolean> leadsToAccept;
private final LightAutomaton a;
@@ -152,7 +150,7 @@ public class AutomatonTestUtil {
}
}
- public RandomAcceptedStringsLight(LightAutomaton a) {
+ public RandomAcceptedStrings(LightAutomaton a) {
this.a = a;
if (a.getNumStates() == 0) {
throw new IllegalArgumentException("this automaton accepts nothing");
@@ -334,6 +332,9 @@ public class AutomatonTestUtil {
* Determinizes the given automaton using the given set of initial states.
*/
public static LightAutomaton determinizeSimpleLight(LightAutomaton a, Set<Integer> initialset) {
+ if (a.getNumStates() == 0) {
+ return a;
+ }
int[] points = a.getStartPoints();
// subset construction
Map<Set<Integer>, Set<Integer>> sets = new HashMap<>();
@@ -448,6 +449,9 @@ public class AutomatonTestUtil {
* this is only used to test the correctness of our faster implementation.
*/
public static boolean isFiniteSlow(LightAutomaton a) {
+ if (a.getNumStates() == 0) {
+ return true;
+ }
return isFiniteSlow(a, 0, new HashSet<Integer>());
}