You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2010/10/22 02:01:16 UTC
svn commit: r1026182 - in /lucene/dev/trunk/lucene/src:
java/org/apache/lucene/util/automaton/ test/org/apache/lucene/util/automaton/
Author: rmuir
Date: Fri Oct 22 00:01:16 2010
New Revision: 1026182
URL: http://svn.apache.org/viewvc?rev=1026182&view=rev
Log:
LUCENE-2716: increase test coverage for minimizeSchindler() and determinizeMcCandless()
Modified:
lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/SpecialOperations.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/AutomatonTestUtil.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestDeterminism.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/SpecialOperations.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/SpecialOperations.java?rev=1026182&r1=1026181&r2=1026182&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/SpecialOperations.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/SpecialOperations.java Fri Oct 22 00:01:16 2010
@@ -178,7 +178,7 @@ final public class SpecialOperations {
* Reverses the language of the given (non-singleton) automaton while returning
* the set of new initial states.
*/
- private static Set<State> reverse(Automaton a) {
+ static Set<State> reverse(Automaton a) {
a.expandSingleton();
// reverse all edges
HashMap<State, HashSet<Transition>> m = new HashMap<State, HashSet<Transition>>();
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/AutomatonTestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/AutomatonTestUtil.java?rev=1026182&r1=1026181&r2=1026182&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/AutomatonTestUtil.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/AutomatonTestUtil.java Fri Oct 22 00:01:16 2010
@@ -250,4 +250,128 @@ public class AutomatonTestUtil {
return ArrayUtil.toIntArray(soFar);
}
}
+
+ /** return a random NFA/DFA for testing */
+ public static Automaton randomAutomaton(Random random) {
+ // get two random Automata from regexps
+ Automaton a1 = AutomatonTestUtil.randomRegexp(random).toAutomaton();
+ if (random.nextBoolean())
+ a1 = BasicOperations.complement(a1);
+
+ Automaton a2 = AutomatonTestUtil.randomRegexp(random).toAutomaton();
+ if (random.nextBoolean())
+ a2 = BasicOperations.complement(a2);
+
+ // combine them in random ways
+ switch(random.nextInt(4)) {
+ case 0: return BasicOperations.concatenate(a1, a2);
+ case 1: return BasicOperations.union(a1, a2);
+ case 2: return BasicOperations.intersection(a1, a2);
+ default: return BasicOperations.minus(a1, a2);
+ }
+ }
+
+ /**
+ * below are original, unoptimized implementations of DFA operations for testing.
+ * These are from brics automaton, full license (BSD) below:
+ */
+
+ /*
+ * dk.brics.automaton
+ *
+ * Copyright (c) 2001-2009 Anders Moeller
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+ /**
+ * Simple, original brics implementation of Brzozowski minimize()
+ */
+ public static void minimizeSimple(Automaton a) {
+ if (a.isSingleton())
+ return;
+ determinizeSimple(a, SpecialOperations.reverse(a));
+ determinizeSimple(a, SpecialOperations.reverse(a));
+ }
+
+ /**
+ * Simple, original brics implementation of determinize()
+ */
+ public static void determinizeSimple(Automaton a) {
+ if (a.deterministic || a.isSingleton())
+ return;
+ Set<State> initialset = new HashSet<State>();
+ initialset.add(a.initial);
+ determinizeSimple(a, initialset);
+ }
+
+ /**
+ * Simple, original brics implementation of determinize()
+ * Determinizes the given automaton using the given set of initial states.
+ */
+ public static void determinizeSimple(Automaton a, Set<State> initialset) {
+ int[] points = a.getStartPoints();
+ // subset construction
+ Map<Set<State>, Set<State>> sets = new HashMap<Set<State>, Set<State>>();
+ LinkedList<Set<State>> worklist = new LinkedList<Set<State>>();
+ Map<Set<State>, State> newstate = new HashMap<Set<State>, State>();
+ sets.put(initialset, initialset);
+ worklist.add(initialset);
+ a.initial = new State();
+ newstate.put(initialset, a.initial);
+ while (worklist.size() > 0) {
+ Set<State> s = worklist.removeFirst();
+ State r = newstate.get(s);
+ for (State q : s)
+ if (q.accept) {
+ r.accept = true;
+ break;
+ }
+ for (int n = 0; n < points.length; n++) {
+ Set<State> p = new HashSet<State>();
+ for (State q : s)
+ for (Transition t : q.getTransitions())
+ if (t.min <= points[n] && points[n] <= t.max)
+ p.add(t.to);
+ if (!sets.containsKey(p)) {
+ sets.put(p, p);
+ worklist.add(p);
+ newstate.put(p, new State());
+ }
+ State q = newstate.get(p);
+ int min = points[n];
+ int max;
+ if (n + 1 < points.length)
+ max = points[n + 1] - 1;
+ else
+ max = Character.MAX_CODE_POINT;
+ r.addTransition(new Transition(min, max, q));
+ }
+ }
+ a.deterministic = true;
+ a.clearNumberedStates();
+ a.removeDeadTransitions();
+ }
+
}
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestDeterminism.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestDeterminism.java?rev=1026182&r1=1026181&r2=1026182&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestDeterminism.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestDeterminism.java Fri Oct 22 00:01:16 2010
@@ -20,16 +20,11 @@ package org.apache.lucene.util.automaton
import org.apache.lucene.util.LuceneTestCase;
/**
- * Not thorough, but tries to test determinism correctness
+ * Not completely thorough, but tries to test determinism correctness
* somewhat randomly.
*/
public class TestDeterminism extends LuceneTestCase {
- @Override
- public void setUp() throws Exception {
- super.setUp();
- }
-
/** test a bunch of random regular expressions */
public void testRegexps() throws Exception {
int num = 500 * RANDOM_MULTIPLIER;
@@ -37,6 +32,20 @@ public class TestDeterminism extends Luc
assertAutomaton(AutomatonTestUtil.randomRegexp(random).toAutomaton());
}
+ /** test against a simple, unoptimized det */
+ public void testAgainstSimple() throws Exception {
+ int num = 2000 * RANDOM_MULTIPLIER;
+ for (int i = 0; i < num; i++) {
+ Automaton a = AutomatonTestUtil.randomAutomaton(random);
+ Automaton b = a.clone();
+ AutomatonTestUtil.determinizeSimple(a);
+ b.deterministic = false; // force det
+ b.determinize();
+ // TODO: more verifications possible?
+ assertTrue(BasicOperations.sameLanguage(a, b));
+ }
+ }
+
private static void assertAutomaton(Automaton a) {
Automaton clone = a.clone();
// complement(complement(a)) = a
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java?rev=1026182&r1=1026181&r2=1026182&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java Fri Oct 22 00:01:16 2010
@@ -21,35 +21,32 @@ import org.apache.lucene.util.LuceneTest
/**
* This test builds some randomish NFA/DFA and minimizes them.
- * the minimal and non-minimal are compared to ensure they are the same.
*/
public class TestMinimize extends LuceneTestCase {
+ /** the minimal and non-minimal are compared to ensure they are the same. */
public void test() {
- int num = 10000 * RANDOM_MULTIPLIER;
+ int num = 2000 * RANDOM_MULTIPLIER;
for (int i = 0; i < num; i++) {
- Automaton a = randomishAutomaton();
+ Automaton a = AutomatonTestUtil.randomAutomaton(random);
Automaton b = a.clone();
MinimizationOperations.minimize(b);
assertTrue(BasicOperations.sameLanguage(a, b));
}
}
- private Automaton randomishAutomaton() {
- // get two random Automata from regexps
- Automaton a1 = AutomatonTestUtil.randomRegexp(random).toAutomaton();
- if (random.nextBoolean())
- a1 = BasicOperations.complement(a1);
-
- Automaton a2 = AutomatonTestUtil.randomRegexp(random).toAutomaton();
- if (random.nextBoolean())
- a2 = BasicOperations.complement(a2);
-
- // combine them in random ways
- switch(random.nextInt(4)) {
- case 0: return BasicOperations.concatenate(a1, a2);
- case 1: return BasicOperations.union(a1, a2);
- case 2: return BasicOperations.intersection(a1, a2);
- default: return BasicOperations.minus(a1, a2);
+ /** compare minimized against minimized with a slower, simple impl.
+ * we check not only that they are the same, but that #states/#transitions
+ * are the same. */
+ public void testAgainstBrzozowski() {
+ int num = 2000 * RANDOM_MULTIPLIER;
+ for (int i = 0; i < num; i++) {
+ Automaton a = AutomatonTestUtil.randomAutomaton(random);
+ AutomatonTestUtil.minimizeSimple(a);
+ Automaton b = a.clone();
+ MinimizationOperations.minimize(b);
+ assertTrue(BasicOperations.sameLanguage(a, b));
+ assertEquals(a.getNumberOfStates(), b.getNumberOfStates());
+ assertEquals(a.getNumberOfTransitions(), b.getNumberOfTransitions());
}
}
}