You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2010/10/22 02:01:16 UTC

svn commit: r1026182 - in /lucene/dev/trunk/lucene/src: java/org/apache/lucene/util/automaton/ test/org/apache/lucene/util/automaton/

Author: rmuir
Date: Fri Oct 22 00:01:16 2010
New Revision: 1026182

URL: http://svn.apache.org/viewvc?rev=1026182&view=rev
Log:
LUCENE-2716: increase test coverage for minimizeSchindler() and determinizeMcCandless()

Modified:
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/SpecialOperations.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/AutomatonTestUtil.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestDeterminism.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/SpecialOperations.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/SpecialOperations.java?rev=1026182&r1=1026181&r2=1026182&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/SpecialOperations.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/SpecialOperations.java Fri Oct 22 00:01:16 2010
@@ -178,7 +178,7 @@ final public class SpecialOperations {
    * Reverses the language of the given (non-singleton) automaton while returning
    * the set of new initial states.
    */
-  private static Set<State> reverse(Automaton a) {
+  static Set<State> reverse(Automaton a) {
     a.expandSingleton();
     // reverse all edges
     HashMap<State, HashSet<Transition>> m = new HashMap<State, HashSet<Transition>>();

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/AutomatonTestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/AutomatonTestUtil.java?rev=1026182&r1=1026181&r2=1026182&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/AutomatonTestUtil.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/AutomatonTestUtil.java Fri Oct 22 00:01:16 2010
@@ -250,4 +250,128 @@ public class AutomatonTestUtil {
       return ArrayUtil.toIntArray(soFar);
     }
   }
+  
+  /** return a random NFA/DFA for testing */
+  public static Automaton randomAutomaton(Random random) {
+    // get two random Automata from regexps
+    Automaton a1 = AutomatonTestUtil.randomRegexp(random).toAutomaton();
+    if (random.nextBoolean())
+      a1 = BasicOperations.complement(a1);
+    
+    Automaton a2 = AutomatonTestUtil.randomRegexp(random).toAutomaton();
+    if (random.nextBoolean()) 
+      a2 = BasicOperations.complement(a2);
+    
+    // combine them in random ways
+    switch(random.nextInt(4)) {
+      case 0: return BasicOperations.concatenate(a1, a2);
+      case 1: return BasicOperations.union(a1, a2);
+      case 2: return BasicOperations.intersection(a1, a2);
+      default: return BasicOperations.minus(a1, a2);
+    }
+  }
+  
+  /** 
+   * below are original, unoptimized implementations of DFA operations for testing.
+   * These are from brics automaton, full license (BSD) below:
+   */
+  
+  /*
+   * dk.brics.automaton
+   * 
+   * Copyright (c) 2001-2009 Anders Moeller
+   * All rights reserved.
+   * 
+   * Redistribution and use in source and binary forms, with or without
+   * modification, are permitted provided that the following conditions
+   * are met:
+   * 1. Redistributions of source code must retain the above copyright
+   *    notice, this list of conditions and the following disclaimer.
+   * 2. Redistributions in binary form must reproduce the above copyright
+   *    notice, this list of conditions and the following disclaimer in the
+   *    documentation and/or other materials provided with the distribution.
+   * 3. The name of the author may not be used to endorse or promote products
+   *    derived from this software without specific prior written permission.
+   * 
+   * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+   * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+   * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+   * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+   * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+   */
+
+  /**
+   * Simple, original brics implementation of Brzozowski minimize()
+   */
+  public static void minimizeSimple(Automaton a) {
+    if (a.isSingleton())
+      return;
+    determinizeSimple(a, SpecialOperations.reverse(a));
+    determinizeSimple(a, SpecialOperations.reverse(a));
+  }
+  
+  /**
+   * Simple, original brics implementation of determinize()
+   */
+  public static void determinizeSimple(Automaton a) {
+    if (a.deterministic || a.isSingleton())
+      return;
+    Set<State> initialset = new HashSet<State>();
+    initialset.add(a.initial);
+    determinizeSimple(a, initialset);
+  }
+  
+  /** 
+   * Simple, original brics implementation of determinize()
+   * Determinizes the given automaton using the given set of initial states. 
+   */
+  public static void determinizeSimple(Automaton a, Set<State> initialset) {
+    int[] points = a.getStartPoints();
+    // subset construction
+    Map<Set<State>, Set<State>> sets = new HashMap<Set<State>, Set<State>>();
+    LinkedList<Set<State>> worklist = new LinkedList<Set<State>>();
+    Map<Set<State>, State> newstate = new HashMap<Set<State>, State>();
+    sets.put(initialset, initialset);
+    worklist.add(initialset);
+    a.initial = new State();
+    newstate.put(initialset, a.initial);
+    while (worklist.size() > 0) {
+      Set<State> s = worklist.removeFirst();
+      State r = newstate.get(s);
+      for (State q : s)
+        if (q.accept) {
+          r.accept = true;
+          break;
+        }
+      for (int n = 0; n < points.length; n++) {
+        Set<State> p = new HashSet<State>();
+        for (State q : s)
+          for (Transition t : q.getTransitions())
+            if (t.min <= points[n] && points[n] <= t.max)
+              p.add(t.to);
+        if (!sets.containsKey(p)) {
+          sets.put(p, p);
+          worklist.add(p);
+          newstate.put(p, new State());
+        }
+        State q = newstate.get(p);
+        int min = points[n];
+        int max;
+        if (n + 1 < points.length)
+          max = points[n + 1] - 1;
+        else
+          max = Character.MAX_CODE_POINT;
+        r.addTransition(new Transition(min, max, q));
+      }
+    }
+    a.deterministic = true;
+    a.clearNumberedStates();
+    a.removeDeadTransitions();
+  }
+
 }

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestDeterminism.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestDeterminism.java?rev=1026182&r1=1026181&r2=1026182&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestDeterminism.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestDeterminism.java Fri Oct 22 00:01:16 2010
@@ -20,16 +20,11 @@ package org.apache.lucene.util.automaton
 import org.apache.lucene.util.LuceneTestCase;
 
 /**
- * Not thorough, but tries to test determinism correctness
+ * Not completely thorough, but tries to test determinism correctness
  * somewhat randomly.
  */
 public class TestDeterminism extends LuceneTestCase {
   
-  @Override
-  public void setUp() throws Exception {
-    super.setUp();
-  }
-  
   /** test a bunch of random regular expressions */
   public void testRegexps() throws Exception {
       int num = 500 * RANDOM_MULTIPLIER;
@@ -37,6 +32,20 @@ public class TestDeterminism extends Luc
         assertAutomaton(AutomatonTestUtil.randomRegexp(random).toAutomaton());
   }
   
+  /** test against a simple, unoptimized det */
+  public void testAgainstSimple() throws Exception {
+    int num = 2000 * RANDOM_MULTIPLIER;
+    for (int i = 0; i < num; i++) {
+      Automaton a = AutomatonTestUtil.randomAutomaton(random);
+      Automaton b = a.clone();
+      AutomatonTestUtil.determinizeSimple(a);
+      b.deterministic = false; // force det
+      b.determinize();
+      // TODO: more verifications possible?
+      assertTrue(BasicOperations.sameLanguage(a, b));
+    }
+  }
+  
   private static void assertAutomaton(Automaton a) {
     Automaton clone = a.clone();
     // complement(complement(a)) = a

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java?rev=1026182&r1=1026181&r2=1026182&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java Fri Oct 22 00:01:16 2010
@@ -21,35 +21,32 @@ import org.apache.lucene.util.LuceneTest
 
 /** 
  * This test builds some randomish NFA/DFA and minimizes them.
- * the minimal and non-minimal are compared to ensure they are the same.
  */
 public class TestMinimize extends LuceneTestCase {
+  /** the minimal and non-minimal are compared to ensure they are the same. */
   public void test() {
-    int num = 10000 * RANDOM_MULTIPLIER;
+    int num = 2000 * RANDOM_MULTIPLIER;
     for (int i = 0; i < num; i++) {
-      Automaton a = randomishAutomaton();
+      Automaton a = AutomatonTestUtil.randomAutomaton(random);
       Automaton b = a.clone();
       MinimizationOperations.minimize(b);
       assertTrue(BasicOperations.sameLanguage(a, b));
     }
   }
   
-  private Automaton randomishAutomaton() {
-    // get two random Automata from regexps
-    Automaton a1 = AutomatonTestUtil.randomRegexp(random).toAutomaton();
-    if (random.nextBoolean())
-      a1 = BasicOperations.complement(a1);
-    
-    Automaton a2 = AutomatonTestUtil.randomRegexp(random).toAutomaton();
-    if (random.nextBoolean()) 
-      a2 = BasicOperations.complement(a2);
-    
-    // combine them in random ways
-    switch(random.nextInt(4)) {
-      case 0: return BasicOperations.concatenate(a1, a2);
-      case 1: return BasicOperations.union(a1, a2);
-      case 2: return BasicOperations.intersection(a1, a2);
-      default: return BasicOperations.minus(a1, a2);
+  /** compare minimized against minimized with a slower, simple impl.
+   * we check not only that they are the same, but that #states/#transitions
+   * are the same. */
+  public void testAgainstBrzozowski() {
+    int num = 2000 * RANDOM_MULTIPLIER;
+    for (int i = 0; i < num; i++) {
+      Automaton a = AutomatonTestUtil.randomAutomaton(random);
+      AutomatonTestUtil.minimizeSimple(a);
+      Automaton b = a.clone();
+      MinimizationOperations.minimize(b);
+      assertTrue(BasicOperations.sameLanguage(a, b));
+      assertEquals(a.getNumberOfStates(), b.getNumberOfStates());
+      assertEquals(a.getNumberOfTransitions(), b.getNumberOfTransitions());
     }
   }
 }