You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2011/05/16 14:15:45 UTC

svn commit: r1103711 - in /lucene/dev/trunk/lucene/src: java/org/apache/lucene/util/automaton/MinimizationOperations.java test/org/apache/lucene/util/automaton/TestMinimize.java

Author: uschindler
Date: Mon May 16 12:15:45 2011
New Revision: 1103711

URL: http://svn.apache.org/viewvc?rev=1103711&view=rev
Log:
LUCENE-3101: Fix n^2 memory usage in minimizeSchindler() ähm minimizeHopcroft()

Modified:
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java?rev=1103711&r1=1103710&r2=1103711&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java Mon May 16 12:15:45 2011
@@ -30,6 +30,8 @@
 package org.apache.lucene.util.automaton;
 
 import java.util.BitSet;
+import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.LinkedList;
 
 /**
@@ -72,8 +74,12 @@ final public class MinimizationOperation
     final int[] sigma = a.getStartPoints();
     final State[] states = a.getNumberedStates();
     final int sigmaLen = sigma.length, statesLen = states.length;
-    final BitSet[][] reverse = new BitSet[statesLen][sigmaLen];
-    final BitSet[] splitblock = new BitSet[statesLen], partition = new BitSet[statesLen];
+    @SuppressWarnings("unchecked") final ArrayList<State>[][] reverse =
+      (ArrayList<State>[][]) new ArrayList[statesLen][sigmaLen];
+    @SuppressWarnings("unchecked") final HashSet<State>[] partition =
+      (HashSet<State>[]) new HashSet[statesLen];
+    @SuppressWarnings("unchecked") final ArrayList<State>[] splitblock =
+      (ArrayList<State>[]) new ArrayList[statesLen];
     final int[] block = new int[statesLen];
     final StateList[][] active = new StateList[statesLen][sigmaLen];
     final StateListNode[][] active2 = new StateListNode[statesLen][sigmaLen];
@@ -82,8 +88,8 @@ final public class MinimizationOperation
     final BitSet split = new BitSet(statesLen), 
       refine = new BitSet(statesLen), refine2 = new BitSet(statesLen);
     for (int q = 0; q < statesLen; q++) {
-      splitblock[q] = new BitSet(statesLen);
-      partition[q] = new BitSet(statesLen);
+      splitblock[q] = new ArrayList<State>();
+      partition[q] = new HashSet<State>();
       for (int x = 0; x < sigmaLen; x++) {
         active[q][x] = new StateList();
       }
@@ -92,23 +98,22 @@ final public class MinimizationOperation
     for (int q = 0; q < statesLen; q++) {
       final State qq = states[q];
       final int j = qq.accept ? 0 : 1;
-      partition[j].set(q);
+      partition[j].add(qq);
       block[q] = j;
       for (int x = 0; x < sigmaLen; x++) {
-        final BitSet[] r =
+        final ArrayList<State>[] r =
           reverse[qq.step(sigma[x]).number];
         if (r[x] == null)
-          r[x] = new BitSet();
-        r[x].set(q);
+          r[x] = new ArrayList<State>();
+        r[x].add(qq);
       }
     }
     // initialize active sets
     for (int j = 0; j <= 1; j++) {
-      final BitSet part = partition[j];
       for (int x = 0; x < sigmaLen; x++) {
-        for (int i = part.nextSetBit(0); i >= 0; i = part.nextSetBit(i+1)) {
-          if (reverse[i][x] != null)
-            active2[i][x] = active[j][x].add(states[i]);
+        for (final State qq : partition[j]) {
+          if (reverse[qq.number][x] != null)
+            active2[qq.number][x] = active[j][x].add(qq);
         }
       }
     }
@@ -121,18 +126,19 @@ final public class MinimizationOperation
     // process pending until fixed point
     int k = 2;
     while (!pending.isEmpty()) {
-      IntPair ip = pending.removeFirst();
+      final IntPair ip = pending.removeFirst();
       final int p = ip.n1;
       final int x = ip.n2;
       pending2.clear(x*statesLen + p);
       // find states that need to be split off their blocks
       for (StateListNode m = active[p][x].first; m != null; m = m.next) {
-        final BitSet r = reverse[m.q.number][x];
-        if (r != null) for (int i = r.nextSetBit(0); i >= 0; i = r.nextSetBit(i+1)) {
+        final ArrayList<State> r = reverse[m.q.number][x];
+        if (r != null) for (final State s : r) {
+          final int i = s.number;
           if (!split.get(i)) {
             split.set(i);
             final int j = block[i];
-            splitblock[j].set(i);
+            splitblock[j].add(s);
             if (!refine2.get(j)) {
               refine2.set(j);
               refine.set(j);
@@ -142,18 +148,19 @@ final public class MinimizationOperation
       }
       // refine blocks
       for (int j = refine.nextSetBit(0); j >= 0; j = refine.nextSetBit(j+1)) {
-        final BitSet sb = splitblock[j];
-        if (sb.cardinality() < partition[j].cardinality()) {
-          final BitSet b1 = partition[j], b2 = partition[k];
-          for (int i = sb.nextSetBit(0); i >= 0; i = sb.nextSetBit(i+1)) {
-            b1.clear(i);
-            b2.set(i);
-            block[i] = k;
+        final ArrayList<State> sb = splitblock[j];
+        if (sb.size() < partition[j].size()) {
+          final HashSet<State> b1 = partition[j];
+          final HashSet<State> b2 = partition[k];
+          for (final State s : sb) {
+            b1.remove(s);
+            b2.add(s);
+            block[s.number] = k;
             for (int c = 0; c < sigmaLen; c++) {
-              final StateListNode sn = active2[i][c];
+              final StateListNode sn = active2[s.number][c];
               if (sn != null && sn.sl == active[j][c]) {
                 sn.remove();
-                active2[i][c] = active[k][c].add(states[i]);
+                active2[s.number][c] = active[k][c].add(s);
               }
             }
           }
@@ -173,8 +180,8 @@ final public class MinimizationOperation
           k++;
         }
         refine2.clear(j);
-        for (int i = sb.nextSetBit(0); i >= 0; i = sb.nextSetBit(i+1))
-          split.clear(i);
+        for (final State s : sb)
+          split.clear(s.number);
         sb.clear();
       }
       refine.clear();
@@ -184,9 +191,7 @@ final public class MinimizationOperation
     for (int n = 0; n < newstates.length; n++) {
       final State s = new State();
       newstates[n] = s;
-      BitSet part = partition[n];
-      for (int i = part.nextSetBit(0); i >= 0; i = part.nextSetBit(i+1)) {
-        final State q = states[i];
+      for (State q : partition[n]) {
         if (q == a.initial) a.initial = s;
         s.accept = q.accept;
         s.number = q.number; // select representative

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java?rev=1103711&r1=1103710&r2=1103711&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java Mon May 16 12:15:45 2011
@@ -49,4 +49,9 @@ public class TestMinimize extends Lucene
       assertEquals(a.getNumberOfTransitions(), b.getNumberOfTransitions());
     }
   }
+  
+  /** n^2 space usage in Hopcroft minimization? */
+  public void testMinimizeHuge() {
+    new RegExp("+-*(A|.....|BC)*]", RegExp.NONE).toAutomaton();
+  }
 }



Re: svn commit: r1103711 - in /lucene/dev/trunk/lucene/src: java/org/apache/lucene/util/automaton/MinimizationOperations.java test/org/apache/lucene/util/automaton/TestMinimize.java

Posted by Simon Willnauer <si...@googlemail.com>.
On Mon, May 16, 2011 at 2:15 PM,  <us...@apache.org> wrote:
> Author: uschindler
> Date: Mon May 16 12:15:45 2011
> New Revision: 1103711
>
> URL: http://svn.apache.org/viewvc?rev=1103711&view=rev
> Log:
> LUCENE-3101: Fix n^2 memory usage in minimizeSchindler() ähm minimizeHopcroft()

LOL ^ ^
>
> Modified:
>    lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java
>    lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java
>
> Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java?rev=1103711&r1=1103710&r2=1103711&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java (original)
> +++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java Mon May 16 12:15:45 2011
> @@ -30,6 +30,8 @@
>  package org.apache.lucene.util.automaton;
>
>  import java.util.BitSet;
> +import java.util.ArrayList;
> +import java.util.HashSet;
>  import java.util.LinkedList;
>
>  /**
> @@ -72,8 +74,12 @@ final public class MinimizationOperation
>     final int[] sigma = a.getStartPoints();
>     final State[] states = a.getNumberedStates();
>     final int sigmaLen = sigma.length, statesLen = states.length;
> -    final BitSet[][] reverse = new BitSet[statesLen][sigmaLen];
> -    final BitSet[] splitblock = new BitSet[statesLen], partition = new BitSet[statesLen];
> +    @SuppressWarnings("unchecked") final ArrayList<State>[][] reverse =
> +      (ArrayList<State>[][]) new ArrayList[statesLen][sigmaLen];
> +    @SuppressWarnings("unchecked") final HashSet<State>[] partition =
> +      (HashSet<State>[]) new HashSet[statesLen];
> +    @SuppressWarnings("unchecked") final ArrayList<State>[] splitblock =
> +      (ArrayList<State>[]) new ArrayList[statesLen];
>     final int[] block = new int[statesLen];
>     final StateList[][] active = new StateList[statesLen][sigmaLen];
>     final StateListNode[][] active2 = new StateListNode[statesLen][sigmaLen];
> @@ -82,8 +88,8 @@ final public class MinimizationOperation
>     final BitSet split = new BitSet(statesLen),
>       refine = new BitSet(statesLen), refine2 = new BitSet(statesLen);
>     for (int q = 0; q < statesLen; q++) {
> -      splitblock[q] = new BitSet(statesLen);
> -      partition[q] = new BitSet(statesLen);
> +      splitblock[q] = new ArrayList<State>();
> +      partition[q] = new HashSet<State>();
>       for (int x = 0; x < sigmaLen; x++) {
>         active[q][x] = new StateList();
>       }
> @@ -92,23 +98,22 @@ final public class MinimizationOperation
>     for (int q = 0; q < statesLen; q++) {
>       final State qq = states[q];
>       final int j = qq.accept ? 0 : 1;
> -      partition[j].set(q);
> +      partition[j].add(qq);
>       block[q] = j;
>       for (int x = 0; x < sigmaLen; x++) {
> -        final BitSet[] r =
> +        final ArrayList<State>[] r =
>           reverse[qq.step(sigma[x]).number];
>         if (r[x] == null)
> -          r[x] = new BitSet();
> -        r[x].set(q);
> +          r[x] = new ArrayList<State>();
> +        r[x].add(qq);
>       }
>     }
>     // initialize active sets
>     for (int j = 0; j <= 1; j++) {
> -      final BitSet part = partition[j];
>       for (int x = 0; x < sigmaLen; x++) {
> -        for (int i = part.nextSetBit(0); i >= 0; i = part.nextSetBit(i+1)) {
> -          if (reverse[i][x] != null)
> -            active2[i][x] = active[j][x].add(states[i]);
> +        for (final State qq : partition[j]) {
> +          if (reverse[qq.number][x] != null)
> +            active2[qq.number][x] = active[j][x].add(qq);
>         }
>       }
>     }
> @@ -121,18 +126,19 @@ final public class MinimizationOperation
>     // process pending until fixed point
>     int k = 2;
>     while (!pending.isEmpty()) {
> -      IntPair ip = pending.removeFirst();
> +      final IntPair ip = pending.removeFirst();
>       final int p = ip.n1;
>       final int x = ip.n2;
>       pending2.clear(x*statesLen + p);
>       // find states that need to be split off their blocks
>       for (StateListNode m = active[p][x].first; m != null; m = m.next) {
> -        final BitSet r = reverse[m.q.number][x];
> -        if (r != null) for (int i = r.nextSetBit(0); i >= 0; i = r.nextSetBit(i+1)) {
> +        final ArrayList<State> r = reverse[m.q.number][x];
> +        if (r != null) for (final State s : r) {
> +          final int i = s.number;
>           if (!split.get(i)) {
>             split.set(i);
>             final int j = block[i];
> -            splitblock[j].set(i);
> +            splitblock[j].add(s);
>             if (!refine2.get(j)) {
>               refine2.set(j);
>               refine.set(j);
> @@ -142,18 +148,19 @@ final public class MinimizationOperation
>       }
>       // refine blocks
>       for (int j = refine.nextSetBit(0); j >= 0; j = refine.nextSetBit(j+1)) {
> -        final BitSet sb = splitblock[j];
> -        if (sb.cardinality() < partition[j].cardinality()) {
> -          final BitSet b1 = partition[j], b2 = partition[k];
> -          for (int i = sb.nextSetBit(0); i >= 0; i = sb.nextSetBit(i+1)) {
> -            b1.clear(i);
> -            b2.set(i);
> -            block[i] = k;
> +        final ArrayList<State> sb = splitblock[j];
> +        if (sb.size() < partition[j].size()) {
> +          final HashSet<State> b1 = partition[j];
> +          final HashSet<State> b2 = partition[k];
> +          for (final State s : sb) {
> +            b1.remove(s);
> +            b2.add(s);
> +            block[s.number] = k;
>             for (int c = 0; c < sigmaLen; c++) {
> -              final StateListNode sn = active2[i][c];
> +              final StateListNode sn = active2[s.number][c];
>               if (sn != null && sn.sl == active[j][c]) {
>                 sn.remove();
> -                active2[i][c] = active[k][c].add(states[i]);
> +                active2[s.number][c] = active[k][c].add(s);
>               }
>             }
>           }
> @@ -173,8 +180,8 @@ final public class MinimizationOperation
>           k++;
>         }
>         refine2.clear(j);
> -        for (int i = sb.nextSetBit(0); i >= 0; i = sb.nextSetBit(i+1))
> -          split.clear(i);
> +        for (final State s : sb)
> +          split.clear(s.number);
>         sb.clear();
>       }
>       refine.clear();
> @@ -184,9 +191,7 @@ final public class MinimizationOperation
>     for (int n = 0; n < newstates.length; n++) {
>       final State s = new State();
>       newstates[n] = s;
> -      BitSet part = partition[n];
> -      for (int i = part.nextSetBit(0); i >= 0; i = part.nextSetBit(i+1)) {
> -        final State q = states[i];
> +      for (State q : partition[n]) {
>         if (q == a.initial) a.initial = s;
>         s.accept = q.accept;
>         s.number = q.number; // select representative
>
> Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java?rev=1103711&r1=1103710&r2=1103711&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java (original)
> +++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java Mon May 16 12:15:45 2011
> @@ -49,4 +49,9 @@ public class TestMinimize extends Lucene
>       assertEquals(a.getNumberOfTransitions(), b.getNumberOfTransitions());
>     }
>   }
> +
> +  /** n^2 space usage in Hopcroft minimization? */
> +  public void testMinimizeHuge() {
> +    new RegExp("+-*(A|.....|BC)*]", RegExp.NONE).toAutomaton();
> +  }
>  }
>
>
>

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org