You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2011/05/16 14:15:45 UTC
svn commit: r1103711 - in /lucene/dev/trunk/lucene/src:
java/org/apache/lucene/util/automaton/MinimizationOperations.java
test/org/apache/lucene/util/automaton/TestMinimize.java
Author: uschindler
Date: Mon May 16 12:15:45 2011
New Revision: 1103711
URL: http://svn.apache.org/viewvc?rev=1103711&view=rev
Log:
LUCENE-3101: Fix n^2 memory usage in minimizeSchindler() ähm minimizeHopcroft()
Modified:
lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java?rev=1103711&r1=1103710&r2=1103711&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java Mon May 16 12:15:45 2011
@@ -30,6 +30,8 @@
package org.apache.lucene.util.automaton;
import java.util.BitSet;
+import java.util.ArrayList;
+import java.util.HashSet;
import java.util.LinkedList;
/**
@@ -72,8 +74,12 @@ final public class MinimizationOperation
final int[] sigma = a.getStartPoints();
final State[] states = a.getNumberedStates();
final int sigmaLen = sigma.length, statesLen = states.length;
- final BitSet[][] reverse = new BitSet[statesLen][sigmaLen];
- final BitSet[] splitblock = new BitSet[statesLen], partition = new BitSet[statesLen];
+ @SuppressWarnings("unchecked") final ArrayList<State>[][] reverse =
+ (ArrayList<State>[][]) new ArrayList[statesLen][sigmaLen];
+ @SuppressWarnings("unchecked") final HashSet<State>[] partition =
+ (HashSet<State>[]) new HashSet[statesLen];
+ @SuppressWarnings("unchecked") final ArrayList<State>[] splitblock =
+ (ArrayList<State>[]) new ArrayList[statesLen];
final int[] block = new int[statesLen];
final StateList[][] active = new StateList[statesLen][sigmaLen];
final StateListNode[][] active2 = new StateListNode[statesLen][sigmaLen];
@@ -82,8 +88,8 @@ final public class MinimizationOperation
final BitSet split = new BitSet(statesLen),
refine = new BitSet(statesLen), refine2 = new BitSet(statesLen);
for (int q = 0; q < statesLen; q++) {
- splitblock[q] = new BitSet(statesLen);
- partition[q] = new BitSet(statesLen);
+ splitblock[q] = new ArrayList<State>();
+ partition[q] = new HashSet<State>();
for (int x = 0; x < sigmaLen; x++) {
active[q][x] = new StateList();
}
@@ -92,23 +98,22 @@ final public class MinimizationOperation
for (int q = 0; q < statesLen; q++) {
final State qq = states[q];
final int j = qq.accept ? 0 : 1;
- partition[j].set(q);
+ partition[j].add(qq);
block[q] = j;
for (int x = 0; x < sigmaLen; x++) {
- final BitSet[] r =
+ final ArrayList<State>[] r =
reverse[qq.step(sigma[x]).number];
if (r[x] == null)
- r[x] = new BitSet();
- r[x].set(q);
+ r[x] = new ArrayList<State>();
+ r[x].add(qq);
}
}
// initialize active sets
for (int j = 0; j <= 1; j++) {
- final BitSet part = partition[j];
for (int x = 0; x < sigmaLen; x++) {
- for (int i = part.nextSetBit(0); i >= 0; i = part.nextSetBit(i+1)) {
- if (reverse[i][x] != null)
- active2[i][x] = active[j][x].add(states[i]);
+ for (final State qq : partition[j]) {
+ if (reverse[qq.number][x] != null)
+ active2[qq.number][x] = active[j][x].add(qq);
}
}
}
@@ -121,18 +126,19 @@ final public class MinimizationOperation
// process pending until fixed point
int k = 2;
while (!pending.isEmpty()) {
- IntPair ip = pending.removeFirst();
+ final IntPair ip = pending.removeFirst();
final int p = ip.n1;
final int x = ip.n2;
pending2.clear(x*statesLen + p);
// find states that need to be split off their blocks
for (StateListNode m = active[p][x].first; m != null; m = m.next) {
- final BitSet r = reverse[m.q.number][x];
- if (r != null) for (int i = r.nextSetBit(0); i >= 0; i = r.nextSetBit(i+1)) {
+ final ArrayList<State> r = reverse[m.q.number][x];
+ if (r != null) for (final State s : r) {
+ final int i = s.number;
if (!split.get(i)) {
split.set(i);
final int j = block[i];
- splitblock[j].set(i);
+ splitblock[j].add(s);
if (!refine2.get(j)) {
refine2.set(j);
refine.set(j);
@@ -142,18 +148,19 @@ final public class MinimizationOperation
}
// refine blocks
for (int j = refine.nextSetBit(0); j >= 0; j = refine.nextSetBit(j+1)) {
- final BitSet sb = splitblock[j];
- if (sb.cardinality() < partition[j].cardinality()) {
- final BitSet b1 = partition[j], b2 = partition[k];
- for (int i = sb.nextSetBit(0); i >= 0; i = sb.nextSetBit(i+1)) {
- b1.clear(i);
- b2.set(i);
- block[i] = k;
+ final ArrayList<State> sb = splitblock[j];
+ if (sb.size() < partition[j].size()) {
+ final HashSet<State> b1 = partition[j];
+ final HashSet<State> b2 = partition[k];
+ for (final State s : sb) {
+ b1.remove(s);
+ b2.add(s);
+ block[s.number] = k;
for (int c = 0; c < sigmaLen; c++) {
- final StateListNode sn = active2[i][c];
+ final StateListNode sn = active2[s.number][c];
if (sn != null && sn.sl == active[j][c]) {
sn.remove();
- active2[i][c] = active[k][c].add(states[i]);
+ active2[s.number][c] = active[k][c].add(s);
}
}
}
@@ -173,8 +180,8 @@ final public class MinimizationOperation
k++;
}
refine2.clear(j);
- for (int i = sb.nextSetBit(0); i >= 0; i = sb.nextSetBit(i+1))
- split.clear(i);
+ for (final State s : sb)
+ split.clear(s.number);
sb.clear();
}
refine.clear();
@@ -184,9 +191,7 @@ final public class MinimizationOperation
for (int n = 0; n < newstates.length; n++) {
final State s = new State();
newstates[n] = s;
- BitSet part = partition[n];
- for (int i = part.nextSetBit(0); i >= 0; i = part.nextSetBit(i+1)) {
- final State q = states[i];
+ for (State q : partition[n]) {
if (q == a.initial) a.initial = s;
s.accept = q.accept;
s.number = q.number; // select representative
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java?rev=1103711&r1=1103710&r2=1103711&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java Mon May 16 12:15:45 2011
@@ -49,4 +49,9 @@ public class TestMinimize extends Lucene
assertEquals(a.getNumberOfTransitions(), b.getNumberOfTransitions());
}
}
+
+ /** n^2 space usage in Hopcroft minimization? */
+ public void testMinimizeHuge() {
+ new RegExp("+-*(A|.....|BC)*]", RegExp.NONE).toAutomaton();
+ }
}
Re: svn commit: r1103711 - in /lucene/dev/trunk/lucene/src:
java/org/apache/lucene/util/automaton/MinimizationOperations.java test/org/apache/lucene/util/automaton/TestMinimize.java
Posted by Simon Willnauer <si...@googlemail.com>.
On Mon, May 16, 2011 at 2:15 PM, <us...@apache.org> wrote:
> Author: uschindler
> Date: Mon May 16 12:15:45 2011
> New Revision: 1103711
>
> URL: http://svn.apache.org/viewvc?rev=1103711&view=rev
> Log:
> LUCENE-3101: Fix n^2 memory usage in minimizeSchindler() ähm minimizeHopcroft()
LOL ^ ^
>
> Modified:
> lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java
> lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java
>
> Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java?rev=1103711&r1=1103710&r2=1103711&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java (original)
> +++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java Mon May 16 12:15:45 2011
> @@ -30,6 +30,8 @@
> package org.apache.lucene.util.automaton;
>
> import java.util.BitSet;
> +import java.util.ArrayList;
> +import java.util.HashSet;
> import java.util.LinkedList;
>
> /**
> @@ -72,8 +74,12 @@ final public class MinimizationOperation
> final int[] sigma = a.getStartPoints();
> final State[] states = a.getNumberedStates();
> final int sigmaLen = sigma.length, statesLen = states.length;
> - final BitSet[][] reverse = new BitSet[statesLen][sigmaLen];
> - final BitSet[] splitblock = new BitSet[statesLen], partition = new BitSet[statesLen];
> + @SuppressWarnings("unchecked") final ArrayList<State>[][] reverse =
> + (ArrayList<State>[][]) new ArrayList[statesLen][sigmaLen];
> + @SuppressWarnings("unchecked") final HashSet<State>[] partition =
> + (HashSet<State>[]) new HashSet[statesLen];
> + @SuppressWarnings("unchecked") final ArrayList<State>[] splitblock =
> + (ArrayList<State>[]) new ArrayList[statesLen];
> final int[] block = new int[statesLen];
> final StateList[][] active = new StateList[statesLen][sigmaLen];
> final StateListNode[][] active2 = new StateListNode[statesLen][sigmaLen];
> @@ -82,8 +88,8 @@ final public class MinimizationOperation
> final BitSet split = new BitSet(statesLen),
> refine = new BitSet(statesLen), refine2 = new BitSet(statesLen);
> for (int q = 0; q < statesLen; q++) {
> - splitblock[q] = new BitSet(statesLen);
> - partition[q] = new BitSet(statesLen);
> + splitblock[q] = new ArrayList<State>();
> + partition[q] = new HashSet<State>();
> for (int x = 0; x < sigmaLen; x++) {
> active[q][x] = new StateList();
> }
> @@ -92,23 +98,22 @@ final public class MinimizationOperation
> for (int q = 0; q < statesLen; q++) {
> final State qq = states[q];
> final int j = qq.accept ? 0 : 1;
> - partition[j].set(q);
> + partition[j].add(qq);
> block[q] = j;
> for (int x = 0; x < sigmaLen; x++) {
> - final BitSet[] r =
> + final ArrayList<State>[] r =
> reverse[qq.step(sigma[x]).number];
> if (r[x] == null)
> - r[x] = new BitSet();
> - r[x].set(q);
> + r[x] = new ArrayList<State>();
> + r[x].add(qq);
> }
> }
> // initialize active sets
> for (int j = 0; j <= 1; j++) {
> - final BitSet part = partition[j];
> for (int x = 0; x < sigmaLen; x++) {
> - for (int i = part.nextSetBit(0); i >= 0; i = part.nextSetBit(i+1)) {
> - if (reverse[i][x] != null)
> - active2[i][x] = active[j][x].add(states[i]);
> + for (final State qq : partition[j]) {
> + if (reverse[qq.number][x] != null)
> + active2[qq.number][x] = active[j][x].add(qq);
> }
> }
> }
> @@ -121,18 +126,19 @@ final public class MinimizationOperation
> // process pending until fixed point
> int k = 2;
> while (!pending.isEmpty()) {
> - IntPair ip = pending.removeFirst();
> + final IntPair ip = pending.removeFirst();
> final int p = ip.n1;
> final int x = ip.n2;
> pending2.clear(x*statesLen + p);
> // find states that need to be split off their blocks
> for (StateListNode m = active[p][x].first; m != null; m = m.next) {
> - final BitSet r = reverse[m.q.number][x];
> - if (r != null) for (int i = r.nextSetBit(0); i >= 0; i = r.nextSetBit(i+1)) {
> + final ArrayList<State> r = reverse[m.q.number][x];
> + if (r != null) for (final State s : r) {
> + final int i = s.number;
> if (!split.get(i)) {
> split.set(i);
> final int j = block[i];
> - splitblock[j].set(i);
> + splitblock[j].add(s);
> if (!refine2.get(j)) {
> refine2.set(j);
> refine.set(j);
> @@ -142,18 +148,19 @@ final public class MinimizationOperation
> }
> // refine blocks
> for (int j = refine.nextSetBit(0); j >= 0; j = refine.nextSetBit(j+1)) {
> - final BitSet sb = splitblock[j];
> - if (sb.cardinality() < partition[j].cardinality()) {
> - final BitSet b1 = partition[j], b2 = partition[k];
> - for (int i = sb.nextSetBit(0); i >= 0; i = sb.nextSetBit(i+1)) {
> - b1.clear(i);
> - b2.set(i);
> - block[i] = k;
> + final ArrayList<State> sb = splitblock[j];
> + if (sb.size() < partition[j].size()) {
> + final HashSet<State> b1 = partition[j];
> + final HashSet<State> b2 = partition[k];
> + for (final State s : sb) {
> + b1.remove(s);
> + b2.add(s);
> + block[s.number] = k;
> for (int c = 0; c < sigmaLen; c++) {
> - final StateListNode sn = active2[i][c];
> + final StateListNode sn = active2[s.number][c];
> if (sn != null && sn.sl == active[j][c]) {
> sn.remove();
> - active2[i][c] = active[k][c].add(states[i]);
> + active2[s.number][c] = active[k][c].add(s);
> }
> }
> }
> @@ -173,8 +180,8 @@ final public class MinimizationOperation
> k++;
> }
> refine2.clear(j);
> - for (int i = sb.nextSetBit(0); i >= 0; i = sb.nextSetBit(i+1))
> - split.clear(i);
> + for (final State s : sb)
> + split.clear(s.number);
> sb.clear();
> }
> refine.clear();
> @@ -184,9 +191,7 @@ final public class MinimizationOperation
> for (int n = 0; n < newstates.length; n++) {
> final State s = new State();
> newstates[n] = s;
> - BitSet part = partition[n];
> - for (int i = part.nextSetBit(0); i >= 0; i = part.nextSetBit(i+1)) {
> - final State q = states[i];
> + for (State q : partition[n]) {
> if (q == a.initial) a.initial = s;
> s.accept = q.accept;
> s.number = q.number; // select representative
>
> Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java?rev=1103711&r1=1103710&r2=1103711&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java (original)
> +++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/TestMinimize.java Mon May 16 12:15:45 2011
> @@ -49,4 +49,9 @@ public class TestMinimize extends Lucene
> assertEquals(a.getNumberOfTransitions(), b.getNumberOfTransitions());
> }
> }
> +
> + /** n^2 space usage in Hopcroft minimization? */
> + public void testMinimizeHuge() {
> + new RegExp("+-*(A|.....|BC)*]", RegExp.NONE).toAutomaton();
> + }
> }
>
>
>
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org