You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2010/10/22 03:17:52 UTC

svn commit: r1026190 - /lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java

Author: uschindler
Date: Fri Oct 22 01:17:51 2010
New Revision: 1026190

URL: http://svn.apache.org/viewvc?rev=1026190&view=rev
Log:
LUCENE-2716: 2nd part of minimize improvements. Will there come version 3? Hopcroft policeman is working on it...

Modified:
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java?rev=1026190&r1=1026189&r2=1026190&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java Fri Oct 22 01:17:51 2010
@@ -29,7 +29,7 @@
 
 package org.apache.lucene.util.automaton;
 
-import java.util.ArrayList;
+import java.util.BitSet;
 import java.util.LinkedList;
 
 /**
@@ -72,24 +72,18 @@ final public class MinimizationOperation
     final int[] sigma = a.getStartPoints();
     final State[] states = a.getNumberedStates();
     final int sigmaLen = sigma.length, statesLen = states.length;
-    @SuppressWarnings("unchecked") final LinkedList<State>[][] reverse =
-      (LinkedList<State>[][]) new LinkedList[statesLen][sigmaLen];
-    @SuppressWarnings("unchecked") final LinkedList<State>[] partition =
-      (LinkedList<State>[]) new LinkedList[statesLen];
-    @SuppressWarnings("unchecked") final ArrayList<State>[] splitblock =
-      (ArrayList<State>[]) new ArrayList[statesLen];
+    final BitSet[][] reverse = new BitSet[statesLen][sigmaLen];
+    final BitSet[] splitblock = new BitSet[statesLen], partition = new BitSet[statesLen];
     final int[] block = new int[statesLen];
     final StateList[][] active = new StateList[statesLen][sigmaLen];
     final StateListNode[][] active2 = new StateListNode[statesLen][sigmaLen];
     final LinkedList<IntPair> pending = new LinkedList<IntPair>();
-    final boolean[][] pending2 = new boolean[sigmaLen][statesLen];
-    final ArrayList<State> split = new ArrayList<State>();
-    final boolean[] split2 = new boolean[statesLen];
-    final ArrayList<Integer> refine = new ArrayList<Integer>();
-    final boolean[] refine2 = new boolean[statesLen];
+    final BitSet pending2 = new BitSet(sigmaLen*statesLen);
+    final BitSet split = new BitSet(statesLen), 
+      refine = new BitSet(statesLen), refine2 = new BitSet(statesLen);
     for (int q = 0; q < statesLen; q++) {
-      splitblock[q] = new ArrayList<State>();
-      partition[q] = new LinkedList<State>();
+      splitblock[q] = new BitSet(statesLen);
+      partition[q] = new BitSet(statesLen);
       for (int x = 0; x < sigmaLen; x++) {
         active[q][x] = new StateList();
       }
@@ -98,27 +92,31 @@ final public class MinimizationOperation
     for (int q = 0; q < statesLen; q++) {
       final State qq = states[q];
       final int j = qq.accept ? 0 : 1;
-      partition[j].add(qq);
-      block[qq.number] = j;
+      partition[j].set(q);
+      block[q] = j;
       for (int x = 0; x < sigmaLen; x++) {
-        final LinkedList<State>[] r =
+        final BitSet[] r =
           reverse[qq.step(sigma[x]).number];
         if (r[x] == null)
-          r[x] = new LinkedList<State>();
-        r[x].add(qq);
+          r[x] = new BitSet();
+        r[x].set(q);
       }
     }
     // initialize active sets
-    for (int j = 0; j <= 1; j++)
-      for (int x = 0; x < sigmaLen; x++)
-        for (State qq : partition[j])
-          if (reverse[qq.number][x] != null)
-            active2[qq.number][x] = active[j][x].add(qq);
+    for (int j = 0; j <= 1; j++) {
+      final BitSet part = partition[j];
+      for (int x = 0; x < sigmaLen; x++) {
+        for (int i = part.nextSetBit(0); i >= 0; i = part.nextSetBit(i+1)) {
+          if (reverse[i][x] != null)
+            active2[i][x] = active[j][x].add(states[i]);
+        }
+      }
+    }
     // initialize pending
     for (int x = 0; x < sigmaLen; x++) {
       final int j = (active[0][x].size <= active[1][x].size) ? 0 : 1;
       pending.add(new IntPair(j, x));
-      pending2[x][j] = true;
+      pending2.set(x*statesLen + j);
     }
     // process pending until fixed point
     int k = 2;
@@ -126,60 +124,59 @@ final public class MinimizationOperation
       IntPair ip = pending.removeFirst();
       final int p = ip.n1;
       final int x = ip.n2;
-      pending2[x][p] = false;
+      pending2.clear(x*statesLen + p);
       // find states that need to be split off their blocks
       for (StateListNode m = active[p][x].first; m != null; m = m.next) {
-        final LinkedList<State> r = reverse[m.q.number][x];
-        if (r != null) for (State s : r) {
-          if (!split2[s.number]) {
-            split2[s.number] = true;
-            split.add(s);
-            final int j = block[s.number];
-            splitblock[j].add(s);
-            if (!refine2[j]) {
-              refine2[j] = true;
-              refine.add(j);
+        final BitSet r = reverse[m.q.number][x];
+        if (r != null) for (int i = r.nextSetBit(0); i >= 0; i = r.nextSetBit(i+1)) {
+          if (!split.get(i)) {
+            split.set(i);
+            final int j = block[i];
+            splitblock[j].set(i);
+            if (!refine2.get(j)) {
+              refine2.set(j);
+              refine.set(j);
             }
           }
         }
       }
       // refine blocks
-      for (int j : refine) {
-        if (splitblock[j].size() < partition[j].size()) {
-          final LinkedList<State> b1 = partition[j];
-          final LinkedList<State> b2 = partition[k];
-          for (State s : splitblock[j]) {
-            b1.remove(s);
-            b2.add(s);
-            block[s.number] = k;
+      for (int j = refine.nextSetBit(0); j >= 0; j = refine.nextSetBit(j+1)) {
+        final BitSet sb = splitblock[j];
+        if (sb.cardinality() < partition[j].cardinality()) {
+          final BitSet b1 = partition[j], b2 = partition[k];
+          for (int i = sb.nextSetBit(0); i >= 0; i = sb.nextSetBit(i+1)) {
+            b1.clear(i);
+            b2.set(i);
+            block[i] = k;
             for (int c = 0; c < sigmaLen; c++) {
-              final StateListNode sn = active2[s.number][c];
+              final StateListNode sn = active2[i][c];
               if (sn != null && sn.sl == active[j][c]) {
                 sn.remove();
-                active2[s.number][c] = active[k][c].add(s);
+                active2[i][c] = active[k][c].add(states[i]);
               }
             }
           }
           // update pending
           for (int c = 0; c < sigmaLen; c++) {
-            final int aj = active[j][c].size;
-            final int ak = active[k][c].size;
-            if (!pending2[c][j] && 0 < aj && aj <= ak) {
-              pending2[c][j] = true;
+            final int aj = active[j][c].size,
+              ak = active[k][c].size,
+              ofs = c*statesLen;
+            if (!pending2.get(ofs + j) && 0 < aj && aj <= ak) {
+              pending2.set(ofs + j);
               pending.add(new IntPair(j, c));
             } else {
-              pending2[c][k] = true;
+              pending2.set(ofs + k);
               pending.add(new IntPair(k, c));
             }
           }
           k++;
         }
-        for (State s : splitblock[j])
-          split2[s.number] = false;
-        refine2[j] = false;
-        splitblock[j].clear();
+        refine2.clear(j);
+        for (int i = sb.nextSetBit(0); i >= 0; i = sb.nextSetBit(i+1))
+          split.clear(i);
+        sb.clear();
       }
-      split.clear();
       refine.clear();
     }
     // make a new state for each equivalence class, set initial state
@@ -187,7 +184,9 @@ final public class MinimizationOperation
     for (int n = 0; n < newstates.length; n++) {
       final State s = new State();
       newstates[n] = s;
-      for (State q : partition[n]) {
+      BitSet part = partition[n];
+      for (int i = part.nextSetBit(0); i >= 0; i = part.nextSetBit(i+1)) {
+        final State q = states[i];
         if (q == a.initial) a.initial = s;
         s.accept = q.accept;
         s.number = q.number; // select representative