You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/01/30 18:24:17 UTC

svn commit: r1237809 - in /lucene/dev/trunk/lucene/src: java/org/apache/lucene/util/fst/FST.java test/org/apache/lucene/util/fst/TestFSTs.java

Author: mikemccand
Date: Mon Jan 30 17:24:16 2012
New Revision: 1237809

URL: http://svn.apache.org/viewvc?rev=1237809&view=rev
Log:
minor FST fixes

Modified:
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/fst/FST.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/fst/FST.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/fst/FST.java?rev=1237809&r1=1237808&r2=1237809&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/fst/FST.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/fst/FST.java Mon Jan 30 17:24:16 2012
@@ -231,7 +231,7 @@ public final class FST<T> {
         b.append(" hasOutput");
       }
       if (flag(BIT_ARC_HAS_FINAL_OUTPUT)) {
-        b.append(" hasOutput");
+        b.append(" hasFinalOutput");
       }
       if (bytesPerArc != 0) {
         b.append(" arcArray(idx=" + arcIdx + " of " + numArcs + ")");
@@ -1447,6 +1447,7 @@ public final class FST<T> {
     // Find top nodes with highest number of incoming arcs:
     NodeQueue q = new NodeQueue(topN);
 
+    // TODO: we could use more RAM efficient selection algo here...
     NodeAndInCount bottom = null;
     for(int node=0;node<inCounts.length;node++) {
       if (inCounts[node] >= minInCountDeref) {
@@ -1515,6 +1516,8 @@ public final class FST<T> {
 
       int addressError = 0;
 
+      //int totWasted = 0;
+
       // Since we re-reverse the bytes, we now write the
       // nodes backwards, so that BIT_TARGET_NEXT is
       // unchanged:
@@ -1554,10 +1557,11 @@ public final class FST<T> {
             writer.writeByte(ARCS_AS_FIXED_ARRAY);
             writer.writeVInt(arc.numArcs);
             writer.writeVInt(bytesPerArc);
+            //System.out.println("node " + node + ": " + arc.numArcs + " arcs");
           }
 
           int maxBytesPerArc = 0;
-
+          //int wasted = 0;
           while(true) {  // iterate over all arcs for this node
 
             //System.out.println("    arc label=" + arc.label + " target=" + arc.target + " pos=" + writer.posWrite);
@@ -1680,6 +1684,7 @@ public final class FST<T> {
               // incoming FST did... but in this case we
               // will retry (below) so it's OK to ovewrite
               // bytes:
+              //wasted += bytesPerArc - arcBytes;
               writer.setPosWrite(arcStartPos + bytesPerArc);
             }
 
@@ -1693,6 +1698,8 @@ public final class FST<T> {
           if (useArcArray) {
             if (maxBytesPerArc == bytesPerArc || (retry && maxBytesPerArc <= bytesPerArc)) {
               // converged
+              //System.out.println("  bba=" + bytesPerArc + " wasted=" + wasted);
+              //totWasted += wasted;
               break;
             }
           } else {
@@ -1719,6 +1726,7 @@ public final class FST<T> {
         // other nodes because we only produce acyclic FSTs
         // w/ nodes only pointing "forwards":
         assert !negDelta;
+        //System.out.println("TOT wasted=" + totWasted);
         // Converged!
         break;
       }
@@ -1730,7 +1738,7 @@ public final class FST<T> {
     }
 
     fst.startNode = newNodeAddress[startNode];
-    //System.out.println("new startNode=" + startNode);
+    //System.out.println("new startNode=" + fst.startNode + " old startNode=" + startNode);
 
     if (emptyOutput != null) {
       fst.setEmptyOutput(emptyOutput);

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java?rev=1237809&r1=1237808&r2=1237809&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java Mon Jan 30 17:24:16 2012
@@ -530,7 +530,7 @@ public class TestFSTs extends LuceneTest
         if (VERBOSE) {
           System.out.println("TEST: now rewrite");
         }
-        final FST<T> packed =fst.pack(_TestUtil.nextInt(random, 1, 10), _TestUtil.nextInt(random, 0, 10000000));
+        final FST<T> packed = fst.pack(_TestUtil.nextInt(random, 1, 10), _TestUtil.nextInt(random, 0, 10000000));
         if (VERBOSE) {
           System.out.println("TEST: now verify packed FST");
         }
@@ -1308,13 +1308,13 @@ public class TestFSTs extends LuceneTest
           System.out.println("Pack...");
           fst = fst.pack(4, 100000000);
           System.out.println("New size " + fst.sizeInBytes() + " bytes");
-        } else {
-          Directory dir = FSDirectory.open(new File(dirOut));
-          IndexOutput out = dir.createOutput("fst.bin", IOContext.DEFAULT);
-          fst.save(out);
-          out.close();
-          System.out.println("Saved FST to fst.bin.");
         }
+        
+        Directory dir = FSDirectory.open(new File(dirOut));
+        IndexOutput out = dir.createOutput("fst.bin", IOContext.DEFAULT);
+        fst.save(out);
+        out.close();
+        System.out.println("Saved FST to fst.bin.");
 
         if (!verify) {
           return;