You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2011/02/26 18:11:08 UTC

svn commit: r1074881 - in /lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst: FST.java FSTEnum.java

Author: mikemccand
Date: Sat Feb 26 17:11:08 2011
New Revision: 1074881

URL: http://svn.apache.org/viewvc?rev=1074881&view=rev
Log:
optimize FST.readLastTargetArc (speeds up primary key lookups)

Modified:
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst/FSTEnum.java

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java?rev=1074881&r1=1074880&r2=1074881&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java Sat Feb 26 17:11:08 2011
@@ -99,7 +99,7 @@ public class FST<T> {
   public int arcWithOutputCount;
 
   // If arc has this label then that arc is final/accepted
-  public static int END_LABEL = -1;
+  public static final int END_LABEL = -1;
 
   public final static class Arc<T> {
     public int label;
@@ -297,9 +297,7 @@ public class FST<T> {
     final int v;
     if (inputType == INPUT_TYPE.BYTE1) {
       v = in.readByte()&0xFF;
-    } else if (inputType == INPUT_TYPE.BYTE2) {
-      v = in.readVInt();
-    } else {
+    } else { 
       v = in.readVInt();
     }
     return v;
@@ -478,6 +476,59 @@ public class FST<T> {
     return arc;
   }
 
+  /** Follows the <code>follow</code> arc and reads the last
+   *  arc of its target; this changes the provided
+   *  <code>arc</code> (2nd arg) in-place and returns it.
+   * 
+   * @return Returns the second argument
+   * (<code>arc</code>). */
+  public Arc<T> readLastTargetArc(Arc<T> follow, Arc<T> arc) throws IOException {
+    //System.out.println("readLast");
+    if (!targetHasArcs(follow)) {
+      //System.out.println("  end node");
+      assert follow.isFinal();
+      arc.label = -1;
+      arc.output = follow.nextFinalOutput;
+      arc.flags = BIT_LAST_ARC;
+      return arc;
+    } else {
+      final BytesReader in = getBytesReader(follow.target);
+      arc.flags = in.readByte();
+      if (arc.flag(BIT_ARCS_AS_FIXED_ARRAY)) {
+        // array: jump straight to end
+        arc.numArcs = in.readVInt();
+        arc.bytesPerArc = in.readByte() & 0xFF;
+        //System.out.println("  array numArcs=" + arc.numArcs + " bpa=" + arc.bytesPerArc);
+        arc.posArcsStart = in.pos;
+        arc.arcIdx = arc.numArcs - 2;
+      } else {
+        // non-array: linear scan
+        arc.bytesPerArc = 0;
+        //System.out.println("  scan");
+        while(!arc.isLast()) {
+          // skip this arc:
+          readLabel(in);
+          if (arc.flag(BIT_ARC_HAS_OUTPUT)) {
+            outputs.read(in);
+          }
+          if (arc.flag(BIT_ARC_HAS_FINAL_OUTPUT)) {
+            outputs.read(in);
+          }
+          if (arc.flag(BIT_STOP_NODE)) {
+          } else if (arc.flag(BIT_TARGET_NEXT)) {
+          } else {
+            in.pos -= 4;
+          }
+          arc.flags = in.readByte();
+        }
+        arc.nextArc = in.pos+1;
+      }
+      readNextRealArc(arc);
+      assert arc.isLast();
+      return arc;
+    }
+  }
+
   /**
    * Follow the <code>follow</code> arc and read the first arc of its target;
    * this changes the provided <code>arc</code> (2nd arg) in-place and returns
@@ -518,15 +569,13 @@ public class FST<T> {
       arc.numArcs = in.readVInt();
       arc.bytesPerArc = in.readByte() & 0xFF;
       arc.arcIdx = -1;
-      arc.posArcsStart = in.pos;
+      arc.nextArc = arc.posArcsStart = in.pos;
       //System.out.println("  bytesPer=" + arc.bytesPerArc + " numArcs=" + arc.numArcs + " arcsStart=" + pos);
     } else {
-      in.pos++;
+      arc.nextArc = address;
       arc.bytesPerArc = 0;
     }
-    arc.nextArc = in.pos;
-    arc.label = 0;
-    return readNextArc(arc);
+    return readNextRealArc(arc);
   }
 
   /**
@@ -598,6 +647,7 @@ public class FST<T> {
     if (arc.bytesPerArc != 0) {
       // arcs are at fixed entries
       arc.arcIdx++;
+      assert arc.arcIdx < arc.numArcs;
       in = getBytesReader(arc.posArcsStart - arc.arcIdx*arc.bytesPerArc);
     } else {
       // arcs are packed

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst/FSTEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst/FSTEnum.java?rev=1074881&r1=1074880&r2=1074881&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst/FSTEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst/FSTEnum.java Sat Feb 26 17:11:08 2011
@@ -274,7 +274,7 @@ abstract class FSTEnum<T> {
     while(true) {
       //System.out.println("  cycle upto=" + upto + " arc.label=" + arc.label + " (" + (char) arc.label + ") targetLabel=" + targetLabel + " isLast?=" + arc.isLast());
 
-      if (arc.bytesPerArc != 0 && arc.label != -1) {
+      if (arc.bytesPerArc != 0 && arc.label != FST.END_LABEL) {
         // Arcs are fixed array -- use binary search to find
         // the target.
 
@@ -465,12 +465,7 @@ abstract class FSTEnum<T> {
       }
       incr();
 
-      final FST.Arc<T> nextArc = getArc(upto);
-      fst.readFirstTargetArc(arc, nextArc);
-      arc = nextArc;
-      while(!arc.isLast()) {
-        fst.readNextArc(arc);
-      }
+      arc = fst.readLastTargetArc(arc, getArc(upto));
     }
   }