You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2011/02/26 18:11:08 UTC
svn commit: r1074881 - in
/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst:
FST.java FSTEnum.java
Author: mikemccand
Date: Sat Feb 26 17:11:08 2011
New Revision: 1074881
URL: http://svn.apache.org/viewvc?rev=1074881&view=rev
Log:
optimize FST.readLastTargetArc (speeds up primary key lookups)
Modified:
lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst/FSTEnum.java
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java?rev=1074881&r1=1074880&r2=1074881&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java Sat Feb 26 17:11:08 2011
@@ -99,7 +99,7 @@ public class FST<T> {
public int arcWithOutputCount;
// If arc has this label then that arc is final/accepted
- public static int END_LABEL = -1;
+ public static final int END_LABEL = -1;
public final static class Arc<T> {
public int label;
@@ -297,9 +297,7 @@ public class FST<T> {
final int v;
if (inputType == INPUT_TYPE.BYTE1) {
v = in.readByte()&0xFF;
- } else if (inputType == INPUT_TYPE.BYTE2) {
- v = in.readVInt();
- } else {
+ } else {
v = in.readVInt();
}
return v;
@@ -478,6 +476,59 @@ public class FST<T> {
return arc;
}
+ /** Follows the <code>follow</code> arc and reads the last
+ * arc of its target; this changes the provided
+ * <code>arc</code> (2nd arg) in-place and returns it.
+ *
+ * @return Returns the second argument
+ * (<code>arc</code>). */
+ public Arc<T> readLastTargetArc(Arc<T> follow, Arc<T> arc) throws IOException {
+ //System.out.println("readLast");
+ if (!targetHasArcs(follow)) {
+ //System.out.println(" end node");
+ assert follow.isFinal();
+ arc.label = -1;
+ arc.output = follow.nextFinalOutput;
+ arc.flags = BIT_LAST_ARC;
+ return arc;
+ } else {
+ final BytesReader in = getBytesReader(follow.target);
+ arc.flags = in.readByte();
+ if (arc.flag(BIT_ARCS_AS_FIXED_ARRAY)) {
+ // array: jump straight to end
+ arc.numArcs = in.readVInt();
+ arc.bytesPerArc = in.readByte() & 0xFF;
+ //System.out.println(" array numArcs=" + arc.numArcs + " bpa=" + arc.bytesPerArc);
+ arc.posArcsStart = in.pos;
+ arc.arcIdx = arc.numArcs - 2;
+ } else {
+ // non-array: linear scan
+ arc.bytesPerArc = 0;
+ //System.out.println(" scan");
+ while(!arc.isLast()) {
+ // skip this arc:
+ readLabel(in);
+ if (arc.flag(BIT_ARC_HAS_OUTPUT)) {
+ outputs.read(in);
+ }
+ if (arc.flag(BIT_ARC_HAS_FINAL_OUTPUT)) {
+ outputs.read(in);
+ }
+ if (arc.flag(BIT_STOP_NODE)) {
+ } else if (arc.flag(BIT_TARGET_NEXT)) {
+ } else {
+ in.pos -= 4;
+ }
+ arc.flags = in.readByte();
+ }
+ arc.nextArc = in.pos+1;
+ }
+ readNextRealArc(arc);
+ assert arc.isLast();
+ return arc;
+ }
+ }
+
/**
* Follow the <code>follow</code> arc and read the first arc of its target;
* this changes the provided <code>arc</code> (2nd arg) in-place and returns
@@ -518,15 +569,13 @@ public class FST<T> {
arc.numArcs = in.readVInt();
arc.bytesPerArc = in.readByte() & 0xFF;
arc.arcIdx = -1;
- arc.posArcsStart = in.pos;
+ arc.nextArc = arc.posArcsStart = in.pos;
//System.out.println(" bytesPer=" + arc.bytesPerArc + " numArcs=" + arc.numArcs + " arcsStart=" + pos);
} else {
- in.pos++;
+ arc.nextArc = address;
arc.bytesPerArc = 0;
}
- arc.nextArc = in.pos;
- arc.label = 0;
- return readNextArc(arc);
+ return readNextRealArc(arc);
}
/**
@@ -598,6 +647,7 @@ public class FST<T> {
if (arc.bytesPerArc != 0) {
// arcs are at fixed entries
arc.arcIdx++;
+ assert arc.arcIdx < arc.numArcs;
in = getBytesReader(arc.posArcsStart - arc.arcIdx*arc.bytesPerArc);
} else {
// arcs are packed
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst/FSTEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst/FSTEnum.java?rev=1074881&r1=1074880&r2=1074881&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst/FSTEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst/FSTEnum.java Sat Feb 26 17:11:08 2011
@@ -274,7 +274,7 @@ abstract class FSTEnum<T> {
while(true) {
//System.out.println(" cycle upto=" + upto + " arc.label=" + arc.label + " (" + (char) arc.label + ") targetLabel=" + targetLabel + " isLast?=" + arc.isLast());
- if (arc.bytesPerArc != 0 && arc.label != -1) {
+ if (arc.bytesPerArc != 0 && arc.label != FST.END_LABEL) {
// Arcs are fixed array -- use binary search to find
// the target.
@@ -465,12 +465,7 @@ abstract class FSTEnum<T> {
}
incr();
- final FST.Arc<T> nextArc = getArc(upto);
- fst.readFirstTargetArc(arc, nextArc);
- arc = nextArc;
- while(!arc.isLast()) {
- fst.readNextArc(arc);
- }
+ arc = fst.readLastTargetArc(arc, getArc(upto));
}
}