You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ha...@apache.org on 2013/07/31 09:15:01 UTC
svn commit: r1508744 -
/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsReader.java
Author: han
Date: Wed Jul 31 07:15:00 2013
New Revision: 1508744
URL: http://svn.apache.org/r1508744
Log:
LUCENE-3069: introduce intersect() to TempFSTOrd
Modified:
lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsReader.java
Modified: lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsReader.java?rev=1508744&r1=1508743&r2=1508744&view=diff
==============================================================================
--- lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsReader.java (original)
+++ lucene/dev/branches/lucene3069/lucene/core/src/java/org/apache/lucene/codecs/temp/TempFSTOrdTermsReader.java Wed Jul 31 07:15:00 2013
@@ -66,7 +66,7 @@ public class TempFSTOrdTermsReader exten
final TempPostingsReaderBase postingsReader;
IndexInput indexIn = null;
IndexInput blockIn = null;
- static final boolean TEST = false;
+ //static final boolean TEST = false;
public TempFSTOrdTermsReader(SegmentReadState state, TempPostingsReaderBase postingsReader) throws IOException {
final String termsIndexFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TempFSTOrdTermsWriter.TERMS_INDEX_EXTENSION);
@@ -251,7 +251,7 @@ public class TempFSTOrdTermsReader exten
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
- return super.intersect(compiled, startTerm);
+ return new IntersectTermsEnum(compiled, startTerm);
}
// Only wraps common operations for PBF interact
@@ -297,7 +297,6 @@ public class TempFSTOrdTermsReader exten
this.totalTermFreq = new long[INTERVAL];
this.statsBlockOrd = -1;
this.metaBlockOrd = -1;
-
}
/** Decodes stats data into term state */
@@ -394,11 +393,6 @@ public class TempFSTOrdTermsReader exten
}
@Override
- public long ord() {
- return ord;
- }
-
- @Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
decodeMetaData();
return postingsReader.docs(fieldInfo, state, liveDocs, reuse, flags);
@@ -419,6 +413,12 @@ public class TempFSTOrdTermsReader exten
public void seekExact(long ord) throws IOException {
throw new UnsupportedOperationException();
}
+
+ @Override
+ public long ord() {
+ throw new UnsupportedOperationException();
+ }
+
}
@@ -495,6 +495,317 @@ public class TempFSTOrdTermsReader exten
}
}
}
+
+ // Iterates intersect result with automaton (cannot seek!)
+ private final class IntersectTermsEnum extends BaseTermsEnum {
+ /* True when current term's metadata is decoded */
+ boolean decoded;
+
+ /* True when there is pending term when calling next() */
+ boolean pending;
+
+ /* stack to record how current term is constructed,
+ * used to accumulate metadata or rewind term:
+ * level == term.length + 1,
+ * == 0 when term is null */
+ Frame[] stack;
+ int level;
+
+ /* term dict fst */
+ final FST<Long> fst;
+ final FST.BytesReader fstReader;
+ final Outputs<Long> fstOutputs;
+
+ /* query automaton to intersect with */
+ final ByteRunAutomaton fsa;
+
+ private final class Frame {
+ /* fst stats */
+ FST.Arc<Long> arc;
+
+ /* automaton stats */
+ int state;
+
+ Frame() {
+ this.arc = new FST.Arc<Long>();
+ this.state = -1;
+ }
+
+ public String toString() {
+ return "arc=" + arc + " state=" + state;
+ }
+ }
+
+ IntersectTermsEnum(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
+ //if (TEST) System.out.println("Enum init, startTerm=" + startTerm);
+ this.fst = index;
+ this.fstReader = fst.getBytesReader();
+ this.fstOutputs = index.outputs;
+ this.fsa = compiled.runAutomaton;
+ /*
+ PrintWriter pw1 = new PrintWriter(new File("../temp/fst.txt"));
+ Util.toDot(dict,pw1, false, false);
+ pw1.close();
+ PrintWriter pw2 = new PrintWriter(new File("../temp/fsa.txt"));
+ pw2.write(compiled.toDot());
+ pw2.close();
+ */
+ this.level = -1;
+ this.stack = new Frame[16];
+ for (int i = 0 ; i < stack.length; i++) {
+ this.stack[i] = new Frame();
+ }
+
+ Frame frame;
+ frame = loadVirtualFrame(newFrame());
+ this.level++;
+ frame = loadFirstFrame(newFrame());
+ pushFrame(frame);
+
+ this.decoded = false;
+ this.pending = false;
+
+ if (startTerm == null) {
+ pending = isAccept(topFrame());
+ } else {
+ doSeekCeil(startTerm);
+ pending = !startTerm.equals(term) && isValid(topFrame()) && isAccept(topFrame());
+ }
+ }
+
+ @Override
+ void decodeMetaData() throws IOException {
+ if (!decoded) {
+ super.decodeMetaData();
+ decoded = true;
+ }
+ }
+
+ @Override
+ void decodeStats() throws IOException {
+ final FST.Arc<Long> arc = topFrame().arc;
+ if (arc.isFinal()) {
+ ord = fstOutputs.add(arc.output, arc.nextFinalOutput);
+ } else {
+ ord = arc.output;
+ }
+ super.decodeStats();
+ }
+
+ // nocommit: need testcase for this
+ @Override
+ public SeekStatus seekCeil(BytesRef target, boolean useCache) throws IOException {
+ decoded = false;
+ term = doSeekCeil(target);
+ decodeStats();
+ if (term == null) {
+ return SeekStatus.END;
+ } else {
+ return term.equals(target) ? SeekStatus.FOUND : SeekStatus.NOT_FOUND;
+ }
+ }
+
+ @Override
+ public BytesRef next() throws IOException {
+ //if (TEST) System.out.println("Enum next()");
+ if (pending) {
+ pending = false;
+ decodeStats();
+ return term;
+ }
+ decoded = false;
+ DFS:
+ while (level > 0) {
+ Frame frame = newFrame();
+ if (loadExpandFrame(topFrame(), frame) != null) { // has valid target
+ pushFrame(frame);
+ if (isAccept(frame)) { // gotcha
+ break;
+ }
+ continue; // check next target
+ }
+ frame = popFrame();
+ while(level > 0) {
+ if (loadNextFrame(topFrame(), frame) != null) { // has valid sibling
+ pushFrame(frame);
+ if (isAccept(frame)) { // gotcha
+ break DFS;
+ }
+ continue DFS; // check next target
+ }
+ frame = popFrame();
+ }
+ return null;
+ }
+ decodeStats();
+ return term;
+ }
+
+ BytesRef doSeekCeil(BytesRef target) throws IOException {
+ //if (TEST) System.out.println("Enum doSeekCeil()");
+ Frame frame= null;
+ int label, upto = 0, limit = target.length;
+ while (upto < limit) { // to target prefix, or ceil label (rewind prefix)
+ frame = newFrame();
+ label = target.bytes[upto] & 0xff;
+ frame = loadCeilFrame(label, topFrame(), frame);
+ if (frame == null || frame.arc.label != label) {
+ break;
+ }
+ assert isValid(frame); // target must be fetched from automaton
+ pushFrame(frame);
+ upto++;
+ }
+ if (upto == limit) { // got target
+ return term;
+ }
+ if (frame != null) { // got larger term('s prefix)
+ pushFrame(frame);
+ return isAccept(frame) ? term : next();
+ }
+ while (level > 0) { // got target's prefix, advance to larger term
+ frame = popFrame();
+ while (level > 0 && !canRewind(frame)) {
+ frame = popFrame();
+ }
+ if (loadNextFrame(topFrame(), frame) != null) {
+ pushFrame(frame);
+ return isAccept(frame) ? term : next();
+ }
+ }
+ return null;
+ }
+
+ /** Virtual frame, never pop */
+ Frame loadVirtualFrame(Frame frame) throws IOException {
+ frame.arc.output = fstOutputs.getNoOutput();
+ frame.arc.nextFinalOutput = fstOutputs.getNoOutput();
+ frame.state = -1;
+ return frame;
+ }
+
+ /** Load frame for start arc(node) on fst */
+ Frame loadFirstFrame(Frame frame) throws IOException {
+ frame.arc = fst.getFirstArc(frame.arc);
+ frame.state = fsa.getInitialState();
+ return frame;
+ }
+
+ // nocommit: expected to use readFirstTargetArc here?
+
+ /** Load frame for target arc(node) on fst */
+ Frame loadExpandFrame(Frame top, Frame frame) throws IOException {
+ if (!canGrow(top)) {
+ return null;
+ }
+ frame.arc = fst.readFirstRealTargetArc(top.arc.target, frame.arc, fstReader);
+ frame.state = fsa.step(top.state, frame.arc.label);
+ //if (TEST) System.out.println(" loadExpand frame="+frame);
+ if (frame.state == -1) {
+ return loadNextFrame(top, frame);
+ }
+ return frame;
+ }
+
+ /** Load frame for sibling arc(node) on fst */
+ Frame loadNextFrame(Frame top, Frame frame) throws IOException {
+ if (!canRewind(frame)) {
+ return null;
+ }
+ while (!frame.arc.isLast()) {
+ frame.arc = fst.readNextRealArc(frame.arc, fstReader);
+ frame.state = fsa.step(top.state, frame.arc.label);
+ if (frame.state != -1) {
+ break;
+ }
+ }
+ //if (TEST) System.out.println(" loadNext frame="+frame);
+ if (frame.state == -1) {
+ return null;
+ }
+ return frame;
+ }
+
+ /** Load frame for target arc(node) on fst, so that
+ * arc.label >= label and !fsa.reject(arc.label) */
+ Frame loadCeilFrame(int label, Frame top, Frame frame) throws IOException {
+ FST.Arc<Long> arc = frame.arc;
+ arc = Util.readCeilArc(label, fst, top.arc, arc, fstReader);
+ if (arc == null) {
+ return null;
+ }
+ frame.state = fsa.step(top.state, arc.label);
+ //if (TEST) System.out.println(" loadCeil frame="+frame);
+ if (frame.state == -1) {
+ return loadNextFrame(top, frame);
+ }
+ return frame;
+ }
+
+ boolean isAccept(Frame frame) { // reach a term both fst&fsa accepts
+ return fsa.isAccept(frame.state) && frame.arc.isFinal();
+ }
+ boolean isValid(Frame frame) { // reach a prefix both fst&fsa won't reject
+ return /*frame != null &&*/ frame.state != -1;
+ }
+ boolean canGrow(Frame frame) { // can walk forward on both fst&fsa
+ return frame.state != -1 && FST.targetHasArcs(frame.arc);
+ }
+ boolean canRewind(Frame frame) { // can jump to sibling
+ return !frame.arc.isLast();
+ }
+
+ // nocommit: need to load ord lazily?
+ void pushFrame(Frame frame) {
+ final FST.Arc<Long> arc = frame.arc;
+ arc.output = fstOutputs.add(topFrame().arc.output, arc.output);
+ term = grow(arc.label);
+ level++;
+ }
+
+ Frame popFrame() {
+ term = shrink();
+ level--;
+ return stack[level+1];
+ }
+
+ Frame newFrame() {
+ if (level+1 == stack.length) {
+ final Frame[] temp = new Frame[ArrayUtil.oversize(level+2, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+ System.arraycopy(stack, 0, temp, 0, stack.length);
+ for (int i = stack.length; i < temp.length; i++) {
+ temp[i] = new Frame();
+ }
+ stack = temp;
+ }
+ return stack[level+1];
+ }
+
+ Frame topFrame() {
+ return stack[level];
+ }
+
+ BytesRef grow(int label) {
+ if (term == null) {
+ term = new BytesRef(new byte[16], 0, 0);
+ } else {
+ if (term.length == term.bytes.length) {
+ term.grow(term.length+1);
+ }
+ term.bytes[term.length++] = (byte)label;
+ }
+ return term;
+ }
+
+ BytesRef shrink() {
+ if (term.length == 0) {
+ term = null;
+ } else {
+ term.length--;
+ }
+ return term;
+ }
+ }
}
static<T> void walk(FST<T> fst) throws IOException {