You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2010/10/30 12:17:21 UTC
svn commit: r1029012 - in /lucene/dev/trunk/lucene/src:
java/org/apache/lucene/index/codecs/intblock/
java/org/apache/lucene/index/codecs/sep/
test/org/apache/lucene/index/codecs/mocksep/
Author: mikemccand
Date: Sat Oct 30 10:17:20 2010
New Revision: 1029012
URL: http://svn.apache.org/viewvc?rev=1029012&view=rev
Log:
LUCENE-2722: fix sep codec to store less in the terms dict
Modified:
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexInput.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexOutput.java
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java?rev=1029012&r1=1029011&r2=1029012&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java Sat Oct 30 10:17:20 2010
@@ -168,6 +168,25 @@ public abstract class FixedIntBlockIndex
}
@Override
+ public void read(final IntIndexInput.Reader indexIn, final boolean absolute) throws IOException {
+ if (absolute) {
+ fp = indexIn.readVLong();
+ upto = indexIn.next();
+ } else {
+ final long delta = indexIn.readVLong();
+ if (delta == 0) {
+ // same block
+ upto += indexIn.next();
+ } else {
+ // new block
+ fp += delta;
+ upto = indexIn.next();
+ }
+ }
+ assert upto < blockSize;
+ }
+
+ @Override
public void seek(final IntIndexInput.Reader other) throws IOException {
((Reader) other).seek(fp, upto);
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java?rev=1029012&r1=1029011&r2=1029012&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java Sat Oct 30 10:17:20 2010
@@ -83,11 +83,30 @@ public abstract class FixedIntBlockIndex
// same block
indexOut.writeVLong(0);
assert upto >= lastUpto;
- indexOut.writeVLong(upto - lastUpto);
+ indexOut.writeVInt(upto - lastUpto);
} else {
// new block
indexOut.writeVLong(fp - lastFP);
- indexOut.writeVLong(upto);
+ indexOut.writeVInt(upto);
+ }
+ lastUpto = upto;
+ lastFP = fp;
+ }
+
+ @Override
+ public void write(IntIndexOutput indexOut, boolean absolute) throws IOException {
+ if (absolute) {
+ indexOut.writeVLong(fp);
+ indexOut.write(upto);
+ } else if (fp == lastFP) {
+ // same block
+ indexOut.writeVLong(0);
+ assert upto >= lastUpto;
+ indexOut.write(upto - lastUpto);
+ } else {
+ // new block
+ indexOut.writeVLong(fp - lastFP);
+ indexOut.write(upto);
}
lastUpto = upto;
lastFP = fp;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java?rev=1029012&r1=1029011&r2=1029012&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java Sat Oct 30 10:17:20 2010
@@ -189,6 +189,24 @@ public abstract class VariableIntBlockIn
}
@Override
+ public void read(final IntIndexInput.Reader indexIn, final boolean absolute) throws IOException {
+ if (absolute) {
+ fp = indexIn.readVLong();
+ upto = indexIn.next()&0xFF;
+ } else {
+ final long delta = indexIn.readVLong();
+ if (delta == 0) {
+ // same block
+ upto = indexIn.next()&0xFF;
+ } else {
+ // new block
+ fp += delta;
+ upto = indexIn.next()&0xFF;
+ }
+ }
+ }
+
+ @Override
public String toString() {
return "VarIntBlock.Index fp=" + fp + " upto=" + upto + " maxBlock=" + maxBlockSize;
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java?rev=1029012&r1=1029011&r2=1029012&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java Sat Oct 30 10:17:20 2010
@@ -103,6 +103,26 @@ public abstract class VariableIntBlockIn
lastUpto = upto;
lastFP = fp;
}
+
+ @Override
+ public void write(IntIndexOutput indexOut, boolean absolute) throws IOException {
+ assert upto >= 0;
+ if (absolute) {
+ indexOut.writeVLong(fp);
+ indexOut.write(upto);
+ } else if (fp == lastFP) {
+ // same block
+ indexOut.writeVLong(0);
+ assert upto >= lastUpto;
+ indexOut.write(upto);
+ } else {
+ // new block
+ indexOut.writeVLong(fp - lastFP);
+ indexOut.write(upto);
+ }
+ lastUpto = upto;
+ lastFP = fp;
+ }
}
@Override
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java?rev=1029012&r1=1029011&r2=1029012&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java Sat Oct 30 10:17:20 2010
@@ -41,6 +41,8 @@ public abstract class IntIndexInput impl
public abstract void read(IndexInput indexIn, boolean absolute) throws IOException;
+ public abstract void read(IntIndexInput.Reader indexIn, boolean absolute) throws IOException;
+
/** Seeks primary stream to the last read offset */
public abstract void seek(IntIndexInput.Reader stream) throws IOException;
@@ -54,6 +56,18 @@ public abstract class IntIndexInput impl
/** Reads next single int */
public abstract int next() throws IOException;
+ /** Encodes as 1 or 2 ints, and can only use 61 of the 64
+ * long bits. */
+ public long readVLong() throws IOException {
+ final int v = next();
+ if ((v & 1) == 0) {
+ return v >> 1;
+ } else {
+ final long v2 = next();
+ return (v2 << 30) | (v >> 1);
+ }
+ }
+
/** Reads next chunk of ints */
private IntsRef bulkResult;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java?rev=1029012&r1=1029011&r2=1029012&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java Sat Oct 30 10:17:20 2010
@@ -34,9 +34,27 @@ import java.io.Closeable;
* @lucene.experimental */
public abstract class IntIndexOutput implements Closeable {
- /** Write an int to the primary file */
+ /** Write an int to the primary file. The value must be
+ * >= 0. */
public abstract void write(int v) throws IOException;
+ public static final long MAX_SINGLE_INT_VLONG = Integer.MAX_VALUE - (1<<30);
+ public static final long MAX_VLONG = Long.MAX_VALUE - (1L<<62) - (1L<<61);
+
+ /** Encodes as 1 or 2 ints, and can only use 61 of the 64
+ * long bits. */
+ public void writeVLong(long v) throws IOException {
+ assert v >= 0: "v=" + v;
+ assert v < MAX_VLONG: "v=" + v;
+ // we cannot pass a negative int
+ if (v <= MAX_SINGLE_INT_VLONG) {
+ write(((int) v)<<1);
+ } else {
+ write(((int) ((v & MAX_SINGLE_INT_VLONG))<<1) | 1);
+ write(((int) (v >> 30)));
+ }
+ }
+
public abstract static class Index {
/** Internally records the current location */
@@ -46,8 +64,10 @@ public abstract class IntIndexOutput imp
public abstract void set(Index other) throws IOException;
/** Writes "location" of current output pointer of primary
- * output to different output (out) */
+ * output to different output (out) */
public abstract void write(IndexOutput indexOut, boolean absolute) throws IOException;
+
+ public abstract void write(IntIndexOutput indexOut, boolean absolute) throws IOException;
}
/** If you are indexing the primary output file, call
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java?rev=1029012&r1=1029011&r2=1029012&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java Sat Oct 30 10:17:20 2010
@@ -130,21 +130,14 @@ public class SepPostingsReaderImpl exten
}
private static class SepTermState extends TermState {
+ // We store only the seek point to the docs file because
+ // the rest of the info (freqIndex, posIndex, etc.) is
+ // stored in the docs file:
IntIndexInput.Index docIndex;
- IntIndexInput.Index freqIndex;
- IntIndexInput.Index posIndex;
- long skipOffset;
- long payloadOffset;
public Object clone() {
SepTermState other = (SepTermState) super.clone();
other.docIndex = (IntIndexInput.Index) docIndex.clone();
- if (freqIndex != null) {
- other.freqIndex = (IntIndexInput.Index) freqIndex.clone();
- }
- if (posIndex != null) {
- other.posIndex = (IntIndexInput.Index) posIndex.clone();
- }
return other;
}
@@ -152,22 +145,6 @@ public class SepPostingsReaderImpl exten
super.copy(_other);
SepTermState other = (SepTermState) _other;
docIndex.set(other.docIndex);
- if (other.posIndex != null) {
- if (posIndex == null) {
- posIndex = (IntIndexInput.Index) other.posIndex.clone();
- } else {
- posIndex.set(other.posIndex);
- }
- }
- if (other.freqIndex != null) {
- if (freqIndex == null) {
- freqIndex = (IntIndexInput.Index) other.freqIndex.clone();
- } else {
- freqIndex.set(other.freqIndex);
- }
- }
- skipOffset = other.skipOffset;
- payloadOffset = other.payloadOffset;
}
@Override
@@ -184,39 +161,8 @@ public class SepPostingsReaderImpl exten
}
@Override
- public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, TermState _termState, boolean isIndexTerm) throws IOException {
- final SepTermState termState = (SepTermState) _termState;
-
- // read freq index
- if (!fieldInfo.omitTermFreqAndPositions) {
- if (termState.freqIndex == null) {
- assert isIndexTerm;
- termState.freqIndex = freqIn.index();
- termState.posIndex = posIn.index();
- }
- termState.freqIndex.read(termsIn, isIndexTerm);
- }
-
- // read doc index
- termState.docIndex.read(termsIn, isIndexTerm);
-
- // read skip index
- if (isIndexTerm) {
- termState.skipOffset = termsIn.readVLong();
- } else if (termState.docFreq >= skipInterval) {
- termState.skipOffset += termsIn.readVLong();
- }
-
- // read pos, payload index
- if (!fieldInfo.omitTermFreqAndPositions) {
- termState.posIndex.read(termsIn, isIndexTerm);
- final long v = termsIn.readVLong();
- if (isIndexTerm) {
- termState.payloadOffset = v;
- } else {
- termState.payloadOffset += v;
- }
- }
+ public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, TermState termState, boolean isIndexTerm) throws IOException {
+ ((SepTermState) termState).docIndex.read(termsIn, isIndexTerm);
}
@Override
@@ -311,14 +257,18 @@ public class SepPostingsReaderImpl exten
docIndex.set(termState.docIndex);
docIndex.seek(docReader);
- skipOffset = termState.skipOffset;
-
if (!omitTF) {
- freqIndex.set(termState.freqIndex);
+ freqIndex.read(docReader, true);
freqIndex.seek(freqReader);
+
+ posIndex.read(docReader, true);
+ // skip payload offset
+ docReader.readVLong();
} else {
freq = 1;
}
+ skipOffset = docReader.readVLong();
+
docFreq = termState.docFreq;
count = 0;
doc = 0;
@@ -498,17 +448,15 @@ public class SepPostingsReaderImpl exten
docIndex.set(termState.docIndex);
docIndex.seek(docReader);
- freqIndex.set(termState.freqIndex);
+ freqIndex.read(docReader, true);
freqIndex.seek(freqReader);
- posIndex.set(termState.posIndex);
+ posIndex.read(docReader, true);
posSeekPending = true;
- //posIndex.seek(posReader);
payloadPending = false;
- skipOffset = termState.skipOffset;
- payloadOffset = termState.payloadOffset;
- //payloadIn.seek(payloadOffset);
+ payloadOffset = docReader.readVLong();
+ skipOffset = docReader.readVLong();
docFreq = termState.docFreq;
count = 0;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java?rev=1029012&r1=1029011&r2=1029012&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java Sat Oct 30 10:17:20 2010
@@ -79,6 +79,7 @@ public final class SepPostingsWriterImpl
long lastPayloadStart;
int lastDocID;
int df;
+ private boolean firstDoc;
public SepPostingsWriterImpl(SegmentWriteState state, IntStreamFactory factory) throws IOException {
super();
@@ -147,6 +148,7 @@ public final class SepPostingsWriterImpl
payloadStart = payloadOut.getFilePointer();
lastPayloadLength = -1;
}
+ firstDoc = true;
skipListWriter.resetSkip(docIndex, freqIndex, posIndex);
}
@@ -169,6 +171,20 @@ public final class SepPostingsWriterImpl
@Override
public void startDoc(int docID, int termDocFreq) throws IOException {
+ if (firstDoc) {
+ // TODO: we are writing absolute file pointers below,
+ // which is wasteful. It'd be better compression to
+ // write the "baseline" into each indexed term, then
+ // write only the delta here.
+ if (!omitTF) {
+ freqIndex.write(docOut, true);
+ posIndex.write(docOut, true);
+ docOut.writeVLong(payloadStart);
+ }
+ docOut.writeVLong(skipOut.getFilePointer());
+ firstDoc = false;
+ }
+
final int delta = docID - lastDocID;
if (docID < 0 || (df > 0 && delta <= 0)) {
@@ -229,42 +245,16 @@ public final class SepPostingsWriterImpl
@Override
public void finishTerm(int docCount, boolean isIndexTerm) throws IOException {
- long skipPos = skipOut.getFilePointer();
-
// TODO: -- wasteful we are counting this in two places?
assert docCount > 0;
assert docCount == df;
- // TODO: -- only do this if once (consolidate the
- // conditional things that are written)
- if (!omitTF) {
- freqIndex.write(termsOut, isIndexTerm);
- }
docIndex.write(termsOut, isIndexTerm);
if (df >= skipInterval) {
skipListWriter.writeSkip(skipOut);
}
- if (isIndexTerm) {
- termsOut.writeVLong(skipPos);
- lastSkipStart = skipPos;
- } else if (df >= skipInterval) {
- termsOut.writeVLong(skipPos-lastSkipStart);
- lastSkipStart = skipPos;
- }
-
- if (!omitTF) {
- posIndex.write(termsOut, isIndexTerm);
- if (isIndexTerm) {
- // Write absolute at seek points
- termsOut.writeVLong(payloadStart);
- } else {
- termsOut.writeVLong(payloadStart-lastPayloadStart);
- }
- lastPayloadStart = payloadStart;
- }
-
lastDocID = 0;
df = 0;
}
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexInput.java?rev=1029012&r1=1029011&r2=1029012&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexInput.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexInput.java Sat Oct 30 10:17:20 2010
@@ -81,6 +81,16 @@ public class MockSingleIntIndexInput ext
}
@Override
+ public void read(IntIndexInput.Reader indexIn, boolean absolute)
+ throws IOException {
+ if (absolute) {
+ fp = indexIn.readVLong();
+ } else {
+ fp += indexIn.readVLong();
+ }
+ }
+
+ @Override
public void set(IntIndexInput.Index other) {
fp = ((Index) other).fp;
}
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexOutput.java?rev=1029012&r1=1029011&r2=1029012&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexOutput.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexOutput.java Sat Oct 30 10:17:20 2010
@@ -76,6 +76,18 @@ public class MockSingleIntIndexOutput ex
}
lastFP = fp;
}
+
+ @Override
+ public void write(IntIndexOutput indexOut, boolean absolute)
+ throws IOException {
+ if (absolute) {
+ indexOut.writeVLong(fp);
+ } else {
+ indexOut.writeVLong(fp - lastFP);
+ }
+ lastFP = fp;
+ }
+
@Override
public String toString() {
return Long.toString(fp);