You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/02/11 16:10:50 UTC
svn commit: r1069829 - in
/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index: ./
codecs/preflex/ codecs/sep/ codecs/standard/
Author: rmuir
Date: Fri Feb 11 15:10:50 2011
New Revision: 1069829
URL: http://svn.apache.org/viewvc?rev=1069829&view=rev
Log:
LUCENE-2905: make skip variables private to codec, separate skipMinimum from skipInterval, don't skip when close in preflex and sep
Modified:
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java?rev=1069829&r1=1069828&r2=1069829&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java Fri Feb 11 15:10:50 2011
@@ -53,20 +53,6 @@ public class SegmentWriteState {
* tweaking this is rarely useful.*/
public int termIndexInterval; // TODO: this should be private to the codec, not settable here or in IWC
- /** Expert: The fraction of TermDocs entries stored in skip tables,
- * used to accelerate {@link DocsEnum#advance(int)}. Larger values result in
- * smaller indexes, greater acceleration, but fewer accelerable cases, while
- * smaller values result in bigger indexes, less acceleration and more
- * accelerable cases. More detailed experiments would be useful here. */
- public final int skipInterval = 16;
-
- /** Expert: The maximum number of skip levels. Smaller values result in
- * slightly smaller indexes, but slower skipping in big posting lists.
- */
- public final int maxSkipLevels = 10;
-
-
-
public SegmentWriteState(PrintStream infoStream, Directory directory, String segmentName, FieldInfos fieldInfos,
int numDocs, int termIndexInterval, SegmentCodecs segmentCodecs, BufferedDeletes segDeletes) {
this.infoStream = infoStream;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java?rev=1069829&r1=1069828&r2=1069829&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java Fri Feb 11 15:10:50 2011
@@ -209,7 +209,8 @@ public class SegmentTermDocs {
/** Optimized implementation. */
public boolean skipTo(int target) throws IOException {
- if (df >= skipInterval) { // optimized case
+ // don't skip if the target is close (within skipInterval docs away)
+ if ((target - skipInterval) >= doc && df >= skipInterval) { // optimized case
if (skipListReader == null)
skipListReader = new DefaultSkipListReader((IndexInput) freqStream.clone(), maxSkipLevels, skipInterval); // lazily clone
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java?rev=1069829&r1=1069828&r2=1069829&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java Fri Feb 11 15:10:50 2011
@@ -56,6 +56,7 @@ public class SepPostingsReaderImpl exten
int skipInterval;
int maxSkipLevels;
+ int skipMinimum;
public SepPostingsReaderImpl(Directory dir, SegmentInfo segmentInfo, int readBufferSize, IntStreamFactory intFactory, String codecId) throws IOException {
@@ -102,6 +103,7 @@ public class SepPostingsReaderImpl exten
SepPostingsWriterImpl.VERSION_START, SepPostingsWriterImpl.VERSION_START);
skipInterval = termsIn.readInt();
maxSkipLevels = termsIn.readInt();
+ skipMinimum = termsIn.readInt();
}
@Override
@@ -231,7 +233,7 @@ public class SepPostingsReaderImpl exten
//System.out.println(" payloadFP=" + termState.payloadFP);
}
}
- if (termState.docFreq >= skipInterval) {
+ if (termState.docFreq >= skipMinimum) {
//System.out.println(" readSkip @ " + termState.bytesReader.pos);
if (isFirstTerm) {
termState.skipFP = termState.bytesReader.readVLong();
@@ -344,7 +346,7 @@ public class SepPostingsReaderImpl exten
}
docFreq = termState.docFreq;
- // NOTE: unused if docFreq < skipInterval:
+ // NOTE: unused if docFreq < skipMinimum:
skipFP = termState.skipFP;
count = 0;
doc = 0;
@@ -420,13 +422,10 @@ public class SepPostingsReaderImpl exten
@Override
public int advance(int target) throws IOException {
- // TODO: jump right to next() if target is < X away
- // from where we are now?
-
- if (docFreq >= skipInterval) {
+ if ((target - skipInterval) >= doc && docFreq >= skipMinimum) {
// There are enough docs in the posting to have
- // skip data
+ // skip data, and its not too close
if (skipper == null) {
// This DocsEnum has never done any skipping
@@ -599,13 +598,10 @@ public class SepPostingsReaderImpl exten
public int advance(int target) throws IOException {
//System.out.println("SepD&P advance target=" + target + " vs current=" + doc + " this=" + this);
- // TODO: jump right to next() if target is < X away
- // from where we are now?
-
- if (docFreq >= skipInterval) {
+ if ((target - skipInterval) >= doc && docFreq >= skipMinimum) {
// There are enough docs in the posting to have
- // skip data
+ // skip data, and its not too close
if (skipper == null) {
//System.out.println(" create skipper");
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java?rev=1069829&r1=1069828&r2=1069829&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java Fri Feb 11 15:10:50 2011
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
@@ -63,8 +64,23 @@ public final class SepPostingsWriterImpl
IndexOutput termsOut;
final SepSkipListWriter skipListWriter;
- final int skipInterval;
- final int maxSkipLevels;
+ /** Expert: The fraction of TermDocs entries stored in skip tables,
+ * used to accelerate {@link DocsEnum#advance(int)}. Larger values result in
+ * smaller indexes, greater acceleration, but fewer accelerable cases, while
+ * smaller values result in bigger indexes, less acceleration and more
+ * accelerable cases. More detailed experiments would be useful here. */
+ final int skipInterval = 16;
+
+ /**
+ * Expert: minimum docFreq to write any skip data at all
+ */
+ final int skipMinimum = skipInterval;
+
+ /** Expert: The maximum number of skip levels. Smaller values result in
+ * slightly smaller indexes, but slower skipping in big posting lists.
+ */
+ final int maxSkipLevels = 10;
+
final int totalNumDocs;
boolean storePayloads;
@@ -118,15 +134,11 @@ public final class SepPostingsWriterImpl
totalNumDocs = state.numDocs;
- // TODO: -- abstraction violation
- skipListWriter = new SepSkipListWriter(state.skipInterval,
- state.maxSkipLevels,
+ skipListWriter = new SepSkipListWriter(skipInterval,
+ maxSkipLevels,
state.numDocs,
freqOut, docOut,
posOut, payloadOut);
-
- skipInterval = state.skipInterval;
- maxSkipLevels = state.maxSkipLevels;
}
@Override
@@ -136,6 +148,7 @@ public final class SepPostingsWriterImpl
// TODO: -- just ask skipper to "start" here
termsOut.writeInt(skipInterval); // write skipInterval
termsOut.writeInt(maxSkipLevels); // write maxSkipLevels
+ termsOut.writeInt(skipMinimum); // write skipMinimum
}
@Override
@@ -264,7 +277,7 @@ public final class SepPostingsWriterImpl
}
}
- if (df >= skipInterval) {
+ if (df >= skipMinimum) {
//System.out.println(" skipFP=" + skipStart);
final long skipFP = skipOut.getFilePointer();
skipListWriter.writeSkip(skipOut);
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java?rev=1069829&r1=1069828&r2=1069829&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java Fri Feb 11 15:10:50 2011
@@ -47,6 +47,7 @@ public class StandardPostingsReader exte
int skipInterval;
int maxSkipLevels;
+ int skipMinimum;
//private String segment;
@@ -86,6 +87,7 @@ public class StandardPostingsReader exte
skipInterval = termsIn.readInt();
maxSkipLevels = termsIn.readInt();
+ skipMinimum = termsIn.readInt();
}
// Must keep final because we do non-standard clone
@@ -179,7 +181,7 @@ public class StandardPostingsReader exte
//System.out.println(" freqFP=" + termState.freqOffset);
assert termState.freqOffset < freqIn.length();
- if (termState.docFreq >= skipInterval) {
+ if (termState.docFreq >= skipMinimum) {
termState.skipOffset = termState.bytesReader.readVInt();
//System.out.println(" skipOffset=" + termState.skipOffset + " vs freqIn.length=" + freqIn.length());
assert termState.freqOffset + termState.skipOffset < freqIn.length();
@@ -378,7 +380,7 @@ public class StandardPostingsReader exte
@Override
public int advance(int target) throws IOException {
- if ((target - skipInterval) >= doc && limit >= skipInterval) {
+ if ((target - skipInterval) >= doc && limit >= skipMinimum) {
// There are enough docs in the posting to have
// skip data, and it isn't too close.
@@ -528,7 +530,7 @@ public class StandardPostingsReader exte
//System.out.println("StandardR.D&PE advance target=" + target);
- if ((target - skipInterval) >= doc && limit >= skipInterval) {
+ if ((target - skipInterval) >= doc && limit >= skipMinimum) {
// There are enough docs in the posting to have
// skip data, and it isn't too close
@@ -725,7 +727,7 @@ public class StandardPostingsReader exte
//System.out.println("StandardR.D&PE advance seg=" + segment + " target=" + target + " this=" + this);
- if ((target - skipInterval) >= doc && limit >= skipInterval) {
+ if ((target - skipInterval) >= doc && limit >= skipMinimum) {
// There are enough docs in the posting to have
// skip data, and it isn't too close
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java?rev=1069829&r1=1069828&r2=1069829&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java Fri Feb 11 15:10:50 2011
@@ -23,6 +23,7 @@ package org.apache.lucene.index.codecs.s
import java.io.IOException;
import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
@@ -44,8 +45,22 @@ public final class StandardPostingsWrite
final IndexOutput freqOut;
final IndexOutput proxOut;
final DefaultSkipListWriter skipListWriter;
- final int skipInterval;
- final int maxSkipLevels;
+ /** Expert: The fraction of TermDocs entries stored in skip tables,
+ * used to accelerate {@link DocsEnum#advance(int)}. Larger values result in
+ * smaller indexes, greater acceleration, but fewer accelerable cases, while
+ * smaller values result in bigger indexes, less acceleration and more
+ * accelerable cases. More detailed experiments would be useful here. */
+ final int skipInterval = 16;
+
+ /**
+ * Expert: minimum docFreq to write any skip data at all
+ */
+ final int skipMinimum = skipInterval;
+
+ /** Expert: The maximum number of skip levels. Smaller values result in
+ * slightly smaller indexes, but slower skipping in big posting lists.
+ */
+ final int maxSkipLevels = 10;
final int totalNumDocs;
IndexOutput termsOut;
@@ -84,14 +99,11 @@ public final class StandardPostingsWrite
totalNumDocs = state.numDocs;
- skipListWriter = new DefaultSkipListWriter(state.skipInterval,
- state.maxSkipLevels,
+ skipListWriter = new DefaultSkipListWriter(skipInterval,
+ maxSkipLevels,
state.numDocs,
freqOut,
proxOut);
-
- skipInterval = state.skipInterval;
- maxSkipLevels = state.maxSkipLevels;
}
@Override
@@ -100,6 +112,7 @@ public final class StandardPostingsWrite
CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
termsOut.writeInt(skipInterval); // write skipInterval
termsOut.writeInt(maxSkipLevels); // write maxSkipLevels
+ termsOut.writeInt(skipMinimum); // write skipMinimum
}
@Override
@@ -218,7 +231,7 @@ public final class StandardPostingsWrite
}
lastFreqStart = freqStart;
- if (df >= skipInterval) {
+ if (df >= skipMinimum) {
bytesWriter.writeVInt((int) (skipListWriter.writeSkip(freqOut)-freqStart));
}