You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/02/11 16:10:50 UTC

svn commit: r1069829 - in /lucene/dev/trunk/lucene/src/java/org/apache/lucene/index: ./ codecs/preflex/ codecs/sep/ codecs/standard/

Author: rmuir
Date: Fri Feb 11 15:10:50 2011
New Revision: 1069829

URL: http://svn.apache.org/viewvc?rev=1069829&view=rev
Log:
LUCENE-2905: make skip variables private to codec, separate skipMinimum from skipInterval, don't skip when close in preflex and sep

Modified:
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java?rev=1069829&r1=1069828&r2=1069829&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java Fri Feb 11 15:10:50 2011
@@ -53,20 +53,6 @@ public class SegmentWriteState {
    * tweaking this is rarely useful.*/
   public int termIndexInterval;                   // TODO: this should be private to the codec, not settable here or in IWC
 
-  /** Expert: The fraction of TermDocs entries stored in skip tables,
-   * used to accelerate {@link DocsEnum#advance(int)}.  Larger values result in
-   * smaller indexes, greater acceleration, but fewer accelerable cases, while
-   * smaller values result in bigger indexes, less acceleration and more
-   * accelerable cases. More detailed experiments would be useful here. */
-  public final int skipInterval = 16;
-  
-  /** Expert: The maximum number of skip levels. Smaller values result in 
-   * slightly smaller indexes, but slower skipping in big posting lists.
-   */
-  public final int maxSkipLevels = 10;
-  
-
-
   public SegmentWriteState(PrintStream infoStream, Directory directory, String segmentName, FieldInfos fieldInfos,
                            int numDocs, int termIndexInterval, SegmentCodecs segmentCodecs, BufferedDeletes segDeletes) {
     this.infoStream = infoStream;

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java?rev=1069829&r1=1069828&r2=1069829&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java Fri Feb 11 15:10:50 2011
@@ -209,7 +209,8 @@ public class SegmentTermDocs {
 
   /** Optimized implementation. */
   public boolean skipTo(int target) throws IOException {
-    if (df >= skipInterval) {                      // optimized case
+    // don't skip if the target is close (within skipInterval docs away)
+    if ((target - skipInterval) >= doc && df >= skipInterval) {                      // optimized case
       if (skipListReader == null)
         skipListReader = new DefaultSkipListReader((IndexInput) freqStream.clone(), maxSkipLevels, skipInterval); // lazily clone
 

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java?rev=1069829&r1=1069828&r2=1069829&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java Fri Feb 11 15:10:50 2011
@@ -56,6 +56,7 @@ public class SepPostingsReaderImpl exten
 
   int skipInterval;
   int maxSkipLevels;
+  int skipMinimum;
 
   public SepPostingsReaderImpl(Directory dir, SegmentInfo segmentInfo, int readBufferSize, IntStreamFactory intFactory, String codecId) throws IOException {
 
@@ -102,6 +103,7 @@ public class SepPostingsReaderImpl exten
       SepPostingsWriterImpl.VERSION_START, SepPostingsWriterImpl.VERSION_START);
     skipInterval = termsIn.readInt();
     maxSkipLevels = termsIn.readInt();
+    skipMinimum = termsIn.readInt();
   }
 
   @Override
@@ -231,7 +233,7 @@ public class SepPostingsReaderImpl exten
         //System.out.println("  payloadFP=" + termState.payloadFP);
       }
     }
-    if (termState.docFreq >= skipInterval) {
+    if (termState.docFreq >= skipMinimum) {
       //System.out.println("   readSkip @ " + termState.bytesReader.pos);
       if (isFirstTerm) {
         termState.skipFP = termState.bytesReader.readVLong();
@@ -344,7 +346,7 @@ public class SepPostingsReaderImpl exten
       }
 
       docFreq = termState.docFreq;
-      // NOTE: unused if docFreq < skipInterval:
+      // NOTE: unused if docFreq < skipMinimum:
       skipFP = termState.skipFP;
       count = 0;
       doc = 0;
@@ -420,13 +422,10 @@ public class SepPostingsReaderImpl exten
     @Override
     public int advance(int target) throws IOException {
 
-      // TODO: jump right to next() if target is < X away
-      // from where we are now?
-
-      if (docFreq >= skipInterval) {
+      if ((target - skipInterval) >= doc && docFreq >= skipMinimum) {
 
         // There are enough docs in the posting to have
-        // skip data
+        // skip data, and its not too close
 
         if (skipper == null) {
           // This DocsEnum has never done any skipping
@@ -599,13 +598,10 @@ public class SepPostingsReaderImpl exten
     public int advance(int target) throws IOException {
       //System.out.println("SepD&P advance target=" + target + " vs current=" + doc + " this=" + this);
 
-      // TODO: jump right to next() if target is < X away
-      // from where we are now?
-
-      if (docFreq >= skipInterval) {
+      if ((target - skipInterval) >= doc && docFreq >= skipMinimum) {
 
         // There are enough docs in the posting to have
-        // skip data
+        // skip data, and its not too close
 
         if (skipper == null) {
           //System.out.println("  create skipper");

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java?rev=1069829&r1=1069828&r2=1069829&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java Fri Feb 11 15:10:50 2011
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.util.Set;
 
 import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentWriteState;
@@ -63,8 +64,23 @@ public final class SepPostingsWriterImpl
   IndexOutput termsOut;
 
   final SepSkipListWriter skipListWriter;
-  final int skipInterval;
-  final int maxSkipLevels;
+  /** Expert: The fraction of TermDocs entries stored in skip tables,
+   * used to accelerate {@link DocsEnum#advance(int)}.  Larger values result in
+   * smaller indexes, greater acceleration, but fewer accelerable cases, while
+   * smaller values result in bigger indexes, less acceleration and more
+   * accelerable cases. More detailed experiments would be useful here. */
+  final int skipInterval = 16;
+  
+  /**
+   * Expert: minimum docFreq to write any skip data at all
+   */
+  final int skipMinimum = skipInterval;
+
+  /** Expert: The maximum number of skip levels. Smaller values result in 
+   * slightly smaller indexes, but slower skipping in big posting lists.
+   */
+  final int maxSkipLevels = 10;
+
   final int totalNumDocs;
 
   boolean storePayloads;
@@ -118,15 +134,11 @@ public final class SepPostingsWriterImpl
 
     totalNumDocs = state.numDocs;
 
-    // TODO: -- abstraction violation
-    skipListWriter = new SepSkipListWriter(state.skipInterval,
-                                           state.maxSkipLevels,
+    skipListWriter = new SepSkipListWriter(skipInterval,
+                                           maxSkipLevels,
                                            state.numDocs,
                                            freqOut, docOut,
                                            posOut, payloadOut);
-
-    skipInterval = state.skipInterval;
-    maxSkipLevels = state.maxSkipLevels;
   }
 
   @Override
@@ -136,6 +148,7 @@ public final class SepPostingsWriterImpl
     // TODO: -- just ask skipper to "start" here
     termsOut.writeInt(skipInterval);                // write skipInterval
     termsOut.writeInt(maxSkipLevels);               // write maxSkipLevels
+    termsOut.writeInt(skipMinimum);                 // write skipMinimum
   }
 
   @Override
@@ -264,7 +277,7 @@ public final class SepPostingsWriterImpl
       }
     }
 
-    if (df >= skipInterval) {
+    if (df >= skipMinimum) {
       //System.out.println("  skipFP=" + skipStart);
       final long skipFP = skipOut.getFilePointer();
       skipListWriter.writeSkip(skipOut);

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java?rev=1069829&r1=1069828&r2=1069829&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java Fri Feb 11 15:10:50 2011
@@ -47,6 +47,7 @@ public class StandardPostingsReader exte
 
   int skipInterval;
   int maxSkipLevels;
+  int skipMinimum;
 
   //private String segment;
 
@@ -86,6 +87,7 @@ public class StandardPostingsReader exte
 
     skipInterval = termsIn.readInt();
     maxSkipLevels = termsIn.readInt();
+    skipMinimum = termsIn.readInt();
   }
 
   // Must keep final because we do non-standard clone
@@ -179,7 +181,7 @@ public class StandardPostingsReader exte
     //System.out.println("  freqFP=" + termState.freqOffset);
     assert termState.freqOffset < freqIn.length();
 
-    if (termState.docFreq >= skipInterval) {
+    if (termState.docFreq >= skipMinimum) {
       termState.skipOffset = termState.bytesReader.readVInt();
       //System.out.println("  skipOffset=" + termState.skipOffset + " vs freqIn.length=" + freqIn.length());
       assert termState.freqOffset + termState.skipOffset < freqIn.length();
@@ -378,7 +380,7 @@ public class StandardPostingsReader exte
     @Override
     public int advance(int target) throws IOException {
 
-      if ((target - skipInterval) >= doc && limit >= skipInterval) {
+      if ((target - skipInterval) >= doc && limit >= skipMinimum) {
 
         // There are enough docs in the posting to have
         // skip data, and it isn't too close.
@@ -528,7 +530,7 @@ public class StandardPostingsReader exte
 
       //System.out.println("StandardR.D&PE advance target=" + target);
 
-      if ((target - skipInterval) >= doc && limit >= skipInterval) {
+      if ((target - skipInterval) >= doc && limit >= skipMinimum) {
 
         // There are enough docs in the posting to have
         // skip data, and it isn't too close
@@ -725,7 +727,7 @@ public class StandardPostingsReader exte
 
       //System.out.println("StandardR.D&PE advance seg=" + segment + " target=" + target + " this=" + this);
 
-      if ((target - skipInterval) >= doc && limit >= skipInterval) {
+      if ((target - skipInterval) >= doc && limit >= skipMinimum) {
 
         // There are enough docs in the posting to have
         // skip data, and it isn't too close

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java?rev=1069829&r1=1069828&r2=1069829&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java Fri Feb 11 15:10:50 2011
@@ -23,6 +23,7 @@ package org.apache.lucene.index.codecs.s
 import java.io.IOException;
 
 import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentWriteState;
@@ -44,8 +45,22 @@ public final class StandardPostingsWrite
   final IndexOutput freqOut;
   final IndexOutput proxOut;
   final DefaultSkipListWriter skipListWriter;
-  final int skipInterval;
-  final int maxSkipLevels;
+  /** Expert: The fraction of TermDocs entries stored in skip tables,
+   * used to accelerate {@link DocsEnum#advance(int)}.  Larger values result in
+   * smaller indexes, greater acceleration, but fewer accelerable cases, while
+   * smaller values result in bigger indexes, less acceleration and more
+   * accelerable cases. More detailed experiments would be useful here. */
+  final int skipInterval = 16;
+  
+  /**
+   * Expert: minimum docFreq to write any skip data at all
+   */
+  final int skipMinimum = skipInterval;
+
+  /** Expert: The maximum number of skip levels. Smaller values result in 
+   * slightly smaller indexes, but slower skipping in big posting lists.
+   */
+  final int maxSkipLevels = 10;
   final int totalNumDocs;
   IndexOutput termsOut;
 
@@ -84,14 +99,11 @@ public final class StandardPostingsWrite
 
     totalNumDocs = state.numDocs;
 
-    skipListWriter = new DefaultSkipListWriter(state.skipInterval,
-                                               state.maxSkipLevels,
+    skipListWriter = new DefaultSkipListWriter(skipInterval,
+                                               maxSkipLevels,
                                                state.numDocs,
                                                freqOut,
                                                proxOut);
-     
-    skipInterval = state.skipInterval;
-    maxSkipLevels = state.maxSkipLevels;
   }
 
   @Override
@@ -100,6 +112,7 @@ public final class StandardPostingsWrite
     CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
     termsOut.writeInt(skipInterval);                // write skipInterval
     termsOut.writeInt(maxSkipLevels);               // write maxSkipLevels
+    termsOut.writeInt(skipMinimum);                 // write skipMinimum
   }
 
   @Override
@@ -218,7 +231,7 @@ public final class StandardPostingsWrite
     }
     lastFreqStart = freqStart;
 
-    if (df >= skipInterval) {
+    if (df >= skipMinimum) {
       bytesWriter.writeVInt((int) (skipListWriter.writeSkip(freqOut)-freqStart));
     }