You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/01/15 17:46:35 UTC

svn commit: r1059371 - in /lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing: PulsingPostingsReaderImpl.java PulsingPostingsWriterImpl.java

Author: rmuir
Date: Sat Jan 15 16:46:35 2011
New Revision: 1059371

URL: http://svn.apache.org/viewvc?rev=1059371&view=rev
Log:
LUCENE-2862: use the stats instead of an extra byte in pulsing

Modified:
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java?rev=1059371&r1=1059370&r2=1059371&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java Sat Jan 15 16:46:35 2011
@@ -54,6 +54,7 @@ public class PulsingPostingsReaderImpl e
   public void init(IndexInput termsIn) throws IOException {
     CodecUtil.checkHeader(termsIn, PulsingPostingsWriterImpl.CODEC,
       PulsingPostingsWriterImpl.VERSION_START, PulsingPostingsWriterImpl.VERSION_START);
+    maxPositions = termsIn.readVInt();
     wrappedPostingsReader.init(termsIn);
   }
 
@@ -115,8 +116,10 @@ public class PulsingPostingsReaderImpl e
 
     termState.pendingIndexTerm |= isIndexTerm;
 
-    // TODO: wasteful to use whole byte for this (need just a 1 bit);
-    if (termsIn.readByte() == 1) {
+    // total TF, but in the omitTFAP case its computed based on docFreq.
+    long count = fieldInfo.omitTermFreqAndPositions ? termState.docFreq : termState.totalTermFreq;
+    
+    if (count <= maxPositions) {
 
       // Inlined into terms dict -- just read the byte[] blob in,
       // but don't decode it now (we only decode when a DocsEnum

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java?rev=1059371&r1=1059370&r2=1059371&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java Sat Jan 15 16:46:35 2011
@@ -27,10 +27,10 @@ import org.apache.lucene.store.RAMOutput
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CodecUtil;
 
-// TODO: we now pulse entirely according to docFreq of the
-// term; it might be better to eg pulse by "net bytes used"
-// so that a term that has only 1 doc but zillions of
-// positions would not be inlined.  Though this is
+// TODO: we pulse based on total TF of the term,
+// it might be better to eg pulse by "net bytes used"
+// so that a term that has only 1 posting but a huge
+// payload would not be inlined.  Though this is
 // presumably rare in practice...
 
 /** @lucene.experimental */
@@ -86,6 +86,7 @@ public final class PulsingPostingsWriter
   public void start(IndexOutput termsOut) throws IOException {
     this.termsOut = termsOut;
     CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
+    termsOut.writeVInt(pending.length); // encode maxPositions in header
     wrappedPostingsWriter.start(termsOut);
   }
 
@@ -186,7 +187,6 @@ public final class PulsingPostingsWriter
     pendingIsIndexTerm |= isIndexTerm;
 
     if (pendingCount == -1) {
-      termsOut.writeByte((byte) 0);
       wrappedPostingsWriter.finishTerm(stats, pendingIsIndexTerm);
       pendingIsIndexTerm = false;
     } else {
@@ -195,8 +195,6 @@ public final class PulsingPostingsWriter
       // term, so we fully inline our postings data into
       // terms dict, now:
 
-      termsOut.writeByte((byte) 1);
-
       // TODO: it'd be better to share this encoding logic
       // in some inner codec that knows how to write a
       // single doc / single position, etc.  This way if a