You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/01/15 17:46:35 UTC
svn commit: r1059371 - in
/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing:
PulsingPostingsReaderImpl.java PulsingPostingsWriterImpl.java
Author: rmuir
Date: Sat Jan 15 16:46:35 2011
New Revision: 1059371
URL: http://svn.apache.org/viewvc?rev=1059371&view=rev
Log:
LUCENE-2862: use the stats instead of an extra byte in pulsing
Modified:
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java?rev=1059371&r1=1059370&r2=1059371&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java Sat Jan 15 16:46:35 2011
@@ -54,6 +54,7 @@ public class PulsingPostingsReaderImpl e
public void init(IndexInput termsIn) throws IOException {
CodecUtil.checkHeader(termsIn, PulsingPostingsWriterImpl.CODEC,
PulsingPostingsWriterImpl.VERSION_START, PulsingPostingsWriterImpl.VERSION_START);
+ maxPositions = termsIn.readVInt();
wrappedPostingsReader.init(termsIn);
}
@@ -115,8 +116,10 @@ public class PulsingPostingsReaderImpl e
termState.pendingIndexTerm |= isIndexTerm;
- // TODO: wasteful to use whole byte for this (need just a 1 bit);
- if (termsIn.readByte() == 1) {
+ // total TF, but in the omitTFAP case its computed based on docFreq.
+ long count = fieldInfo.omitTermFreqAndPositions ? termState.docFreq : termState.totalTermFreq;
+
+ if (count <= maxPositions) {
// Inlined into terms dict -- just read the byte[] blob in,
// but don't decode it now (we only decode when a DocsEnum
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java?rev=1059371&r1=1059370&r2=1059371&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java Sat Jan 15 16:46:35 2011
@@ -27,10 +27,10 @@ import org.apache.lucene.store.RAMOutput
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
-// TODO: we now pulse entirely according to docFreq of the
-// term; it might be better to eg pulse by "net bytes used"
-// so that a term that has only 1 doc but zillions of
-// positions would not be inlined. Though this is
+// TODO: we pulse based on total TF of the term,
+// it might be better to eg pulse by "net bytes used"
+// so that a term that has only 1 posting but a huge
+// payload would not be inlined. Though this is
// presumably rare in practice...
/** @lucene.experimental */
@@ -86,6 +86,7 @@ public final class PulsingPostingsWriter
public void start(IndexOutput termsOut) throws IOException {
this.termsOut = termsOut;
CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
+ termsOut.writeVInt(pending.length); // encode maxPositions in header
wrappedPostingsWriter.start(termsOut);
}
@@ -186,7 +187,6 @@ public final class PulsingPostingsWriter
pendingIsIndexTerm |= isIndexTerm;
if (pendingCount == -1) {
- termsOut.writeByte((byte) 0);
wrappedPostingsWriter.finishTerm(stats, pendingIsIndexTerm);
pendingIsIndexTerm = false;
} else {
@@ -195,8 +195,6 @@ public final class PulsingPostingsWriter
// term, so we fully inline our postings data into
// terms dict, now:
- termsOut.writeByte((byte) 1);
-
// TODO: it'd be better to share this encoding logic
// in some inner codec that knows how to write a
// single doc / single position, etc. This way if a