You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2012/08/09 11:16:09 UTC
svn commit: r1371114 - in
/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked:
BlockPackedPostingsFormat.java BlockPackedPostingsReader.java
BlockPackedSkipWriter.java
Author: jpountz
Date: Thu Aug 9 09:16:09 2012
New Revision: 1371114
URL: http://svn.apache.org/viewvc?rev=1371114&view=rev
Log:
LUCENE-3892: backporting r1371010 and r1371011 to BlockPacked.
I changed the comment on the value of BLOCK_SIZE. Indeed, since PackedInts
encoding/decoding is long-aligned, all numbers of bits per value that are
co-prime with 64 cannot encode/decode less than 64 values per iteration, so the
block size should be at least 64. Switching it to a lower value (eg. 32) should
work but will perform many unnecessary operations in the encoding/decoding step.
Modified:
lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsFormat.java
lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsReader.java
lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedSkipWriter.java
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsFormat.java?rev=1371114&r1=1371113&r2=1371114&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsFormat.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsFormat.java Thu Aug 9 09:16:09 2012
@@ -41,6 +41,10 @@ public final class BlockPackedPostingsFo
private final int minTermBlockSize;
private final int maxTermBlockSize;
+
+ // nocommit is this right?:
+ // NOTE: should be at least 64 because of PackedInts long-aligned encoding/decoding
+ // NOTE: must be factor of ... 64?
public final static int BLOCK_SIZE = 128;
public BlockPackedPostingsFormat() {
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsReader.java?rev=1371114&r1=1371113&r2=1371114&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsReader.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsReader.java Thu Aug 9 09:16:09 2012
@@ -384,7 +384,6 @@ public final class BlockPackedPostingsRe
}
private void refillDocs() throws IOException {
- //System.out.println("["+docFreq+"]"+" refillDoc");
final int left = docFreq - docUpto;
assert left > 0;
@@ -426,7 +425,6 @@ public final class BlockPackedPostingsRe
}
return doc = NO_MORE_DOCS;
}
- //System.out.println("["+docFreq+"]"+" nextDoc");
if (docBufferUpto == BLOCK_SIZE) {
refillDocs();
}
@@ -486,15 +484,15 @@ public final class BlockPackedPostingsRe
skipped = true;
}
- final int newDocUpto = skipper.skipTo(target);
+ final int newDocUpto = skipper.skipTo(target) + 1;
if (newDocUpto > docUpto) {
// Skipper moved
if (DEBUG) {
System.out.println("skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer());
}
- assert newDocUpto % BLOCK_SIZE == (BLOCK_SIZE - 1): "got " + newDocUpto;
- docUpto = newDocUpto+1;
+ assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
+ docUpto = newDocUpto;
// Force to read next block
docBufferUpto = BLOCK_SIZE;
@@ -503,6 +501,12 @@ public final class BlockPackedPostingsRe
}
nextSkipDoc = skipper.getNextSkipDoc();
}
+ if (docUpto == docFreq) {
+ return doc = NO_MORE_DOCS;
+ }
+ if (docBufferUpto == BLOCK_SIZE) {
+ refillDocs();
+ }
// Now scan... this is an inlined/pared down version
// of nextDoc():
@@ -510,18 +514,6 @@ public final class BlockPackedPostingsRe
if (DEBUG) {
System.out.println(" scan doc=" + accum + " docBufferUpto=" + docBufferUpto);
}
- if (docUpto == docFreq) {
- return doc = NO_MORE_DOCS;
- }
-
- // nocommit: in theory we should not hit this? ie
- // skipper should already have moved us to the block
- // containing the doc? yet assert false trips ... i
- // think because if you advance w/o having done a
- // nextDoc yet()... can we assert/remove this?
- if (docBufferUpto == BLOCK_SIZE) {
- refillDocs();
- }
accum += docDeltaBuffer[docBufferUpto];
docUpto++;
@@ -529,6 +521,9 @@ public final class BlockPackedPostingsRe
break;
}
docBufferUpto++;
+ if (docUpto == docFreq) {
+ return doc = NO_MORE_DOCS;
+ }
}
if (liveDocs == null || liveDocs.get(accum)) {
@@ -666,9 +661,9 @@ public final class BlockPackedPostingsRe
}
private void refillDocs() throws IOException {
- //System.out.println("["+docFreq+"]"+" refillDoc");
final int left = docFreq - docUpto;
assert left > 0;
+
if (left >= BLOCK_SIZE) {
if (DEBUG) {
System.out.println(" fill doc block from fp=" + docIn.getFilePointer());
@@ -797,7 +792,7 @@ public final class BlockPackedPostingsRe
skipped = true;
}
- final int newDocUpto = skipper.skipTo(target);
+ final int newDocUpto = skipper.skipTo(target) + 1;
if (newDocUpto > docUpto) {
// Skipper moved
@@ -805,8 +800,8 @@ public final class BlockPackedPostingsRe
System.out.println(" skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer() + " pos.fp=" + skipper.getPosPointer() + " pos.bufferUpto=" + skipper.getPosBufferUpto());
}
- assert newDocUpto % BLOCK_SIZE == (BLOCK_SIZE - 1): "got " + newDocUpto;
- docUpto = newDocUpto+1;
+ assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
+ docUpto = newDocUpto;
// Force to read next block
docBufferUpto = BLOCK_SIZE;
@@ -817,6 +812,12 @@ public final class BlockPackedPostingsRe
}
nextSkipDoc = skipper.getNextSkipDoc();
}
+ if (docUpto == docFreq) {
+ return doc = NO_MORE_DOCS;
+ }
+ if (docBufferUpto == BLOCK_SIZE) {
+ refillDocs();
+ }
// Now scan... this is an inlined/pared down version
// of nextDoc():
@@ -827,16 +828,6 @@ public final class BlockPackedPostingsRe
if (docUpto == docFreq) {
return doc = NO_MORE_DOCS;
}
- // nocommit: in theory we should not hit this? ie
- // skipper should already have moved us to the block
- // containing the doc? yet assert false trips ... i
- // think because if you advance w/o having done a
- // nextDoc yet()... can we assert/remove this?
- if (docBufferUpto == BLOCK_SIZE) {
- // nocommit hmm skip freq? but: we don't ever
- // scan over more than one block?
- refillDocs();
- }
accum += docDeltaBuffer[docBufferUpto];
freq = freqBuffer[docBufferUpto];
posPendingCount += freq;
@@ -846,6 +837,9 @@ public final class BlockPackedPostingsRe
if (accum >= target) {
break;
}
+ if (docUpto == docFreq) {
+ return doc = NO_MORE_DOCS;
+ }
}
if (liveDocs == null || liveDocs.get(accum)) {
@@ -1110,7 +1104,6 @@ public final class BlockPackedPostingsRe
}
private void refillDocs() throws IOException {
- //System.out.println("["+docFreq+"]"+" refillDoc");
final int left = docFreq - docUpto;
assert left > 0;
@@ -1226,7 +1219,6 @@ public final class BlockPackedPostingsRe
if (docUpto == docFreq) {
return doc = NO_MORE_DOCS;
}
- //System.out.println("["+docFreq+"]"+" nextDoc");
if (docBufferUpto == BLOCK_SIZE) {
refillDocs();
}
@@ -1293,15 +1285,15 @@ public final class BlockPackedPostingsRe
skipped = true;
}
- final int newDocUpto = skipper.skipTo(target);
+ final int newDocUpto = skipper.skipTo(target) + 1;
if (newDocUpto > docUpto) {
// Skipper moved
if (DEBUG) {
System.out.println(" skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer() + " pos.fp=" + skipper.getPosPointer() + " pos.bufferUpto=" + skipper.getPosBufferUpto() + " pay.fp=" + skipper.getPayPointer() + " lastStartOffset=" + lastStartOffset);
}
- assert newDocUpto % BLOCK_SIZE == (BLOCK_SIZE - 1): "got " + newDocUpto;
- docUpto = newDocUpto+1;
+ assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
+ docUpto = newDocUpto;
// Force to read next block
docBufferUpto = BLOCK_SIZE;
@@ -1315,24 +1307,51 @@ public final class BlockPackedPostingsRe
}
nextSkipDoc = skipper.getNextSkipDoc();
}
-
- // nocommit inline nextDoc here
+ if (docUpto == docFreq) {
+ return doc = NO_MORE_DOCS;
+ }
+ if (docBufferUpto == BLOCK_SIZE) {
+ refillDocs();
+ }
// Now scan:
- while (nextDoc() != NO_MORE_DOCS) {
- if (doc >= target) {
- if (DEBUG) {
- System.out.println(" advance return doc=" + doc);
- }
- return doc;
+ // Now scan:
+ while (true) {
+ if (DEBUG) {
+ System.out.println(" scan doc=" + accum + " docBufferUpto=" + docBufferUpto);
}
- }
+ accum += docDeltaBuffer[docBufferUpto];
+ freq = freqBuffer[docBufferUpto];
+ posPendingCount += freq;
+ docBufferUpto++;
+ docUpto++;
- if (DEBUG) {
- System.out.println(" advance return doc=END");
+ if (accum >= target) {
+ break;
+ }
+ if (docUpto == docFreq) {
+ return doc = NO_MORE_DOCS;
+ }
}
- return NO_MORE_DOCS;
+ if (liveDocs == null || liveDocs.get(accum)) {
+ if (DEBUG) {
+ System.out.println(" return doc=" + accum);
+ }
+ if (indexHasPayloads) {
+ payloadByteUpto += payloadLength;
+ payloadLength = 0;
+ }
+ position = 0;
+ payloadLength = 0;
+ lastStartOffset = 0;
+ return doc = accum;
+ } else {
+ if (DEBUG) {
+ System.out.println(" now do nextDoc()");
+ }
+ return nextDoc();
+ }
}
// nocommit in theory we could avoid loading frq block
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedSkipWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedSkipWriter.java?rev=1371114&r1=1371113&r2=1371114&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedSkipWriter.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedSkipWriter.java Thu Aug 9 09:16:09 2012
@@ -37,7 +37,7 @@ import org.apache.lucene.codecs.MultiLev
* block, only record skip data at the start its start point(if it exist).
*
* For each skip point, we will record:
- * 1. lastDocID,
+ * 1. docID in former position, i.e. for position 12, record docID[11], etc.
* 2. its related file points(position, payload),
* 3. related numbers or uptos(position, payload).
* 4. start offset.