You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/08/09 01:02:05 UTC
svn commit: r1371010 - in
/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block:
BlockPostingsReader.java BlockSkipWriter.java gendecompress.py
Author: mikemccand
Date: Wed Aug 8 23:02:05 2012
New Revision: 1371010
URL: http://svn.apache.org/viewvc?rev=1371010&view=rev
Log:
LUCENE-4283: further optimize scan-after-advance
Modified:
lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java
lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block/BlockSkipWriter.java
lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block/gendecompress.py
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java?rev=1371010&r1=1371009&r2=1371010&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java Wed Aug 8 23:02:05 2012
@@ -412,7 +412,6 @@ public final class BlockPostingsReader e
}
private void refillDocs() throws IOException {
- //System.out.println("["+docFreq+"]"+" refillDoc");
final int left = docFreq - docUpto;
assert left > 0;
@@ -451,7 +450,6 @@ public final class BlockPostingsReader e
}
return doc = NO_MORE_DOCS;
}
- //System.out.println("["+docFreq+"]"+" nextDoc");
if (docBufferUpto == BLOCK_SIZE) {
refillDocs();
}
@@ -510,15 +508,15 @@ public final class BlockPostingsReader e
skipped = true;
}
- final int newDocUpto = skipper.skipTo(target);
+ final int newDocUpto = skipper.skipTo(target) + 1;
if (newDocUpto > docUpto) {
// Skipper moved
if (DEBUG) {
System.out.println("skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer());
}
- assert newDocUpto % BLOCK_SIZE == (BLOCK_SIZE-1): "got " + newDocUpto;
- docUpto = newDocUpto+1;
+ assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
+ docUpto = newDocUpto;
// Force to read next block
docBufferUpto = BLOCK_SIZE;
@@ -527,6 +525,12 @@ public final class BlockPostingsReader e
}
nextSkipDoc = skipper.getNextSkipDoc();
}
+ if (docUpto == docFreq) {
+ return doc = NO_MORE_DOCS;
+ }
+ if (docBufferUpto == BLOCK_SIZE) {
+ refillDocs();
+ }
// Now scan... this is an inlined/pared down version
// of nextDoc():
@@ -534,18 +538,6 @@ public final class BlockPostingsReader e
if (DEBUG) {
System.out.println(" scan doc=" + accum + " docBufferUpto=" + docBufferUpto);
}
- if (docUpto == docFreq) {
- return doc = NO_MORE_DOCS;
- }
-
- // nocommit: in theory we should not hit this? ie
- // skipper should already have moved us to the block
- // containing the doc? yet assert false trips ... i
- // think because if you advance w/o having done a
- // nextDoc yet()... can we assert/remove this?
- if (docBufferUpto == BLOCK_SIZE) {
- refillDocs();
- }
accum += docDeltaBuffer[docBufferUpto];
docUpto++;
@@ -553,6 +545,9 @@ public final class BlockPostingsReader e
break;
}
docBufferUpto++;
+ if (docUpto == docFreq) {
+ return doc = NO_MORE_DOCS;
+ }
}
if (liveDocs == null || liveDocs.get(accum)) {
@@ -692,9 +687,9 @@ public final class BlockPostingsReader e
}
private void refillDocs() throws IOException {
- //System.out.println("["+docFreq+"]"+" refillDoc");
final int left = docFreq - docUpto;
assert left > 0;
+
if (left >= BLOCK_SIZE) {
if (DEBUG) {
System.out.println(" fill doc block from fp=" + docIn.getFilePointer());
@@ -761,7 +756,6 @@ public final class BlockPostingsReader e
if (docUpto == docFreq) {
return doc = NO_MORE_DOCS;
}
- //System.out.println("["+docFreq+"]"+" nextDoc");
if (docBufferUpto == BLOCK_SIZE) {
refillDocs();
}
@@ -823,7 +817,7 @@ public final class BlockPostingsReader e
skipped = true;
}
- final int newDocUpto = skipper.skipTo(target);
+ final int newDocUpto = skipper.skipTo(target) + 1;
if (newDocUpto > docUpto) {
// Skipper moved
@@ -831,8 +825,8 @@ public final class BlockPostingsReader e
System.out.println(" skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer() + " pos.fp=" + skipper.getPosPointer() + " pos.bufferUpto=" + skipper.getPosBufferUpto());
}
- assert newDocUpto % BLOCK_SIZE == (BLOCK_SIZE-1): "got " + newDocUpto;
- docUpto = newDocUpto+1;
+ assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
+ docUpto = newDocUpto;
// Force to read next block
docBufferUpto = BLOCK_SIZE;
@@ -843,6 +837,12 @@ public final class BlockPostingsReader e
}
nextSkipDoc = skipper.getNextSkipDoc();
}
+ if (docUpto == docFreq) {
+ return doc = NO_MORE_DOCS;
+ }
+ if (docBufferUpto == BLOCK_SIZE) {
+ refillDocs();
+ }
// Now scan... this is an inlined/pared down version
// of nextDoc():
@@ -853,16 +853,6 @@ public final class BlockPostingsReader e
if (docUpto == docFreq) {
return doc = NO_MORE_DOCS;
}
- // nocommit: in theory we should not hit this? ie
- // skipper should already have moved us to the block
- // containing the doc? yet assert false trips ... i
- // think because if you advance w/o having done a
- // nextDoc yet()... can we assert/remove this?
- if (docBufferUpto == BLOCK_SIZE) {
- // nocommit hmm skip freq? but: we don't ever
- // scan over more than one block?
- refillDocs();
- }
accum += docDeltaBuffer[docBufferUpto];
freq = freqBuffer[docBufferUpto];
posPendingCount += freq;
@@ -872,6 +862,9 @@ public final class BlockPostingsReader e
if (accum >= target) {
break;
}
+ if (docUpto == docFreq) {
+ return doc = NO_MORE_DOCS;
+ }
}
if (liveDocs == null || liveDocs.get(accum)) {
@@ -1138,7 +1131,6 @@ public final class BlockPostingsReader e
}
private void refillDocs() throws IOException {
- //System.out.println("["+docFreq+"]"+" refillDoc");
final int left = docFreq - docUpto;
assert left > 0;
@@ -1254,7 +1246,6 @@ public final class BlockPostingsReader e
if (docUpto == docFreq) {
return doc = NO_MORE_DOCS;
}
- //System.out.println("["+docFreq+"]"+" nextDoc");
if (docBufferUpto == BLOCK_SIZE) {
refillDocs();
}
@@ -1320,15 +1311,15 @@ public final class BlockPostingsReader e
skipped = true;
}
- final int newDocUpto = skipper.skipTo(target);
+ final int newDocUpto = skipper.skipTo(target) + 1;
if (newDocUpto > docUpto) {
// Skipper moved
if (DEBUG) {
System.out.println(" skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer() + " pos.fp=" + skipper.getPosPointer() + " pos.bufferUpto=" + skipper.getPosBufferUpto() + " pay.fp=" + skipper.getPayPointer() + " lastStartOffset=" + lastStartOffset);
}
- assert newDocUpto % BLOCK_SIZE == (BLOCK_SIZE-1): "got " + newDocUpto;
- docUpto = newDocUpto+1;
+ assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
+ docUpto = newDocUpto;
// Force to read next block
docBufferUpto = BLOCK_SIZE;
@@ -1342,24 +1333,50 @@ public final class BlockPostingsReader e
}
nextSkipDoc = skipper.getNextSkipDoc();
}
-
- // nocommit inline nextDoc here
+ if (docUpto == docFreq) {
+ return doc = NO_MORE_DOCS;
+ }
+ if (docBufferUpto == BLOCK_SIZE) {
+ refillDocs();
+ }
// Now scan:
- while (nextDoc() != NO_MORE_DOCS) {
- if (doc >= target) {
- if (DEBUG) {
- System.out.println(" advance return doc=" + doc);
- }
- return doc;
+ while (true) {
+ if (DEBUG) {
+ System.out.println(" scan doc=" + accum + " docBufferUpto=" + docBufferUpto);
}
- }
+ accum += docDeltaBuffer[docBufferUpto];
+ freq = freqBuffer[docBufferUpto];
+ posPendingCount += freq;
+ docBufferUpto++;
+ docUpto++;
- if (DEBUG) {
- System.out.println(" advance return doc=END");
+ if (accum >= target) {
+ break;
+ }
+ if (docUpto == docFreq) {
+ return doc = NO_MORE_DOCS;
+ }
}
- return NO_MORE_DOCS;
+ if (liveDocs == null || liveDocs.get(accum)) {
+ if (DEBUG) {
+ System.out.println(" return doc=" + accum);
+ }
+ if (indexHasPayloads) {
+ payloadByteUpto += payloadLength;
+ payloadLength = 0;
+ }
+ position = 0;
+ payloadLength = 0;
+ lastStartOffset = 0;
+ return doc = accum;
+ } else {
+ if (DEBUG) {
+ System.out.println(" now do nextDoc()");
+ }
+ return nextDoc();
+ }
}
// nocommit in theory we could avoid loading frq block
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block/BlockSkipWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block/BlockSkipWriter.java?rev=1371010&r1=1371009&r2=1371010&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block/BlockSkipWriter.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block/BlockSkipWriter.java Wed Aug 8 23:02:05 2012
@@ -37,7 +37,7 @@ import org.apache.lucene.codecs.MultiLev
* block, only record skip data at the start its start point(if it exist).
*
* For each skip point, we will record:
- * 1. lastDocID,
+ * 1. docID in former position, i.e. for position 12, record docID[11], etc.
* 2. its related file points(position, payload),
* 3. related numbers or uptos(position, payload).
* 4. start offset.
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block/gendecompress.py
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block/gendecompress.py?rev=1371010&r1=1371009&r2=1371010&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block/gendecompress.py (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/block/gendecompress.py Wed Aug 8 23:02:05 2012
@@ -81,7 +81,7 @@ def genDecompress():
w(' Arrays.fill(output, compressedBuffer.get());\n')
w(' }\n')
- for numFrameBits in xrange(1, 33):
+ for numFrameBits in xrange(1, 32):
w(' public static void decode%d(final IntBuffer compressedBuffer, final int[] output) {\n' % numFrameBits)
w(' final int numFrameBits = %d;\n' % numFrameBits)
w(' final int mask = (int) ((1L<<numFrameBits) - 1);\n')