You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/02/11 20:24:15 UTC
svn commit: r1069925 - in
/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock:
FixedIntBlockIndexInput.java FixedIntBlockIndexOutput.java
VariableIntBlockIndexInput.java VariableIntBlockIndexOutput.java
Author: rmuir
Date: Fri Feb 11 19:24:15 2011
New Revision: 1069925
URL: http://svn.apache.org/viewvc?rev=1069925&view=rev
Log:
LUCENE-2905: write pointers and skip data more efficiently for fixed and variable intblock
Modified:
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java?rev=1069925&r1=1069924&r2=1069925&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java Fri Feb 11 19:24:15 2011
@@ -144,19 +144,17 @@ public abstract class FixedIntBlockIndex
// nocommit -- somehow we should share the "upto" for
// doc & freq since they will always be "in sync"
if (absolute) {
- fp = indexIn.readVLong();
upto = indexIn.readVInt();
+ fp = indexIn.readVLong();
} else {
- // nocommit -- can't this be more efficient? read a
- // single byte and check a bit? block size is 128...
- final long delta = indexIn.readVLong();
- if (delta == 0) {
+ final int uptoDelta = indexIn.readVInt();
+ if ((uptoDelta & 1) == 1) {
// same block
- upto += indexIn.readVInt();
+ upto += uptoDelta >>> 1;
} else {
// new block
- fp += delta;
- upto = indexIn.readVInt();
+ upto = uptoDelta >>> 1;
+ fp += indexIn.readVLong();
}
}
assert upto < blockSize;
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java?rev=1069925&r1=1069924&r2=1069925&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java Fri Feb 11 19:24:15 2011
@@ -77,17 +77,17 @@ public abstract class FixedIntBlockIndex
@Override
public void write(IndexOutput indexOut, boolean absolute) throws IOException {
if (absolute) {
- indexOut.writeVLong(fp);
indexOut.writeVInt(upto);
+ indexOut.writeVLong(fp);
} else if (fp == lastFP) {
// same block
- indexOut.writeVLong(0);
assert upto >= lastUpto;
- indexOut.writeVInt(upto - lastUpto);
+ int uptoDelta = upto - lastUpto;
+ indexOut.writeVInt(uptoDelta << 1 | 1);
} else {
// new block
+ indexOut.writeVInt(upto << 1);
indexOut.writeVLong(fp - lastFP);
- indexOut.writeVInt(upto);
}
lastUpto = upto;
lastFP = fp;
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java?rev=1069925&r1=1069924&r2=1069925&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java Fri Feb 11 19:24:15 2011
@@ -157,17 +157,17 @@ public abstract class VariableIntBlockIn
@Override
public void read(final DataInput indexIn, final boolean absolute) throws IOException {
if (absolute) {
+ upto = indexIn.readVInt();
fp = indexIn.readVLong();
- upto = indexIn.readByte()&0xFF;
} else {
- final long delta = indexIn.readVLong();
- if (delta == 0) {
+ final int uptoDelta = indexIn.readVInt();
+ if ((uptoDelta & 1) == 1) {
// same block
- upto = indexIn.readByte()&0xFF;
+ upto += uptoDelta >>> 1;
} else {
// new block
- fp += delta;
- upto = indexIn.readByte()&0xFF;
+ upto = uptoDelta >>> 1;
+ fp += indexIn.readVLong();
}
}
// TODO: we can't do this assert because non-causal
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java?rev=1069925&r1=1069924&r2=1069925&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java Fri Feb 11 19:24:15 2011
@@ -42,16 +42,14 @@ public abstract class VariableIntBlockIn
private int upto;
- private static final int MAX_BLOCK_SIZE = 1 << 8;
+ // TODO what Var-Var codecs exist in practice... and what are there blocksizes like?
+ // if its less than 128 we should set that as max and use byte?
- /** NOTE: maxBlockSize plus the max non-causal lookahead
- * of your codec must be less than 256. EG Simple9
+ /** NOTE: maxBlockSize must be the maximum block size
+ * plus the max non-causal lookahead of your codec. EG Simple9
* requires lookahead=1 because on seeing the Nth value
* it knows it must now encode the N-1 values before it. */
protected VariableIntBlockIndexOutput(IndexOutput out, int maxBlockSize) throws IOException {
- if (maxBlockSize > MAX_BLOCK_SIZE) {
- throw new IllegalArgumentException("maxBlockSize must be <= " + MAX_BLOCK_SIZE + "; got " + maxBlockSize);
- }
this.out = out;
out.writeInt(maxBlockSize);
}
@@ -88,17 +86,17 @@ public abstract class VariableIntBlockIn
public void write(IndexOutput indexOut, boolean absolute) throws IOException {
assert upto >= 0;
if (absolute) {
+ indexOut.writeVInt(upto);
indexOut.writeVLong(fp);
- indexOut.writeByte((byte) upto);
} else if (fp == lastFP) {
// same block
- indexOut.writeVLong(0);
assert upto >= lastUpto;
- indexOut.writeByte((byte) upto);
+ int uptoDelta = upto - lastUpto;
+ indexOut.writeVInt(uptoDelta << 1 | 1);
} else {
// new block
+ indexOut.writeVInt(upto << 1);
indexOut.writeVLong(fp - lastFP);
- indexOut.writeByte((byte) upto);
}
lastUpto = upto;
lastFP = fp;