You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/02/11 20:24:15 UTC

svn commit: r1069925 - in /lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock: FixedIntBlockIndexInput.java FixedIntBlockIndexOutput.java VariableIntBlockIndexInput.java VariableIntBlockIndexOutput.java

Author: rmuir
Date: Fri Feb 11 19:24:15 2011
New Revision: 1069925

URL: http://svn.apache.org/viewvc?rev=1069925&view=rev
Log:
LUCENE-2905: write pointers and skip data more efficiently for fixed and variable intblock

Modified:
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java?rev=1069925&r1=1069924&r2=1069925&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java Fri Feb 11 19:24:15 2011
@@ -144,19 +144,17 @@ public abstract class FixedIntBlockIndex
       // nocommit -- somehow we should share the "upto" for
       // doc & freq since they will always be "in sync"
       if (absolute) {
-        fp = indexIn.readVLong();
         upto = indexIn.readVInt();
+        fp = indexIn.readVLong();
       } else {
-        // nocommit -- can't this be more efficient?  read a
-        // single byte and check a bit?  block size is 128...
-        final long delta = indexIn.readVLong();
-        if (delta == 0) {
+        final int uptoDelta = indexIn.readVInt();
+        if ((uptoDelta & 1) == 1) {
           // same block
-          upto += indexIn.readVInt();
+          upto += uptoDelta >>> 1;
         } else {
           // new block
-          fp += delta;
-          upto = indexIn.readVInt();
+          upto = uptoDelta >>> 1;
+          fp += indexIn.readVLong();
         }
       }
       assert upto < blockSize;

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java?rev=1069925&r1=1069924&r2=1069925&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java Fri Feb 11 19:24:15 2011
@@ -77,17 +77,17 @@ public abstract class FixedIntBlockIndex
     @Override
     public void write(IndexOutput indexOut, boolean absolute) throws IOException {
       if (absolute) {
-        indexOut.writeVLong(fp);
         indexOut.writeVInt(upto);
+        indexOut.writeVLong(fp);
       } else if (fp == lastFP) {
         // same block
-        indexOut.writeVLong(0);
         assert upto >= lastUpto;
-        indexOut.writeVInt(upto - lastUpto);
+        int uptoDelta = upto - lastUpto;
+        indexOut.writeVInt(uptoDelta << 1 | 1);
       } else {      
         // new block
+        indexOut.writeVInt(upto << 1);
         indexOut.writeVLong(fp - lastFP);
-        indexOut.writeVInt(upto);
       }
       lastUpto = upto;
       lastFP = fp;

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java?rev=1069925&r1=1069924&r2=1069925&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java Fri Feb 11 19:24:15 2011
@@ -157,17 +157,17 @@ public abstract class VariableIntBlockIn
     @Override
     public void read(final DataInput indexIn, final boolean absolute) throws IOException {
       if (absolute) {
+        upto = indexIn.readVInt();
         fp = indexIn.readVLong();
-        upto = indexIn.readByte()&0xFF;
       } else {
-        final long delta = indexIn.readVLong();
-        if (delta == 0) {
+        final int uptoDelta = indexIn.readVInt();
+        if ((uptoDelta & 1) == 1) {
           // same block
-          upto = indexIn.readByte()&0xFF;
+          upto += uptoDelta >>> 1;
         } else {
           // new block
-          fp += delta;
-          upto = indexIn.readByte()&0xFF;
+          upto = uptoDelta >>> 1;
+          fp += indexIn.readVLong();
         }
       }
       // TODO: we can't do this assert because non-causal

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java?rev=1069925&r1=1069924&r2=1069925&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java Fri Feb 11 19:24:15 2011
@@ -42,16 +42,14 @@ public abstract class VariableIntBlockIn
 
   private int upto;
 
-  private static final int MAX_BLOCK_SIZE = 1 << 8;
+  // TODO what Var-Var codecs exist in practice... and what are there blocksizes like?
+  // if its less than 128 we should set that as max and use byte?
 
-  /** NOTE: maxBlockSize plus the max non-causal lookahead
-   *  of your codec must be less than 256.  EG Simple9
+  /** NOTE: maxBlockSize must be the maximum block size 
+   *  plus the max non-causal lookahead of your codec.  EG Simple9
    *  requires lookahead=1 because on seeing the Nth value
    *  it knows it must now encode the N-1 values before it. */
   protected VariableIntBlockIndexOutput(IndexOutput out, int maxBlockSize) throws IOException {
-    if (maxBlockSize > MAX_BLOCK_SIZE) {
-      throw new IllegalArgumentException("maxBlockSize must be <= " + MAX_BLOCK_SIZE + "; got " + maxBlockSize);
-    }
     this.out = out;
     out.writeInt(maxBlockSize);
   }
@@ -88,17 +86,17 @@ public abstract class VariableIntBlockIn
     public void write(IndexOutput indexOut, boolean absolute) throws IOException {
       assert upto >= 0;
       if (absolute) {
+        indexOut.writeVInt(upto);
         indexOut.writeVLong(fp);
-        indexOut.writeByte((byte) upto);
       } else if (fp == lastFP) {
         // same block
-        indexOut.writeVLong(0);
         assert upto >= lastUpto;
-        indexOut.writeByte((byte) upto);
+        int uptoDelta = upto - lastUpto;
+        indexOut.writeVInt(uptoDelta << 1 | 1);
       } else {      
         // new block
+        indexOut.writeVInt(upto << 1);
         indexOut.writeVLong(fp - lastFP);
-        indexOut.writeByte((byte) upto);
       }
       lastUpto = upto;
       lastFP = fp;