You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2012/11/07 15:17:43 UTC

svn commit: r1406651 - in /lucene/dev/trunk/lucene: ./ codecs/src/java/org/apache/lucene/codecs/compressing/ core/src/java/org/apache/lucene/codecs/lucene41/ core/src/java/org/apache/lucene/util/packed/ core/src/test/org/apache/lucene/util/packed/

Author: jpountz
Date: Wed Nov  7 14:17:42 2012
New Revision: 1406651

URL: http://svn.apache.org/viewvc?rev=1406651&view=rev
Log:
LUCENE-4536: Make PackedInts on-disk format byte-aligned.

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperation.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct16.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct32.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct64.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct8.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/DirectPackedReader.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed16ThreeBlocks.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed64.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed8ThreeBlocks.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/PackedWriter.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/gen_Direct.py
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/gen_PackedThreeBlocks.py
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Wed Nov  7 14:17:42 2012
@@ -142,6 +142,10 @@ Bug Fixes
 
 Optimizations
 
+* LUCENE-4536: PackedInts on-disk format is now byte-aligned (it used to be
+  long-aligned), saving up to 7 bytes per array of values.
+  (Adrien Grand, Mike McCandless)
+
 * LUCENE-4512: Additional memory savings for CompressingStoredFieldsFormat.
   (Adrien Grand, Robert Muir)
 

Modified: lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java (original)
+++ lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java Wed Nov  7 14:17:42 2012
@@ -205,7 +205,7 @@ final class CompressingStoredFieldsReade
     }
     final int length = (int) lengths.next();
     // skip the last values
-    fieldsStream.seek(filePointer + (PackedInts.Format.PACKED.nblocks(bitsPerValue, chunkDocs) << 3));
+    fieldsStream.seek(filePointer + PackedInts.Format.PACKED.byteCount(packedIntsVersion, chunkDocs, bitsPerValue));
 
     decompressor.decompress(fieldsStream, offset, length, bytes);
 

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java Wed Nov  7 14:17:42 2012
@@ -19,7 +19,6 @@ package org.apache.lucene.codecs.lucene4
 import java.io.IOException;
 import java.util.Arrays;
 
-import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.IndexInput;
@@ -83,8 +82,10 @@ final class ForUtil {
    * Compute the number of bytes required to encode a block of values that require
    * <code>bitsPerValue</code> bits per value with format <code>format</code>.
    */
-  private static int encodedSize(PackedInts.Format format, int bitsPerValue) {
-    return format.nblocks(bitsPerValue, BLOCK_SIZE) << 3;
+  private static int encodedSize(PackedInts.Format format, int packedIntsVersion, int bitsPerValue) {
+    final long byteCount = format.byteCount(packedIntsVersion, BLOCK_SIZE, bitsPerValue);
+    assert byteCount >= 0 && byteCount <= Integer.MAX_VALUE : byteCount;
+    return (int) byteCount;
   }
 
   private final int[] encodedSizes;
@@ -107,7 +108,7 @@ final class ForUtil {
           BLOCK_SIZE, bpv, acceptableOverheadRatio);
       assert formatAndBits.format.isSupported(formatAndBits.bitsPerValue);
       assert formatAndBits.bitsPerValue <= 32;
-      encodedSizes[bpv] = encodedSize(formatAndBits.format, formatAndBits.bitsPerValue);
+      encodedSizes[bpv] = encodedSize(formatAndBits.format, PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue);
       encoders[bpv] = PackedInts.getEncoder(
           formatAndBits.format, PackedInts.VERSION_CURRENT, formatAndBits.bitsPerValue);
       decoders[bpv] = PackedInts.getDecoder(
@@ -123,9 +124,7 @@ final class ForUtil {
    */
   ForUtil(DataInput in) throws IOException {
     int packedIntsVersion = in.readVInt();
-    if (packedIntsVersion != PackedInts.VERSION_START) {
-      throw new CorruptIndexException("expected version=" + PackedInts.VERSION_START + " but got version=" + packedIntsVersion);
-    }
+    PackedInts.checkVersion(packedIntsVersion);
     encodedSizes = new int[33];
     encoders = new PackedInts.Encoder[33];
     decoders = new PackedInts.Decoder[33];
@@ -138,7 +137,7 @@ final class ForUtil {
 
       final PackedInts.Format format = PackedInts.Format.byId(formatId);
       assert format.isSupported(bitsPerValue);
-      encodedSizes[bpv] = encodedSize(format, bitsPerValue);
+      encodedSizes[bpv] = encodedSize(format, packedIntsVersion, bitsPerValue);
       encoders[bpv] = PackedInts.getEncoder(
           format, packedIntsVersion, bitsPerValue);
       decoders[bpv] = PackedInts.getDecoder(

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperation.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperation.java?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperation.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperation.java Wed Nov  7 14:17:42 2012
@@ -2,6 +2,7 @@
 
 package org.apache.lucene.util.packed;
 
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -157,14 +158,19 @@ abstract class BulkOperation implements 
    *  - 50 bits per value -> b=25, v=32
    *  - 63 bits per value -> b=63, v=64
    *  - ...
-   *
+   * <p>
    * A bulk read consists in copying <code>iterations*v</code> values that are
    * contained in <code>iterations*b</code> blocks into a <code>long[]</code>
    * (higher values of <code>iterations</code> are likely to yield a better
    * throughput) => this requires n * (b + v) longs in memory.
-   *
+   * <p>
    * This method computes <code>iterations</code> as
    * <code>ramBudget / (8 * (b + v))</code> (since a long is 8 bytes).
+   * <p>
+   * The resulting number of iterations of this method is guaranteed not to
+   * overflow when multiplied by
+   * <tt>8 * {@link PackedInts.Encoder#blockCount()}</tt> or
+   * <tt>8 * {@link PackedInts.Decoder#blockCount()}</tt>.
    */
   public final int computeIterations(int valueCount, int ramBudget) {
     final int iterations = (ramBudget >>> 3) / (blockCount() + valueCount());

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct16.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct16.java?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct16.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct16.java Wed Nov  7 14:17:42 2012
@@ -37,16 +37,15 @@ final class Direct16 extends PackedInts.
     values = new short[valueCount];
   }
 
-  Direct16(DataInput in, int valueCount) throws IOException {
+  Direct16(int packedIntsVersion, DataInput in, int valueCount) throws IOException {
     this(valueCount);
     for (int i = 0; i < valueCount; ++i) {
       values[i] = in.readShort();
     }
-    final int mod = valueCount % 4;
-    if (mod != 0) {
-      for (int i = mod; i < 4; ++i) {
-        in.readShort();
-      }
+    // because packed ints have not always been byte-aligned
+    final int remaining = (int) (PackedInts.Format.PACKED.byteCount(packedIntsVersion, valueCount, 16) - 2L * valueCount);
+    for (int i = 0; i < remaining; ++i) {
+      in.readByte();
     }
   }
 

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct32.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct32.java?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct32.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct32.java Wed Nov  7 14:17:42 2012
@@ -37,16 +37,15 @@ final class Direct32 extends PackedInts.
     values = new int[valueCount];
   }
 
-  Direct32(DataInput in, int valueCount) throws IOException {
+  Direct32(int packedIntsVersion, DataInput in, int valueCount) throws IOException {
     this(valueCount);
     for (int i = 0; i < valueCount; ++i) {
       values[i] = in.readInt();
     }
-    final int mod = valueCount % 2;
-    if (mod != 0) {
-      for (int i = mod; i < 2; ++i) {
-        in.readInt();
-      }
+    // because packed ints have not always been byte-aligned
+    final int remaining = (int) (PackedInts.Format.PACKED.byteCount(packedIntsVersion, valueCount, 32) - 4L * valueCount);
+    for (int i = 0; i < remaining; ++i) {
+      in.readByte();
     }
   }
 

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct64.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct64.java?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct64.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct64.java Wed Nov  7 14:17:42 2012
@@ -37,7 +37,7 @@ final class Direct64 extends PackedInts.
     values = new long[valueCount];
   }
 
-  Direct64(DataInput in, int valueCount) throws IOException {
+  Direct64(int packedIntsVersion, DataInput in, int valueCount) throws IOException {
     this(valueCount);
     for (int i = 0; i < valueCount; ++i) {
       values[i] = in.readLong();

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct8.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct8.java?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct8.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Direct8.java Wed Nov  7 14:17:42 2012
@@ -37,16 +37,13 @@ final class Direct8 extends PackedInts.M
     values = new byte[valueCount];
   }
 
-  Direct8(DataInput in, int valueCount) throws IOException {
+  Direct8(int packedIntsVersion, DataInput in, int valueCount) throws IOException {
     this(valueCount);
-    for (int i = 0; i < valueCount; ++i) {
-      values[i] = in.readByte();
-    }
-    final int mod = valueCount % 8;
-    if (mod != 0) {
-      for (int i = mod; i < 8; ++i) {
-        in.readByte();
-      }
+    in.readBytes(values, 0, valueCount);
+    // because packed ints have not always been byte-aligned
+    final int remaining = (int) (PackedInts.Format.PACKED.byteCount(packedIntsVersion, valueCount, 8) - 1L * valueCount);
+    for (int i = 0; i < remaining; ++i) {
+      in.readByte();
     }
   }
 

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/DirectPackedReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/DirectPackedReader.java?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/DirectPackedReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/DirectPackedReader.java Wed Nov  7 14:17:42 2012
@@ -22,13 +22,10 @@ import org.apache.lucene.store.IndexInpu
 import java.io.IOException;
 
 /* Reads directly from disk on each get */
-final class DirectPackedReader extends PackedInts.ReaderImpl {
+class DirectPackedReader extends PackedInts.ReaderImpl {
   private final IndexInput in;
   private final long startPointer;
 
-  private static final int BLOCK_BITS = Packed64.BLOCK_BITS;
-  private static final int MOD_MASK = Packed64.MOD_MASK;
-
   // masks[n-1] masks for bottom n bits
   private final long[] masks;
 
@@ -49,22 +46,30 @@ final class DirectPackedReader extends P
   @Override
   public long get(int index) {
     final long majorBitPos = (long)index * bitsPerValue;
-    final int elementPos = (int)(majorBitPos >>> BLOCK_BITS); // / BLOCK_SIZE
-    final int bitPos =     (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE);
-
-    final long result;
+    final long elementPos = majorBitPos >>> 3;
     try {
-      in.seek(startPointer + (elementPos << 3));
-      final long l1 = in.readLong();
-      final int bits1 = 64 - bitPos;
-      if (bits1 >= bitsPerValue) { // not split
-        result = l1 >> (bits1-bitsPerValue) & masks[bitsPerValue-1];
-      } else {
-        final int bits2 = bitsPerValue - bits1;
-        final long result1 = (l1 & masks[bits1-1]) << bits2;
-        final long l2 = in.readLong();
-        final long result2 = l2 >> (64 - bits2) & masks[bits2-1];
-        result = result1 | result2;
+      in.seek(startPointer + elementPos);
+
+      final byte b0 = in.readByte();
+      final int bitPos = (int) (majorBitPos & 7);
+      if (bitPos + bitsPerValue <= 8) {
+        // special case: all bits are in the first byte
+        return (b0 & ((1L << (8 - bitPos)) - 1)) >>> (8 - bitPos - bitsPerValue);
+      }
+
+      // take bits from the first byte
+      int remainingBits = bitsPerValue - 8 + bitPos;
+      long result = (b0 & ((1L << (8 - bitPos)) - 1)) << remainingBits;
+
+      // add bits from inner bytes
+      while (remainingBits >= 8) {
+        remainingBits -= 8;
+        result |= (in.readByte() & 0xFFL) << remainingBits;
+      }
+
+      // take bits from the last byte
+      if (remainingBits > 0) {
+        result |= (in.readByte() & 0xFFL) >>> (8 - remainingBits);
       }
 
       return result;

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed16ThreeBlocks.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed16ThreeBlocks.java?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed16ThreeBlocks.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed16ThreeBlocks.java Wed Nov  7 14:17:42 2012
@@ -42,16 +42,15 @@ final class Packed16ThreeBlocks extends 
     blocks = new short[valueCount * 3];
   }
 
-  Packed16ThreeBlocks(DataInput in, int valueCount) throws IOException {
+  Packed16ThreeBlocks(int packedIntsVersion, DataInput in, int valueCount) throws IOException {
     this(valueCount);
     for (int i = 0; i < 3 * valueCount; ++i) {
       blocks[i] = in.readShort();
     }
-    final int mod = blocks.length % 4;
-    if (mod != 0) {
-      for (int i = mod; i < 4; ++i) {
-         in.readShort();
-      }
+    // because packed ints have not always been byte-aligned
+    final int remaining = (int) (PackedInts.Format.PACKED.byteCount(packedIntsVersion, valueCount, 48) - 3L * valueCount * 2);
+    for (int i = 0; i < remaining; ++i) {
+       in.readByte();
     }
   }
 

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed64.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed64.java?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed64.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed64.java Wed Nov  7 14:17:42 2012
@@ -67,26 +67,10 @@ class Packed64 extends PackedInts.Mutabl
    * @param bitsPerValue the number of bits available for any given value.
    */
   public Packed64(int valueCount, int bitsPerValue) {
-    // NOTE: block-size was previously calculated as
-    // valueCount * bitsPerValue / BLOCK_SIZE + 1
-    // due to memory layout requirements dictated by non-branching code
-    this(new long[size(valueCount, bitsPerValue)],
-            valueCount, bitsPerValue);
-  }
-
-  /**
-   * Creates an array backed by the given blocks.
-   * </p><p>
-   * Note: The blocks are used directly, so changes to the given block will
-   * affect the Packed64-structure.
-   * @param blocks   used as the internal backing array. Not that the last
-   *                 element cannot be addressed directly.
-   * @param valueCount the number of values.
-   * @param bitsPerValue the number of bits available for any given value.
-   */
-  public Packed64(long[] blocks, int valueCount, int bitsPerValue) {
     super(valueCount, bitsPerValue);
-    this.blocks = blocks;
+    final PackedInts.Format format = PackedInts.Format.PACKED;
+    final int longCount = format.longCount(PackedInts.VERSION_CURRENT, valueCount, bitsPerValue);
+    this.blocks = new long[longCount];
     maskRight = ~0L << (BLOCK_SIZE-bitsPerValue) >>> (BLOCK_SIZE-bitsPerValue);
     bpvMinusBlockSize = bitsPerValue - BLOCK_SIZE;
   }
@@ -99,23 +83,30 @@ class Packed64 extends PackedInts.Mutabl
    * @throws java.io.IOException if the values for the backing array could not
    *                             be retrieved.
    */
-  public Packed64(DataInput in, int valueCount, int bitsPerValue)
+  public Packed64(int packedIntsVersion, DataInput in, int valueCount, int bitsPerValue)
                                                             throws IOException {
     super(valueCount, bitsPerValue);
-    int size = size(valueCount, bitsPerValue);
-    blocks = new long[size]; // Previously +1 due to non-conditional tricks
-    for(int i=0;i<size;i++) {
+    final PackedInts.Format format = PackedInts.Format.PACKED;
+    final long byteCount = format.byteCount(packedIntsVersion, valueCount, bitsPerValue); // to know how much to read
+    final int longCount = format.longCount(PackedInts.VERSION_CURRENT, valueCount, bitsPerValue); // to size the array
+    blocks = new long[longCount];
+    // read as many longs as we can
+    for (int i = 0; i < byteCount / 8; ++i) {
       blocks[i] = in.readLong();
     }
+    final int remaining = (int) (byteCount % 8);
+    if (remaining != 0) {
+      // read the last bytes
+      long lastLong = 0;
+      for (int i = 0; i < remaining; ++i) {
+        lastLong |= (in.readByte() & 0xFFL) << (56 - i * 8);
+      }
+      blocks[blocks.length - 1] = lastLong;
+    }
     maskRight = ~0L << (BLOCK_SIZE-bitsPerValue) >>> (BLOCK_SIZE-bitsPerValue);
     bpvMinusBlockSize = bitsPerValue - BLOCK_SIZE;
   }
 
-  private static int size(int valueCount, int bitsPerValue) {
-    final long totBitCount = (long) valueCount * bitsPerValue;
-    return (int)(totBitCount/64 + ((totBitCount % 64 == 0 ) ? 0:1));
-  }
-
   /**
    * @param index the position of the value.
    * @return the value at the given index.

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed8ThreeBlocks.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed8ThreeBlocks.java?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed8ThreeBlocks.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/Packed8ThreeBlocks.java Wed Nov  7 14:17:42 2012
@@ -42,16 +42,13 @@ final class Packed8ThreeBlocks extends P
     blocks = new byte[valueCount * 3];
   }
 
-  Packed8ThreeBlocks(DataInput in, int valueCount) throws IOException {
+  Packed8ThreeBlocks(int packedIntsVersion, DataInput in, int valueCount) throws IOException {
     this(valueCount);
-    for (int i = 0; i < 3 * valueCount; ++i) {
-      blocks[i] = in.readByte();
-    }
-    final int mod = blocks.length % 8;
-    if (mod != 0) {
-      for (int i = mod; i < 8; ++i) {
-         in.readByte();
-      }
+    in.readBytes(blocks, 0, 3 * valueCount);
+    // because packed ints have not always been byte-aligned
+    final int remaining = (int) (PackedInts.Format.PACKED.byteCount(packedIntsVersion, valueCount, 24) - 3L * valueCount * 1);
+    for (int i = 0; i < remaining; ++i) {
+       in.readByte();
     }
   }
 

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java Wed Nov  7 14:17:42 2012
@@ -62,10 +62,14 @@ public class PackedInts {
   public static final int DEFAULT_BUFFER_SIZE = 1024; // 1K
 
   public final static String CODEC_NAME = "PackedInts";
-  public final static int VERSION_START = 0;
-  public final static int VERSION_CURRENT = VERSION_START;
+  public final static int VERSION_START = 0; // PackedInts were long-aligned
+  public final static int VERSION_BYTE_ALIGNED = 1;
+  public final static int VERSION_CURRENT = VERSION_BYTE_ALIGNED;
 
-  private static void checkVersion(int version) {
+  /**
+   * Check the validity of a version number.
+   */
+  public static void checkVersion(int version) {
     if (version < VERSION_START) {
       throw new IllegalArgumentException("Version is too old, should be at least " + VERSION_START + " (got " + version + ")");
     } else if (version > VERSION_CURRENT) {
@@ -85,8 +89,12 @@ public class PackedInts {
     PACKED(0) {
 
       @Override
-      public int nblocks(int bitsPerValue, int values) {
-        return (int) Math.ceil((double) values * bitsPerValue / 64);
+      public long byteCount(int packedIntsVersion, int valueCount, int bitsPerValue) {
+        if (packedIntsVersion < VERSION_BYTE_ALIGNED) {
+          return 8L *  (long) Math.ceil((double) valueCount * bitsPerValue / 64);
+        } else {
+          return (long) Math.ceil((double) valueCount * bitsPerValue / 8);
+        }
       }
 
     },
@@ -101,9 +109,9 @@ public class PackedInts {
     PACKED_SINGLE_BLOCK(1) {
 
       @Override
-      public int nblocks(int bitsPerValue, int values) {
+      public int longCount(int packedIntsVersion, int valueCount, int bitsPerValue) {
         final int valuesPerBlock = 64 / bitsPerValue;
-        return (int) Math.ceil((double) values / valuesPerBlock);
+        return (int) Math.ceil((double) valueCount / valuesPerBlock);
       }
 
       @Override
@@ -147,10 +155,29 @@ public class PackedInts {
     }
 
     /**
-     * Computes how many blocks are needed to store <code>values</code> values
-     * of size <code>bitsPerValue</code>.
+     * Computes how many byte blocks are needed to store <code>values</code>
+     * values of size <code>bitsPerValue</code>.
      */
-    public abstract int nblocks(int bitsPerValue, int values);
+    public long byteCount(int packedIntsVersion, int valueCount, int bitsPerValue) {
+      assert bitsPerValue >= 0 && bitsPerValue <= 64 : bitsPerValue;
+      // assume long-aligned
+      return 8L * longCount(packedIntsVersion, valueCount, bitsPerValue);
+    }
+
+    /**
+     * Computes how many long blocks are needed to store <code>values</code>
+     * values of size <code>bitsPerValue</code>.
+     */
+    public int longCount(int packedIntsVersion, int valueCount, int bitsPerValue) {
+      assert bitsPerValue >= 0 && bitsPerValue <= 64 : bitsPerValue;
+      final long byteCount = byteCount(packedIntsVersion, valueCount, bitsPerValue);
+      assert byteCount < 8L * Integer.MAX_VALUE;
+      if ((byteCount % 8) == 0) {
+        return (int) (byteCount / 8);
+      } else {
+        return (int) (byteCount / 8 + 1);
+      }
+    }
 
     /**
      * Tests whether the provided number of bits per value is supported by the
@@ -725,25 +752,25 @@ public class PackedInts {
       case PACKED:
         switch (bitsPerValue) {
           case 8:
-            return new Direct8(in, valueCount);
+            return new Direct8(version, in, valueCount);
           case 16:
-            return new Direct16(in, valueCount);
+            return new Direct16(version, in, valueCount);
           case 32:
-            return new Direct32(in, valueCount);
+            return new Direct32(version, in, valueCount);
           case 64:
-            return new Direct64(in, valueCount);
+            return new Direct64(version, in, valueCount);
           case 24:
             if (valueCount <= Packed8ThreeBlocks.MAX_SIZE) {
-              return new Packed8ThreeBlocks(in, valueCount);
+              return new Packed8ThreeBlocks(version, in, valueCount);
             }
             break;
           case 48:
             if (valueCount <= Packed16ThreeBlocks.MAX_SIZE) {
-              return new Packed16ThreeBlocks(in, valueCount);
+              return new Packed16ThreeBlocks(version, in, valueCount);
             }
             break;
         }
-        return new Packed64(in, valueCount, bitsPerValue);
+        return new Packed64(version, in, valueCount, bitsPerValue);
       default:
         throw new AssertionError("Unknown Writer format: " + format);
     }
@@ -786,7 +813,7 @@ public class PackedInts {
   public static ReaderIterator getReaderIteratorNoHeader(DataInput in, Format format, int version,
       int valueCount, int bitsPerValue, int mem) {
     checkVersion(version);
-    return new PackedReaderIterator(format, valueCount, bitsPerValue, in, mem);
+    return new PackedReaderIterator(format, version, valueCount, bitsPerValue, in, mem);
   }
 
   /**
@@ -823,12 +850,37 @@ public class PackedInts {
    * @return a direct Reader
    * @lucene.internal
    */
-  public static Reader getDirectReaderNoHeader(IndexInput in, Format format,
+  public static Reader getDirectReaderNoHeader(final IndexInput in, Format format,
       int version, int valueCount, int bitsPerValue) {
     checkVersion(version);
     switch (format) {
       case PACKED:
-        return new DirectPackedReader(bitsPerValue, valueCount, in);
+        final long byteCount = format.byteCount(version, valueCount, bitsPerValue);
+        if (byteCount != format.byteCount(VERSION_CURRENT, valueCount, bitsPerValue)) {
+          assert version == VERSION_START;
+          final long endPointer = in.getFilePointer() + byteCount;
+          // Some consumers of direct readers assume that reading the last value
+          // will make the underlying IndexInput go to the end of the packed
+          // stream, but this is not true because packed ints storage used to be
+          // long-aligned and is now byte-aligned, hence this additional
+          // condition when reading the last value
+          return new DirectPackedReader(bitsPerValue, valueCount, in) {
+            @Override
+            public long get(int index) {
+              final long result = super.get(index);
+              if (index == valueCount - 1) {
+                try {
+                  in.seek(endPointer);
+                } catch (IOException e) {
+                  throw new IllegalStateException("failed", e);
+                }
+              }
+              return result;
+            }
+          };
+        } else {
+          return new DirectPackedReader(bitsPerValue, valueCount, in);
+        }
       case PACKED_SINGLE_BLOCK:
         return new DirectPacked64SingleBlockReader(bitsPerValue, valueCount, in);
       default:

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java Wed Nov  7 14:17:42 2012
@@ -19,29 +19,30 @@ package org.apache.lucene.util.packed;
 
 import java.io.EOFException;
 import java.io.IOException;
+import java.util.Arrays;
 
 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.util.LongsRef;
 
 final class PackedReaderIterator extends PackedInts.ReaderIteratorImpl {
 
+  final int packedIntsVersion;
   final PackedInts.Format format;
   final BulkOperation bulkOperation;
-  final long[] nextBlocks;
+  final byte[] nextBlocks;
   final LongsRef nextValues;
   final int iterations;
   int position;
 
-  PackedReaderIterator(PackedInts.Format format, int valueCount, int bitsPerValue, DataInput in, int mem) {
+  PackedReaderIterator(PackedInts.Format format, int packedIntsVersion, int valueCount, int bitsPerValue, DataInput in, int mem) {
     super(valueCount, bitsPerValue, in);
     this.format = format;
+    this.packedIntsVersion = packedIntsVersion;
     bulkOperation = BulkOperation.of(format, bitsPerValue);
     iterations = bulkOperation.computeIterations(valueCount, mem);
     assert valueCount == 0 || iterations > 0;
-    nextBlocks = new long[iterations * bulkOperation.blockCount()];
+    nextBlocks = new byte[8 * iterations * bulkOperation.blockCount()];
     nextValues = new LongsRef(new long[iterations * bulkOperation.valueCount()], 0, 0);
-    assert iterations * bulkOperation.valueCount() == nextValues.longs.length;
-    assert iterations * bulkOperation.blockCount() == nextBlocks.length;
     nextValues.offset = nextValues.longs.length;
     position = -1;
   }
@@ -61,13 +62,11 @@ final class PackedReaderIterator extends
     count = Math.min(remaining, count);
 
     if (nextValues.offset == nextValues.longs.length) {
-      final int remainingBlocks = format.nblocks(bitsPerValue, remaining);
-      final int blocksToRead = Math.min(remainingBlocks, nextBlocks.length);
-      for (int i = 0; i < blocksToRead; ++i) {
-        nextBlocks[i] = in.readLong();
-      }
-      for (int i = blocksToRead; i < nextBlocks.length; ++i) {
-        nextBlocks[i] = 0L;
+      final long remainingBlocks = format.byteCount(packedIntsVersion, remaining, bitsPerValue);
+      final int blocksToRead = (int) Math.min(remainingBlocks, nextBlocks.length);
+      in.readBytes(nextBlocks, 0, blocksToRead);
+      if (blocksToRead < nextBlocks.length) {
+        Arrays.fill(nextBlocks, blocksToRead, nextBlocks.length, (byte) 0);
       }
 
       bulkOperation.decode(nextBlocks, 0, nextValues.longs, 0, iterations);

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/PackedWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/PackedWriter.java?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/PackedWriter.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/PackedWriter.java Wed Nov  7 14:17:42 2012
@@ -31,7 +31,7 @@ final class PackedWriter extends PackedI
   boolean finished;
   final PackedInts.Format format;
   final BulkOperation encoder;
-  final long[] nextBlocks;
+  final byte[] nextBlocks;
   final long[] nextValues;
   final int iterations;
   int off;
@@ -42,7 +42,7 @@ final class PackedWriter extends PackedI
     this.format = format;
     encoder = BulkOperation.of(format, bitsPerValue);
     iterations = encoder.computeIterations(valueCount, mem);
-    nextBlocks = new long[iterations * encoder.blockCount()];
+    nextBlocks = new byte[8 * iterations * encoder.blockCount()];
     nextValues = new long[iterations * encoder.valueCount()];
     off = 0;
     written = 0;
@@ -82,10 +82,8 @@ final class PackedWriter extends PackedI
 
   private void flush() throws IOException {
     encoder.encode(nextValues, 0, nextBlocks, 0, iterations);
-    final int blocks = format.nblocks(bitsPerValue, off);
-    for (int i = 0; i < blocks; ++i) {
-      out.writeLong(nextBlocks[i]);
-    }
+    final int blockCount = (int) format.byteCount(PackedInts.VERSION_CURRENT, off, bitsPerValue);
+    out.writeBytes(nextBlocks, blockCount);
     Arrays.fill(nextValues, 0L);
     off = 0;
   }

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/gen_Direct.py
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/gen_Direct.py?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/gen_Direct.py (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/gen_Direct.py Wed Nov  7 14:17:42 2012
@@ -65,17 +65,19 @@ if __name__ == '__main__':
     f.write("    values = new %s[valueCount];\n" %TYPES[bpv])
     f.write("  }\n\n")
 
-    f.write("  Direct%d(DataInput in, int valueCount) throws IOException {\n" %bpv)
+    f.write("  Direct%d(int packedIntsVersion, DataInput in, int valueCount) throws IOException {\n" %bpv)
     f.write("    this(valueCount);\n")
-    f.write("    for (int i = 0; i < valueCount; ++i) {\n")
-    f.write("      values[i] = in.read%s();\n" %TYPES[bpv].title())
-    f.write("    }\n")
+    if bpv == 8:
+      f.write("    in.readBytes(values, 0, valueCount);\n")
+    else:
+      f.write("    for (int i = 0; i < valueCount; ++i) {\n")
+      f.write("      values[i] = in.read%s();\n" %TYPES[bpv].title())
+      f.write("    }\n")
     if bpv != 64:
-      f.write("    final int mod = valueCount %% %d;\n" %(64 / bpv))
-      f.write("    if (mod != 0) {\n")
-      f.write("      for (int i = mod; i < %d; ++i) {\n" %(64 / bpv))
-      f.write("        in.read%s();\n" %TYPES[bpv].title())
-      f.write("      }\n")
+      f.write("    // because packed ints have not always been byte-aligned\n")
+      f.write("    final int remaining = (int) (PackedInts.Format.PACKED.byteCount(packedIntsVersion, valueCount, %d) - %dL * valueCount);\n" %(bpv, bpv / 8))
+      f.write("    for (int i = 0; i < remaining; ++i) {\n")
+      f.write("      in.readByte();\n")
       f.write("    }\n")
     f.write("  }\n")
 

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/gen_PackedThreeBlocks.py
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/gen_PackedThreeBlocks.py?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/gen_PackedThreeBlocks.py (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/gen_PackedThreeBlocks.py Wed Nov  7 14:17:42 2012
@@ -70,16 +70,18 @@ if __name__ == '__main__':
     f.write("    blocks = new %s[valueCount * 3];\n" %TYPES[bpv])
     f.write("  }\n\n")
 
-    f.write("  Packed%dThreeBlocks(DataInput in, int valueCount) throws IOException {\n" %bpv)
+    f.write("  Packed%dThreeBlocks(int packedIntsVersion, DataInput in, int valueCount) throws IOException {\n" %bpv)
     f.write("    this(valueCount);\n")
-    f.write("    for (int i = 0; i < 3 * valueCount; ++i) {\n")
-    f.write("      blocks[i] = in.read%s();\n" %TYPES[bpv].title())
-    f.write("    }\n")
-    f.write("    final int mod = blocks.length %% %d;\n" %(64 / bpv))
-    f.write("    if (mod != 0) {\n")
-    f.write("      for (int i = mod; i < %d; ++i) {\n" %(64 / bpv))
-    f.write("         in.read%s();\n" %TYPES[bpv].title())
-    f.write("      }\n")
+    if bpv == 8:
+      f.write("    in.readBytes(blocks, 0, 3 * valueCount);\n")
+    else:
+      f.write("    for (int i = 0; i < 3 * valueCount; ++i) {\n")
+      f.write("      blocks[i] = in.read%s();\n" %TYPES[bpv].title())
+      f.write("    }\n")
+    f.write("    // because packed ints have not always been byte-aligned\n")
+    f.write("    final int remaining = (int) (PackedInts.Format.PACKED.byteCount(packedIntsVersion, valueCount, %d) - 3L * valueCount * %d);\n" %(3 * bpv, bpv / 8))
+    f.write("    for (int i = 0; i < remaining; ++i) {\n")
+    f.write("       in.readByte();\n")
     f.write("    }\n")
     f.write("  }\n")
 

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java?rev=1406651&r1=1406650&r2=1406651&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java Wed Nov  7 14:17:42 2012
@@ -40,8 +40,28 @@ import org.apache.lucene.util.packed.Pac
 
 import org.junit.Ignore;
 
+import com.carrotsearch.randomizedtesting.generators.RandomInts;
+
 @Slow
 public class TestPackedInts extends LuceneTestCase {
+
+  public void testByteCount() {
+    final int iters = atLeast(3);
+    for (int i = 0; i < iters; ++i) {
+      final int valueCount = RandomInts.randomIntBetween(random(), 1, Integer.MAX_VALUE);
+      for (PackedInts.Format format : PackedInts.Format.values()) {
+        for (int bpv = 1; bpv <= 64; ++bpv) {
+          final long byteCount = format.byteCount(PackedInts.VERSION_CURRENT, valueCount, bpv);
+          String msg = "format=" + format + ", byteCount=" + byteCount + ", valueCount=" + valueCount + ", bpv=" + bpv;
+          assertTrue(msg, byteCount * 8 >= (long) valueCount * bpv);
+          if (format == PackedInts.Format.PACKED) {
+            assertTrue(msg, (byteCount - 1) * 8 < (long) valueCount * bpv);
+          }
+        }
+      }
+    }
+  }
+
   public void testBitsRequired() {
     assertEquals(61, PackedInts.bitsRequired((long)Math.pow(2, 61)-1));
     assertEquals(61, PackedInts.bitsRequired(0x1FFFFFFFFFFFFFFFL));
@@ -88,21 +108,8 @@ public class TestPackedInts extends Luce
         final long fp = out.getFilePointer();
         out.close();
 
-        // packed writers should only write longs
-        assertEquals(0, (fp - startFp) % 8);
         // ensure that finish() added the (valueCount-actualValueCount) missing values
-        final long bytes;
-        switch (w.getFormat()) {
-          case PACKED:
-            bytes = (long) Math.ceil((double) valueCount * w.bitsPerValue / 64) << 3;
-            break;
-          case PACKED_SINGLE_BLOCK:
-            final int valuesPerBlock = 64 / w.bitsPerValue;
-            bytes = (long) Math.ceil((double) valueCount / valuesPerBlock) << 3;
-            break;
-          default:
-            bytes = -1;
-        }
+        final long bytes = w.getFormat().byteCount(PackedInts.VERSION_CURRENT, valueCount, w.bitsPerValue);
         assertEquals(bytes, fp - startFp);
 
         {// test header
@@ -170,13 +177,58 @@ public class TestPackedInts extends Luce
             long value = intsEnum.get(index);
             assertEquals(msg, value, values[index]);
           }
+          intsEnum.get(intsEnum.size() - 1);
+          assertEquals(fp, in.getFilePointer());
           in.close();
         }
         d.close();
       }
     }
   }
-  
+
+  public void testEndPointer() throws IOException {
+    final Directory dir = newDirectory();
+    final int valueCount = RandomInts.randomIntBetween(random(), 1, 1000);
+    final IndexOutput out = dir.createOutput("tests.bin", newIOContext(random()));
+    for (int i = 0; i < valueCount; ++i) {
+      out.writeLong(0);
+    }
+    out.close();
+    final IndexInput in = dir.openInput("tests.bin", newIOContext(random()));
+    for (int version = PackedInts.VERSION_START; version <= PackedInts.VERSION_CURRENT; ++version) {
+      for (int bpv = 1; bpv <= 64; ++bpv) {
+        for (PackedInts.Format format : PackedInts.Format.values()) {
+          if (!format.isSupported(bpv)) {
+            continue;
+          }
+          final long byteCount = format.byteCount(version, valueCount, bpv); 
+          String msg = "format=" + format + ",version=" + version + ",valueCount=" + valueCount + ",bpv=" + bpv;
+
+          // test iterator
+          in.seek(0L);
+          final PackedInts.ReaderIterator it = PackedInts.getReaderIteratorNoHeader(in, format, version, valueCount, bpv, RandomInts.randomIntBetween(random(), 1, 1<<16));
+          for (int i = 0; i < valueCount; ++i) {
+            it.next();
+          }
+          assertEquals(msg, byteCount, in.getFilePointer());
+
+          // test direct reader
+          in.seek(0L);
+          final PackedInts.Reader directReader = PackedInts.getDirectReaderNoHeader(in, format, version, valueCount, bpv);
+          directReader.get(valueCount - 1);
+          assertEquals(msg, byteCount, in.getFilePointer());
+
+          // test reader
+          in.seek(0L);
+          PackedInts.getReaderNoHeader(in, format, version, valueCount, bpv);
+          assertEquals(msg, byteCount, in.getFilePointer());
+         }
+      }
+    }
+    in.close();
+    dir.close();
+  }
+
   public void testControlledEquality() {
     final int VALUE_COUNT = 255;
     final int BITS_PER_VALUE = 8;
@@ -629,6 +681,7 @@ public class TestPackedInts extends Luce
         in.close();
         directory.deleteFile("packed-ints.bin");
       }
+      directory.close();
     }
   }