You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2013/01/22 00:47:32 UTC

svn commit: r1436692 - in /lucene/dev/branches/lucene4547/lucene/core/src: java/org/apache/lucene/codecs/compressing/ java/org/apache/lucene/codecs/lucene42/ java/org/apache/lucene/util/packed/ test/org/apache/lucene/util/packed/

Author: jpountz
Date: Mon Jan 21 23:47:31 2013
New Revision: 1436692

URL: http://svn.apache.org/viewvc?rev=1436692&view=rev
Log:
NumericDocValues: Prevent rare extreme values from raising the number of bits per value for everyone.

Added:
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReaderIterator.java   (with props)
Modified:
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReader.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedWriter.java
    lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java?rev=1436692&r1=1436691&r2=1436692&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java Mon Jan 21 23:47:31 2013
@@ -57,7 +57,7 @@ import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LongsRef;
-import org.apache.lucene.util.packed.BlockPackedReader;
+import org.apache.lucene.util.packed.BlockPackedReaderIterator;
 import org.apache.lucene.util.packed.PackedInts;
 
 
@@ -76,7 +76,7 @@ public final class CompressingTermVector
   private final int chunkSize;
   private final int numDocs;
   private boolean closed;
-  private final BlockPackedReader reader;
+  private final BlockPackedReaderIterator reader;
 
   // used by clone
   private CompressingTermVectorsReader(CompressingTermVectorsReader reader) {
@@ -88,7 +88,7 @@ public final class CompressingTermVector
     this.decompressor = reader.decompressor.clone();
     this.chunkSize = reader.chunkSize;
     this.numDocs = reader.numDocs;
-    this.reader = new BlockPackedReader(vectorsStream, packedIntsVersion, BLOCK_SIZE, 0);
+    this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, BLOCK_SIZE, 0);
     this.closed = false;
   }
 
@@ -119,7 +119,7 @@ public final class CompressingTermVector
       packedIntsVersion = vectorsStream.readVInt();
       chunkSize = vectorsStream.readVInt();
       decompressor = compressionMode.newDecompressor();
-      this.reader = new BlockPackedReader(vectorsStream, packedIntsVersion, BLOCK_SIZE, 0);
+      this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, BLOCK_SIZE, 0);
 
       success = true;
     } finally {

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java?rev=1436692&r1=1436691&r2=1436692&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java Mon Jan 21 23:47:31 2013
@@ -36,6 +36,7 @@ import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.FST.INPUT_TYPE;
 import org.apache.lucene.util.fst.PositiveIntOutputs;
 import org.apache.lucene.util.fst.Util;
+import org.apache.lucene.util.packed.BlockPackedWriter;
 import org.apache.lucene.util.packed.PackedInts;
 
 /**
@@ -53,7 +54,9 @@ class Lucene42DocValuesConsumer extends 
   static final byte NUMBER = 0;
   static final byte BYTES = 1;
   static final byte FST = 2;
-  
+
+  static final int BLOCK_SIZE = 4096;
+
   final IndexOutput data, meta;
   
   Lucene42DocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
@@ -97,15 +100,10 @@ class Lucene42DocValuesConsumer extends 
       }
     }
 
-    long delta = maxValue - minValue;
-    final int bitsPerValue;
-    if (delta < 0) {
-      bitsPerValue = 64;
-      meta.writeByte((byte)0); // delta-compressed
-    } else if (uniqueValues != null && PackedInts.bitsRequired(uniqueValues.size()-1) < PackedInts.bitsRequired(delta)) {
+    final long delta = maxValue - minValue;
+    if (uniqueValues != null && (delta < 0 || PackedInts.bitsRequired(uniqueValues.size()-1) < PackedInts.bitsRequired(delta))) {
       // smaller to tableize
-      bitsPerValue = PackedInts.bitsRequired(uniqueValues.size()-1);
-      minValue = 0; // we will write indexes into the table instead of values
+      final int bitsPerValue = PackedInts.bitsRequired(uniqueValues.size()-1);
       meta.writeByte((byte)1); // table-compressed
       Long[] decode = uniqueValues.toArray(new Long[uniqueValues.size()]);
       final HashMap<Long,Integer> encode = new HashMap<Long,Integer>();
@@ -114,39 +112,29 @@ class Lucene42DocValuesConsumer extends 
         data.writeLong(decode[i]);
         encode.put(decode[i], i);
       }
-      final Iterable<Number> original = values;
-      values = new Iterable<Number>() {
-        @Override
-        public Iterator<Number> iterator() {
-          final Iterator<Number> inner = original.iterator();
-          return new Iterator<Number>() {
-            @Override
-            public boolean hasNext() {
-              return inner.hasNext();
-            }
 
-            @Override
-            public Number next() {
-              return encode.get(inner.next());
-            }
-
-            @Override
-            public void remove() { throw new UnsupportedOperationException(); }
-          };
-        }
-      };
+      data.writeVInt(PackedInts.VERSION_CURRENT);
+      data.writeVInt(count);
+      data.writeVInt(bitsPerValue);
+
+      final PackedInts.Writer writer = PackedInts.getWriterNoHeader(data, PackedInts.Format.PACKED, count, bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE);
+      for(Number nv : values) {
+        writer.add(encode.get(nv));
+      }
+      writer.finish();
     } else {
-      bitsPerValue = PackedInts.bitsRequired(delta);
       meta.writeByte((byte)0); // delta-compressed
-    }
-
-    data.writeLong(minValue);
 
-    final PackedInts.Writer writer = PackedInts.getWriter(data, count, bitsPerValue, PackedInts.COMPACT);
-    for(Number nv : values) {
-      writer.add(nv.longValue() - minValue);
+      data.writeVInt(PackedInts.VERSION_CURRENT);
+      data.writeVInt(count);
+      data.writeVInt(BLOCK_SIZE);
+
+      final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
+      for (Number nv : values) {
+        writer.add(nv.longValue());
+      }
+      writer.finish();
     }
-    writer.finish();
   }
   
   @Override

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java?rev=1436692&r1=1436691&r2=1436692&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java Mon Jan 21 23:47:31 2013
@@ -42,6 +42,7 @@ import org.apache.lucene.util.fst.FST.Ar
 import org.apache.lucene.util.fst.FST.BytesReader;
 import org.apache.lucene.util.fst.PositiveIntOutputs;
 import org.apache.lucene.util.fst.Util;
+import org.apache.lucene.util.packed.BlockPackedReader;
 import org.apache.lucene.util.packed.PackedInts;
 
 class Lucene42DocValuesProducer extends DocValuesProducer {
@@ -139,9 +140,10 @@ class Lucene42DocValuesProducer extends 
       for (int i = 0; i < decode.length; i++) {
         decode[i] = data.readLong();
       }
-      final long minValue = data.readLong();
-      assert minValue == 0;
-      final PackedInts.Reader reader = PackedInts.getReader(data);
+      final int packedIntsVersion = data.readVInt();
+      final int count = data.readVInt();
+      final int bitsPerValue = data.readVInt();
+      final PackedInts.Reader reader = PackedInts.getReaderNoHeader(data, PackedInts.Format.PACKED, packedIntsVersion, count, bitsPerValue);
       return new NumericDocValues() {
         @Override
         public long get(int docID) {
@@ -149,12 +151,14 @@ class Lucene42DocValuesProducer extends 
         }
       };
     } else {
-      final long minValue = data.readLong();
-      final PackedInts.Reader reader = PackedInts.getReader(data);
+      final int packedIntsVersion = data.readVInt();
+      final int count = data.readVInt();
+      final int blockSize = data.readVInt();
+      final BlockPackedReader reader = new BlockPackedReader(data, packedIntsVersion, blockSize, count, false);
       return new NumericDocValues() {
         @Override
         public long get(int docID) {
-          return minValue + reader.get(docID);
+          return reader.get(docID);
         }
       };
     }

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReader.java?rev=1436692&r1=1436691&r2=1436692&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReader.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReader.java Mon Jan 21 23:47:31 2013
@@ -17,225 +17,74 @@ package org.apache.lucene.util.packed;
  * limitations under the License.
  */
 
+import static org.apache.lucene.util.packed.BlockPackedReaderIterator.readVLong;
+import static org.apache.lucene.util.packed.BlockPackedReaderIterator.zigZagDecode;
 import static org.apache.lucene.util.packed.BlockPackedWriter.BPV_SHIFT;
 import static org.apache.lucene.util.packed.BlockPackedWriter.MIN_VALUE_EQUALS_0;
 import static org.apache.lucene.util.packed.BlockPackedWriter.checkBlockSize;
 
-import java.io.EOFException;
 import java.io.IOException;
-import java.util.Arrays;
 
-import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.util.LongsRef;
 
 /**
- * Reader for sequences of longs written with {@link BlockPackedWriter}.
- * @see BlockPackedWriter
+ * Provides random access to a stream written with {@link BlockPackedWriter}.
  * @lucene.internal
  */
 public final class BlockPackedReader {
 
-  static long zigZagDecode(long n) {
-    return ((n >>> 1) ^ -(n & 1));
-  }
-
-  // same as DataInput.readVLong but supports negative values
-  static long readVLong(DataInput in) throws IOException {
-    byte b = in.readByte();
-    if (b >= 0) return b;
-    long i = b & 0x7FL;
-    b = in.readByte();
-    i |= (b & 0x7FL) << 7;
-    if (b >= 0) return i;
-    b = in.readByte();
-    i |= (b & 0x7FL) << 14;
-    if (b >= 0) return i;
-    b = in.readByte();
-    i |= (b & 0x7FL) << 21;
-    if (b >= 0) return i;
-    b = in.readByte();
-    i |= (b & 0x7FL) << 28;
-    if (b >= 0) return i;
-    b = in.readByte();
-    i |= (b & 0x7FL) << 35;
-    if (b >= 0) return i;
-    b = in.readByte();
-    i |= (b & 0x7FL) << 42;
-    if (b >= 0) return i;
-    b = in.readByte();
-    i |= (b & 0x7FL) << 49;
-    if (b >= 0) return i;
-    b = in.readByte();
-    i |= (b & 0xFFL) << 56;
-    return i;
-  }
+  private final int blockShift, blockMask;
+  private final long valueCount;
+  private final long[] minValues;
+  private final PackedInts.Reader[] subReaders;
 
-  DataInput in;
-  final int packedIntsVersion;
-  long valueCount;
-  final int blockSize;
-  final long[] values;
-  final LongsRef valuesRef;
-  byte[] blocks;
-  int off;
-  long ord;
-
-  /** Sole constructor.
-   * @param blockSize the number of values of a block, must be equal to the
-   *                  block size of the {@link BlockPackedWriter} which has
-   *                  been used to write the stream
-   */
-  public BlockPackedReader(DataInput in, int packedIntsVersion, int blockSize, long valueCount) {
+  /** Sole constructor. */
+  public BlockPackedReader(IndexInput in, int packedIntsVersion, int blockSize, long valueCount, boolean direct) throws IOException {
     checkBlockSize(blockSize);
-    this.packedIntsVersion = packedIntsVersion;
-    this.blockSize = blockSize;
-    this.values = new long[blockSize];
-    this.valuesRef = new LongsRef(this.values, 0, 0);
-    reset(in, valueCount);
-  }
-
-  /** Reset the current reader to wrap a stream of <code>valueCount</code>
-   * values contained in <code>in</code>. The block size remains unchanged. */
-  public void reset(DataInput in, long valueCount) {
-    this.in = in;
-    assert valueCount >= 0;
     this.valueCount = valueCount;
-    off = blockSize;
-    ord = 0;
-  }
-
-  /** Skip exactly <code>count</code> values. */
-  public void skip(long count) throws IOException {
-    assert count >= 0;
-    if (ord + count > valueCount || ord + count < 0) {
-      throw new EOFException();
-    }
-
-    // 1. skip buffered values
-    final int skipBuffer = (int) Math.min(count, blockSize - off);
-    off += skipBuffer;
-    ord += skipBuffer;
-    count -= skipBuffer;
-    if (count == 0L) {
-      return;
-    }
-
-    // 2. skip as many blocks as necessary
-    assert off == blockSize;
-    while (count >= blockSize) {
+    blockShift = Long.numberOfTrailingZeros(blockSize);
+    blockMask = blockSize - 1;
+    final int numBlocks = (int) (valueCount / blockSize) + (valueCount % blockSize == 0 ? 0 : 1);
+    if (numBlocks * blockSize < valueCount) {
+      throw new IllegalArgumentException("valueCount is too large for this block size");
+    }
+    long[] minValues = null;
+    subReaders = new PackedInts.Reader[numBlocks];
+    for (int i = 0; i < numBlocks; ++i) {
       final int token = in.readByte() & 0xFF;
       final int bitsPerValue = token >>> BPV_SHIFT;
       if (bitsPerValue > 64) {
         throw new IOException("Corrupted");
       }
       if ((token & MIN_VALUE_EQUALS_0) == 0) {
-        readVLong(in);
-      }
-      final long blockBytes = PackedInts.Format.PACKED.byteCount(packedIntsVersion, blockSize, bitsPerValue);
-      skipBytes(blockBytes);
-      ord += blockSize;
-      count -= blockSize;
-    }
-    if (count == 0L) {
-      return;
-    }
-
-    // 3. skip last values
-    assert count < blockSize;
-    refill();
-    ord += count;
-    off += count;
-  }
-
-  private void skipBytes(long count) throws IOException {
-    if (in instanceof IndexInput) {
-      final IndexInput iin = (IndexInput) in;
-      iin.seek(iin.getFilePointer() + count);
-    } else {
-      if (blocks == null) {
-        blocks = new byte[blockSize];
-      }
-      long skipped = 0;
-      while (skipped < count) {
-        final int toSkip = (int) Math.min(blocks.length, count - skipped);
-        in.readBytes(blocks, 0, toSkip);
-        skipped += toSkip;
-      }
-    }
-  }
-
-  /** Read the next value. */
-  public long next() throws IOException {
-    if (ord == valueCount) {
-      throw new EOFException();
-    }
-    if (off == blockSize) {
-      refill();
-    }
-    final long value = values[off++];
-    ++ord;
-    return value;
-  }
-
-  /** Read between <tt>1</tt> and <code>count</code> values. */
-  public LongsRef next(int count) throws IOException {
-    assert count > 0;
-    if (ord == valueCount) {
-      throw new EOFException();
-    }
-    if (off == blockSize) {
-      refill();
-    }
-
-    count = Math.min(count, blockSize - off);
-    count = (int) Math.min(count, valueCount - ord);
-
-    valuesRef.offset = off;
-    valuesRef.length = count;
-    off += count;
-    ord += count;
-    return valuesRef;
-  }
-
-  private void refill() throws IOException {
-    final int token = in.readByte() & 0xFF;
-    final boolean minEquals0 = (token & MIN_VALUE_EQUALS_0) != 0;
-    final int bitsPerValue = token >>> BPV_SHIFT;
-    if (bitsPerValue > 64) {
-      throw new IOException("Corrupted");
-    }
-    final long minValue = minEquals0 ? 0L : zigZagDecode(1L + readVLong(in));
-    assert minEquals0 || minValue != 0;
-
-    if (bitsPerValue == 0) {
-      Arrays.fill(values, minValue);
-    } else {
-      final PackedInts.Decoder decoder = PackedInts.getDecoder(PackedInts.Format.PACKED, packedIntsVersion, bitsPerValue);
-      final int iterations = blockSize / decoder.valueCount();
-      final int blocksSize = iterations * 8 * decoder.blockCount();
-      if (blocks == null || blocks.length < blocksSize) {
-        blocks = new byte[blocksSize];
+        if (minValues == null) {
+          minValues = new long[numBlocks];
+        }
+        minValues[i] = zigZagDecode(1L + readVLong(in));
       }
-
-      final int valueCount = (int) Math.min(this.valueCount - ord, blockSize);
-      final int blocksCount = (int) PackedInts.Format.PACKED.byteCount(packedIntsVersion, valueCount, bitsPerValue);
-      in.readBytes(blocks, 0, blocksCount);
-
-      decoder.decode(blocks, 0, values, 0, iterations);
-
-      if (minValue != 0) {
-        for (int i = 0; i < valueCount; ++i) {
-          values[i] += minValue;
+      if (bitsPerValue != 0) {
+        final int size = (int) Math.min(blockSize, valueCount - (long) i * blockSize);
+        if (direct) {
+          final long pointer = in.getFilePointer();
+          subReaders[i] = PackedInts.getDirectReaderNoHeader(in, PackedInts.Format.PACKED, packedIntsVersion, size, bitsPerValue);
+          in.seek(pointer + PackedInts.Format.PACKED.byteCount(packedIntsVersion, size, bitsPerValue));
+        } else {
+          subReaders[i] = PackedInts.getReaderNoHeader(in, PackedInts.Format.PACKED, packedIntsVersion, size, bitsPerValue);
         }
       }
     }
-    off = 0;
+    this.minValues = minValues;
   }
 
-  /** Return the offset of the next value to read. */
-  public long ord() {
-    return ord;
+  /** Get value at <code>index</code>. */
+  public long get(long index) {
+    assert index >= 0 && index < valueCount;
+    final int block = (int) (index >>> blockShift);
+    if (subReaders[block] == null) {
+      return minValues == null ? 0 : minValues[block];
+    }
+    final int idx = (int) (index & blockMask);
+    return (minValues == null ? 0 : minValues[block]) + subReaders[block].get(idx);
   }
 
 }

Added: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReaderIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReaderIterator.java?rev=1436692&view=auto
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReaderIterator.java (added)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReaderIterator.java Mon Jan 21 23:47:31 2013
@@ -0,0 +1,241 @@
+package org.apache.lucene.util.packed;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import static org.apache.lucene.util.packed.BlockPackedWriter.BPV_SHIFT;
+import static org.apache.lucene.util.packed.BlockPackedWriter.MIN_VALUE_EQUALS_0;
+import static org.apache.lucene.util.packed.BlockPackedWriter.checkBlockSize;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.LongsRef;
+
+/**
+ * Reader for sequences of longs written with {@link BlockPackedWriter}.
+ * @see BlockPackedWriter
+ * @lucene.internal
+ */
+public final class BlockPackedReaderIterator {
+
+  static long zigZagDecode(long n) {
+    return ((n >>> 1) ^ -(n & 1));
+  }
+
+  // same as DataInput.readVLong but supports negative values
+  static long readVLong(DataInput in) throws IOException {
+    byte b = in.readByte();
+    if (b >= 0) return b;
+    long i = b & 0x7FL;
+    b = in.readByte();
+    i |= (b & 0x7FL) << 7;
+    if (b >= 0) return i;
+    b = in.readByte();
+    i |= (b & 0x7FL) << 14;
+    if (b >= 0) return i;
+    b = in.readByte();
+    i |= (b & 0x7FL) << 21;
+    if (b >= 0) return i;
+    b = in.readByte();
+    i |= (b & 0x7FL) << 28;
+    if (b >= 0) return i;
+    b = in.readByte();
+    i |= (b & 0x7FL) << 35;
+    if (b >= 0) return i;
+    b = in.readByte();
+    i |= (b & 0x7FL) << 42;
+    if (b >= 0) return i;
+    b = in.readByte();
+    i |= (b & 0x7FL) << 49;
+    if (b >= 0) return i;
+    b = in.readByte();
+    i |= (b & 0xFFL) << 56;
+    return i;
+  }
+
+  DataInput in;
+  final int packedIntsVersion;
+  long valueCount;
+  final int blockSize;
+  final long[] values;
+  final LongsRef valuesRef;
+  byte[] blocks;
+  int off;
+  long ord;
+
+  /** Sole constructor.
+   * @param blockSize the number of values of a block, must be equal to the
+   *                  block size of the {@link BlockPackedWriter} which has
+   *                  been used to write the stream
+   */
+  public BlockPackedReaderIterator(DataInput in, int packedIntsVersion, int blockSize, long valueCount) {
+    checkBlockSize(blockSize);
+    this.packedIntsVersion = packedIntsVersion;
+    this.blockSize = blockSize;
+    this.values = new long[blockSize];
+    this.valuesRef = new LongsRef(this.values, 0, 0);
+    reset(in, valueCount);
+  }
+
+  /** Reset the current reader to wrap a stream of <code>valueCount</code>
+   * values contained in <code>in</code>. The block size remains unchanged. */
+  public void reset(DataInput in, long valueCount) {
+    this.in = in;
+    assert valueCount >= 0;
+    this.valueCount = valueCount;
+    off = blockSize;
+    ord = 0;
+  }
+
+  /** Skip exactly <code>count</code> values. */
+  public void skip(long count) throws IOException {
+    assert count >= 0;
+    if (ord + count > valueCount || ord + count < 0) {
+      throw new EOFException();
+    }
+
+    // 1. skip buffered values
+    final int skipBuffer = (int) Math.min(count, blockSize - off);
+    off += skipBuffer;
+    ord += skipBuffer;
+    count -= skipBuffer;
+    if (count == 0L) {
+      return;
+    }
+
+    // 2. skip as many blocks as necessary
+    assert off == blockSize;
+    while (count >= blockSize) {
+      final int token = in.readByte() & 0xFF;
+      final int bitsPerValue = token >>> BPV_SHIFT;
+      if (bitsPerValue > 64) {
+        throw new IOException("Corrupted");
+      }
+      if ((token & MIN_VALUE_EQUALS_0) == 0) {
+        readVLong(in);
+      }
+      final long blockBytes = PackedInts.Format.PACKED.byteCount(packedIntsVersion, blockSize, bitsPerValue);
+      skipBytes(blockBytes);
+      ord += blockSize;
+      count -= blockSize;
+    }
+    if (count == 0L) {
+      return;
+    }
+
+    // 3. skip last values
+    assert count < blockSize;
+    refill();
+    ord += count;
+    off += count;
+  }
+
+  private void skipBytes(long count) throws IOException {
+    if (in instanceof IndexInput) {
+      final IndexInput iin = (IndexInput) in;
+      iin.seek(iin.getFilePointer() + count);
+    } else {
+      if (blocks == null) {
+        blocks = new byte[blockSize];
+      }
+      long skipped = 0;
+      while (skipped < count) {
+        final int toSkip = (int) Math.min(blocks.length, count - skipped);
+        in.readBytes(blocks, 0, toSkip);
+        skipped += toSkip;
+      }
+    }
+  }
+
+  /** Read the next value. */
+  public long next() throws IOException {
+    if (ord == valueCount) {
+      throw new EOFException();
+    }
+    if (off == blockSize) {
+      refill();
+    }
+    final long value = values[off++];
+    ++ord;
+    return value;
+  }
+
+  /** Read between <tt>1</tt> and <code>count</code> values. */
+  public LongsRef next(int count) throws IOException {
+    assert count > 0;
+    if (ord == valueCount) {
+      throw new EOFException();
+    }
+    if (off == blockSize) {
+      refill();
+    }
+
+    count = Math.min(count, blockSize - off);
+    count = (int) Math.min(count, valueCount - ord);
+
+    valuesRef.offset = off;
+    valuesRef.length = count;
+    off += count;
+    ord += count;
+    return valuesRef;
+  }
+
+  private void refill() throws IOException {
+    final int token = in.readByte() & 0xFF;
+    final boolean minEquals0 = (token & MIN_VALUE_EQUALS_0) != 0;
+    final int bitsPerValue = token >>> BPV_SHIFT;
+    if (bitsPerValue > 64) {
+      throw new IOException("Corrupted");
+    }
+    final long minValue = minEquals0 ? 0L : zigZagDecode(1L + readVLong(in));
+    assert minEquals0 || minValue != 0;
+
+    if (bitsPerValue == 0) {
+      Arrays.fill(values, minValue);
+    } else {
+      final PackedInts.Decoder decoder = PackedInts.getDecoder(PackedInts.Format.PACKED, packedIntsVersion, bitsPerValue);
+      final int iterations = blockSize / decoder.valueCount();
+      final int blocksSize = iterations * 8 * decoder.blockCount();
+      if (blocks == null || blocks.length < blocksSize) {
+        blocks = new byte[blocksSize];
+      }
+
+      final int valueCount = (int) Math.min(this.valueCount - ord, blockSize);
+      final int blocksCount = (int) PackedInts.Format.PACKED.byteCount(packedIntsVersion, valueCount, bitsPerValue);
+      in.readBytes(blocks, 0, blocksCount);
+
+      decoder.decode(blocks, 0, values, 0, iterations);
+
+      if (minValue != 0) {
+        for (int i = 0; i < valueCount; ++i) {
+          values[i] += minValue;
+        }
+      }
+    }
+    off = 0;
+  }
+
+  /** Return the offset of the next value to read. */
+  public long ord() {
+    return ord;
+  }
+
+}

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedWriter.java?rev=1436692&r1=1436691&r2=1436692&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedWriter.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedWriter.java Mon Jan 21 23:47:31 2013
@@ -30,7 +30,7 @@ import org.apache.lucene.store.DataOutpu
  * using as few bits as possible. Memory usage of this class is proportional to
  * the block size. Each block has an overhead between 1 and 10 bytes to store
  * the minimum value and the number of bits per value of the block.
- * @see BlockPackedReader
+ * @see BlockPackedReaderIterator
  * @lucene.internal
  */
 public final class BlockPackedWriter {
@@ -43,8 +43,11 @@ public final class BlockPackedWriter {
     if (blockSize <= 0 || blockSize > MAX_BLOCK_SIZE) {
       throw new IllegalArgumentException("blockSize must be > 0 and < " + MAX_BLOCK_SIZE + ", got " + blockSize);
     }
-    if (blockSize % 64 != 0) {
-      throw new IllegalArgumentException("blockSize must be a multiple of 64, got " + blockSize);
+    if (blockSize < 64) {
+      throw new IllegalArgumentException("blockSize must be >= 64, got " + blockSize);
+    }
+    if ((blockSize & (blockSize - 1)) != 0) {
+      throw new IllegalArgumentException("blockSize must be a power of two, got " + blockSize);
     }
   }
 

Modified: lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java?rev=1436692&r1=1436691&r2=1436692&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java Mon Jan 21 23:47:31 2013
@@ -877,10 +877,11 @@ public class TestPackedInts extends Luce
     in.close();
     dir.close();
   }
+
   public void testBlockPackedReaderWriter() throws IOException {
     final int iters = atLeast(2);
     for (int iter = 0; iter < iters; ++iter) {
-      final int blockSize = 64 * _TestUtil.nextInt(random(), 1, 1 << 12);
+      final int blockSize = 1 << _TestUtil.nextInt(random(), 6, 18);
       final int valueCount = random().nextInt(1 << 18);
       final long[] values = new long[valueCount];
       long minValue = 0;
@@ -912,30 +913,29 @@ public class TestPackedInts extends Luce
       final long fp = out.getFilePointer();
       out.close();
 
-      DataInput in = dir.openInput("out.bin", IOContext.DEFAULT);
-      if (random().nextBoolean()) {
-        byte[] buf = new byte[(int) fp];
-        in.readBytes(buf, 0, (int) fp);
-        ((IndexInput) in).close();
-        in = new ByteArrayDataInput(buf);
-      }
-      final BlockPackedReader reader = new BlockPackedReader(in, PackedInts.VERSION_CURRENT, blockSize, valueCount);
+      IndexInput in1 = dir.openInput("out.bin", IOContext.DEFAULT);
+      byte[] buf = new byte[(int) fp];
+      in1.readBytes(buf, 0, (int) fp);
+      in1.seek(0L);
+      ByteArrayDataInput in2 = new ByteArrayDataInput(buf);
+      final DataInput in = random().nextBoolean() ? in1 : in2;
+      final BlockPackedReaderIterator it = new BlockPackedReaderIterator(in, PackedInts.VERSION_CURRENT, blockSize, valueCount);
       for (int i = 0; i < valueCount; ) {
         if (random().nextBoolean()) {
-          assertEquals("" + i, values[i], reader.next());
+          assertEquals("" + i, values[i], it.next());
           ++i;
         } else {
-          final LongsRef nextValues = reader.next(_TestUtil.nextInt(random(), 1, 1024));
+          final LongsRef nextValues = it.next(_TestUtil.nextInt(random(), 1, 1024));
           for (int j = 0; j < nextValues.length; ++j) {
             assertEquals("" + (i + j), values[i + j], nextValues.longs[nextValues.offset + j]);
           }
           i += nextValues.length;
         }
-        assertEquals(i, reader.ord());
+        assertEquals(i, it.ord());
       }
       assertEquals(fp, in instanceof ByteArrayDataInput ? ((ByteArrayDataInput) in).getPosition() : ((IndexInput) in).getFilePointer());
       try {
-        reader.next();
+        it.next();
         assertTrue(false);
       } catch (IOException e) {
         // OK
@@ -946,31 +946,35 @@ public class TestPackedInts extends Luce
       } else {
         ((IndexInput) in).seek(0L);
       }
-      final BlockPackedReader reader2 = new BlockPackedReader(in, PackedInts.VERSION_CURRENT, blockSize, valueCount);
+      final BlockPackedReaderIterator it2 = new BlockPackedReaderIterator(in, PackedInts.VERSION_CURRENT, blockSize, valueCount);
       int i = 0;
       while (true) {
         final int skip = _TestUtil.nextInt(random(), 0, valueCount - i);
-        reader2.skip(skip);
+        it2.skip(skip);
         i += skip;
-        assertEquals(i, reader2.ord());
+        assertEquals(i, it2.ord());
         if (i == valueCount) {
           break;
         } else {
-          assertEquals(values[i], reader2.next());
+          assertEquals(values[i], it2.next());
           ++i;
         }
       }
       assertEquals(fp, in instanceof ByteArrayDataInput ? ((ByteArrayDataInput) in).getPosition() : ((IndexInput) in).getFilePointer());
       try {
-        reader2.skip(1);
+        it2.skip(1);
         assertTrue(false);
       } catch (IOException e) {
         // OK
       }
 
-      if (in instanceof IndexInput) {
-        ((IndexInput) in).close();
+      in1.seek(0L);
+      final BlockPackedReader reader = new BlockPackedReader(in1, PackedInts.VERSION_CURRENT, blockSize, valueCount, random().nextBoolean());
+      for (i = 0; i < valueCount; ++i) {
+        assertEquals("i=" + i, values[i], reader.get(i));
       }
+      assertEquals(in1.getFilePointer(), in1.length());
+      in1.close();
       dir.close();
     }
   }