You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2013/01/22 15:54:56 UTC

svn commit: r1436983 - in /lucene/dev/branches/lucene4547/lucene: codecs/src/java/org/apache/lucene/codecs/diskdv/ core/src/java/org/apache/lucene/codecs/lucene42/ core/src/java/org/apache/lucene/util/packed/ core/src/test/org/apache/lucene/util/packed/

Author: jpountz
Date: Tue Jan 22 14:54:56 2013
New Revision: 1436983

URL: http://svn.apache.org/viewvc?rev=1436983&view=rev
Log:
Improved memory efficiency for the bytes index.

Added:
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/AbstractBlockPackedWriter.java   (with props)
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedReader.java   (with props)
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedWriter.java   (with props)
Modified:
    lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesConsumer.java
    lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReader.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedWriter.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
    lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java

Modified: lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesConsumer.java?rev=1436983&r1=1436982&r2=1436983&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesConsumer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesConsumer.java Tue Jan 22 14:54:56 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.codecs.diskdv;
  */
 
 import java.io.IOException;
-import java.util.Iterator;
 
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.DocValuesConsumer;
@@ -29,6 +28,7 @@ import org.apache.lucene.store.IndexOutp
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.packed.BlockPackedWriter;
+import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
 import org.apache.lucene.util.packed.PackedInts;
 
 class DiskDocValuesConsumer extends DocValuesConsumer {
@@ -99,32 +99,18 @@ class DiskDocValuesConsumer extends DocV
     
     // if minLength == maxLength, its a fixed-length byte[], we are done (the addresses are implicit)
     // otherwise, we need to record the length fields...
-    // TODO: make this more efficient. this is just as inefficient as 4.0 codec.... we can do much better.
     if (minLength != maxLength) {
-      addNumericField(field, new Iterable<Number>() {
-        @Override
-        public Iterator<Number> iterator() {
-          final Iterator<BytesRef> inner = values.iterator();
-          return new Iterator<Number>() {
-            long addr = 0;
-
-            @Override
-            public boolean hasNext() {
-              return inner.hasNext();
-            }
-
-            @Override
-            public Number next() {
-              BytesRef b = inner.next();
-              addr += b.length;
-              return Long.valueOf(addr);
-            }
-
-            @Override
-            public void remove() { throw new UnsupportedOperationException(); } 
-          };
-        }
-      });
+      meta.writeLong(data.getFilePointer());
+      meta.writeVInt(PackedInts.VERSION_CURRENT);
+      meta.writeVInt(BLOCK_SIZE);
+
+      final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE);
+      long addr = 0;
+      for (BytesRef v : values) {
+        addr += v.length;
+        writer.add(addr);
+      }
+      writer.finish();
     }
   }
 

Modified: lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java?rev=1436983&r1=1436982&r2=1436983&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/diskdv/DiskDocValuesProducer.java Tue Jan 22 14:54:56 2013
@@ -33,9 +33,11 @@ import org.apache.lucene.index.NumericDo
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.packed.BlockPackedReader;
+import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
 
 class DiskDocValuesProducer extends DocValuesProducer {
   private final Map<Integer,NumericEntry> numerics;
@@ -81,28 +83,14 @@ class DiskDocValuesProducer extends DocV
       } else if (type == DocValuesType.BINARY) {
         BinaryEntry b = readBinaryEntry(meta);
         binaries.put(fieldNumber, b);
-        if (b.minLength != b.maxLength) {
-          if (meta.readVInt() != fieldNumber) {
-            throw new CorruptIndexException("binary entry for field: " + fieldNumber + " is corrupt");
-          }
-          // variable length byte[]: read addresses as a numeric dv field
-          numerics.put(fieldNumber, readNumericEntry(meta));
-        }
       } else if (type == DocValuesType.SORTED) {
         BinaryEntry b = readBinaryEntry(meta);
         binaries.put(fieldNumber, b);
-        if (b.minLength != b.maxLength) {
-          if (meta.readVInt() != fieldNumber) {
-            throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt");
-          }
-          // variable length byte[]: read addresses as a numeric dv field
-          numerics.put(fieldNumber, readNumericEntry(meta));
-        }
-        // sorted byte[]: read ords as a numeric dv field
         if (meta.readVInt() != fieldNumber) {
           throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt");
         }
-        ords.put(fieldNumber, readNumericEntry(meta));
+        NumericEntry n = readNumericEntry(meta);
+        ords.put(fieldNumber, n);
       }
       fieldNumber = meta.readVInt();
     }
@@ -123,6 +111,11 @@ class DiskDocValuesProducer extends DocV
     entry.maxLength = meta.readVInt();
     entry.count = meta.readVInt();
     entry.offset = meta.readLong();
+    if (entry.minLength != entry.maxLength) {
+      entry.addressesOffset = meta.readLong();
+      entry.packedIntsVersion = meta.readVInt();
+      entry.blockSize = meta.readVInt();
+    }
     return entry;
   }
 
@@ -157,6 +150,7 @@ class DiskDocValuesProducer extends DocV
   
   private BinaryDocValues getFixedBinary(FieldInfo field, final BinaryEntry bytes) {
     final IndexInput data = this.data.clone();
+
     return new BinaryDocValues() {
       @Override
       public void get(int docID, BytesRef result) {
@@ -178,18 +172,20 @@ class DiskDocValuesProducer extends DocV
   
   private BinaryDocValues getVariableBinary(FieldInfo field, final BinaryEntry bytes) throws IOException {
     final IndexInput data = this.data.clone();
-    final NumericDocValues addresses = getNumeric(field);
+    data.seek(bytes.addressesOffset);
+
+    final MonotonicBlockPackedReader addresses = new MonotonicBlockPackedReader(data, bytes.packedIntsVersion, bytes.blockSize, bytes.count, true);
     return new BinaryDocValues() {
       @Override
       public void get(int docID, BytesRef result) {
-        long startAddress = docID == 0 ? bytes.offset : bytes.offset + addresses.get(docID-1);
+        long startAddress = bytes.offset + (docID == 0 ? 0 : + addresses.get(docID-1));
         long endAddress = bytes.offset + addresses.get(docID);
         int length = (int) (endAddress - startAddress);
         try {
           data.seek(startAddress);
           if (result.bytes.length < length) {
             result.offset = 0;
-            result.bytes = new byte[length];
+            result.bytes = new byte[ArrayUtil.oversize(length, 1)];
           }
           data.readBytes(result.bytes, result.offset, length);
           result.length = length;
@@ -243,5 +239,8 @@ class DiskDocValuesProducer extends DocV
     int count;
     int minLength;
     int maxLength;
+    long addressesOffset;
+    int packedIntsVersion;
+    int blockSize;
   }
 }

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java?rev=1436983&r1=1436982&r2=1436983&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java Tue Jan 22 14:54:56 2013
@@ -20,7 +20,6 @@ package org.apache.lucene.codecs.lucene4
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.HashSet;
-import java.util.Iterator;
 
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.DocValuesConsumer;
@@ -37,6 +36,7 @@ import org.apache.lucene.util.fst.FST.IN
 import org.apache.lucene.util.fst.PositiveIntOutputs;
 import org.apache.lucene.util.fst.Util;
 import org.apache.lucene.util.packed.BlockPackedWriter;
+import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
 import org.apache.lucene.util.packed.PackedInts;
 
 /**
@@ -113,8 +113,8 @@ class Lucene42DocValuesConsumer extends 
         encode.put(decode[i], i);
       }
 
-      data.writeVInt(PackedInts.VERSION_CURRENT);
-      data.writeVInt(count);
+      meta.writeVInt(PackedInts.VERSION_CURRENT);
+      meta.writeVInt(count);
       data.writeVInt(bitsPerValue);
 
       final PackedInts.Writer writer = PackedInts.getWriterNoHeader(data, PackedInts.Format.PACKED, count, bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE);
@@ -125,8 +125,8 @@ class Lucene42DocValuesConsumer extends 
     } else {
       meta.writeByte((byte)0); // delta-compressed
 
-      data.writeVInt(PackedInts.VERSION_CURRENT);
-      data.writeVInt(count);
+      meta.writeVInt(PackedInts.VERSION_CURRENT);
+      meta.writeVInt(count);
       data.writeVInt(BLOCK_SIZE);
 
       final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
@@ -164,11 +164,14 @@ class Lucene42DocValuesConsumer extends 
     int minLength = Integer.MAX_VALUE;
     int maxLength = Integer.MIN_VALUE;
     final long startFP = data.getFilePointer();
+    int count = 0;
     for(BytesRef v : values) {
       minLength = Math.min(minLength, v.length);
       maxLength = Math.max(maxLength, v.length);
       data.writeBytes(v.bytes, v.offset, v.length);
+      ++count;
     }
+    meta.writeVInt(count);
     meta.writeLong(startFP);
     meta.writeLong(data.getFilePointer() - startFP);
     meta.writeVInt(minLength);
@@ -176,32 +179,17 @@ class Lucene42DocValuesConsumer extends 
     
     // if minLength == maxLength, its a fixed-length byte[], we are done (the addresses are implicit)
     // otherwise, we need to record the length fields...
-    // TODO: make this more efficient. this is just as inefficient as 4.0 codec.... we can do much better.
     if (minLength != maxLength) {
-      addNumericField(field, new Iterable<Number>() {
-        @Override
-        public Iterator<Number> iterator() {
-          final Iterator<BytesRef> inner = values.iterator();
-          return new Iterator<Number>() {
-            long addr = 0;
-
-            @Override
-            public boolean hasNext() {
-              return inner.hasNext();
-            }
-
-            @Override
-            public Number next() {
-              BytesRef b = inner.next();
-              addr += b.length;
-              return Long.valueOf(addr);
-            }
-
-            @Override
-            public void remove() { throw new UnsupportedOperationException(); } 
-          };
-        }
-      });
+      meta.writeVInt(PackedInts.VERSION_CURRENT);
+      meta.writeVInt(BLOCK_SIZE);
+
+      final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE);
+      long addr = 0;
+      for (BytesRef v : values) {
+        addr += v.length;
+        writer.add(addr);
+      }
+      writer.finish();
     }
   }
 

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java?rev=1436983&r1=1436982&r2=1436983&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java Tue Jan 22 14:54:56 2013
@@ -43,6 +43,7 @@ import org.apache.lucene.util.fst.FST.By
 import org.apache.lucene.util.fst.PositiveIntOutputs;
 import org.apache.lucene.util.fst.Util;
 import org.apache.lucene.util.packed.BlockPackedReader;
+import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
 import org.apache.lucene.util.packed.PackedInts;
 
 class Lucene42DocValuesProducer extends DocValuesProducer {
@@ -101,13 +102,20 @@ class Lucene42DocValuesProducer extends 
         NumericEntry entry = new NumericEntry();
         entry.offset = meta.readLong();
         entry.tableized = meta.readByte() != 0;
+        entry.packedIntsVersion = meta.readVInt();
+        entry.count = meta.readVInt();
         numerics.put(fieldNumber, entry);
       } else if (fieldType == Lucene42DocValuesConsumer.BYTES) {
         BinaryEntry entry = new BinaryEntry();
+        entry.count = meta.readVInt();
         entry.offset = meta.readLong();
         entry.numBytes = meta.readLong();
         entry.minLength = meta.readVInt();
         entry.maxLength = meta.readVInt();
+        if (entry.minLength != entry.maxLength) {
+          entry.packedIntsVersion = meta.readVInt();
+          entry.blockSize = meta.readVInt();
+        }
         binaries.put(fieldNumber, entry);
       } else if (fieldType == Lucene42DocValuesConsumer.FST) {
         FSTEntry entry = new FSTEntry();
@@ -140,10 +148,8 @@ class Lucene42DocValuesProducer extends 
       for (int i = 0; i < decode.length; i++) {
         decode[i] = data.readLong();
       }
-      final int packedIntsVersion = data.readVInt();
-      final int count = data.readVInt();
       final int bitsPerValue = data.readVInt();
-      final PackedInts.Reader reader = PackedInts.getReaderNoHeader(data, PackedInts.Format.PACKED, packedIntsVersion, count, bitsPerValue);
+      final PackedInts.Reader reader = PackedInts.getReaderNoHeader(data, PackedInts.Format.PACKED, entry.packedIntsVersion, entry.count, bitsPerValue);
       return new NumericDocValues() {
         @Override
         public long get(int docID) {
@@ -151,10 +157,8 @@ class Lucene42DocValuesProducer extends 
         }
       };
     } else {
-      final int packedIntsVersion = data.readVInt();
-      final int count = data.readVInt();
       final int blockSize = data.readVInt();
-      final BlockPackedReader reader = new BlockPackedReader(data, packedIntsVersion, blockSize, count, false);
+      final BlockPackedReader reader = new BlockPackedReader(data, entry.packedIntsVersion, blockSize, entry.count, false);
       return new NumericDocValues() {
         @Override
         public long get(int docID) {
@@ -191,15 +195,15 @@ class Lucene42DocValuesProducer extends 
         }
       };
     } else {
-      final NumericDocValues addresses = getNumeric(field);
+      final MonotonicBlockPackedReader addresses = new MonotonicBlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, false);
       return new BinaryDocValues() {
         @Override
         public void get(int docID, BytesRef result) {
-          int startAddress = docID == 0 ? 0 : (int) addresses.get(docID-1);
-          int endAddress = (int)addresses.get(docID); 
+          long startAddress = docID == 0 ? 0 : addresses.get(docID-1);
+          long endAddress = addresses.get(docID); 
           result.bytes = bytes;
-          result.offset = startAddress;
-          result.length = endAddress - startAddress;
+          result.offset = (int) startAddress;
+          result.length = (int) (endAddress - startAddress);
         }
       };
     }
@@ -275,13 +279,18 @@ class Lucene42DocValuesProducer extends 
   static class NumericEntry {
     long offset;
     boolean tableized;
+    int packedIntsVersion;
+    int count;
   }
   
   static class BinaryEntry {
+    int count;
     long offset;
     long numBytes;
     int minLength;
     int maxLength;
+    int packedIntsVersion;
+    int blockSize;
   }
   
   static class FSTEntry {

Added: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/AbstractBlockPackedWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/AbstractBlockPackedWriter.java?rev=1436983&view=auto
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/AbstractBlockPackedWriter.java (added)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/AbstractBlockPackedWriter.java Tue Jan 22 14:54:56 2013
@@ -0,0 +1,132 @@
+package org.apache.lucene.util.packed;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.store.DataOutput;
+
+abstract class AbstractBlockPackedWriter {
+
+  static final int MAX_BLOCK_SIZE = 1 << (30 - 3);
+  static final int MIN_VALUE_EQUALS_0 = 1 << 0;
+  static final int BPV_SHIFT = 1;
+
+  static void checkBlockSize(int blockSize) {
+    if (blockSize <= 0 || blockSize > MAX_BLOCK_SIZE) {
+      throw new IllegalArgumentException("blockSize must be > 0 and < " + MAX_BLOCK_SIZE + ", got " + blockSize);
+    }
+    if (blockSize < 64) {
+      throw new IllegalArgumentException("blockSize must be >= 64, got " + blockSize);
+    }
+    if ((blockSize & (blockSize - 1)) != 0) {
+      throw new IllegalArgumentException("blockSize must be a power of two, got " + blockSize);
+    }
+  }
+
+  static long zigZagEncode(long n) {
+    return (n >> 63) ^ (n << 1);
+  }
+
+  // same as DataOutput.writeVLong but accepts negative values
+  static void writeVLong(DataOutput out, long i) throws IOException {
+    int k = 0;
+    while ((i & ~0x7FL) != 0L && k++ < 8) {
+      out.writeByte((byte)((i & 0x7FL) | 0x80L));
+      i >>>= 7;
+    }
+    out.writeByte((byte) i);
+  }
+
+  protected DataOutput out;
+  protected final long[] values;
+  protected byte[] blocks;
+  protected int off;
+  protected long ord;
+  protected boolean finished;
+
+  /**
+   * Sole constructor.
+   * @param blockSize the number of values of a single block, must be a multiple of <tt>64</tt>
+   */
+  public AbstractBlockPackedWriter(DataOutput out, int blockSize) {
+    checkBlockSize(blockSize);
+    reset(out);
+    values = new long[blockSize];
+  }
+
+  /** Reset this writer to wrap <code>out</code>. The block size remains unchanged. */
+  public void reset(DataOutput out) {
+    assert out != null;
+    this.out = out;
+    off = 0;
+    ord = 0L;
+    finished = false;
+  }
+
+  private void checkNotFinished() {
+    if (finished) {
+      throw new IllegalStateException("Already finished");
+    }
+  }
+
+  /** Append a new long. */
+  public void add(long l) throws IOException {
+    checkNotFinished();
+    if (off == values.length) {
+      flush();
+    }
+    values[off++] = l;
+    ++ord;
+  }
+
+  /** Flush all buffered data to disk. This instance is not usable anymore
+   *  after this method has been called until {@link #reset(DataOutput)} has
+   *  been called. */
+  public void finish() throws IOException {
+    checkNotFinished();
+    if (off > 0) {
+      flush();
+    }
+    finished = true;
+  }
+
+  /** Return the number of values which have been added. */
+  public long ord() {
+    return ord;
+  }
+
+  protected abstract void flush() throws IOException;
+
+  protected final void writeValues(int bitsRequired) throws IOException {
+    final PackedInts.Encoder encoder = PackedInts.getEncoder(PackedInts.Format.PACKED, PackedInts.VERSION_CURRENT, bitsRequired);
+    final int iterations = values.length / encoder.valueCount();
+    final int blockSize = encoder.blockCount() * 8 * iterations;
+    if (blocks == null || blocks.length < blockSize) {
+      blocks = new byte[blockSize];
+    }
+    if (off < values.length) {
+      Arrays.fill(values, off, values.length, 0L);
+    }
+    encoder.encode(values, 0, blocks, 0, iterations);
+    final int blockCount = (int) PackedInts.Format.PACKED.byteCount(PackedInts.VERSION_CURRENT, off, bitsRequired);
+    out.writeBytes(blocks, blockCount);
+  }
+
+}

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReader.java?rev=1436983&r1=1436982&r2=1436983&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReader.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReader.java Tue Jan 22 14:54:56 2013
@@ -42,7 +42,7 @@ public final class BlockPackedReader {
   public BlockPackedReader(IndexInput in, int packedIntsVersion, int blockSize, long valueCount, boolean direct) throws IOException {
     checkBlockSize(blockSize);
     this.valueCount = valueCount;
-    blockShift = Long.numberOfTrailingZeros(blockSize);
+    blockShift = Integer.numberOfTrailingZeros(blockSize);
     blockMask = blockSize - 1;
     final int numBlocks = (int) (valueCount / blockSize) + (valueCount % blockSize == 0 ? 0 : 1);
     if (numBlocks * blockSize < valueCount) {
@@ -62,7 +62,9 @@ public final class BlockPackedReader {
         }
         minValues[i] = zigZagDecode(1L + readVLong(in));
       }
-      if (bitsPerValue != 0) {
+      if (bitsPerValue == 0) {
+        subReaders[i] = new PackedInts.NullReader(blockSize);
+      } else {
         final int size = (int) Math.min(blockSize, valueCount - (long) i * blockSize);
         if (direct) {
           final long pointer = in.getFilePointer();
@@ -80,9 +82,6 @@ public final class BlockPackedReader {
   public long get(long index) {
     assert index >= 0 && index < valueCount;
     final int block = (int) (index >>> blockShift);
-    if (subReaders[block] == null) {
-      return minValues == null ? 0 : minValues[block];
-    }
     final int idx = (int) (index & blockMask);
     return (minValues == null ? 0 : minValues[block]) + subReaders[block].get(idx);
   }

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedWriter.java?rev=1436983&r1=1436982&r2=1436983&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedWriter.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedWriter.java Tue Jan 22 14:54:56 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.util.packed;
  */
 
 import java.io.IOException;
-import java.util.Arrays;
 
 import org.apache.lucene.store.DataOutput;
 
@@ -33,92 +32,17 @@ import org.apache.lucene.store.DataOutpu
  * @see BlockPackedReaderIterator
  * @lucene.internal
  */
-public final class BlockPackedWriter {
-
-  static final int MAX_BLOCK_SIZE = 1 << (30 - 3);
-  static final int MIN_VALUE_EQUALS_0 = 1 << 0;
-  static final int BPV_SHIFT = 1;
-
-  static void checkBlockSize(int blockSize) {
-    if (blockSize <= 0 || blockSize > MAX_BLOCK_SIZE) {
-      throw new IllegalArgumentException("blockSize must be > 0 and < " + MAX_BLOCK_SIZE + ", got " + blockSize);
-    }
-    if (blockSize < 64) {
-      throw new IllegalArgumentException("blockSize must be >= 64, got " + blockSize);
-    }
-    if ((blockSize & (blockSize - 1)) != 0) {
-      throw new IllegalArgumentException("blockSize must be a power of two, got " + blockSize);
-    }
-  }
-
-  static long zigZagEncode(long n) {
-    return (n >> 63) ^ (n << 1);
-  }
-
-  // same as DataOutput.writeVLong but accepts negative values
-  static void writeVLong(DataOutput out, long i) throws IOException {
-    int k = 0;
-    while ((i & ~0x7FL) != 0L && k++ < 8) {
-      out.writeByte((byte)((i & 0x7FL) | 0x80L));
-      i >>>= 7;
-    }
-    out.writeByte((byte) i);
-  }
-
-  DataOutput out;
-  final long[] values;
-  byte[] blocks;
-  int off;
-  long ord;
-  boolean finished;
+public final class BlockPackedWriter extends AbstractBlockPackedWriter {
 
   /**
    * Sole constructor.
-   * @param blockSize the number of values of a single block, must be a multiple of <tt>64</tt>
+   * @param blockSize the number of values of a single block, must be a power of 2
    */
   public BlockPackedWriter(DataOutput out, int blockSize) {
-    checkBlockSize(blockSize);
-    reset(out);
-    values = new long[blockSize];
-  }
-
-  /** Reset this writer to wrap <code>out</code>. The block size remains unchanged. */
-  public void reset(DataOutput out) {
-    assert out != null;
-    this.out = out;
-    off = 0;
-    ord = 0L;
-    finished = false;
+    super(out, blockSize);
   }
 
-  private void checkNotFinished() {
-    if (finished) {
-      throw new IllegalStateException("Already finished");
-    }
-  }
-
-  /** Append a new long. */
-  public void add(long l) throws IOException {
-    checkNotFinished();
-    if (off == values.length) {
-      flush();
-    }
-    values[off++] = l;
-    ++ord;
-  }
-
-  /** Flush all buffered data to disk. This instance is not usable anymore
-   *  after this method has been called until {@link #reset(DataOutput)} has
-   *  been called. */
-  public void finish() throws IOException {
-    checkNotFinished();
-    if (off > 0) {
-      flush();
-    }
-    finished = true;
-  }
-
-  private void flush() throws IOException {
+  protected void flush() throws IOException {
     assert off > 0;
     long min = Long.MAX_VALUE, max = Long.MIN_VALUE;
     for (int i = 0; i < off; ++i) {
@@ -149,26 +73,10 @@ public final class BlockPackedWriter {
           values[i] -= min;
         }
       }
-      final PackedInts.Encoder encoder = PackedInts.getEncoder(PackedInts.Format.PACKED, PackedInts.VERSION_CURRENT, bitsRequired);
-      final int iterations = values.length / encoder.valueCount();
-      final int blockSize = encoder.blockCount() * 8 * iterations;
-      if (blocks == null || blocks.length < blockSize) {
-        blocks = new byte[blockSize];
-      }
-      if (off < values.length) {
-        Arrays.fill(values, off, values.length, 0L);
-      }
-      encoder.encode(values, 0, blocks, 0, iterations);
-      final int blockCount = (int) PackedInts.Format.PACKED.byteCount(PackedInts.VERSION_CURRENT, off, bitsRequired);
-      out.writeBytes(blocks, blockCount);
+      writeValues(bitsRequired);
     }
 
     off = 0;
   }
 
-  /** Return the number of values which have been added. */
-  public long ord() {
-    return ord;
-  }
-
 }

Added: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedReader.java?rev=1436983&view=auto
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedReader.java (added)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedReader.java Tue Jan 22 14:54:56 2013
@@ -0,0 +1,83 @@
+package org.apache.lucene.util.packed;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.checkBlockSize;
+import static org.apache.lucene.util.packed.BlockPackedReaderIterator.zigZagDecode;
+
+import java.io.IOException;
+
+import org.apache.lucene.store.IndexInput;
+
+/**
+ * Provides random access to a stream written with
+ * {@link MonotonicBlockPackedWriter}.
+ * @lucene.internal
+ */
+public final class MonotonicBlockPackedReader {
+
+  private final int blockShift, blockMask;
+  private final long valueCount;
+  private final long[] minValues;
+  private final float[] averages;
+  private final PackedInts.Reader[] subReaders;
+
+  /** Sole constructor. */
+  public MonotonicBlockPackedReader(IndexInput in, int packedIntsVersion, int blockSize, long valueCount, boolean direct) throws IOException {
+    checkBlockSize(blockSize);
+    this.valueCount = valueCount;
+    blockShift = Integer.numberOfTrailingZeros(blockSize);
+    blockMask = blockSize - 1;
+    final int numBlocks = (int) (valueCount / blockSize) + (valueCount % blockSize == 0 ? 0 : 1);
+    if (numBlocks * blockSize < valueCount) {
+      throw new IllegalArgumentException("valueCount is too large for this block size");
+    }
+    minValues = new long[numBlocks];
+    averages = new float[numBlocks];
+    subReaders = new PackedInts.Reader[numBlocks];
+    for (int i = 0; i < numBlocks; ++i) {
+      minValues[i] = in.readVLong();
+      averages[i] = Float.intBitsToFloat(in.readInt());
+      final int bitsPerValue = in.readVInt();
+      if (bitsPerValue > 64) {
+        throw new IOException("Corrupted");
+      }
+      if (bitsPerValue == 0) {
+        subReaders[i] = new PackedInts.NullReader(blockSize);
+      } else {
+        final int size = (int) Math.min(blockSize, valueCount - (long) i * blockSize);
+        if (direct) {
+          final long pointer = in.getFilePointer();
+          subReaders[i] = PackedInts.getDirectReaderNoHeader(in, PackedInts.Format.PACKED, packedIntsVersion, size, bitsPerValue);
+          in.seek(pointer + PackedInts.Format.PACKED.byteCount(packedIntsVersion, size, bitsPerValue));
+        } else {
+          subReaders[i] = PackedInts.getReaderNoHeader(in, PackedInts.Format.PACKED, packedIntsVersion, size, bitsPerValue);
+        }
+      }
+    }
+  }
+
+  /** Get value at <code>index</code>. */
+  public long get(long index) {
+    assert index >= 0 && index < valueCount;
+    final int block = (int) (index >>> blockShift);
+    final int idx = (int) (index & blockMask);
+    return minValues[block] + (long) (idx * averages[block]) + zigZagDecode(subReaders[block].get(idx));
+  }
+
+}

Added: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedWriter.java?rev=1436983&view=auto
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedWriter.java (added)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedWriter.java Tue Jan 22 14:54:56 2013
@@ -0,0 +1,76 @@
+package org.apache.lucene.util.packed;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.store.DataOutput;
+
+/**
+ * A writer for large monotonically increasing sequences of positive longs.
+ * <p>
+ * The sequence is divided into fixed-size blocks and for each block, the
+ * average value per ord is computed, followed by the delta from the expected
+ * value for every ord, using as few bits as possible. Each block has an
+ * overhead between 6 and 14 bytes.
+ * @see MonotonicBlockPackedReader
+ * @lucene.internal
+ */
+public final class MonotonicBlockPackedWriter extends AbstractBlockPackedWriter {
+
+  /**
+   * Sole constructor.
+   * @param blockSize the number of values of a single block, must be a power of 2
+   */
+  public MonotonicBlockPackedWriter(DataOutput out, int blockSize) {
+    super(out, blockSize);
+  }
+
+  @Override
+  public void add(long l) throws IOException {
+    assert l >= 0;
+    super.add(l);
+  }
+
+  protected void flush() throws IOException {
+    assert off > 0;
+
+    // TODO: perform a true linear regression?
+    final long min = values[0];
+    final float avg = off == 1 ? 0f : (float) (values[off - 1] - min) / (off - 1);
+
+    long maxZigZagDelta = 0;
+    for (int i = 0; i < off; ++i) {
+      values[i] = zigZagEncode(values[i] - min - (long) (avg * i));
+      maxZigZagDelta = Math.max(maxZigZagDelta, values[i]);
+    }
+
+    out.writeVLong(min);
+    out.writeInt(Float.floatToIntBits(avg));
+    if (maxZigZagDelta == 0) {
+      out.writeVInt(0);
+    } else {
+      final int bitsRequired = PackedInts.bitsRequired(maxZigZagDelta);
+      out.writeVInt(bitsRequired);
+      writeValues(bitsRequired);
+    }
+
+    off = 0;
+  }
+
+}

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java?rev=1436983&r1=1436982&r2=1436983&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java Tue Jan 22 14:54:56 2013
@@ -24,7 +24,6 @@ import org.apache.lucene.store.DataInput
 import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.LongsRef;
-import org.apache.lucene.util.packed.PackedInts.Format;
 
 /**
  * Simplistic compression for array of unsigned long values.
@@ -656,6 +655,53 @@ public class PackedInts {
     }
   }
 
+  /** A {@link Reader} which has all its values equal to 0 (bitsPerValue = 0). */
+  public static final class NullReader implements Reader {
+
+    private final int valueCount;
+
+    /** Sole constructor. */
+    public NullReader(int valueCount) {
+      this.valueCount = valueCount;
+    }
+
+    @Override
+    public long get(int index) {
+      return 0;
+    }
+
+    @Override
+    public int get(int index, long[] arr, int off, int len) {
+      return 0;
+    }
+
+    @Override
+    public int getBitsPerValue() {
+      return 0;
+    }
+
+    @Override
+    public int size() {
+      return valueCount;
+    }
+
+    @Override
+    public long ramBytesUsed() {
+      return 0;
+    }
+
+    @Override
+    public Object getArray() {
+      return null;
+    }
+
+    @Override
+    public boolean hasArray() {
+      return false;
+    }
+
+  }
+
   /** A write-once Writer.
    * @lucene.internal
    */

Modified: lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java?rev=1436983&r1=1436982&r2=1436983&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java Tue Jan 22 14:54:56 2013
@@ -979,4 +979,45 @@ public class TestPackedInts extends Luce
     }
   }
 
+  public void testMonotonicBlockPackedReaderWriter() throws IOException {
+    final int iters = atLeast(2);
+    for (int iter = 0; iter < iters; ++iter) {
+      final int blockSize = 1 << _TestUtil.nextInt(random(), 6, 18);
+      final int valueCount = random().nextInt(1 << 18);
+      final long[] values = new long[valueCount];
+      if (valueCount > 0) {
+        values[0] = random().nextBoolean() ? random().nextInt(10) : random().nextInt(Integer.MAX_VALUE);
+        int maxDelta = random().nextInt(64);
+        for (int i = 1; i < valueCount; ++i) {
+          if (random().nextDouble() < 0.1d) {
+            maxDelta = random().nextInt(64);
+          }
+          values[i] = Math.max(0, values[i-1] + _TestUtil.nextInt(random(), -16, maxDelta));
+        }
+      }
+
+      final Directory dir = newDirectory();
+      final IndexOutput out = dir.createOutput("out.bin", IOContext.DEFAULT);
+      final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(out, blockSize);
+      for (int i = 0; i < valueCount; ++i) {
+        assertEquals(i, writer.ord());
+        writer.add(values[i]);
+      }
+      assertEquals(valueCount, writer.ord());
+      writer.finish();
+      assertEquals(valueCount, writer.ord());
+      final long fp = out.getFilePointer();
+      out.close();
+
+      final IndexInput in = dir.openInput("out.bin", IOContext.DEFAULT);
+      final MonotonicBlockPackedReader reader = new MonotonicBlockPackedReader(in, PackedInts.VERSION_CURRENT, blockSize, valueCount, random().nextBoolean());
+      assertEquals(fp, in.getFilePointer());
+      for (int i = 0; i < valueCount; ++i) {
+        assertEquals("i=" +i, values[i], reader.get(i));
+      }
+      in.close();
+      dir.close();
+    }
+  }
+
 }