You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2013/07/09 23:49:05 UTC

svn commit: r1501576 - in /lucene/dev/trunk/lucene: ./ core/src/java/org/apache/lucene/util/packed/ core/src/test/org/apache/lucene/util/packed/

Author: jpountz
Date: Tue Jul  9 21:49:04 2013
New Revision: 1501576

URL: http://svn.apache.org/r1501576
Log:
LUCENE-5084: EliasFanoDocIdSet.

Added:
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDecoder.java   (with props)
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDocIdSet.java   (with props)
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoEncoder.java   (with props)
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/packed/TestEliasFanoDocIdSet.java   (with props)
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/packed/TestEliasFanoSequence.java   (with props)
Modified:
    lucene/dev/trunk/lucene/CHANGES.txt

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1501576&r1=1501575&r2=1501576&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Tue Jul  9 21:49:04 2013
@@ -298,6 +298,9 @@ New Features
 * LUCENE-5013: Added ScandinavianFoldingFilterFactory and
   ScandinavianNormalizationFilterFactory (Karl Wettin via janhoy)
 
+* LUCENE-5084: Added new Elias-Fano encoder, decoder and DocIdSet
+  implementations. (Paul Elschot via Adrien Grand)
+
 API Changes
 
 * LUCENE-5077: Make it easier to use compressed norms. Lucene42NormsFormat takes

Added: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDecoder.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDecoder.java?rev=1501576&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDecoder.java (added)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDecoder.java Tue Jul  9 21:49:04 2013
@@ -0,0 +1,387 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.util.packed;
+
+/** A decoder for an {@link EliasFanoEncoder}.
+ * @lucene.internal
+ */
+public class EliasFanoDecoder {
+  private static final int LOG2_LONG_SIZE = Long.numberOfTrailingZeros(Long.SIZE);
+
+  private final EliasFanoEncoder efEncoder;
+  final long numEncoded;
+  private long efIndex = -1; // the decoding index.
+  private long setBitForIndex = -1; // the index of the high bit at the decoding index.
+
+  public final static long NO_MORE_VALUES = -1L;
+
+  /** Construct a decoder for a given {@link EliasFanoEncoder}.
+   * The decoding index is set to just before the first encoded value.
+   */
+  public EliasFanoDecoder(EliasFanoEncoder efEncoder) {
+    this.efEncoder = efEncoder;
+    this.numEncoded = efEncoder.numEncoded; // numEncoded is not final in EliasFanoEncoder
+  }
+
+  /** @return The Elias-Fano encoder that is decoded. */
+  public EliasFanoEncoder getEliasFanoEncoder() {
+    return efEncoder;
+  }
+
+
+  /** @return The index of the last decoded value.
+   * The first value encoded by {@link EliasFanoEncoder#encodeNext} has index 0.
+   * Only valid directly after
+   * {@link #nextValue}, {@link #advanceToValue},
+   * {@link #previousValue}, or {@link #backToValue}
+   * returned another value than {@link #NO_MORE_VALUES}.
+   */
+  public long index() {
+    if (efIndex < 0) {
+      throw new IllegalStateException("index before sequence");
+    }
+    if (efIndex >= numEncoded) {
+      throw new IllegalStateException("index after sequence");
+    }
+    return efIndex;
+  }
+
+  /**  @return The high value for the current decoding index. */
+  private long currentHighValue() {
+    return setBitForIndex - efIndex; // sequence of unary gaps
+  }
+
+  /**  @return The low value for the current decoding index. */
+  private long currentLowValue() {
+    assert efIndex >= 0;
+    assert efIndex < numEncoded;
+    if (efEncoder.numLowBits == 0) {
+      return 0;
+    }
+    long bitPos = efIndex * efEncoder.numLowBits;
+    int lowIndex = (int) (bitPos >>> LOG2_LONG_SIZE);
+    int bitPosAtIndex = (int) (bitPos & (Long.SIZE-1));
+    long lowValue = efEncoder.lowerLongs[lowIndex] >>> bitPosAtIndex;
+    if ((bitPosAtIndex + efEncoder.numLowBits) > Long.SIZE) {
+      lowValue |= (efEncoder.lowerLongs[lowIndex + 1] << (Long.SIZE - bitPosAtIndex));
+    }
+    lowValue &= efEncoder.lowerBitsMask;
+    return lowValue;
+  }
+
+  /**  @return The given highValue shifted left by the number of low bits from by the EliasFanoSequence,
+   *           logically OR-ed with the given lowValue.
+   */
+  private long combineHighLowValues(long highValue, long lowValue) {
+    return (highValue << efEncoder.numLowBits) | lowValue;
+  }
+
+  private long curHighLong;
+
+
+  /* The implementation of forward decoding and backward decoding is done by the following method pairs.
+   *
+   * toBeforeSequence - toAfterSequence
+   * getCurrentRightShift - getCurrentLeftShift
+   * toAfterCurrentHighBit - toBeforeCurrentHighBit
+   * toNextHighLong - toPreviousHighLong
+   * nextHighValue - previousHighValue
+   * nextValue - previousValue
+   * advanceToValue - backToValue
+   *
+   */
+
+  /* Forward decoding section */
+
+
+  /** Set the decoding index to just before the first encoded value.
+   */
+  public void toBeforeSequence() {
+    efIndex = -1;
+    setBitForIndex = -1;
+  }
+
+  /** @return the number of bits in a long after (setBitForIndex modulo Long.SIZE) */
+  private int getCurrentRightShift() {
+    int s = (int) (setBitForIndex & (Long.SIZE-1));
+    return s;
+  }
+
+  /** Increment efIndex and setBitForIndex and
+   * shift curHighLong so that it does not contain the high bits before setBitForIndex.
+   * @return true iff efIndex still smaller than numEncoded.
+   */
+  private boolean toAfterCurrentHighBit() {
+    efIndex += 1;
+    if (efIndex >= numEncoded) {
+      return false;
+    }
+    setBitForIndex += 1;
+    int highIndex = (int)(setBitForIndex >>> LOG2_LONG_SIZE);
+    curHighLong = efEncoder.upperLongs[highIndex] >>> getCurrentRightShift();
+    return true;
+  }
+
+  /** The current high long has been determined to not contain the set bit that is needed.
+   *  Increment setBitForIndex to the next high long and set curHighLong accordingly.
+   */
+  private void toNextHighLong() {
+    setBitForIndex += Long.SIZE - (setBitForIndex & (Long.SIZE-1));
+    //assert getCurrentRightShift() == 0;
+    int highIndex = (int)(setBitForIndex >>> LOG2_LONG_SIZE);
+    curHighLong = efEncoder.upperLongs[highIndex];
+  }
+
+  /** setBitForIndex and efIndex have just been incremented, scan to the next high set bit
+   *  by incrementing setBitForIndex, and by setting curHighLong accordingly.
+   */
+  private void toNextHighValue() {
+    while (curHighLong == 0L) {
+      toNextHighLong(); // inlining and unrolling would simplify somewhat
+    }
+    setBitForIndex += Long.numberOfTrailingZeros(curHighLong);
+  }
+
+  /** setBitForIndex and efIndex have just been incremented, scan to the next high set bit
+   *  by incrementing setBitForIndex, and by setting curHighLong accordingly.
+   *  @return the next encoded high value.
+   */
+  private long nextHighValue() {
+    toNextHighValue();
+    return currentHighValue();
+  }
+
+  /** If another value is available after the current decoding index, return this value and
+   * and increase the decoding index by 1. Otherwise return {@link #NO_MORE_VALUES}.
+   */
+  public long nextValue() {
+    if (! toAfterCurrentHighBit()) {
+      return NO_MORE_VALUES;
+    }
+    long highValue = nextHighValue();
+    return combineHighLowValues(highValue, currentLowValue());
+  }
+
+  /** Advance the decoding index to a given index.
+   * and return <code>true</code> iff it is available.
+   */
+  public boolean advanceToIndex(long index) {
+    assert index > efIndex;
+    if (index >= numEncoded) {
+      efIndex = numEncoded;
+      return false;
+    }
+    if (! toAfterCurrentHighBit()) {
+      assert false;
+    }
+    int curSetBits = Long.bitCount(curHighLong);
+    while ((efIndex + curSetBits) < index) { // curHighLong has not enough set bits to reach index
+      efIndex += curSetBits;
+      toNextHighLong();
+      curSetBits = Long.bitCount(curHighLong);
+    }
+    // curHighLong has enough set bits to reach index
+    while (efIndex < index) {
+      /* CHECKME: Instead of the linear search here, use (forward) broadword selection from
+       * "Broadword Implementation of Rank/Select Queries", Sebastiano Vigna, January 30, 2012.
+       */
+      if (! toAfterCurrentHighBit()) {
+        assert false;
+      }
+      toNextHighValue();
+    }
+    return true;
+  }
+
+
+  /** setBitForIndex and efIndex have just been incremented, scan forward to the high set bit
+   *  of at least a given high value
+   *  by incrementing setBitForIndex, and by setting curHighLong accordingly.
+   *  @return the smallest encoded high value that is at least the given one.
+   */
+  private long advanceToHighValue(long highTarget) {
+    int curSetBits = Long.bitCount(curHighLong); // is shifted by getCurrentRightShift()
+    int curClearBits = Long.SIZE - curSetBits - getCurrentRightShift();
+    while ((currentHighValue() + curClearBits) < highTarget) {
+      // curHighLong has not enough clear bits to reach highTarget
+      efIndex += curSetBits;
+      if (efIndex >= numEncoded) {
+        return NO_MORE_VALUES;
+      }
+      toNextHighLong();
+      // assert getCurrentRightShift() == 0;
+      curSetBits = Long.bitCount(curHighLong);
+      curClearBits = Long.SIZE - curSetBits;
+    }
+    // curHighLong has enough clear bits to reach highTarget, but may not have enough set bits.
+    long highValue = nextHighValue();
+    while (highValue < highTarget) {
+      /* CHECKME: Instead of the linear search here, use (forward) broadword selection from
+       * "Broadword Implementation of Rank/Select Queries", Sebastiano Vigna, January 30, 2012.
+       */
+      if (! toAfterCurrentHighBit()) {
+        return NO_MORE_VALUES;
+      }
+      highValue = nextHighValue();
+    }
+    return highValue;
+  }
+
+  /** Given a target value, advance the decoding index to the first bigger or equal value
+   * and return it if it is available. Otherwise return {@link #NO_MORE_VALUES}.
+   */
+  public long advanceToValue(long target) {
+    if (! toAfterCurrentHighBit()) {
+      return NO_MORE_VALUES;
+    }
+    long highTarget = target >>> efEncoder.numLowBits;
+    long highValue = advanceToHighValue(highTarget);
+    if (highValue == NO_MORE_VALUES) {
+      return NO_MORE_VALUES;
+    }
+    // Linear search with low values:
+    long currentValue = combineHighLowValues(highValue, currentLowValue());
+    while (currentValue < target) {
+      currentValue = nextValue();
+      if (currentValue == NO_MORE_VALUES) {
+        return NO_MORE_VALUES;
+      }
+    }
+    return currentValue;
+  }
+
+
+  /* Backward decoding section */
+
+  /** Set the decoding index to just after the last encoded value.
+   */
+  public void toAfterSequence() {
+    efIndex = numEncoded; // just after last index
+    setBitForIndex = (efEncoder.lastEncoded >>> efEncoder.numLowBits) + numEncoded;
+  }
+
+  /** @return the number of bits in a long before (setBitForIndex modulo Long.SIZE) */
+  private int getCurrentLeftShift() {
+    int s = Long.SIZE - 1 - (int) (setBitForIndex & (Long.SIZE-1));
+    return s;
+  }
+
+  /** Decrement efindex and setBitForIndex and
+   * shift curHighLong so that it does not contain the high bits after setBitForIndex.
+   * @return true iff efindex still >= 0
+   */
+  private boolean toBeforeCurrentHighBit() {
+    efIndex -= 1;
+    if (efIndex < 0) {
+      return false;
+    }
+    setBitForIndex -= 1;
+    int highIndex = (int)(setBitForIndex >>> LOG2_LONG_SIZE);
+    curHighLong = efEncoder.upperLongs[highIndex] << getCurrentLeftShift();
+    return true;
+  }
+
+  /** The current high long has been determined to not contain the set bit that is needed.
+   *  Decrement setBitForIndex to the previous high long and set curHighLong accordingly.
+   */
+  private void toPreviousHighLong() {
+    setBitForIndex -= (setBitForIndex & (Long.SIZE-1)) + 1;
+    //assert getCurrentLeftShift() == 0;
+    int highIndex = (int)(setBitForIndex >>> LOG2_LONG_SIZE);
+    curHighLong = efEncoder.upperLongs[highIndex];
+  }
+
+  /** setBitForIndex and efIndex have just been decremented, scan to the previous high set bit
+   *  by decrementing setBitForIndex and by setting curHighLong accordingly.
+   *  @return the previous encoded high value.
+   */
+  private long previousHighValue() {
+    while (curHighLong == 0L) {
+      toPreviousHighLong(); // inlining and unrolling would simplify somewhat
+    }
+    setBitForIndex -= Long.numberOfLeadingZeros(curHighLong);
+    return currentHighValue();
+  }
+
+  /** If another value is available before the current decoding index, return this value and
+   * and decrease the decoding index by 1. Otherwise return {@link #NO_MORE_VALUES}.
+   */
+  public long previousValue() {
+    if (! toBeforeCurrentHighBit()) {
+      return NO_MORE_VALUES;
+    }
+    long highValue = previousHighValue();
+    return combineHighLowValues(highValue, currentLowValue());
+  }
+
+
+  /** setBitForIndex and efIndex have just been decremented, scan backward to the high set bit
+   *  of at most a given high value
+   *  by decrementing setBitForIndex and by setting curHighLong accordingly.
+   *  @return the largest encoded high value that is at most the given one.
+   */
+  private long backToHighValue(long highTarget) {
+    int curSetBits = Long.bitCount(curHighLong); // is shifted by getCurrentLeftShift()
+    int curClearBits = Long.SIZE - curSetBits - getCurrentLeftShift();
+    while ((currentHighValue() - curClearBits) > highTarget) {
+      // curHighLong has not enough clear bits to reach highTarget
+      efIndex -= curSetBits;
+      if (efIndex < 0) {
+        return NO_MORE_VALUES;
+      }
+      toPreviousHighLong();
+      //assert getCurrentLeftShift() == 0;
+      curSetBits = Long.bitCount(curHighLong);
+      curClearBits = Long.SIZE - curSetBits;
+    }
+    // curHighLong has enough clear bits to reach highTarget, but may not have enough set bits.
+    long highValue = previousHighValue();
+    while (highValue > highTarget) {
+      /* CHECKME: See at advanceToHighValue. */
+      if (! toBeforeCurrentHighBit()) {
+        return NO_MORE_VALUES;
+      }
+      highValue = previousHighValue();
+    }
+    return highValue;
+  }
+
+  /** Given a target value, go back to the first smaller or equal value
+   * and return it if it is available. Otherwise return {@link #NO_MORE_VALUES}.
+   */
+  public long backToValue(long target) {
+    if (! toBeforeCurrentHighBit()) {
+      return NO_MORE_VALUES;
+    }
+    long highTarget = target >>> efEncoder.numLowBits;
+    long highValue = backToHighValue(highTarget);
+    if (highValue == NO_MORE_VALUES) {
+      return NO_MORE_VALUES;
+    }
+    // Linear search with low values:
+    long currentValue = combineHighLowValues(highValue, currentLowValue());
+    while (currentValue > target) {
+      currentValue = previousValue();
+      if (currentValue == NO_MORE_VALUES) {
+        return NO_MORE_VALUES;
+      }
+    }
+    return currentValue;
+  }
+}
+

Added: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDocIdSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDocIdSet.java?rev=1501576&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDocIdSet.java (added)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDocIdSet.java Tue Jul  9 21:49:04 2013
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.util.packed;
+
+import java.io.IOException;
+
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
+
+
+/** A DocIdSet in Elias-Fano encoding.
+ * @lucene.internal
+ */
+public class EliasFanoDocIdSet extends DocIdSet {
+  final EliasFanoEncoder efEncoder;
+  /*
+   * Construct an EliasFanoDocIdSet.
+   * @param numValues The number of values that can be encoded.
+   * @param upperBound  At least the highest value that will be encoded.
+   */
+  public EliasFanoDocIdSet(long numValues, long upperBound) {
+    efEncoder = new EliasFanoEncoder(numValues, upperBound);
+  }
+
+  public void encodeFromDisi(DocIdSetIterator disi) throws IOException {
+    while (efEncoder.numEncoded < efEncoder.numValues) {
+      int x = disi.nextDoc();
+      if (x == DocIdSetIterator.NO_MORE_DOCS) {
+        throw new IllegalArgumentException("disi: " + disi.toString()
+            + "\nhas " + efEncoder.numEncoded
+            + " docs, but at least " + efEncoder.numValues + " are required.");
+      }
+      efEncoder.encodeNext(x);
+    }
+  }
+
+  /**
+   * Provides a {@link DocIdSetIterator} to access encoded document ids.
+   */
+  @Override
+  public DocIdSetIterator iterator() {
+    if (efEncoder.lastEncoded >= DocIdSetIterator.NO_MORE_DOCS) {
+      throw new UnsupportedOperationException(
+          "Highest encoded value too high for DocIdSetIterator.NO_MORE_DOCS: " + efEncoder.lastEncoded);
+    }
+    return new DocIdSetIterator() {
+      private int curDocId = -1;
+      private final EliasFanoDecoder efDecoder = efEncoder.getDecoder();
+
+      @Override
+      public int docID() {
+        return curDocId;
+      }
+
+      private int setCurDocID(long nextValue) {
+        curDocId = (nextValue == EliasFanoDecoder.NO_MORE_VALUES)
+            ?  NO_MORE_DOCS
+                : (int) nextValue;
+        return curDocId;
+      }
+
+      @Override
+      public int nextDoc() {
+        return setCurDocID(efDecoder.nextValue());
+      }
+
+      @Override
+      public int advance(int target) {
+        return setCurDocID(efDecoder.advanceToValue(target));
+      }
+
+      @Override
+      public long cost() {
+        return efDecoder.numEncoded;
+      }
+    };
+  }
+
+  /** This DocIdSet implementation is cacheable. @return <code>true</code> */
+  @Override
+  public boolean isCacheable() {
+    return true;
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    return ((other instanceof EliasFanoDocIdSet))
+        && efEncoder.equals(((EliasFanoDocIdSet) other).efEncoder);
+  }
+
+  @Override
+  public int hashCode() {
+    return efEncoder.hashCode() ^ getClass().hashCode();
+  }
+}
+

Added: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoEncoder.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoEncoder.java?rev=1501576&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoEncoder.java (added)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoEncoder.java Tue Jul  9 21:49:04 2013
@@ -0,0 +1,295 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.util.packed;
+
+import java.util.Arrays;
+
+import org.apache.lucene.util.FixedBitSet; // for javadocs
+
+
+/** Encode a non decreasing sequence of non negative whole numbers in the Elias-Fano encoding
+ * that was introduced in the 1970's by Peter Elias and Robert Fano.
+ * <p>
+ * The Elias-Fano encoding is a high bits / low bits representation of
+ * a monotonically increasing sequence of <code>numValues > 0</code> natural numbers <code>x[i]</code>
+ * <p>
+ * <code>0 <= x[0] <= x[1] <= ... <= x[numValues-2] <= x[numValues-1] <= upperBound</code>
+ * <p>
+ * where <code>upperBound > 0</code> is an upper bound on the last value.
+ * <br>
+ * The Elias-Fano encoding uses less than half a bit per encoded number more
+ * than the smallest representation
+ * that can encode any monotone sequence with the same bounds.
+ * <p>
+ * The lower <code>L</code> bits of each <code>x[i]</code> are stored explicitly and contiguously
+ * in the lower-bits array, with <code>L</code> chosen as (<code>log()</code> base 2):
+ * <p>
+ * <code>L = max(0, floor(log(upperBound/numValues)))</code>
+ * <p>
+ * The upper bits are stored in the upper-bits array as a sequence of unary-coded gaps (<code>x[-1] = 0</code>):
+ * <p>
+ * <code>(x[i]/2**L) - (x[i-1]/2**L)</code>
+ * <p>
+ * The unary code encodes a natural number <code>n</code> by <code>n</code> 0 bits followed by a 1 bit:
+ * <code>0...01</code>. <br>
+ * In the upper bits the total the number of 1 bits is <code>numValues</code>
+ * and the total number of 0 bits is:<p>
+ * <code>floor(x[numValues-1]/2**L) <= upperBound/(2**max(0, floor(log(upperBound/numValues)))) <= 2*numValues</code>
+ * <p>
+ * The Elias-Fano encoding uses at most
+ * <p>
+ * <code>2 + ceil(log(upperBound/numValues))</code>
+ * <p>
+ * bits per encoded number. With <code>upperBound</code> in these bounds (<code>p</code> is an integer):
+ * <p>
+ * <code>2**p < x[numValues-1] <= upperBound <= 2**(p+1)</code>
+ * <p>
+ * the number of bits per encoded number is minimized.
+ * <p>
+ * In this implementation the values in the sequence can be given as <code>long</code>,
+ * <code>numValues = 0</code> and <code>upperBound = 0</code> are allowed,
+ * and each of the upper and lower bit arrays should fit in a <code>long[]</code>.
+ * <p>
+ * This implementation is based on this article:
+ * <br>
+ * Sebastiano Vigna, "Quasi Succinct Indices", June 19, 2012, sections 3 and 4.
+ * Retrieved from http://arxiv.org/pdf/1206.4300 .
+ *
+ * <p>The articles originally describing the Elias-Fano representation are:
+ * <br>Peter Elias, "Efficient storage and retrieval by content and address of static files",
+ * J. Assoc. Comput. Mach., 21(2):246–260, 1974.
+ * <br>Robert M. Fano, "On the number of bits required to implement an associative memory",
+ *  Memorandum 61, Computer Structures Group, Project MAC, MIT, Cambridge, Mass., 1971.
+ *
+ * @lucene.internal
+ */
+
+public class EliasFanoEncoder {
+  final long numValues;
+  private final long upperBound;
+  final int numLowBits;
+  final long lowerBitsMask;
+  final long[] upperLongs;
+  final long[] lowerLongs;
+  private static final int LOG2_LONG_SIZE = Long.numberOfTrailingZeros(Long.SIZE);
+
+  long numEncoded = 0L;
+  long lastEncoded = 0L;
+
+  /**
+   * Construct an Elias-Fano encoder.
+   * After construction, call {@link #encodeNext} <code>numValues</code> times to encode
+   * a non decreasing sequence of non negative numbers.
+   * @param numValues The number of values that is to be encoded.
+   * @param upperBound  At least the highest value that will be encoded.
+   *                For space efficiency this should not exceed the power of two that equals
+   *                or is the first higher than the actual maximum.
+   *                <br>When <code>numValues >= (upperBound/3)</code>
+   *                a {@link FixedBitSet} will take less space.
+   * @throws IllegalArgumentException when:
+   *         <ul>
+   *         <li><code>numValues</code> is negative, or
+   *         <li><code>numValues</code> is non negative and <code>upperBound</code> is negative, or
+   *         <li>the low bits do not fit in a <code>long[]</code>:
+   *             <code>(L * numValues / 64) > Integer.MAX_VALUE</code>, or
+   *         <li>the high bits do not fit in a <code>long[]</code>:
+   *             <code>(2 * numValues / 64) > Integer.MAX_VALUE</code>.
+   *         </ul>
+   */
+  public EliasFanoEncoder(long numValues, long upperBound) {
+    if (numValues < 0L) {
+      throw new IllegalArgumentException("numValues should not be negative: " + numValues);
+    }
+    this.numValues = numValues;
+    if ((numValues > 0L) && (upperBound < 0L)) {
+      throw new IllegalArgumentException("upperBound should not be negative: " + upperBound + " when numValues > 0");
+    }
+    this.upperBound = upperBound;
+    int nLowBits = 0;
+    if (this.numValues > 0) { // nLowBits = max(0; floor(2log(upperBound/numValues)))
+      long lowBitsFac = this.upperBound / this.numValues;
+      if (lowBitsFac > 0) {
+        nLowBits = 63 - Long.numberOfLeadingZeros(lowBitsFac); // see Long.numberOfLeadingZeros javadocs
+      }
+    }
+    this.numLowBits = nLowBits;
+    this.lowerBitsMask = Long.MAX_VALUE >>> (Long.SIZE - 1 - this.numLowBits);
+
+    long numLongsForLowBits = numLongsForBits(numValues * numLowBits);
+    if (numLongsForLowBits > Integer.MAX_VALUE) {
+      throw new IllegalArgumentException("numLongsForLowBits too large to index a long array: " + numLongsForLowBits);
+    }
+    this.lowerLongs = new long[(int) numLongsForLowBits];
+
+    long numHighBitsClear = ((this.upperBound > 0) ? this.upperBound : 0) >>> this.numLowBits;
+    assert numHighBitsClear <= (2 * this.numValues);
+    long numHighBitsSet = this.numValues;
+
+    long numLongsForHighBits = numLongsForBits(numHighBitsClear + numHighBitsSet);
+    if (numLongsForHighBits > Integer.MAX_VALUE) {
+      throw new IllegalArgumentException("numLongsForHighBits too large to index a long array: " + numLongsForHighBits);
+    }
+    this.upperLongs = new long[(int) numLongsForHighBits];
+  }
+
+  private static long numLongsForBits(long numBits) {
+    assert numBits >= 0 : numBits;
+    return (numBits + (Long.SIZE-1)) >>> LOG2_LONG_SIZE;
+  }
+
+  /** Call at most <code>numValues</code> times to encode a non decreasing sequence of non negative numbers.
+   * @param x The next number to be encoded.
+   * @throws IllegalArgumentException when:
+   *         <ul>
+   *         <li>called more than <code>numValues</code> times, or
+   *         <li><code>x</code> is smaller than an earlier encoded value, or
+   *         <li><code>x</code> is larger than <code>upperBound</code>.
+   *         </ul>
+   */
+  public void encodeNext(long x) {
+    if (numEncoded >= numValues) {
+      throw new IllegalStateException("encodeNext called more than " + numValues + " times.");
+    }
+    if (lastEncoded > x) {
+      throw new IllegalArgumentException(x + " smaller than previous " + lastEncoded);
+    }
+    if (x > upperBound) {
+      throw new IllegalArgumentException(x + " larger than upperBound " + upperBound);
+    }
+    encodeUpperBits(x >>> numLowBits);
+    encodeLowerBits(x & lowerBitsMask);
+    numEncoded++;
+    lastEncoded = x;
+  }
+
+  private void encodeUpperBits(long highValue) {
+    long nextHighBitNum = numEncoded + highValue; // sequence of unary gaps
+    upperLongs[(int)(nextHighBitNum >>> LOG2_LONG_SIZE)] |= (1L << (nextHighBitNum & (Long.SIZE-1)));
+  }
+
+  private void encodeLowerBits(long lowValue) {
+    packValue(lowValue, lowerLongs, numLowBits, numEncoded);
+  }
+
+  private static void packValue(long value, long[] longArray, int numBits, long packIndex) {
+    if (numBits != 0) {
+      long bitPos = numBits * packIndex;
+      int index = (int) (bitPos >>> LOG2_LONG_SIZE);
+      int bitPosAtIndex = (int) (bitPos & (Long.SIZE-1));
+      longArray[index] |= (value << bitPosAtIndex);
+      if ((bitPosAtIndex + numBits) > Long.SIZE) {
+        longArray[index+1] = (value >>> (Long.SIZE - bitPosAtIndex));
+      }
+    }
+  }
+
+  /** Provide an indication that is better to use an {@link EliasFanoEncoder} than a {@link FixedBitSet}
+   *  to encode document identifiers.
+   *  This indication is not precise and may change in the future.
+   *  <br>An EliasFanoEncoder is favoured when the size of the encoding by the EliasFanoEncoder
+   *  is at most 5/6 of the size of the FixedBitSet.
+   *  <br>This condition is the same as comparing estimates of the number of bits accessed by a pair of FixedBitSets and
+   *  by a pair of non indexed EliasFanoDocIdSets when determining the intersections of the pairs.
+   *  @param numValues The number of document identifiers that is to be encoded. Should be non negative.
+   *  @param upperBound The maximum possible value for a document identifier. Should be at least numValues.
+   */
+  public static boolean sufficientlySmallerThanBitSet(long numValues, long upperBound) {
+    /* When (upperBound / 6) == numValues,
+     * the number of bits per entry for the EliasFanoEncoder is 2 + ceil(2log(upperBound/numValues)) == 5.
+     */
+    /* For intersecting two bit sets upperBound bits are accessed, roughly half of one, half of the other.
+     * For intersecting two EliasFano sequences without index on the upper bits,
+     * all (2 * 3 * numValues) upper bits are accessed.
+     */
+    return (upperBound / 6) > numValues;
+  }
+
+  /**
+   * @return An {@link EliasFanoDecoder} to access the encoded values.
+   * Perform all calls to {@link #encodeNext} before calling {@link #getDecoder}.
+   */
+  public EliasFanoDecoder getDecoder() {
+    // decode as far as currently encoded as determined by numEncoded.
+    return new EliasFanoDecoder(this);
+  }
+
+  /** Expert. The low bits. */
+  public long[] getLowerBits() {
+    return lowerLongs;
+  }
+
+  /** Expert. The high bits. */
+  public long[] getUpperBits() {
+    return upperLongs;
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder s = new StringBuilder("EliasFanoSequence");
+    s.append(" numValues " + numValues);
+    s.append(" numEncoded " + numEncoded);
+    s.append(" upperBound " + upperBound);
+    s.append(" lastEncoded " + lastEncoded);
+    s.append(" numLowBits " + numLowBits);
+    s.append("\nupperLongs[" + upperLongs.length + "]");
+    for (int i = 0; i < upperLongs.length; i++) {
+      s.append(" " + longHex(upperLongs[i]));
+    }
+    s.append("\nlowerLongs[" + lowerLongs.length + "]");
+    for (int i = 0; i < lowerLongs.length; i++) {
+      s.append(" " + longHex(lowerLongs[i]));
+    }
+    return s.toString();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (! (other instanceof EliasFanoEncoder)) {
+      return false;
+    }
+    EliasFanoEncoder oefs = (EliasFanoEncoder) other;
+    // no equality needed for upperBound
+    return (this.numValues == oefs.numValues)
+        && (this.numEncoded == oefs.numEncoded)
+        && (this.numLowBits == oefs.numLowBits)
+        && Arrays.equals(this.upperLongs, oefs.upperLongs)
+        && Arrays.equals(this.lowerLongs, oefs.lowerLongs);
+  }
+
+  @Override
+  public int hashCode() {
+    int h = ((int) (numValues + numEncoded))
+        ^ numLowBits
+        ^ Arrays.hashCode(upperLongs)
+        ^ Arrays.hashCode(lowerLongs);
+    return h;
+  }
+
+  public static String longHex(long x) {
+    String hx = Long.toHexString(x);
+    StringBuilder sb = new StringBuilder("0x");
+    int l = 16 - hx.length();
+    while (l > 0) {
+      sb.append('0');
+      l--;
+    }
+    sb.append(hx);
+    return sb.toString();
+  }
+}
+

Added: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/packed/TestEliasFanoDocIdSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/packed/TestEliasFanoDocIdSet.java?rev=1501576&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/packed/TestEliasFanoDocIdSet.java (added)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/packed/TestEliasFanoDocIdSet.java Tue Jul  9 21:49:04 2013
@@ -0,0 +1,156 @@
+package org.apache.lucene.util.packed;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestEliasFanoDocIdSet extends LuceneTestCase {
+  private static DocIdSetIterator makeDisi(final int[] docIds) {
+    class IntArrayDisi extends DocIdSetIterator {
+      int i = 0;
+      int docId = -1;
+
+      @Override
+      public int docID() {
+        return docId;
+      }
+
+      @Override
+      public int nextDoc() {
+        if (i >= docIds.length) {
+          docId = NO_MORE_DOCS;
+          return docId;
+        }
+        if (docIds[i] < docId) { // Elias-Fano sequence should be non decreasing.
+          // The non decreasing condition for Elias-Fano is weaker than normal increasing for DocIdSetIterator
+          throw new AssertionError("docIds[] out of order");
+        }
+        docId = docIds[i++]; // increase i to just after current
+        return docId;
+      }
+
+      @Override
+      public int advance(int target) {
+        // ( ((i == 0) and (docId == -1)) or
+        //   ((i > 0) and (docIds.length > 0) and (i <= docIds.length) and (docId == docIds[i-1])) )
+
+        // The behavior of this method is undefined when called with target ≤ current, or after the iterator has exhausted.
+        // Both cases may result in unpredicted behavior, and may throw an assertion error or an IOOBE here.
+        // So when nextDoc() or advance() were called earlier, the target should be bigger than current docId:
+        assert (docId == -1) || (docId < target);
+
+
+        // Do a binary search for the index j for which:
+        // ((j >= i)
+        //  and ((j < docIds.length) implies (docIds[j] >= target))
+        //  and ((j >= 1) implies (docIds[j-1] < target)) )
+        int j = docIds.length;
+        while (i < j) {
+          // ((0 <= i) and (i < j) and (j <= docIds.length)) so (docIds.length > 0)
+          int m = i + (j - i) / 2; // (i <= m) and (m < j); avoid overflow for (i + j)
+          if (docIds[m] < target) {
+            i = m + 1; // (docIds[i-1] <  target) and (i <= j)
+          } else {
+            j = m; //     (docIds[j] >= target)   and (i <= j)
+          }
+        } // (i == j)
+        docId = (i >= docIds.length)
+            ? NO_MORE_DOCS // exhausted
+                : docIds[i++]; // increase i to just after current
+        return docId;
+      }
+
+      @Override
+      public long cost() {
+        return docIds.length;
+      }
+    };
+    return new IntArrayDisi();
+  }
+
+  public void tstEqualDisisNext(DocIdSetIterator disi0, DocIdSetIterator disi1) throws IOException {
+    assertEquals(disi0.docID(), disi1.docID());
+    int d0 = disi0.nextDoc();
+    int d1 = disi1.nextDoc();
+    int i = 0;
+    while ((d0 != DocIdSetIterator.NO_MORE_DOCS) && (d1 != DocIdSetIterator.NO_MORE_DOCS)) {
+      assertEquals("index " + i, d0, d1);
+      i++;
+      d0 = disi0.nextDoc();
+      d1 = disi1.nextDoc();
+    }
+    assertEquals("at end", d0, d1);
+  }
+
+  public void tstEqualDisisAdvanceAsNext(DocIdSetIterator disi0, DocIdSetIterator disi1) throws IOException {
+    assertEquals(disi0.docID(), disi1.docID());
+    int d0 = disi0.advance(0);
+    int d1 = disi1.advance(0);
+    int i = 0;
+    while ((d0 != DocIdSetIterator.NO_MORE_DOCS) && (d1 != DocIdSetIterator.NO_MORE_DOCS)) {
+      assertEquals("index " + i, d0, d1);
+      i++;
+      d0 = disi0.advance(d1+1);
+      d1 = disi1.advance(d1+1);
+    }
+    assertEquals("at end disi0 " + disi0 + ", disi1 " + disi1, d0, d1);
+  }
+
+  public void tstEF(int[] docIds) {
+    int maxDoc = -1;
+    for (int docId: docIds) {
+      assert docId >= maxDoc; // non decreasing
+      maxDoc = docId;
+    }
+    try {
+      EliasFanoDocIdSet efd = new EliasFanoDocIdSet(docIds.length, maxDoc);
+      efd.encodeFromDisi(makeDisi(docIds));
+      tstEqualDisisNext(         makeDisi(docIds), efd.iterator());
+      tstEqualDisisAdvanceAsNext(makeDisi(docIds), efd.iterator());
+    } catch (IOException ioe) {
+      throw new Error(ioe);
+    }
+  }
+
+  public void testEmpty() { tstEF(new int[] {}); }
+
+  public void testOneElementZero() { tstEF(new int[] {0}); }
+
+  public void testTwoElements() { tstEF(new int[] {0,1}); }
+
+  public void testOneElementOneBit() {
+    for (int i = 0; i < (Integer.SIZE-1); i++) {
+      tstEF(new int[] {1 << i});
+    }
+  }
+
+  public void testIncreasingSequences() {
+    final int TEST_NUMDOCS = 129;
+    int[] docIds = new int[TEST_NUMDOCS];
+    for (int f = 1; f <= 1025; f++) {
+      for (int i = 0; i < TEST_NUMDOCS; i++) {
+        docIds[i] = i*f;
+      }
+      tstEF(docIds);
+    }
+  }
+}
+

Added: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/packed/TestEliasFanoSequence.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/packed/TestEliasFanoSequence.java?rev=1501576&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/packed/TestEliasFanoSequence.java (added)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/packed/TestEliasFanoSequence.java Tue Jul  9 21:49:04 2013
@@ -0,0 +1,281 @@
+package org.apache.lucene.util.packed;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestEliasFanoSequence extends LuceneTestCase {
+
+  private static EliasFanoEncoder makeEncoder(long[] values) {
+    long upperBound = -1L;
+    for (long value: values) {
+      assertTrue(value >= upperBound); // test data ok
+      upperBound = value;
+    }
+    EliasFanoEncoder efEncoder = new EliasFanoEncoder(values.length, upperBound);
+    for (long value: values) {
+      efEncoder.encodeNext(value);
+    }
+    return efEncoder;
+  }
+
+  private static void tstDecodeAllNext(long[] values, EliasFanoDecoder efd) {
+    efd.toBeforeSequence();
+    long nextValue = efd.nextValue();
+    for (long expValue: values) {
+      assertFalse("nextValue at end too early", EliasFanoDecoder.NO_MORE_VALUES == nextValue);
+      assertEquals(expValue, nextValue);
+      nextValue = efd.nextValue();
+    }
+    assertEquals(EliasFanoDecoder.NO_MORE_VALUES, nextValue);
+  }
+
+  private static void tstDecodeAllPrev(long[] values, EliasFanoDecoder efd) {
+    efd.toAfterSequence();
+    for (int i = values.length - 1; i >= 0; i--) {
+      long previousValue = efd.previousValue();
+      assertFalse("previousValue at end too early", EliasFanoDecoder.NO_MORE_VALUES == previousValue);
+      assertEquals(values[i], previousValue);
+    }
+    assertEquals(EliasFanoDecoder.NO_MORE_VALUES, efd.previousValue());
+  }
+
+  private static void tstDecodeAllAdvanceToExpected(long[] values, EliasFanoDecoder efd) {
+    efd.toBeforeSequence();
+    long previousValue = -1L;
+    long index = 0;
+    for (long expValue: values) {
+      if (expValue > previousValue) {
+        long advanceValue = efd.advanceToValue(expValue);
+        assertFalse("advanceValue at end too early", EliasFanoDecoder.NO_MORE_VALUES == advanceValue);
+        assertEquals(expValue, advanceValue);
+        assertEquals(index, efd.index());
+        previousValue = expValue;
+      }
+      index++;
+    }
+    long advanceValue = efd.advanceToValue(previousValue+1);
+    assertEquals(EliasFanoDecoder.NO_MORE_VALUES, advanceValue);
+  }
+
+  private static void tstDecodeAdvanceToMultiples(long[] values, EliasFanoDecoder efd, final long m) {
+    // test advancing to multiples of m
+    assert m > 0;
+    long previousValue = -1L;
+    long index = 0;
+    long mm = m;
+    efd.toBeforeSequence();
+    for (long expValue: values) {
+      // mm > previousValue
+      if (expValue >= mm) {
+        long advanceValue = efd.advanceToValue(mm);
+        assertFalse("advanceValue at end too early", EliasFanoDecoder.NO_MORE_VALUES == advanceValue);
+        assertEquals(expValue, advanceValue);
+        assertEquals(index, efd.index());
+        previousValue = expValue;
+        do {
+          mm += m;
+        } while (mm <= previousValue);
+      }
+      index++;
+    }
+    long advanceValue = efd.advanceToValue(mm);
+    assertEquals(EliasFanoDecoder.NO_MORE_VALUES, advanceValue);
+  }
+
+  private static void tstDecodeBackToMultiples(long[] values, EliasFanoDecoder efd, final long m) {
+    // test backing to multiples of m
+    assert m > 0;
+    efd.toAfterSequence();
+    int index = values.length - 1;
+    if (index < 0) {
+      long advanceValue = efd.backToValue(0);
+      assertEquals(EliasFanoDecoder.NO_MORE_VALUES, advanceValue);
+      return; // empty values, nothing to go back to/from
+    }
+    long expValue = values[index];
+    long previousValue = expValue + 1;
+    long mm = (expValue / m) * m;
+    while (index >= 0) {
+      expValue = values[index];
+      assert mm < previousValue;
+      if (expValue <= mm) {
+        long backValue = efd.backToValue(mm);
+        assertFalse("backToValue at end too early", EliasFanoDecoder.NO_MORE_VALUES == backValue);
+        assertEquals(expValue, backValue);
+        assertEquals(index, efd.index());
+        previousValue = expValue;
+        do {
+          mm -= m;
+        } while (mm >= previousValue);
+      }
+      index--;
+    }
+    long backValue = efd.backToValue(mm);
+    assertEquals(EliasFanoDecoder.NO_MORE_VALUES, backValue);
+  }
+
+  private static void tstEqual(String mes, long[] exp, long[] act) {
+    assertEquals(mes + ".length", exp.length, act.length);
+    for (int i = 0; i < exp.length; i++) {
+      if (exp[i] != act[i]) {
+        fail(mes + "[" + i + "] " + exp[i] + " != " + act[i]);
+      }
+    }
+  }
+
+  private static void tstDecodeAll(EliasFanoEncoder efEncoder, long[] values) {
+    tstDecodeAllNext(values, efEncoder.getDecoder());
+    tstDecodeAllPrev(values, efEncoder.getDecoder());
+    tstDecodeAllAdvanceToExpected(values, efEncoder.getDecoder());
+  }
+
+  private static void tstEFS(long[] values, long[] expHighLongs, long[] expLowLongs) {
+    EliasFanoEncoder efEncoder = makeEncoder(values);
+    tstEqual("upperBits", expHighLongs, efEncoder.getUpperBits());
+    tstEqual("lowerBits", expLowLongs, efEncoder.getLowerBits());
+    tstDecodeAll(efEncoder, values);
+  }
+
+  private static void tstEFS2(long[] values) {
+    EliasFanoEncoder efEncoder = makeEncoder(values);
+    tstDecodeAll(efEncoder, values);
+  }
+
+  private static void tstEFSadvanceToAndBackToMultiples(long[] values, long maxValue, long minAdvanceMultiple) {
+    EliasFanoEncoder efEncoder = makeEncoder(values);
+    for (long m = minAdvanceMultiple; m <= maxValue; m += 1) {
+      tstDecodeAdvanceToMultiples(values, efEncoder.getDecoder(), m);
+      tstDecodeBackToMultiples(values, efEncoder.getDecoder(), m);
+    }
+  }
+
+  public void testEmpty() {
+    long[] values = new long[0];
+    long[] expHighBits = new long[0];
+    long[] expLowBits = new long[0];
+    tstEFS(values, expHighBits, expLowBits);
+  }
+
+  public void testOneValue1() {
+    long[] values = new long[] {0};
+    long[] expHighBits = new long[] {0x1L};
+    long[] expLowBits = new long[] {};
+    tstEFS(values, expHighBits, expLowBits);
+  }
+
+  public void testTwoValues1() {
+    long[] values = new long[] {0,0};
+    long[] expHighBits = new long[] {0x3L};
+    long[] expLowBits = new long[] {};
+    tstEFS(values, expHighBits, expLowBits);
+  }
+
+  public void testOneValue2() {
+    long[] values = new long[] {63};
+    long[] expHighBits = new long[] {2};
+    long[] expLowBits = new long[] {31};
+    tstEFS(values, expHighBits, expLowBits);
+  }
+
+  public void testOneMaxValue() {
+    long[] values = new long[] {Long.MAX_VALUE};
+    long[] expHighBits = new long[] {2};
+    long[] expLowBits = new long[] {Long.MAX_VALUE/2};
+    tstEFS(values, expHighBits, expLowBits);
+  }
+
+  public void testTwoMinMaxValues() {
+    long[] values = new long[] {0, Long.MAX_VALUE};
+    long[] expHighBits = new long[] {0x11};
+    long[] expLowBits = new long[] {0xE000000000000000L, 0x03FFFFFFFFFFFFFFL};
+    tstEFS(values, expHighBits, expLowBits);
+  }
+
+  public void testTwoMaxValues() {
+    long[] values = new long[] {Long.MAX_VALUE, Long.MAX_VALUE};
+    long[] expHighBits = new long[] {0x18};
+    long[] expLowBits = new long[] {-1L, 0x03FFFFFFFFFFFFFFL};
+    tstEFS(values, expHighBits, expLowBits);
+  }
+
+  public void testExample1() { // Figure 1 from Vigna 2012 paper
+    long[] values = new long[] {5,8,8,15,32};
+    long[] expLowBits = new long[] {Long.parseLong("0011000001", 2)}; // reverse block and bit order
+    long[] expHighBits = new long[] {Long.parseLong("1000001011010", 2)}; // reverse block and bit order
+    tstEFS(values, expHighBits, expLowBits);
+  }
+
+  public void testHashCodeEquals() {
+    long[] values = new long[] {5,8,8,15,32};
+    EliasFanoEncoder efEncoder1 = makeEncoder(values);
+    EliasFanoEncoder efEncoder2 = makeEncoder(values);
+    assertEquals(efEncoder1, efEncoder2);
+    assertEquals(efEncoder1.hashCode(), efEncoder2.hashCode());
+
+    EliasFanoEncoder efEncoder3 = makeEncoder(new long[] {1,2,3});
+    assertFalse(efEncoder1.equals(efEncoder3));
+    assertFalse(efEncoder3.equals(efEncoder1));
+    assertFalse(efEncoder1.hashCode() == efEncoder3.hashCode()); // implementation ok for these.
+  }
+
+  public void testMonotoneSequences() {
+    for (int s = 2; s < 1222; s++) {
+      long[] values = new long[s];
+      for (int i = 0; i < s; i++) {
+        values[i] = (i/2);
+      }
+      tstEFS2(values);
+    }
+  }
+
+  public void testStrictMonotoneSequences() {
+    for (int s = 2; s < 1222; s++) {
+      long[] values = new long[s];
+      for (int i = 0; i < s; i++) {
+        values[i] = i * ((long) i - 1) / 2; // Add a gap of (s-1) to previous
+        // s = (s*(s+1) - (s-1)*s)/2
+      }
+      tstEFS2(values);
+    }
+  }
+
+  public void testHighBitLongZero() {
+    final int s = 65;
+    long[] values = new long[s];
+    for (int i = 0; i < s-1; i++) {
+      values[i] = 0;
+    }
+    values[s-1] = 128;
+    long[] expHighBits = new long[] {-1,0,0,1};
+    long[] expLowBits = new long[0];
+    tstEFS(values, expHighBits, expLowBits);
+  }
+
+  public void testAdvanceToAndBackToMultiples() {
+    for (int s = 2; s < 130; s++) {
+      long[] values = new long[s];
+      for (int i = 0; i < s; i++) {
+        values[i] = i * ((long) i + 1) / 2; // Add a gap of s to previous
+        // s = (s*(s+1) - (s-1)*s)/2
+      }
+      tstEFSadvanceToAndBackToMultiples(values, values[s-1], 10);
+    }
+  }
+}
+



Re: svn commit: r1501576 - in /lucene/dev/trunk/lucene: ./ core/src/java/org/apache/lucene/util/packed/ core/src/test/org/apache/lucene/util/packed/

Posted by Adrien Grand <jp...@gmail.com>.
Good point Robert, I will fix it...

--
Adrien

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org


Re: svn commit: r1501576 - in /lucene/dev/trunk/lucene: ./ core/src/java/org/apache/lucene/util/packed/ core/src/test/org/apache/lucene/util/packed/

Posted by Robert Muir <rc...@gmail.com>.
On Tue, Jul 9, 2013 at 5:49 PM, <jp...@apache.org> wrote:

>
> +
> +  /** @return The Elias-Fano encoder that is decoded. */
> +  public EliasFanoEncoder getEliasFanoEncoder() {
> +    return efEncoder;
> +  }
>

For methods like this, can we just state "Returns the ..."

The problem is that if there is only an @return, the "Method summary" for
the class will have a blank summary. this makes it hard on the user...