You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2013/07/12 09:15:45 UTC

svn commit: r1502450 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/core/ lucene/core/src/java/org/apache/lucene/util/packed/ lucene/core/src/test/org/apache/lucene/util/ lucene/core/src/test/org/apache/lucene/util/packed/ lucene/test-framework...

Author: jpountz
Date: Fri Jul 12 07:15:44 2013
New Revision: 1502450

URL: http://svn.apache.org/r1502450
Log:
LUCENE-5100: BaseDocIdSetTestCase (merged from r1502448).

Added:
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/TestDocIdBitSet.java
      - copied unchanged from r1502448, lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestDocIdBitSet.java
    lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/BaseDocIdSetTestCase.java
      - copied unchanged from r1502448, lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/BaseDocIdSetTestCase.java
Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDocIdSet.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoEncoder.java
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/TestFixedBitSet.java
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/TestOpenBitSet.java
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/TestWAH8DocIdSet.java
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/packed/TestEliasFanoDocIdSet.java
    lucene/dev/branches/branch_4x/lucene/test-framework/   (props changed)

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDocIdSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDocIdSet.java?rev=1502450&r1=1502449&r2=1502450&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDocIdSet.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDocIdSet.java Fri Jul 12 07:15:44 2013
@@ -33,7 +33,7 @@ public class EliasFanoDocIdSet extends D
    * @param numValues The number of values that can be encoded.
    * @param upperBound  At least the highest value that will be encoded.
    */
-  public EliasFanoDocIdSet(long numValues, long upperBound) {
+  public EliasFanoDocIdSet(int numValues, int upperBound) {
     efEncoder = new EliasFanoEncoder(numValues, upperBound);
   }
 

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoEncoder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoEncoder.java?rev=1502450&r1=1502449&r2=1502450&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoEncoder.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoEncoder.java Fri Jul 12 07:15:44 2013
@@ -119,7 +119,7 @@ public class EliasFanoEncoder {
     if ((numValues > 0L) && (upperBound < 0L)) {
       throw new IllegalArgumentException("upperBound should not be negative: " + upperBound + " when numValues > 0");
     }
-    this.upperBound = upperBound;
+    this.upperBound = numValues > 0 ? upperBound : -1L; // if there is no value, -1 is the best upper bound
     int nLowBits = 0;
     if (this.numValues > 0) { // nLowBits = max(0; floor(2log(upperBound/numValues)))
       long lowBitsFac = this.upperBound / this.numValues;

Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/TestFixedBitSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/TestFixedBitSet.java?rev=1502450&r1=1502449&r2=1502450&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/TestFixedBitSet.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/TestFixedBitSet.java Fri Jul 12 07:15:44 2013
@@ -22,7 +22,16 @@ import java.util.BitSet;
 
 import org.apache.lucene.search.DocIdSetIterator;
 
-public class TestFixedBitSet extends LuceneTestCase {
+public class TestFixedBitSet extends BaseDocIdSetTestCase<FixedBitSet> {
+
+  @Override
+  public FixedBitSet copyOf(BitSet bs, int length) throws IOException {
+    final FixedBitSet set = new FixedBitSet(length);
+    for (int doc = bs.nextSetBit(0); doc != -1; doc = bs.nextSetBit(doc + 1)) {
+      set.set(doc);
+    }
+    return set;
+  }
 
   void doGet(BitSet a, FixedBitSet b) {
     int max = b.length();

Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/TestOpenBitSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/TestOpenBitSet.java?rev=1502450&r1=1502449&r2=1502450&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/TestOpenBitSet.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/TestOpenBitSet.java Fri Jul 12 07:15:44 2013
@@ -17,11 +17,21 @@
 
 package org.apache.lucene.util;
 
+import java.io.IOException;
 import java.util.BitSet;
 
 import org.apache.lucene.search.DocIdSetIterator;
 
-public class TestOpenBitSet extends LuceneTestCase {
+public class TestOpenBitSet extends BaseDocIdSetTestCase<OpenBitSet> {
+
+  @Override
+  public OpenBitSet copyOf(BitSet bs, int length) throws IOException {
+    final OpenBitSet set = new OpenBitSet(length);
+    for (int doc = bs.nextSetBit(0); doc != -1; doc = bs.nextSetBit(doc + 1)) {
+      set.set(doc);
+    }
+    return set;
+  }
 
   void doGet(BitSet a, OpenBitSet b) {
     int max = a.size();
@@ -320,6 +330,7 @@ public class TestOpenBitSet extends Luce
     checkPrevSetBitArray(new int[] {0});
     checkPrevSetBitArray(new int[] {0,2});
   }
+
 }
 
 

Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/TestWAH8DocIdSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/TestWAH8DocIdSet.java?rev=1502450&r1=1502449&r2=1502450&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/TestWAH8DocIdSet.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/TestWAH8DocIdSet.java Fri Jul 12 07:15:44 2013
@@ -19,93 +19,44 @@ package org.apache.lucene.util;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.BitSet;
 import java.util.List;
 
-import org.apache.lucene.search.DocIdSet;
-import org.apache.lucene.search.DocIdSetIterator;
+public class TestWAH8DocIdSet extends BaseDocIdSetTestCase<WAH8DocIdSet> {
 
-public class TestWAH8DocIdSet extends LuceneTestCase {
-
-  private static FixedBitSet randomSet(int numBits, int numBitsSet) {
-    assert numBitsSet <= numBits;
-    final FixedBitSet set = new FixedBitSet(numBits);
-    if (numBitsSet == numBits) {
-      set.set(0, set.length());
-    } else {
-      for (int i = 0; i < numBitsSet; ++i) {
-        while (true) {
-          final int o = random().nextInt(numBits);
-          if (!set.get(o)) {
-            set.set(o);
-            break;
-          }
-        }
-      }
-    }
-    return set;
-  }
-
-  private static FixedBitSet randomSet(int numBits, float percentSet) {
-    return randomSet(numBits, (int) (percentSet * numBits));
-  }
-
-  public void testAgainstFixedBitSet() throws IOException {
-    final int numBits = _TestUtil.nextInt(random(), 100, 1 << 20);
-    for (float percentSet : new float[] {0f, 0.0001f, random().nextFloat() / 2, 0.9f, 1f}) {
-      final FixedBitSet set = randomSet(numBits, percentSet);
-      final WAH8DocIdSet copy = WAH8DocIdSet.copyOf(set.iterator());
-      assertEquals(numBits, set, copy);
-    }
-  }
-
-  public void assertEquals(int numBits, FixedBitSet ds1, WAH8DocIdSet ds2) throws IOException {
+  @Override
+  public WAH8DocIdSet copyOf(BitSet bs, int length) throws IOException {
+    final int indexInterval = _TestUtil.nextInt(random(), 8, 256);
+    final WAH8DocIdSet.Builder builder = new WAH8DocIdSet.Builder().setIndexInterval(indexInterval);
+    for (int i = bs.nextSetBit(0); i != -1; i = bs.nextSetBit(i + 1)) {
+      builder.add(i);
+    }
+    return builder.build();
+  }
+
+  @Override
+  public void assertEquals(int numBits, BitSet ds1, WAH8DocIdSet ds2)
+      throws IOException {
+    super.assertEquals(numBits, ds1, ds2);
     assertEquals(ds1.cardinality(), ds2.cardinality());
-
-    // nextDoc
-    DocIdSetIterator it1 = ds1.iterator();
-    DocIdSetIterator it2 = ds2.iterator();
-    assertEquals(it1.docID(), it2.docID());
-    for (int doc = it1.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it1.nextDoc()) {
-      assertEquals(doc, it2.nextDoc());
-      assertEquals(it1.docID(), it2.docID());
-    }
-    assertEquals(DocIdSetIterator.NO_MORE_DOCS, it2.nextDoc());
-    assertEquals(it1.docID(), it2.docID());
-
-    // nextDoc / advance
-    it1 = ds1.iterator();
-    it2 = ds2.iterator();
-    for (int doc = -1; doc != DocIdSetIterator.NO_MORE_DOCS;) {
-      if (random().nextBoolean()) {
-        doc = it1.nextDoc();
-        assertEquals(doc, it2.nextDoc());
-        assertEquals(it1.docID(), it2.docID());
-      } else {
-        final int target = doc + 1 + random().nextInt(random().nextBoolean() ? 64 : numBits / 64);
-        doc = it1.advance(target);
-        assertEquals(doc, it2.advance(target));
-        assertEquals(it1.docID(), it2.docID());
-      }
-    }
   }
 
   public void testUnion() throws IOException {
     final int numBits = _TestUtil.nextInt(random(), 100, 1 << 20);
     final int numDocIdSets = _TestUtil.nextInt(random(), 0, 4);
-    final List<FixedBitSet> fixedSets = new ArrayList<FixedBitSet>(numDocIdSets);
+    final List<BitSet> fixedSets = new ArrayList<BitSet>(numDocIdSets);
     for (int i = 0; i < numDocIdSets; ++i) {
       fixedSets.add(randomSet(numBits, random().nextFloat() / 16));
     }
     final List<WAH8DocIdSet> compressedSets = new ArrayList<WAH8DocIdSet>(numDocIdSets);
-    for (FixedBitSet set : fixedSets) {
-      compressedSets.add(WAH8DocIdSet.copyOf(set.iterator()));
+    for (BitSet set : fixedSets) {
+      compressedSets.add(copyOf(set, numBits));
     }
 
     final WAH8DocIdSet union = WAH8DocIdSet.union(compressedSets);
-    final FixedBitSet expected = new FixedBitSet(numBits);
-    for (DocIdSet set : fixedSets) {
-      final DocIdSetIterator it = set.iterator();
-      for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
+    final BitSet expected = new BitSet(numBits);
+    for (BitSet set : fixedSets) {
+      for (int doc = set.nextSetBit(0); doc != -1; doc = set.nextSetBit(doc + 1)) {
         expected.set(doc);
       }
     }
@@ -115,27 +66,26 @@ public class TestWAH8DocIdSet extends Lu
   public void testIntersection() throws IOException {
     final int numBits = _TestUtil.nextInt(random(), 100, 1 << 20);
     final int numDocIdSets = _TestUtil.nextInt(random(), 1, 4);
-    final List<FixedBitSet> fixedSets = new ArrayList<FixedBitSet>(numDocIdSets);
+    final List<BitSet> fixedSets = new ArrayList<BitSet>(numDocIdSets);
     for (int i = 0; i < numDocIdSets; ++i) {
       fixedSets.add(randomSet(numBits, random().nextFloat()));
     }
     final List<WAH8DocIdSet> compressedSets = new ArrayList<WAH8DocIdSet>(numDocIdSets);
-    for (FixedBitSet set : fixedSets) {
-      compressedSets.add(WAH8DocIdSet.copyOf(set.iterator()));
+    for (BitSet set : fixedSets) {
+      compressedSets.add(copyOf(set, numBits));
     }
 
     final WAH8DocIdSet union = WAH8DocIdSet.intersect(compressedSets);
-    final FixedBitSet expected = new FixedBitSet(numBits);
-    expected.set(0, expected.length());
-    for (DocIdSet set : fixedSets) {
-      final DocIdSetIterator it = set.iterator();
-      int lastDoc = -1;
-      for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
-        expected.clear(lastDoc + 1, doc);
-        lastDoc = doc;
-      }
-      if (lastDoc + 1 < expected.length()) {
-        expected.clear(lastDoc + 1, expected.length());
+    final BitSet expected = new BitSet(numBits);
+    expected.set(0, expected.size());
+    for (BitSet set : fixedSets) {
+      for (int previousDoc = -1, doc = set.nextSetBit(0); ; previousDoc = doc, doc = set.nextSetBit(doc + 1)) {
+        if (doc == -1) {
+          expected.clear(previousDoc + 1, set.size());
+          break;
+        } else {
+          expected.clear(previousDoc + 1, doc);
+        }
       }
     }
     assertEquals(numBits, expected, union);

Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/packed/TestEliasFanoDocIdSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/packed/TestEliasFanoDocIdSet.java?rev=1502450&r1=1502449&r2=1502450&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/packed/TestEliasFanoDocIdSet.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/packed/TestEliasFanoDocIdSet.java Fri Jul 12 07:15:44 2013
@@ -18,139 +18,45 @@ package org.apache.lucene.util.packed;
  */
 
 import java.io.IOException;
+import java.util.BitSet;
 
 import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.BaseDocIdSetTestCase;
 
-public class TestEliasFanoDocIdSet extends LuceneTestCase {
-  private static DocIdSetIterator makeDisi(final int[] docIds) {
-    class IntArrayDisi extends DocIdSetIterator {
-      int i = 0;
-      int docId = -1;
+public class TestEliasFanoDocIdSet extends BaseDocIdSetTestCase<EliasFanoDocIdSet> {
 
-      @Override
-      public int docID() {
-        return docId;
-      }
+  @Override
+  public EliasFanoDocIdSet copyOf(final BitSet bs, final int numBits) throws IOException {
+    final EliasFanoDocIdSet set = new EliasFanoDocIdSet(bs.cardinality(), numBits - 1);
+    set.encodeFromDisi(new DocIdSetIterator() {
+      int doc = -1;
 
       @Override
-      public int nextDoc() {
-        if (i >= docIds.length) {
-          docId = NO_MORE_DOCS;
-          return docId;
+      public int nextDoc() throws IOException {
+        doc = bs.nextSetBit(doc + 1);
+        if (doc == -1) {
+          doc = NO_MORE_DOCS;
         }
-        if (docIds[i] < docId) { // Elias-Fano sequence should be non decreasing.
-          // The non decreasing condition for Elias-Fano is weaker than normal increasing for DocIdSetIterator
-          throw new AssertionError("docIds[] out of order");
-        }
-        docId = docIds[i++]; // increase i to just after current
-        return docId;
+        assert doc < numBits;
+        return doc;
       }
-
+      
       @Override
-      public int advance(int target) {
-        // ( ((i == 0) and (docId == -1)) or
-        //   ((i > 0) and (docIds.length > 0) and (i <= docIds.length) and (docId == docIds[i-1])) )
-
-        // The behavior of this method is undefined when called with target ≤ current, or after the iterator has exhausted.
-        // Both cases may result in unpredicted behavior, and may throw an assertion error or an IOOBE here.
-        // So when nextDoc() or advance() were called earlier, the target should be bigger than current docId:
-        assert (docId == -1) || (docId < target);
-
-
-        // Do a binary search for the index j for which:
-        // ((j >= i)
-        //  and ((j < docIds.length) implies (docIds[j] >= target))
-        //  and ((j >= 1) implies (docIds[j-1] < target)) )
-        int j = docIds.length;
-        while (i < j) {
-          // ((0 <= i) and (i < j) and (j <= docIds.length)) so (docIds.length > 0)
-          int m = i + (j - i) / 2; // (i <= m) and (m < j); avoid overflow for (i + j)
-          if (docIds[m] < target) {
-            i = m + 1; // (docIds[i-1] <  target) and (i <= j)
-          } else {
-            j = m; //     (docIds[j] >= target)   and (i <= j)
-          }
-        } // (i == j)
-        docId = (i >= docIds.length)
-            ? NO_MORE_DOCS // exhausted
-                : docIds[i++]; // increase i to just after current
-        return docId;
+      public int docID() {
+        return doc;
       }
-
+      
       @Override
       public long cost() {
-        return docIds.length;
+        return bs.cardinality();
       }
-    };
-    return new IntArrayDisi();
-  }
-
-  public void tstEqualDisisNext(DocIdSetIterator disi0, DocIdSetIterator disi1) throws IOException {
-    assertEquals(disi0.docID(), disi1.docID());
-    int d0 = disi0.nextDoc();
-    int d1 = disi1.nextDoc();
-    int i = 0;
-    while ((d0 != DocIdSetIterator.NO_MORE_DOCS) && (d1 != DocIdSetIterator.NO_MORE_DOCS)) {
-      assertEquals("index " + i, d0, d1);
-      i++;
-      d0 = disi0.nextDoc();
-      d1 = disi1.nextDoc();
-    }
-    assertEquals("at end", d0, d1);
-  }
-
-  public void tstEqualDisisAdvanceAsNext(DocIdSetIterator disi0, DocIdSetIterator disi1) throws IOException {
-    assertEquals(disi0.docID(), disi1.docID());
-    int d0 = disi0.advance(0);
-    int d1 = disi1.advance(0);
-    int i = 0;
-    while ((d0 != DocIdSetIterator.NO_MORE_DOCS) && (d1 != DocIdSetIterator.NO_MORE_DOCS)) {
-      assertEquals("index " + i, d0, d1);
-      i++;
-      d0 = disi0.advance(d1+1);
-      d1 = disi1.advance(d1+1);
-    }
-    assertEquals("at end disi0 " + disi0 + ", disi1 " + disi1, d0, d1);
-  }
-
-  public void tstEF(int[] docIds) {
-    int maxDoc = -1;
-    for (int docId: docIds) {
-      assert docId >= maxDoc; // non decreasing
-      maxDoc = docId;
-    }
-    try {
-      EliasFanoDocIdSet efd = new EliasFanoDocIdSet(docIds.length, maxDoc);
-      efd.encodeFromDisi(makeDisi(docIds));
-      tstEqualDisisNext(         makeDisi(docIds), efd.iterator());
-      tstEqualDisisAdvanceAsNext(makeDisi(docIds), efd.iterator());
-    } catch (IOException ioe) {
-      throw new Error(ioe);
-    }
-  }
-
-  public void testEmpty() { tstEF(new int[] {}); }
-
-  public void testOneElementZero() { tstEF(new int[] {0}); }
-
-  public void testTwoElements() { tstEF(new int[] {0,1}); }
-
-  public void testOneElementOneBit() {
-    for (int i = 0; i < (Integer.SIZE-1); i++) {
-      tstEF(new int[] {1 << i});
-    }
-  }
-
-  public void testIncreasingSequences() {
-    final int TEST_NUMDOCS = 129;
-    int[] docIds = new int[TEST_NUMDOCS];
-    for (int f = 1; f <= 1025; f++) {
-      for (int i = 0; i < TEST_NUMDOCS; i++) {
-        docIds[i] = i*f;
+      
+      @Override
+      public int advance(int target) throws IOException {
+        return slowAdvance(target);
       }
-      tstEF(docIds);
-    }
+    });
+    return set;
   }
-}
 
+}
\ No newline at end of file