You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by bu...@apache.org on 2008/11/25 22:39:48 UTC

svn commit: r720609 - in /lucene/java/trunk: CHANGES.txt src/java/org/apache/lucene/util/OpenBitSetIterator.java src/test/org/apache/lucene/util/TestOpenBitSet.java

Author: buschmi
Date: Tue Nov 25 13:39:47 2008
New Revision: 720609

URL: http://svn.apache.org/viewvc?rev=720609&view=rev
Log:
LUCENE-1467: Add nextDoc() and next(int) methods to OpenBitSetIterator.

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSetIterator.java
    lucene/java/trunk/src/test/org/apache/lucene/util/TestOpenBitSet.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=720609&r1=720608&r2=720609&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Tue Nov 25 13:39:47 2008
@@ -34,6 +34,10 @@
    and also to define custom Attributes. The new API has the same performance
    as the old next(Token) approach. (Michael Busch)
 
+5. LUCENE-1467: Add nextDoc() and next(int) methods to OpenBitSetIterator.
+   These methods can be used to avoid additional calls to doc(). 
+   (Michael Busch)
+
 Bug fixes
 
 1. LUCENE-1415: MultiPhraseQuery has incorrect hashCode() and equals()

Modified: lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSetIterator.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSetIterator.java?rev=720609&r1=720608&r2=720609&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSetIterator.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSetIterator.java Tue Nov 25 13:39:47 2008
@@ -133,6 +133,35 @@
     return true;
   }
 
+  /** Moves iterator to the next doc and returns its id;
+      returns -1 when this iterator is exhausted. */
+  public int nextDoc() {
+    if (indexArray==0) {
+      if (word!=0) {
+        word >>>= 8;
+        wordShift += 8;
+      }
+
+      while (word==0) {
+        if (++i >= words) {
+          return curDocId = -1;
+        }
+        word = arr[i];
+        wordShift =-1;  // loop invariant code motion should move this
+      }
+
+      // after the first time, should I go with a linear search, or
+      // stick with the binary search in shift?
+      shift();
+    }
+
+    int bitIndex = (indexArray & 0x0f) + wordShift;
+    indexArray >>>= 4;
+    // should i<<6 be cached as a separate variable?
+    // it would only save one cycle in the best circumstances.
+    return curDocId = (i<<6) + bitIndex;
+  }
+  
   public boolean skipTo(int target) {
     indexArray=0;
     i = target >> 6;
@@ -166,6 +195,38 @@
     return true;
   }
   
+  /** Behaves like {@link #skipTo(int)} and returns the docId the iterator
+   *  skipped to; returns -1 if no valid document could be skipped to. */
+  public int next(int fromIndex) {
+    indexArray=0;
+    i = fromIndex >> 6;
+    if (i>=words) {
+      word =0; // setup so next() will also return -1
+      return curDocId = -1;
+    }
+    wordShift = fromIndex & 0x3f;
+    word = arr[i] >>> wordShift;
+    if (word !=0) {
+      wordShift--; // compensate for 1 based arrIndex
+    } else {
+      while (word ==0) {
+        if (++i >= words) {
+          return curDocId = -1;
+        }
+        word = arr[i];
+      }
+      wordShift =-1;
+    }
+
+    shift();
+
+    int bitIndex = (indexArray & 0x0f) + wordShift;
+    indexArray >>>= 4;
+    // should i<<6 be cached as a separate variable?
+    // it would only save one cycle in the best circumstances.
+    return curDocId = (i<<6) + bitIndex;
+  }
+  
   public int doc() {
     return this.curDocId;
   }

Modified: lucene/java/trunk/src/test/org/apache/lucene/util/TestOpenBitSet.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/util/TestOpenBitSet.java?rev=720609&r1=720608&r2=720609&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/util/TestOpenBitSet.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/util/TestOpenBitSet.java Tue Nov 25 13:39:47 2008
@@ -46,8 +46,13 @@
     } while (aa>=0);
   }
 
-  // test interleaving different BitSetIterator.next()
-  void doIterate(BitSet a, OpenBitSet b) {
+  // test interleaving different OpenBitSetIterator.next()/skipTo()
+  void doIterate(BitSet a, OpenBitSet b, int mode) {
+    if (mode==1) doIterate1(a, b);
+    if (mode==2) doIterate2(a, b);
+  }
+
+  void doIterate1(BitSet a, OpenBitSet b) {
     int aa=-1,bb=-1;
     OpenBitSetIterator iterator = new OpenBitSetIterator(b);
     do {
@@ -61,8 +66,20 @@
     } while (aa>=0);
   }
 
+  void doIterate2(BitSet a, OpenBitSet b) {
+    int aa=-1,bb=-1;
+    OpenBitSetIterator iterator = new OpenBitSetIterator(b);
+    do {
+      aa = a.nextSetBit(aa+1);
+      if (rand.nextBoolean())
+        bb = iterator.nextDoc();
+      else
+        bb = iterator.next(bb+1);
+      assertEquals(aa,bb);
+    } while (aa>=0);
+  }
 
-  void doRandomSets(int maxSize, int iter) {
+  void doRandomSets(int maxSize, int iter, int mode) {
     BitSet a0=null;
     OpenBitSet b0=null;
 
@@ -110,7 +127,7 @@
       BitSet aa = (BitSet)a.clone(); aa.flip(fromIndex,toIndex);
       OpenBitSet bb = (OpenBitSet)b.clone(); bb.flip(fromIndex,toIndex);
 
-      doIterate(aa,bb);   // a problem here is from flip or doIterate
+      doIterate(aa,bb, mode);   // a problem here is from flip or doIterate
 
       fromIndex = rand.nextInt(sz+80);
       toIndex = fromIndex + rand.nextInt((sz>>1)+1);
@@ -142,10 +159,10 @@
         OpenBitSet b_xor = (OpenBitSet)b.clone(); b_xor.xor(b0);
         OpenBitSet b_andn = (OpenBitSet)b.clone(); b_andn.andNot(b0);
 
-        doIterate(a_and,b_and);
-        doIterate(a_or,b_or);
-        doIterate(a_xor,b_xor);
-        doIterate(a_andn,b_andn);
+        doIterate(a_and,b_and, mode);
+        doIterate(a_or,b_or, mode);
+        doIterate(a_xor,b_xor, mode);
+        doIterate(a_andn,b_andn, mode);
 
         assertEquals(a_and.cardinality(), b_and.cardinality());
         assertEquals(a_or.cardinality(), b_or.cardinality());
@@ -167,12 +184,14 @@
   // large enough to flush obvious bugs, small enough to run in <.5 sec as part of a
   // larger testsuite.
   public void testSmall() {
-    doRandomSets(1200,1000);
+    doRandomSets(1200,1000, 1);
+    doRandomSets(1200,1000, 2);
   }
 
   public void testBig() {
     // uncomment to run a bigger test (~2 minutes).
-    // doRandomSets(2000,200000);
+    // doRandomSets(2000,200000, 1);
+    // doRandomSets(2000,200000, 2);
   }
 
   public void testEquals() {
@@ -196,7 +215,28 @@
     // try different type of object
     assertFalse(b1.equals(new Object()));
   }
+  
+  public void testBitUtils()
+  {
+    long num = 100000;
+    assertEquals( 5, BitUtil.ntz(num) );
+    assertEquals( 5, BitUtil.ntz2(num) );
+    assertEquals( 5, BitUtil.ntz3(num) );
+    
+    num = 10;
+    assertEquals( 1, BitUtil.ntz(num) );
+    assertEquals( 1, BitUtil.ntz2(num) );
+    assertEquals( 1, BitUtil.ntz3(num) );
+
+    for (int i=0; i<64; i++) {
+      num = 1L << i;
+      assertEquals( i, BitUtil.ntz(num) );
+      assertEquals( i, BitUtil.ntz2(num) );
+      assertEquals( i, BitUtil.ntz3(num) );
+    }
+  }
 
+  
 }