You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by bu...@apache.org on 2008/11/25 22:39:48 UTC
svn commit: r720609 - in /lucene/java/trunk: CHANGES.txt
src/java/org/apache/lucene/util/OpenBitSetIterator.java
src/test/org/apache/lucene/util/TestOpenBitSet.java
Author: buschmi
Date: Tue Nov 25 13:39:47 2008
New Revision: 720609
URL: http://svn.apache.org/viewvc?rev=720609&view=rev
Log:
LUCENE-1467: Add nextDoc() and next(int) methods to OpenBitSetIterator.
Modified:
lucene/java/trunk/CHANGES.txt
lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSetIterator.java
lucene/java/trunk/src/test/org/apache/lucene/util/TestOpenBitSet.java
Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=720609&r1=720608&r2=720609&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Tue Nov 25 13:39:47 2008
@@ -34,6 +34,10 @@
and also to define custom Attributes. The new API has the same performance
as the old next(Token) approach. (Michael Busch)
+5. LUCENE-1467: Add nextDoc() and next(int) methods to OpenBitSetIterator.
+ These methods can be used to avoid additional calls to doc().
+ (Michael Busch)
+
Bug fixes
1. LUCENE-1415: MultiPhraseQuery has incorrect hashCode() and equals()
Modified: lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSetIterator.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSetIterator.java?rev=720609&r1=720608&r2=720609&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSetIterator.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/util/OpenBitSetIterator.java Tue Nov 25 13:39:47 2008
@@ -133,6 +133,35 @@
return true;
}
+ /** Moves iterator to the next doc and returns its id;
+ returns -1 when this iterator is exhausted. */
+ public int nextDoc() {
+ if (indexArray==0) {
+ if (word!=0) {
+ word >>>= 8;
+ wordShift += 8;
+ }
+
+ while (word==0) {
+ if (++i >= words) {
+ return curDocId = -1;
+ }
+ word = arr[i];
+ wordShift =-1; // loop invariant code motion should move this
+ }
+
+ // after the first time, should I go with a linear search, or
+ // stick with the binary search in shift?
+ shift();
+ }
+
+ int bitIndex = (indexArray & 0x0f) + wordShift;
+ indexArray >>>= 4;
+ // should i<<6 be cached as a separate variable?
+ // it would only save one cycle in the best circumstances.
+ return curDocId = (i<<6) + bitIndex;
+ }
+
public boolean skipTo(int target) {
indexArray=0;
i = target >> 6;
@@ -166,6 +195,38 @@
return true;
}
+ /** Behaves like {@link #skipTo(int)} and returns the docId the iterator
+ * skipped to; returns -1 if no valid document could be skipped to. */
+ public int next(int fromIndex) {
+ indexArray=0;
+ i = fromIndex >> 6;
+ if (i>=words) {
+ word =0; // setup so next() will also return -1
+ return curDocId = -1;
+ }
+ wordShift = fromIndex & 0x3f;
+ word = arr[i] >>> wordShift;
+ if (word !=0) {
+ wordShift--; // compensate for 1 based arrIndex
+ } else {
+ while (word ==0) {
+ if (++i >= words) {
+ return curDocId = -1;
+ }
+ word = arr[i];
+ }
+ wordShift =-1;
+ }
+
+ shift();
+
+ int bitIndex = (indexArray & 0x0f) + wordShift;
+ indexArray >>>= 4;
+ // should i<<6 be cached as a separate variable?
+ // it would only save one cycle in the best circumstances.
+ return curDocId = (i<<6) + bitIndex;
+ }
+
public int doc() {
return this.curDocId;
}
Modified: lucene/java/trunk/src/test/org/apache/lucene/util/TestOpenBitSet.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/util/TestOpenBitSet.java?rev=720609&r1=720608&r2=720609&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/util/TestOpenBitSet.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/util/TestOpenBitSet.java Tue Nov 25 13:39:47 2008
@@ -46,8 +46,13 @@
} while (aa>=0);
}
- // test interleaving different BitSetIterator.next()
- void doIterate(BitSet a, OpenBitSet b) {
+ // test interleaving different OpenBitSetIterator.next()/skipTo()
+ void doIterate(BitSet a, OpenBitSet b, int mode) {
+ if (mode==1) doIterate1(a, b);
+ if (mode==2) doIterate2(a, b);
+ }
+
+ void doIterate1(BitSet a, OpenBitSet b) {
int aa=-1,bb=-1;
OpenBitSetIterator iterator = new OpenBitSetIterator(b);
do {
@@ -61,8 +66,20 @@
} while (aa>=0);
}
+ void doIterate2(BitSet a, OpenBitSet b) {
+ int aa=-1,bb=-1;
+ OpenBitSetIterator iterator = new OpenBitSetIterator(b);
+ do {
+ aa = a.nextSetBit(aa+1);
+ if (rand.nextBoolean())
+ bb = iterator.nextDoc();
+ else
+ bb = iterator.next(bb+1);
+ assertEquals(aa,bb);
+ } while (aa>=0);
+ }
- void doRandomSets(int maxSize, int iter) {
+ void doRandomSets(int maxSize, int iter, int mode) {
BitSet a0=null;
OpenBitSet b0=null;
@@ -110,7 +127,7 @@
BitSet aa = (BitSet)a.clone(); aa.flip(fromIndex,toIndex);
OpenBitSet bb = (OpenBitSet)b.clone(); bb.flip(fromIndex,toIndex);
- doIterate(aa,bb); // a problem here is from flip or doIterate
+ doIterate(aa,bb, mode); // a problem here is from flip or doIterate
fromIndex = rand.nextInt(sz+80);
toIndex = fromIndex + rand.nextInt((sz>>1)+1);
@@ -142,10 +159,10 @@
OpenBitSet b_xor = (OpenBitSet)b.clone(); b_xor.xor(b0);
OpenBitSet b_andn = (OpenBitSet)b.clone(); b_andn.andNot(b0);
- doIterate(a_and,b_and);
- doIterate(a_or,b_or);
- doIterate(a_xor,b_xor);
- doIterate(a_andn,b_andn);
+ doIterate(a_and,b_and, mode);
+ doIterate(a_or,b_or, mode);
+ doIterate(a_xor,b_xor, mode);
+ doIterate(a_andn,b_andn, mode);
assertEquals(a_and.cardinality(), b_and.cardinality());
assertEquals(a_or.cardinality(), b_or.cardinality());
@@ -167,12 +184,14 @@
// large enough to flush obvious bugs, small enough to run in <.5 sec as part of a
// larger testsuite.
public void testSmall() {
- doRandomSets(1200,1000);
+ doRandomSets(1200,1000, 1);
+ doRandomSets(1200,1000, 2);
}
public void testBig() {
// uncomment to run a bigger test (~2 minutes).
- // doRandomSets(2000,200000);
+ // doRandomSets(2000,200000, 1);
+ // doRandomSets(2000,200000, 2);
}
public void testEquals() {
@@ -196,7 +215,28 @@
// try different type of object
assertFalse(b1.equals(new Object()));
}
+
+ public void testBitUtils()
+ {
+ long num = 100000;
+ assertEquals( 5, BitUtil.ntz(num) );
+ assertEquals( 5, BitUtil.ntz2(num) );
+ assertEquals( 5, BitUtil.ntz3(num) );
+
+ num = 10;
+ assertEquals( 1, BitUtil.ntz(num) );
+ assertEquals( 1, BitUtil.ntz2(num) );
+ assertEquals( 1, BitUtil.ntz3(num) );
+
+ for (int i=0; i<64; i++) {
+ num = 1L << i;
+ assertEquals( i, BitUtil.ntz(num) );
+ assertEquals( i, BitUtil.ntz2(num) );
+ assertEquals( i, BitUtil.ntz3(num) );
+ }
+ }
+
}