You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by mh...@apache.org on 2014/11/09 23:07:46 UTC
[2/7] lucenenet git commit: adding OpenBitSet, OpenBitSetIterator,
DocIdSet, and DocIdSetIterator.
adding OpenBitSet, OpenBitSetIterator, DocIdSet, and DocIdSetIterator.
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/d3242166
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/d3242166
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/d3242166
Branch: refs/heads/pcl
Commit: d3242166aa1e669a3ebf41ca0a4708ee91970836
Parents: 8e7e104
Author: Michael Herndon <mh...@michaelherndon.com>
Authored: Wed Aug 20 23:10:46 2014 -0400
Committer: Michael Herndon <mh...@michaelherndon.com>
Committed: Wed Aug 20 23:10:46 2014 -0400
----------------------------------------------------------------------
src/Lucene.Net.Core/Lucene.Net.Core.csproj | 4 +
src/Lucene.Net.Core/Search/DocIdSet.cs | 97 ++
src/Lucene.Net.Core/Search/DocIdSetIterator.cs | 285 +++++
.../Support/ArrayExtensionMethods.cs | 12 +
src/Lucene.Net.Core/Util/BytesRefBuilder.cs | 2 +-
src/Lucene.Net.Core/Util/OpenBitSet.cs | 1158 ++++++++++++++++++
src/Lucene.Net.Core/Util/OpenBitSetIterator.cs | 184 +++
7 files changed, 1741 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d3242166/src/Lucene.Net.Core/Lucene.Net.Core.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Lucene.Net.Core.csproj b/src/Lucene.Net.Core/Lucene.Net.Core.csproj
index 5ad93e0..26f3b3b 100644
--- a/src/Lucene.Net.Core/Lucene.Net.Core.csproj
+++ b/src/Lucene.Net.Core/Lucene.Net.Core.csproj
@@ -56,6 +56,8 @@
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<ItemGroup>
+ <Compile Include="Search\DocIdSet.cs" />
+ <Compile Include="Search\DocIdSetIterator.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="Check.cs" />
<Compile Include="Support\ArrayExtensionMethods.cs" />
@@ -89,6 +91,8 @@
<Compile Include="Util\InPlaceMergeSorter.cs" />
<Compile Include="Util\IntroSorter.cs" />
<Compile Include="Util\IPurgeStrategy.cs" />
+ <Compile Include="Util\OpenBitSet.cs" />
+ <Compile Include="Util\OpenBitSetIterator.cs" />
<Compile Include="Util\PclPurgeStrategy.cs" />
<Compile Include="Util\RamUsageEstimator.cs" />
<Compile Include="Util\SetOnce.cs" />
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d3242166/src/Lucene.Net.Core/Search/DocIdSet.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Search/DocIdSet.cs b/src/Lucene.Net.Core/Search/DocIdSet.cs
new file mode 100644
index 0000000..ecee793
--- /dev/null
+++ b/src/Lucene.Net.Core/Search/DocIdSet.cs
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Search
+{
+ using System.Collections;
+ using System.Collections.Generic;
+ using IBits = Lucene.Net.Util.IBits;
+
+
+ // ReSharper disable CSharpWarnings::CS1584
+ // ReSharper disable CSharpWarnings::CS1574
+ /// <summary>
+ /// A DocIdSet contains a set of doc ids. Implementing classes must
+ /// only implement <seealso cref="IEnumerator"/> to provide access to the set.
+ /// </summary>
+
+ public abstract class DocIdSet : IEnumerable<int>
+ {
+ /// <summary>
+ /// Provides a <seealso cref="DocIdSetIterator"/> to access the set.
+ /// this implementation can return <code>null</code> if there
+ /// are no docs that match.
+ /// </summary>
+ public abstract DocIdSetIterator GetIterator();
+
+ // TODO: somehow this class should express the cost of
+ // iteration vs the cost of random access Bits; for
+ // expensive Filters (e.g. distance < 1 km) we should use
+ // bits() after all other Query/Filters have matched, but
+ // this is the opposite of what bits() is for now
+ // (down-low filtering using e.g. FixedBitSet)
+
+ /// <summary>
+ /// Optionally provides a <seealso cref="GetBits"/> interface for random access
+ /// to matching documents. </summary>
+ /// <remarks>
+ ///
+ /// {@code null}, if this {@code DocIdSet} does not support random access.
+
+ /// In contrast to <seealso cref="IEnumerator()"/>, a return value of {@code null}
+ /// <b>does not</b> imply that no documents match the filter!
+ /// The default implementation does not provide random access, so you
+ /// only need to implement this method if your DocIdSet can
+ /// guarantee random access to every docid in O(1) time without
+ /// external disk access (as <seealso cref="GetBits"/> interface cannot throw
+ /// <seealso cref="IO.Exception"/>). this is generally true for bit sets
+ /// like <seealso cref="Lucene.Net.Util.FixedBitSet"/>, which return
+ /// itself if they are used as {@code DocIdSet}.
+ /// </remarks>
+ /// <returns> </returns>
+ public virtual IBits GetBits()
+ {
+ return null;
+ }
+
+ /// <summary>
+
+ /// this method is a hint for <seealso cref="CachingWrapperFilter"/>, if this <code>DocIdSet</code>
+ /// should be cached without copying it. The default is to return
+ /// <code>false</code>. If you have an own <code>DocIdSet</code> implementation
+ /// that does its iteration very effective and fast without doing disk I/O,
+ /// override this method and return <code>true</code>.
+ /// </summary>
+ public virtual bool Cacheable
+ {
+ get
+ {
+ return false;
+ }
+ }
+
+ IEnumerator<int> IEnumerable<int>.GetEnumerator()
+ {
+ return this.GetIterator();
+ }
+
+ IEnumerator IEnumerable.GetEnumerator()
+ {
+ return this.GetIterator();
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d3242166/src/Lucene.Net.Core/Search/DocIdSetIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Search/DocIdSetIterator.cs b/src/Lucene.Net.Core/Search/DocIdSetIterator.cs
new file mode 100644
index 0000000..85a7aa7
--- /dev/null
+++ b/src/Lucene.Net.Core/Search/DocIdSetIterator.cs
@@ -0,0 +1,285 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+namespace Lucene.Net.Search
+{
+ using System;
+ using System.Collections;
+ using System.Collections.Generic;
+ using System.Diagnostics;
+
+
+ // ReSharper disable CSharpWarnings::CS1574
+ /// <summary>
+ /// The abstract class defines methods to iterate over a set of non-decreasing
+ /// doc ids. Note that this class assumes it iterates on doc Ids, and therefore
+ /// <seealso cref="NO_MORE_DOCS"/> is set to <see cref="NO_MORE_DOCS"/> in order to be used as
+ /// a sentinel object. Implementations of this class are expected to consider
+
+ /// <seealso cref="System.Int32.MAX_VALUE"/> as an invalid value.
+ /// </summary>
+ public abstract class DocIdSetIterator : IEnumerator<int>
+ {
+ /// <summary>
+ /// An empty <see cref="DocIdSetIterator"/> instance.
+ /// </summary>
+ public static DocIdSetIterator Empty()
+ {
+ return new EmptyDocIdSetIterator();
+ }
+
+ /// <summary>
+ /// Class EmptyDocIdSetIterator.
+ /// </summary>
+ private class EmptyDocIdSetIterator : DocIdSetIterator
+ {
+ private bool exhausted;
+
+ /// <summary>
+ /// Advances to the first beyond the current whose document number is greater
+ /// than or equal to <i>target</i>, and returns the document number itself.
+ /// Exhausts the iterator and returns <seealso cref="DocIdSetIterator.NO_MORE_DOCS" /> if <i>target</i>
+ /// is greater than the highest document number in the set.
+ /// </summary>
+ /// <param name="target">The target.</param>
+ /// <returns>System.Int32.</returns>
+ /// <remarks><para>
+ /// The behavior of this method is <b>undefined</b> when called with
+ /// <c> target < current</c>, or after the iterator has exhausted.
+ /// Both cases may result in unpredicted behavior.
+ /// </para>
+ /// <para>
+ /// When <code> target > current</code> it behaves as if written:
+ /// </para>
+ /// <example>
+ /// <code language="c#">
+ /// int advance(int target) {
+ /// int doc;
+ /// while ((doc = nextDoc()) < target) {
+ /// }
+ /// return doc;
+ /// }
+ /// </code>
+ /// <para>
+ /// Some implementations are considerably more efficient than that.
+ /// </para>
+ /// </example>
+ /// <para>
+ /// <b>NOTE:</b> this method may be called with <seealso cref="DocIdSetIterator.NO_MORE_DOCS" /> for
+ /// efficiency by some Scorers. If your implementation cannot efficiently
+ /// determine that it should exhaust, it is recommended that you check for that
+ /// value in each call to this method.
+ /// </para></remarks>
+ public override int Advance(int target)
+ {
+ Debug.Assert(!exhausted);
+ Debug.Assert(target >= 0);
+ exhausted = true;
+ return NO_MORE_DOCS;
+ }
+
+ /// <summary>
+ /// Returns the following document id. It will return -1 if <see cref="NextDoc()" />
+ /// has not been called or <see cref="DocIdSetIterator.NO_MORE_DOCS" /> if the iterator has been
+ /// exhausted.
+ /// </summary>
+ /// <value>The document identifier.</value>
+ /// <remarks>@since 2.9</remarks>
+ public override int DocId
+ {
+ get { return exhausted ? NO_MORE_DOCS : -1; }
+
+ }
+
+ /// <summary>
+ /// Advances to the next document in the set and returns the doc it is
+ /// currently on, or <seealso cref="DocIdSetIterator.NO_MORE_DOCS" /> if there are no more docs in the
+ /// set.
+ /// </summary>
+ /// <returns>System.Int32.</returns>
+ /// <remarks><para>
+ /// <b>NOTE:</b> after the iterator has exhausted you should not call this
+ /// method, as it may result in unpredicted behavior.
+ /// </para>
+ /// @since 2.9</remarks>
+ public override int NextDoc()
+ {
+ Debug.Assert(!exhausted);
+ exhausted = true;
+ return NO_MORE_DOCS;
+ }
+
+ /// <summary>
+ /// Returns the estimated cost of this <seealso cref="DocIdSetIterator" />.
+ /// </summary>
+ /// <returns>System.Int64.</returns>
+ /// <remarks>this is generally an upper bound of the number of documents this iterator
+ /// might match, but may be a rough heuristic, hardcoded value, or otherwise
+ /// completely inaccurate.</remarks>
+ public override long Cost()
+ {
+ return 0;
+ }
+ }
+
+ /// <summary>
+ /// When returned by <seealso cref="NextDoc" />, <seealso cref="Advance(int)" /> and
+ /// <seealso cref="DocId" /> it means there are no more docs in the iterator.
+ /// </summary>
+ public const int NO_MORE_DOCS = int.MaxValue;
+
+ /// <summary>
+ /// Returns the following document id. It will return -1 if <see cref="NextDoc()" />
+ /// has not been called or <see cref="NO_MORE_DOCS" /> if the iterator has been
+ /// exhausted.
+ /// </summary>
+ /// <value>The document identifier.</value>
+ /// <remarks>@since 2.9</remarks>
+ public abstract int DocId { get; }
+
+ /// <summary>
+ /// Advances to the next document in the set and returns the doc it is
+ /// currently on, or <seealso cref="NO_MORE_DOCS" /> if there are no more docs in the
+ /// set.
+ /// </summary>
+ /// <returns>System.Int32.</returns>
+ /// <remarks><para>
+ /// <b>NOTE:</b> after the iterator has exhausted you should not call this
+ /// method, as it may result in unpredicted behavior.
+ /// </para>
+ /// @since 2.9</remarks>
+ public abstract int NextDoc();
+
+ /// <summary>
+ /// Advances to the first beyond the current whose document number is greater
+ /// than or equal to <i>target</i>, and returns the document number itself.
+ /// Exhausts the iterator and returns <seealso cref="NO_MORE_DOCS" /> if <i>target</i>
+ /// is greater than the highest document number in the set.
+ /// </summary>
+ /// <param name="target">The target.</param>
+ /// <returns>System.Int32.</returns>
+ /// <remarks><para>
+ /// The behavior of this method is <b>undefined</b> when called with
+ /// <c> target < current</c>, or after the iterator has exhausted.
+ /// Both cases may result in unpredicted behavior.
+ /// </para>
+ /// <para>
+ /// When <code> target > current</code> it behaves as if written:
+ /// </para>
+ /// <example>
+ /// <code language="c#">
+ /// int advance(int target) {
+ /// int doc;
+ /// while ((doc = nextDoc()) < target) {
+ /// }
+ /// return doc;
+ /// }
+ /// </code>
+ /// <para>
+ /// Some implementations are considerably more efficient than that.
+ /// </para>
+ /// </example>
+ /// <para>
+ /// <b>NOTE:</b> this method may be called with <seealso cref="NO_MORE_DOCS" /> for
+ /// efficiency by some Scorers. If your implementation cannot efficiently
+ /// determine that it should exhaust, it is recommended that you check for that
+ /// value in each call to this method.
+ /// </para></remarks>
+ public abstract int Advance(int target);
+
+ /// <summary>
+ /// Slow (linear) implementation of <seealso cref="Advance(int)" /> relying on
+ /// <seealso cref="NextDoc()" /> to advance beyond the target position.
+ /// </summary>
+ /// <param name="target">The target.</param>
+ /// <returns>System.Int32.</returns>
+ protected internal int SlowAdvance(int target)
+ {
+ Debug.Assert(DocId == NO_MORE_DOCS || DocId < target); // can happen when the enum is not positioned yet
+ int doc;
+ do
+ {
+ doc = NextDoc();
+ } while (doc < target);
+ return doc;
+ }
+
+ /// <summary>
+ /// Returns the estimated cost of this <seealso cref="DocIdSetIterator"/>.
+ /// </summary>
+ /// <remarks>
+ /// <para>
+ /// this is generally an upper bound of the number of documents this iterator
+ /// might match, but may be a rough heuristic, hardcoded value, or otherwise
+ /// completely inaccurate.
+ /// </para>
+ /// </remarks>
+ public abstract long Cost();
+
+ int IEnumerator<int>.Current
+ {
+ get { return this.DocId; }
+ }
+
+ object IEnumerator.Current
+ {
+ get { return this.DocId; }
+ }
+
+ bool IEnumerator.MoveNext()
+ {
+ return this.NextDoc() != NO_MORE_DOCS;
+ }
+
+ void IEnumerator.Reset()
+ {
+ this.Reset();
+ }
+
+ void IDisposable.Dispose()
+ {
+ GC.SuppressFinalize(this);
+ this.Dispose(true);
+ }
+
+ /// <summary>
+ /// Sets the enumerator to its initial position, which is before the first element in the collection.
+ /// </summary>
+ protected virtual void Reset()
+ {
+
+ }
+
+ /// <summary>
+ /// Releases unmanaged and - optionally - managed resources.
+ /// </summary>
+ /// <param name="disposing"><c>true</c> to release both managed and unmanaged resources; <c>false</c> to release only unmanaged resources.</param>
+ protected virtual void Dispose(bool disposing)
+ {
+
+ }
+
+ /// <summary>
+ /// Finalizes an instance of the <see cref="DocIdSetIterator"/> class.
+ /// </summary>
+ ~DocIdSetIterator()
+ {
+ this.Dispose(false);
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d3242166/src/Lucene.Net.Core/Support/ArrayExtensionMethods.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Support/ArrayExtensionMethods.cs b/src/Lucene.Net.Core/Support/ArrayExtensionMethods.cs
index d7cc737..a3db1d4 100644
--- a/src/Lucene.Net.Core/Support/ArrayExtensionMethods.cs
+++ b/src/Lucene.Net.Core/Support/ArrayExtensionMethods.cs
@@ -7,6 +7,18 @@ namespace Lucene.Net.Support
public static class ArrayExtensionMethods
{
+ public static T[] Fill<T>(this T[] array, int start, int count, T value)
+ {
+ Check.NotNull("array", array);
+ Check.InRangeOfLength(start, count, array.Length);
+
+ for (var i = start; i < count; i++)
+ {
+ array[i] = value;
+ }
+
+ return array;
+ }
public static T[] Copy<T>(this T[] array, int length = -1)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d3242166/src/Lucene.Net.Core/Util/BytesRefBuilder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/BytesRefBuilder.cs b/src/Lucene.Net.Core/Util/BytesRefBuilder.cs
index 23ca12c..5da428e 100644
--- a/src/Lucene.Net.Core/Util/BytesRefBuilder.cs
+++ b/src/Lucene.Net.Core/Util/BytesRefBuilder.cs
@@ -231,7 +231,7 @@ namespace Lucene.Net.Util
throw new NotSupportedException();
}
- /// <inherits />
+ /// <inherits />
/// <exception cref="NotSupportedException">Throws when called.</exception>
[SuppressMessage("Microsoft.Design", "CA1065:DoNotRaiseExceptionsInUnexpectedLocations", Justification = "Java Port Consistency")]
public override int GetHashCode()
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d3242166/src/Lucene.Net.Core/Util/OpenBitSet.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/OpenBitSet.cs b/src/Lucene.Net.Core/Util/OpenBitSet.cs
new file mode 100644
index 0000000..7a176e3
--- /dev/null
+++ b/src/Lucene.Net.Core/Util/OpenBitSet.cs
@@ -0,0 +1,1158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Util
+{
+ using System;
+ using System.Diagnostics;
+ using Lucene.Net.Support;
+ using DocIdSet = Lucene.Net.Search.DocIdSet;
+ using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+
+ // ReSharper disable CSharpWarnings::CS1574
+ /// <summary>
+ /// An "open" BitSet implementation that allows direct access to the array of words
+ /// storing the bits.
+ /// </summary>
+ /// <remarks>
+ ///
+ /// <p/>
+ /// Unlike java.util.bitset, the fact that bits are packed into an array of longs
+ /// is part of the interface. this allows efficient implementation of other algorithms
+ /// by someone other than the author. It also allows one to efficiently implement
+ /// alternate serialization or interchange formats.
+ /// <p/>
+ /// <code>OpenBitSet</code> is faster than <code>java.util.BitSet</code> in most operations
+ /// and *much* faster at calculating cardinality of sets and results of set operations.
+ /// It can also handle sets of larger cardinality (up to 64 * 2**32-1)
+ /// <p/>
+ /// The goals of <code>OpenBitSet</code> are the fastest implementation possible, and
+ /// maximum code reuse. Extra safety and encapsulation
+ /// may always be built on top, but if that's built in, the cost can never be removed (and
+ /// hence people re-implement their own version in order to get better performance).
+ /// If you want a "safe", totally encapsulated (and slower and limited) BitSet
+ /// class, use <code>java.util.BitSet</code>.
+ /// <p/>
+ /// <h3>Performance Results</h3>
+ ///
+ /// Test system: Pentium 4, Sun Java 1.5_06 -server -Xbatch -Xmx64M
+ /// <br/>BitSet size = 1,000,000
+ /// <br/>Results are java.util.BitSet time divided by OpenBitSet time.
+ /// <table border="1">
+ /// <tr>
+ /// <th></th> <th>cardinality</th> <th>intersect_count</th> <th>union</th> <th>nextSetBit</th> <th>get</th> <th>iterator</th>
+ /// </tr>
+ /// <tr>
+ /// <th>50% full</th> <td>3.36</td> <td>3.96</td> <td>1.44</td> <td>1.46</td> <td>1.99</td> <td>1.58</td>
+ /// </tr>
+ /// <tr>
+ /// <th>1% full</th> <td>3.31</td> <td>3.90</td> <td>&nbsp;</td> <td>1.04</td> <td>&nbsp;</td> <td>0.99</td>
+ /// </tr>
+ /// </table>
+ /// <br/>
+ /// Test system: AMD Opteron, 64 bit linux, Sun Java 1.5_06 -server -Xbatch -Xmx64M
+ /// <br/>BitSet size = 1,000,000
+ /// <br/>Results are java.util.BitSet time divided by OpenBitSet time.
+ /// <table border="1">
+ /// <tr>
+ /// <th></th> <th>cardinality</th> <th>intersect_count</th> <th>union</th> <th>nextSetBit</th> <th>get</th> <th>iterator</th>
+ /// </tr>
+ /// <tr>
+ /// <th>50% full</th> <td>2.50</td> <td>3.50</td> <td>1.00</td> <td>1.03</td> <td>1.12</td> <td>1.25</td>
+ /// </tr>
+ /// <tr>
+ /// <th>1% full</th> <td>2.51</td> <td>3.49</td> <td>&nbsp;</td> <td>1.00</td> <td>&nbsp;</td> <td>1.02</td>
+ /// </tr>
+ /// </table>
+ /// </remarks>
+
+ public class OpenBitSet : DocIdSet, IBits, ICloneable
+ {
+ private long[] bits;
+ private int wordLength; // number of words (elements) used in the array
+
+ // Used only for assert:
+ private long numBits;
+
+ /// <summary>
+ /// Constructs an OpenBitSet large enough to hold {@code numBits}. </summary>
+ public OpenBitSet(long numBits)
+ {
+ this.numBits = numBits;
+ bits = new long[Bits2Words(numBits)];
+ wordLength = bits.Length;
+ }
+
+ /// <summary>
+ /// Constructor: allocates enough space for 64 bits. </summary>
+ public OpenBitSet()
+ : this(64)
+ {
+ }
+
+ /// <summary>
+ /// Constructs an OpenBitSet from an existing long[].
+ /// </summary>
+ /// <remarks>
+ /// <p>
+ /// The first 64 bits are in long[0], with bit index 0 at the least significant
+ /// bit, and bit index 63 at the most significant. Given a bit index, the word
+ /// containing it is long[index/64], and it is at bit number index%64 within
+ /// that word.
+ /// </p>
+ /// <p>
+ /// numWords are the number of elements in the array that contain set bits
+ /// (non-zero longs). numWords should be <= bits.length, and any existing
+ /// words in the array at position >= numWords should be zero.
+ /// </p>
+ /// </remarks>
+ public OpenBitSet(long[] bits, int numWords)
+ {
+ if (numWords > bits.Length)
+ {
+ throw new ArgumentException("numWords cannot exceed bits.length");
+ }
+ this.bits = bits;
+ this.wordLength = numWords;
+ this.numBits = wordLength * 64;
+ }
+
+ public override DocIdSetIterator GetIterator()
+ {
+ return new OpenBitSetIterator(bits, this.wordLength);
+ }
+
+ public override IBits GetBits()
+ {
+ return this;
+ }
+
+ /// <summary>
+
+ /// Gets the <see cref="System.Boolean"/> at the specified index.
+ /// </summary>
+ /// <param name="index">The index.</param>
+ /// <returns><c>true</c> if XXXX, <c>false</c> otherwise.</returns>
+ public bool this[int index]
+ {
+ get { return this.Get(index); }
+ }
+
+ /// <summary>
+ /// this DocIdSet implementation is cacheable. </summary>
+ public override bool Cacheable
+ {
+ get
+ {
+ return true;
+ }
+ }
+
+ /// <summary>
+ /// Returns the current capacity in bits (1 greater than the index of the last bit) </summary>
+ public virtual long Capacity()
+ {
+ return bits.Length << 6;
+ }
+
+ /// <summary>
+ /// Returns the current capacity of this set. Included for
+ /// compatibility. this is *not* equal to <seealso cref="Cardinality"/>
+ /// </summary>
+ public virtual long Size()
+ {
+ return Capacity();
+ }
+
+ /// <summary>
+ /// Returns the number of bits in the set.
+ /// </summary>
+ /// <value>The length.</value>
+ public virtual int Length
+ {
+ get
+ {
+ return bits.Length << 6;
+ }
+ }
+
+ /// <summary>
+ /// Returns true if there are no set bits </summary>
+ public virtual bool Empty
+ {
+ get
+ {
+ return Cardinality() == 0;
+ }
+ }
+
+ /// <summary>
+ /// Expert: returns the long[] storing the bits </summary>
+ public virtual long[] Bits
+ {
+ get
+ {
+ return bits;
+ }
+ }
+
+ /// <summary>
+ /// Expert: gets the number of longs in the array that are in use </summary>
+ public virtual int NumWords
+ {
+ get
+ {
+ return wordLength;
+ }
+ }
+
+ /// <summary>
+ /// Returns true or false for the specified bit index. </summary>
+ public virtual bool Get(int index)
+ {
+ int i = index >> 6; // div 64
+ // signed shift will keep a negative index and force an
+ // array-index-out-of-bounds-exception, removing the need for an explicit check.
+ if (i >= bits.Length)
+ {
+ return false;
+ }
+
+ int bit = index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ return (bits[i] & bitmask) != 0;
+ }
+
+ /// <summary>
+ /// Returns true or false for the specified bit index.
+ /// The index should be less than the OpenBitSet size
+ /// </summary>
+ public virtual bool FastGet(int index)
+ {
+ Debug.Assert(index >= 0 && index < numBits);
+ int i = index >> 6; // div 64
+ // signed shift will keep a negative index and force an
+ // array-index-out-of-bounds-exception, removing the need for an explicit check.
+ int bit = index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ return (bits[i] & bitmask) != 0;
+ }
+
+ /// <summary>
+ /// Returns true or false for the specified bit index
+ /// </summary>
+ public virtual bool Get(long index)
+ {
+ var i = (int)(index >> 6); // div 64
+ if (i >= bits.Length)
+ {
+ return false;
+ }
+ int bit = (int)index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ return (bits[i] & bitmask) != 0;
+ }
+
+ /// <summary>
+ /// Returns true or false for the specified bit index.
+ /// The index should be less than the OpenBitSet size.
+ /// </summary>
+ public virtual bool FastGet(long index)
+ {
+ Debug.Assert(index >= 0 && index < numBits);
+ int i = (int)(index >> 6); // div 64
+ int bit = (int)index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ return (bits[i] & bitmask) != 0;
+ }
+
+ /*
+ // alternate implementation of get()
+ public boolean get1(int index) {
+ int i = index >> 6; // div 64
+ int bit = index & 0x3f; // mod 64
+ return ((bits[i]>>>bit) & 0x01) != 0;
+ // this does a long shift and a bittest (on x86) vs
+ // a long shift, and a long AND, (the test for zero is prob a no-op)
+ // testing on a P4 indicates this is slower than (bits[i] & bitmask) != 0;
+ }
+ */
+
+ /// <summary>
+ /// returns 1 if the bit is set, 0 if not.
+ /// The index should be less than the OpenBitSet size
+ /// </summary>
+ public virtual int GetBit(int index)
+ {
+ Debug.Assert(index >= 0 && index < numBits);
+ int i = index >> 6; // div 64
+ int bit = index & 0x3f; // mod 64
+ return ((int)((long)((ulong)bits[i] >> bit))) & 0x01;
+ }
+
+ /*
+ public boolean get2(int index) {
+ int word = index >> 6; // div 64
+ int bit = index & 0x0000003f; // mod 64
+ return (bits[word] << bit) < 0; // hmmm, this would work if bit order were reversed
+ // we could right shift and check for parity bit, if it was available to us.
+ }
+ */
+
+ /// <summary>
+ /// sets a bit, expanding the set size if necessary </summary>
+ public virtual void Set(long index)
+ {
+ int wordNum = ExpandingWordNum(index);
+ int bit = (int)index & 0x3f;
+ long bitmask = 1L << bit;
+ bits[wordNum] |= bitmask;
+ }
+
+ /// <summary>
+ /// Sets the bit at the specified index.
+ /// The index should be less than the OpenBitSet size.
+ /// </summary>
+ public virtual void FastSet(int index)
+ {
+ Debug.Assert(index >= 0 && index < numBits);
+ int wordNum = index >> 6; // div 64
+ int bit = index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ bits[wordNum] |= bitmask;
+ }
+
+ /// <summary>
+ /// Sets the bit at the specified index.
+ /// The index should be less than the OpenBitSet size.
+ /// </summary>
+ public virtual void FastSet(long index)
+ {
+ Debug.Assert(index >= 0 && index < numBits);
+ int wordNum = (int)(index >> 6);
+ int bit = (int)index & 0x3f;
+ long bitmask = 1L << bit;
+ bits[wordNum] |= bitmask;
+ }
+
+ /// <summary>
+ /// Sets a range of bits, expanding the set size if necessary
+ /// </summary>
+ /// <param name="startIndex"> lower index </param>
+ /// <param name="endIndex"> one-past the last bit to set </param>
+ public virtual void Set(long startIndex, long endIndex)
+ {
+ if (endIndex <= startIndex)
+ {
+ return;
+ }
+
+ int startWord = (int)(startIndex >> 6);
+
+ // since endIndex is one past the end, this is index of the last
+ // word to be changed.
+ int endWord = ExpandingWordNum(endIndex - 1);
+
+ long startmask = -1L << (int)startIndex;
+ long endmask = -(int)((uint)1L >> (int)-endIndex); // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
+
+ if (startWord == endWord)
+ {
+ bits[startWord] |= (startmask & endmask);
+ return;
+ }
+
+ bits[startWord] |= startmask;
+ bits.Fill(startWord + 1, endWord, -1L);
+ bits[endWord] |= endmask;
+ }
+
+ protected internal virtual int ExpandingWordNum(long index)
+ {
+ int wordNum = (int)(index >> 6);
+ if (wordNum >= wordLength)
+ {
+ EnsureCapacity(index + 1);
+ }
+ return wordNum;
+ }
+
+ /// <summary>
+ /// clears a bit.
+ /// The index should be less than the OpenBitSet size.
+ /// </summary>
+ public virtual void FastClear(int index)
+ {
+ Debug.Assert(index >= 0 && index < numBits);
+ int wordNum = index >> 6;
+ int bit = index & 0x03f;
+ long bitmask = 1L << bit;
+ bits[wordNum] &= ~bitmask;
+ // hmmm, it takes one more instruction to clear than it does to set... any
+ // way to work around this? If there were only 63 bits per word, we could
+ // use a right shift of 10111111...111 in binary to position the 0 in the
+ // correct place (using sign extension).
+ // Could also use Long.rotateRight() or rotateLeft() *if* they were converted
+ // by the JVM into a native instruction.
+ // bits[word] &= Long.rotateLeft(0xfffffffe,bit);
+ }
+
+ /// <summary>
+ /// clears a bit.
+ /// The index should be less than the OpenBitSet size.
+ /// </summary>
+ public virtual void FastClear(long index)
+ {
+ Debug.Assert(index >= 0 && index < numBits);
+ int wordNum = (int)(index >> 6); // div 64
+ int bit = (int)index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ bits[wordNum] &= ~bitmask;
+ }
+
+ /// <summary>
+ /// clears a bit, allowing access beyond the current set size without changing the size. </summary>
+ public virtual void Clear(long index)
+ {
+ int wordNum = (int)(index >> 6); // div 64
+ if (wordNum >= wordLength)
+ {
+ return;
+ }
+ int bit = (int)index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ bits[wordNum] &= ~bitmask;
+ }
+
+ /// <summary>
+ /// Clears a range of bits. Clearing past the end does not change the size of the set.
+ /// </summary>
+ /// <param name="startIndex"> lower index </param>
+ /// <param name="endIndex"> one-past the last bit to clear </param>
+ public virtual void Clear(int startIndex, int endIndex)
+ {
+ if (endIndex <= startIndex)
+ {
+ return;
+ }
+
+ int startWord = (startIndex >> 6);
+ if (startWord >= wordLength)
+ {
+ return;
+ }
+
+ // since endIndex is one past the end, this is index of the last
+ // word to be changed.
+ int endWord = ((endIndex - 1) >> 6);
+
+ //LUCENE TO-DO
+ long startmask = -1L << startIndex;
+ long endmask = -(int)((uint)1L >> -endIndex); // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
+
+ // invert masks since we are clearing
+ startmask = ~startmask;
+ endmask = ~endmask;
+
+ if (startWord == endWord)
+ {
+ bits[startWord] &= (startmask | endmask);
+ return;
+ }
+
+ bits[startWord] &= startmask;
+
+ int middle = Math.Min(wordLength, endWord);
+ bits.Fill(startWord + 1, middle, 0L);
+ if (endWord < wordLength)
+ {
+ bits[endWord] &= endmask;
+ }
+ }
+
+ /// <summary>
+ /// Clears a range of bits. Clearing past the end does not change the size of the set.
+ /// </summary>
+ /// <param name="startIndex"> lower index </param>
+ /// <param name="endIndex"> one-past the last bit to clear </param>
+ public virtual void Clear(long startIndex, long endIndex)
+ {
+ if (endIndex <= startIndex)
+ {
+ return;
+ }
+
+ int startWord = (int)(startIndex >> 6);
+ if (startWord >= wordLength)
+ {
+ return;
+ }
+
+ // since endIndex is one past the end, this is index of the last
+ // word to be changed.
+ int endWord = (int)((endIndex - 1) >> 6);
+
+ //LUCENE TO-DO
+ long startmask = -1L << (int)startIndex;
+ long endmask = -(int)((uint)1L >> (int)-endIndex); // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
+
+ // invert masks since we are clearing
+ startmask = ~startmask;
+ endmask = ~endmask;
+
+ if (startWord == endWord)
+ {
+ bits[startWord] &= (startmask | endmask);
+ return;
+ }
+
+ bits[startWord] &= startmask;
+
+ int middle = Math.Min(wordLength, endWord);
+ bits.Fill(startWord + 1, middle, 0L);
+ if (endWord < wordLength)
+ {
+ bits[endWord] &= endmask;
+ }
+ }
+
+ /// <summary>
+ /// Sets a bit and returns the previous value.
+ /// The index should be less than the OpenBitSet size.
+ /// </summary>
+ public virtual bool GetAndSet(int index)
+ {
+ Debug.Assert(index >= 0 && index < numBits);
+ int wordNum = index >> 6; // div 64
+ int bit = index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ bool val = (bits[wordNum] & bitmask) != 0;
+ bits[wordNum] |= bitmask;
+ return val;
+ }
+
+ /// <summary>
+ /// Sets a bit and returns the previous value.
+ /// The index should be less than the OpenBitSet size.
+ /// </summary>
+ public virtual bool GetAndSet(long index)
+ {
+ Debug.Assert(index >= 0 && index < numBits);
+ int wordNum = (int)(index >> 6); // div 64
+ int bit = (int)index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ bool val = (bits[wordNum] & bitmask) != 0;
+ bits[wordNum] |= bitmask;
+ return val;
+ }
+
+ /// <summary>
+ /// flips a bit.
+ /// The index should be less than the OpenBitSet size.
+ /// </summary>
+ public virtual void FastFlip(int index)
+ {
+ Debug.Assert(index >= 0 && index < numBits);
+ int wordNum = index >> 6; // div 64
+ int bit = index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ bits[wordNum] ^= bitmask;
+ }
+
+ /// <summary>
+ /// flips a bit.
+ /// The index should be less than the OpenBitSet size.
+ /// </summary>
+ public virtual void FastFlip(long index)
+ {
+ Debug.Assert(index >= 0 && index < numBits);
+ int wordNum = (int)(index >> 6); // div 64
+ int bit = (int)index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ bits[wordNum] ^= bitmask;
+ }
+
+ /// <summary>
+ /// flips a bit, expanding the set size if necessary </summary>
+ public virtual void Flip(long index)
+ {
+ int wordNum = ExpandingWordNum(index);
+ int bit = (int)index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ bits[wordNum] ^= bitmask;
+ }
+
+ /// <summary>
+ /// flips a bit and returns the resulting bit value.
+ /// The index should be less than the OpenBitSet size.
+ /// </summary>
+ public virtual bool FlipAndGet(int index)
+ {
+ Debug.Assert(index >= 0 && index < numBits);
+ int wordNum = index >> 6; // div 64
+ int bit = index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ bits[wordNum] ^= bitmask;
+ return (bits[wordNum] & bitmask) != 0;
+ }
+
+ /// <summary>
+ /// flips a bit and returns the resulting bit value.
+ /// The index should be less than the OpenBitSet size.
+ /// </summary>
+ public virtual bool FlipAndGet(long index)
+ {
+ Debug.Assert(index >= 0 && index < numBits);
+ int wordNum = (int)(index >> 6); // div 64
+ int bit = (int)index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ bits[wordNum] ^= bitmask;
+ return (bits[wordNum] & bitmask) != 0;
+ }
+
+ /// <summary>
+ /// Flips a range of bits, expanding the set size if necessary
+ /// </summary>
+ /// <param name="startIndex"> lower index </param>
+ /// <param name="endIndex"> one-past the last bit to flip </param>
+ public virtual void Flip(long startIndex, long endIndex)
+ {
+ if (endIndex <= startIndex)
+ {
+ return;
+ }
+ int startWord = (int)(startIndex >> 6);
+
+ // since endIndex is one past the end, this is index of the last
+ // word to be changed.
+ int endWord = ExpandingWordNum(endIndex - 1);
+
+
+ // Grrr, java shifting wraps around so -1L>>>64 == -1
+ // for that reason, make sure not to use endmask if the bits to flip will
+ // be zero in the last word (redefine endWord to be the last changed...)
+ // long startmask = -1L << (startIndex & 0x3f); // example: 11111...111000
+ // long endmask = -1L >>> (64-(endIndex & 0x3f)); // example: 00111...111111
+
+
+ //LUCENE TO-DO
+ long startmask = -1L << (int)startIndex;
+ long endmask = -(int)((uint)1L >> (int)-endIndex); // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
+
+ if (startWord == endWord)
+ {
+ bits[startWord] ^= (startmask & endmask);
+ return;
+ }
+
+ bits[startWord] ^= startmask;
+
+ for (int i = startWord + 1; i < endWord; i++)
+ {
+ bits[i] = ~bits[i];
+ }
+
+ bits[endWord] ^= endmask;
+ }
+
+ /*
+ public static int pop(long v0, long v1, long v2, long v3) {
+ // derived from pop_array by setting last four elems to 0.
+ // exchanges one pop() call for 10 elementary operations
+ // saving about 7 instructions... is there a better way?
+ long twosA=v0 & v1;
+ long ones=v0^v1;
+
+ long u2=ones^v2;
+ long twosB =(ones&v2)|(u2&v3);
+ ones=u2^v3;
+
+ long fours=(twosA&twosB);
+ long twos=twosA^twosB;
+
+ return (pop(fours)<<2)
+ + (pop(twos)<<1)
+ + pop(ones);
+ }
+ */
+
+ /// <returns> the number of set bits </returns>
+ public virtual long Cardinality()
+ {
+ return BitUtil.PopArray(bits, 0, wordLength);
+ }
+
+ /// <summary>
+ /// Returns the popcount or cardinality of the intersection of the two sets.
+ /// Neither set is modified.
+ /// </summary>
+ public static long IntersectionCount(OpenBitSet a, OpenBitSet b)
+ {
+ return BitUtil.PopIntersect(a.bits, b.bits, 0, Math.Min(a.wordLength, b.wordLength));
+ }
+
+ /// <summary>
+ /// Returns the popcount or cardinality of the union of the two sets.
+ /// Neither set is modified.
+ /// </summary>
+ public static long UnionCount(OpenBitSet a, OpenBitSet b)
+ {
+ long tot = BitUtil.PopUnion(a.bits, b.bits, 0, Math.Min(a.wordLength, b.wordLength));
+ if (a.wordLength < b.wordLength)
+ {
+ tot += BitUtil.PopArray(b.bits, a.wordLength, b.wordLength - a.wordLength);
+ }
+ else if (a.wordLength > b.wordLength)
+ {
+ tot += BitUtil.PopArray(a.bits, b.wordLength, a.wordLength - b.wordLength);
+ }
+ return tot;
+ }
+
+ /// <summary>
+ /// Returns the popcount or cardinality of "a and not b"
+ /// or "intersection(a, not(b))".
+ /// Neither set is modified.
+ /// </summary>
+ public static long AndNotCount(OpenBitSet a, OpenBitSet b)
+ {
+ long tot = BitUtil.PopAndNot(a.bits, b.bits, 0, Math.Min(a.wordLength, b.wordLength));
+ if (a.wordLength > b.wordLength)
+ {
+ tot += BitUtil.PopArray(a.bits, b.wordLength, a.wordLength - b.wordLength);
+ }
+ return tot;
+ }
+
+ /// <summary>
+ /// Returns the popcount or cardinality of the exclusive-or of the two sets.
+ /// Neither set is modified.
+ /// </summary>
+ public static long XorCount(OpenBitSet a, OpenBitSet b)
+ {
+ long tot = BitUtil.PopXor(a.bits, b.bits, 0, Math.Min(a.wordLength, b.wordLength));
+ if (a.wordLength < b.wordLength)
+ {
+ tot += BitUtil.PopArray(b.bits, a.wordLength, b.wordLength - a.wordLength);
+ }
+ else if (a.wordLength > b.wordLength)
+ {
+ tot += BitUtil.PopArray(a.bits, b.wordLength, a.wordLength - b.wordLength);
+ }
+ return tot;
+ }
+
+ /// <summary>
+ /// Returns the index of the first set bit starting at the index specified.
+ /// -1 is returned if there are no more set bits.
+ /// </summary>
+ public virtual int NextSetBit(int index)
+ {
+ int i = index >> 6;
+ if (i >= wordLength)
+ {
+ return -1;
+ }
+ int subIndex = index & 0x3f; // index within the word
+ long word = bits[i] >> subIndex; // skip all the bits to the right of index
+
+ if (word != 0)
+ {
+ return (i << 6) + subIndex + word.NumberOfTrailingZeros();
+ }
+
+ while (++i < wordLength)
+ {
+ word = bits[i];
+ if (word != 0)
+ {
+ return (i << 6) + word.NumberOfTrailingZeros();
+ }
+ }
+
+ return -1;
+ }
+
+ /// <summary>
+ /// Returns the index of the first set bit starting at the index specified.
+ /// -1 is returned if there are no more set bits.
+ /// </summary>
+ public virtual long NextSetBit(long index)
+ {
+ int i = (int)((long)((ulong)index >> 6));
+ if (i >= wordLength)
+ {
+ return -1;
+ }
+ int subIndex = (int)index & 0x3f; // index within the word
+ long word = (long)((ulong)bits[i] >> subIndex); // skip all the bits to the right of index
+
+ if (word != 0)
+ {
+ return (((long)i) << 6) + (subIndex + word.NumberOfTrailingZeros());
+ }
+
+ while (++i < wordLength)
+ {
+ word = bits[i];
+ if (word != 0)
+ {
+ return (((long)i) << 6) + word.NumberOfTrailingZeros();
+ }
+ }
+
+ return -1;
+ }
+
+ /// <summary>
+ /// Returns the index of the first set bit starting downwards at
+ /// the index specified.
+ /// -1 is returned if there are no more set bits.
+ /// </summary>
+ public virtual int PrevSetBit(int index)
+ {
+ int i = index >> 6;
+ int subIndex;
+ long word;
+ if (i >= wordLength)
+ {
+ i = wordLength - 1;
+ if (i < 0)
+ {
+ return -1;
+ }
+ subIndex = 63; // last possible bit
+ word = bits[i];
+ }
+ else
+ {
+ if (i < 0)
+ {
+ return -1;
+ }
+ subIndex = index & 0x3f; // index within the word
+ word = (bits[i] << (63 - subIndex)); // skip all the bits to the left of index
+ }
+
+ if (word != 0)
+ {
+ return (i << 6) + subIndex - word.NumberOfLeadingZeros(); // See LUCENE-3197
+ }
+
+ while (--i >= 0)
+ {
+ word = bits[i];
+ if (word != 0)
+ {
+ return (i << 6) + 63 - word.NumberOfLeadingZeros();
+ }
+ }
+
+ return -1;
+ }
+
+ /// <summary>
+ /// Returns the index of the first set bit starting downwards at
+ /// the index specified.
+ /// -1 is returned if there are no more set bits.
+ /// </summary>
+ public virtual long PrevSetBit(long index)
+ {
+ int i = (int)(index >> 6);
+ int subIndex;
+ long word;
+ if (i >= wordLength)
+ {
+ i = wordLength - 1;
+ if (i < 0)
+ {
+ return -1;
+ }
+ subIndex = 63; // last possible bit
+ word = bits[i];
+ }
+ else
+ {
+ if (i < 0)
+ {
+ return -1;
+ }
+ subIndex = (int)index & 0x3f; // index within the word
+ word = (bits[i] << (63 - subIndex)); // skip all the bits to the left of index
+ }
+
+ if (word != 0)
+ {
+ return (((long)i) << 6) + subIndex - word.NumberOfLeadingZeros(); // See LUCENE-3197
+ }
+
+ while (--i >= 0)
+ {
+ word = bits[i];
+ if (word != 0)
+ {
+ return (((long)i) << 6) + 63 - word.NumberOfLeadingZeros();
+ }
+ }
+
+ return -1;
+ }
+
+ /// <summary>
+ /// Clones <see cref="OpenBitSet"/>. Set <paramref name="deepCopy"/> to
+ /// to <c>true</c> for most copies.
+ /// </summary>
+ /// <param name="deepCopy">if set to <c>true</c> [deep copy].</param>
+ /// <returns>System.Object.</returns>
+ /// <exception cref="System.InvalidOperationException"></exception>
+ public object Clone(bool deepCopy = true)
+ {
+ if (!deepCopy)
+ return new OpenBitSet((long[])this.bits.Copy(), this.wordLength);
+
+ try
+ {
+ //OpenBitSet obs = (OpenBitSet)base.Clone();
+ //obs.bits = (long[])obs.bits.Clone(); // hopefully an array clone is as fast(er) than arraycopy
+ var obs = new OpenBitSet((long[])this.bits.Clone(), this.wordLength);
+ return obs;
+ }
+ catch (Exception e)
+ {
+ throw new InvalidOperationException(e.Message, e);
+ }
+ }
+
+ /// <summary>
+ /// this = this AND other </summary>
+ public virtual void Intersect(OpenBitSet other)
+ {
+ int newLen = Math.Min(this.wordLength, other.wordLength);
+ long[] thisArr = this.bits;
+ long[] otherArr = other.bits;
+ // testing against zero can be more efficient
+ int pos = newLen;
+ while (--pos >= 0)
+ {
+ thisArr[pos] &= otherArr[pos];
+ }
+ if (this.wordLength > newLen)
+ {
+ // fill zeros from the new shorter length to the old length
+ bits.Fill(newLen, this.wordLength, 0L);
+ }
+ this.wordLength = newLen;
+ }
+
+ /// <summary>
+ /// this = this OR other </summary>
+ public virtual void Union(OpenBitSet other)
+ {
+ int newLen = Math.Max(wordLength, other.wordLength);
+ EnsureCapacityWords(newLen);
+ Debug.Assert((numBits = Math.Max(other.numBits, numBits)) >= 0);
+
+ long[] thisArr = this.bits;
+ long[] otherArr = other.bits;
+ int pos = Math.Min(wordLength, other.wordLength);
+ while (--pos >= 0)
+ {
+ thisArr[pos] |= otherArr[pos];
+ }
+ if (this.wordLength < newLen)
+ {
+ Array.Copy(otherArr, this.wordLength, thisArr, this.wordLength, newLen - this.wordLength);
+ }
+ this.wordLength = newLen;
+ }
+
+ /// <summary>
+ /// Remove all elements set in other. this = this AND_NOT other </summary>
+ public virtual void Remove(OpenBitSet other)
+ {
+ int idx = Math.Min(wordLength, other.wordLength);
+ long[] thisArr = this.bits;
+ long[] otherArr = other.bits;
+ while (--idx >= 0)
+ {
+ thisArr[idx] &= ~otherArr[idx];
+ }
+ }
+
+ /// <summary>
+ /// this = this XOR other </summary>
+ public virtual void Xor(OpenBitSet other)
+ {
+ int newLen = Math.Max(wordLength, other.wordLength);
+ EnsureCapacityWords(newLen);
+ Debug.Assert((numBits = Math.Max(other.numBits, numBits)) >= 0);
+
+ long[] thisArr = this.bits;
+ long[] otherArr = other.bits;
+ int pos = Math.Min(wordLength, other.wordLength);
+ while (--pos >= 0)
+ {
+ thisArr[pos] ^= otherArr[pos];
+ }
+ if (this.wordLength < newLen)
+ {
+ Array.Copy(otherArr, this.wordLength, thisArr, this.wordLength, newLen - this.wordLength);
+ }
+ this.wordLength = newLen;
+ }
+
+ // some BitSet compatability methods
+
+ //** see {@link intersect} */
+ public virtual void And(OpenBitSet other)
+ {
+ Intersect(other);
+ }
+
+ //** see {@link union} */
+ public virtual void Or(OpenBitSet other)
+ {
+ Union(other);
+ }
+
+ //** see {@link andNot} */
+ public virtual void AndNot(OpenBitSet other)
+ {
+ Remove(other);
+ }
+
+ /// <summary>
+ /// returns true if the sets have any elements in common </summary>
+ public virtual bool Intersects(OpenBitSet other)
+ {
+ int pos = Math.Min(this.wordLength, other.wordLength);
+ long[] thisArr = this.bits;
+ long[] otherArr = other.bits;
+ while (--pos >= 0)
+ {
+ if ((thisArr[pos] & otherArr[pos]) != 0)
+ {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /// <summary>
+ /// Expand the long[] with the size given as a number of words (64 bit longs). </summary>
+ public virtual void EnsureCapacityWords(int numWords)
+ {
+ bits = ArrayUtil.Grow(bits, numWords);
+ wordLength = numWords;
+ Debug.Assert((this.numBits = Math.Max(this.numBits, numWords << 6)) >= 0);
+ }
+
+ /// <summary>
+ /// Ensure that the long[] is big enough to hold numBits, expanding it if
+ /// necessary.
+ /// </summary>
+ public virtual void EnsureCapacity(long numberOfBits)
+ {
+ EnsureCapacityWords(Bits2Words(numberOfBits));
+ // ensureCapacityWords sets numBits to a multiple of 64, but we want to set
+ // it to exactly what the app asked.
+ Debug.Assert((this.numBits = Math.Max(this.numBits, numberOfBits)) >= 0);
+ }
+
+ /// <summary>
+ /// Lowers numWords, the number of words in use,
+ /// by checking for trailing zero words.
+ /// </summary>
+ public virtual void TrimTrailingZeros()
+ {
+ int idx = wordLength - 1;
+ while (idx >= 0 && bits[idx] == 0)
+ {
+ idx--;
+ }
+ wordLength = idx + 1;
+ }
+
+ /// <summary>
+ /// returns the number of 64 bit words it would take to hold numBits </summary>
+ public static int Bits2Words(long numBits)
+ {
+ return (((int)((uint)(numBits - 1) >> 6)) + 1);
+ }
+
+ /// <summary>
+ /// returns true if both sets have the same bits set </summary>
+ public override bool Equals(object o)
+ {
+ if (this == o)
+ {
+ return true;
+ }
+ if (!(o is OpenBitSet))
+ {
+ return false;
+ }
+ OpenBitSet a;
+ OpenBitSet b = (OpenBitSet)o;
+ // make a the larger set.
+ if (b.wordLength > this.wordLength)
+ {
+ a = b;
+ b = this;
+ }
+ else
+ {
+ a = this;
+ }
+
+ // check for any set bits out of the range of b
+ for (int i = a.wordLength - 1; i >= b.wordLength; i--)
+ {
+ if (a.bits[i] != 0)
+ {
+ return false;
+ }
+ }
+
+ for (int i = b.wordLength - 1; i >= 0; i--)
+ {
+ if (a.bits[i] != b.bits[i])
+ {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ public override int GetHashCode()
+ {
+ // Start with a zero hash and use a mix that results in zero if the input is zero.
+ // this effectively truncates trailing zeros without an explicit check.
+ long h = 0;
+ // ReSharper disable NonReadonlyFieldInGetHashCode
+ for (int i = bits.Length; --i >= 0; )
+ {
+ h ^= bits[i];
+ h = (h << 1) | ((long)((ulong)h >> 63)); // rotate left
+ }
+ // fold leftmost bits into right and add a constant to prevent
+ // empty sets from returning 0, which is too common.
+ return (int)((h >> 32) ^ h) + unchecked((int)0x98761234);
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d3242166/src/Lucene.Net.Core/Util/OpenBitSetIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/OpenBitSetIterator.cs b/src/Lucene.Net.Core/Util/OpenBitSetIterator.cs
new file mode 100644
index 0000000..9b2484c
--- /dev/null
+++ b/src/Lucene.Net.Core/Util/OpenBitSetIterator.cs
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Util
+{
+ using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+
+ /// <summary>
+ /// An iterator to iterate over set bits in an OpenBitSet.
+ /// this is faster than nextSetBit() for iterating over the complete set of bits,
+ /// especially when the density of the bits set is high.
+ /// </summary>
+ public class OpenBitSetIterator : DocIdSetIterator
+ {
+ // hmmm, what about an iterator that finds zeros though,
+ // or a reverse iterator... should they be separate classes
+ // for efficiency, or have a common root interface? (or
+ // maybe both? could ask for a SetBitsIterator, etc...
+
+ private readonly long[] array;
+ private readonly int words;
+ private int position = -1;
+ private long word;
+ private int wordShift;
+ private int indexArray;
+ private int currentDocId = -1;
+
+ public OpenBitSetIterator(OpenBitSet obs)
+ : this(obs.Bits, obs.NumWords)
+ {
+ }
+
+ public OpenBitSetIterator(long[] bits, int numWords)
+ {
+ array = bits;
+ words = numWords;
+ }
+
+ // 64 bit shifts
+ private void Shift()
+ {
+ if ((int)word == 0)
+ {
+ wordShift += 32;
+ word = (long)((ulong)word >> 32);
+ }
+ if ((word & 0x0000FFFF) == 0)
+ {
+ wordShift += 16;
+ word = (long)((ulong)word >> 16);
+ }
+ if ((word & 0x000000FF) == 0)
+ {
+ wordShift += 8;
+ word = (long)((ulong)word >> 8);
+ }
+ indexArray = BitUtil.BitList((byte)word);
+ }
+
+ /// <summary>
+ ///*** alternate shift implementations
+ /// // 32 bit shifts, but a long shift needed at the end
+ /// private void shift2() {
+ /// int y = (int)word;
+ /// if (y==0) {wordShift +=32; y = (int)(word >>>32); }
+ /// if ((y & 0x0000FFFF) == 0) { wordShift +=16; y>>>=16; }
+ /// if ((y & 0x000000FF) == 0) { wordShift +=8; y>>>=8; }
+ /// indexArray = bitlist[y & 0xff];
+ /// word >>>= (wordShift +1);
+ /// }
+ ///
+ /// private void shift3() {
+ /// int lower = (int)word;
+ /// int lowByte = lower & 0xff;
+ /// if (lowByte != 0) {
+ /// indexArray=bitlist[lowByte];
+ /// return;
+ /// }
+ /// shift();
+ /// }
+ /// *****
+ /// </summary>
+
+ public override int NextDoc()
+ {
+ if (indexArray == 0)
+ {
+ if (word != 0)
+ {
+ word = (long)((ulong)word >> 8);
+ wordShift += 8;
+ }
+
+ while (word == 0)
+ {
+ if (++position >= words)
+ {
+ return currentDocId = NO_MORE_DOCS;
+ }
+ word = array[position];
+ wordShift = -1; // loop invariant code motion should move this
+ }
+
+ // after the first time, should I go with a linear search, or
+ // stick with the binary search in shift?
+ Shift();
+ }
+
+ int bitIndex = (indexArray & 0x0f) + wordShift;
+ indexArray = (int)((uint)indexArray >> 4);
+ // should i<<6 be cached as a separate variable?
+ // it would only save one cycle in the best circumstances.
+ return currentDocId = (position << 6) + bitIndex;
+ }
+
+ public override int Advance(int target)
+ {
+ indexArray = 0;
+ position = target >> 6;
+ if (position >= words)
+ {
+ word = 0; // setup so next() will also return -1
+ return currentDocId = NO_MORE_DOCS;
+ }
+ wordShift = target & 0x3f;
+ word = (long)((ulong)array[position] >> wordShift);
+ if (word != 0)
+ {
+ wordShift--; // compensate for 1 based arrIndex
+ }
+ else
+ {
+ while (word == 0)
+ {
+ if (++position >= words)
+ {
+ return currentDocId = NO_MORE_DOCS;
+ }
+ word = array[position];
+ }
+ wordShift = -1;
+ }
+
+ Shift();
+
+ int bitIndex = (indexArray & 0x0f) + wordShift;
+ indexArray = (int)((uint)indexArray >> 4);
+ // should i<<6 be cached as a separate variable?
+ // it would only save one cycle in the best circumstances.
+ return currentDocId = (position << 6) + bitIndex;
+ }
+
+ public override int DocId
+ {
+ get { return this.currentDocId; }
+
+ }
+
+ public override long Cost()
+ {
+ return words / 64;
+ }
+
+ protected override void Reset()
+ {
+ this.position = -1;
+ this.currentDocId = -1;
+ }
+ }
+}
\ No newline at end of file