You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by do...@apache.org on 2009/07/29 20:04:24 UTC
svn commit: r798995 [22/35] - in /incubator/lucene.net/trunk/C#/src:
Lucene.Net/ Lucene.Net/Analysis/ Lucene.Net/Analysis/Standard/
Lucene.Net/Document/ Lucene.Net/Index/ Lucene.Net/QueryParser/
Lucene.Net/Search/ Lucene.Net/Search/Function/ Lucene.Net...
Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/OpenBitSet.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Util/OpenBitSet.cs?rev=798995&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/OpenBitSet.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/OpenBitSet.cs Wed Jul 29 18:04:12 2009
@@ -0,0 +1,841 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using DocIdSet = Lucene.Net.Search.DocIdSet;
+using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+
+namespace Lucene.Net.Util
+{
+ /// <summary>
+ /// An "open" BitSet implementation that allows direct access to the array of words
+ /// storing the bits.
+ /// <p/>
+ /// Unlike java.util.bitset, the fact that bits are packed into an array of longs
+ /// is part of the interface. This allows efficient implementation of other algorithms
+ /// by someone other than the author. It also allows one to efficiently implement
+ /// alternate serialization or interchange formats.
+ /// <p/>
+ /// <code>OpenBitSet</code> is faster than <code>java.util.BitSet</code> in most operations
+ /// and *much* faster at calculating cardinality of sets and results of set operations.
+ /// It can also handle sets of larger cardinality (up to 64 * 2**32-1)
+ /// <p/>
+ /// The goals of <code>OpenBitSet</code> are the fastest implementation possible, and
+ /// maximum code reuse. Extra safety and encapsulation
+ /// may always be built on top, but if that's built in, the cost can never be removed (and
+ /// hence people re-implement their own version in order to get better performance).
+ /// If you want a "safe", totally encapsulated (and slower and limited) BitSet
+ /// class, use <code>java.util.BitSet</code>.
+ /// <p/>
+ /// <h3>Performance Results</h3>
+ /// </summary>
+ [Serializable]
+ public class OpenBitSet : DocIdSet, System.ICloneable
+ {
+ protected long[] bits;
+ protected int wlen; // number of words (elements) used in the array
+
+ /** Constructs an OpenBitSet large enough to hold numBits.
+ *
+ * @param numBits
+ */
+ public OpenBitSet(long numBits)
+ {
+ bits = new long[bits2words(numBits)];
+ wlen = bits.Length;
+ }
+
+ public OpenBitSet()
+ : this(64)
+ {
+ }
+
+ /** Constructs an OpenBitSet from an existing long[].
+ * <br/>
+ * The first 64 bits are in long[0],
+ * with bit index 0 at the least significant bit, and bit index 63 at the most significant.
+ * Given a bit index,
+ * the word containing it is long[index/64], and it is at bit number index%64 within that word.
+ * <p>
+ * numWords are the number of elements in the array that contain
+ * set bits (non-zero longs).
+ * numWords should be <= bits.length, and
+ * any existing words in the array at position >= numWords should be zero.
+ *
+ */
+ public OpenBitSet(long[] bits, int numWords)
+ {
+ this.bits = bits;
+ this.wlen = numWords;
+ }
+
+ public override DocIdSetIterator Iterator()
+ {
+ return new OpenBitSetIterator(bits, wlen);
+ }
+
+ /** Returns the current capacity in bits (1 greater than the index of the last bit) */
+ public long Capacity() { return bits.Length << 6; }
+
+ /**
+ * Returns the current capacity of this set. Included for
+ * compatibility. This is *not* equal to {@link #cardinality}
+ */
+ public long Size()
+ {
+ return Capacity();
+ }
+
+ /** Returns true if there are no set bits */
+ public bool IsEmpty() { return Cardinality() == 0; }
+
+ /** Expert: returns the long[] storing the bits */
+ public long[] GetBits() { return bits; }
+
+ /** Expert: sets a new long[] to use as the bit storage */
+ public void SetBits(long[] bits) { this.bits = bits; }
+
+ /** Expert: gets the number of longs in the array that are in use */
+ public int GetNumWords() { return wlen; }
+
+ /** Expert: sets the number of longs in the array that are in use */
+ public void SetNumWords(int nWords) { this.wlen = nWords; }
+
+ /** Returns true or false for the specified bit index. */
+ public bool Get(int index)
+ {
+ int i = index >> 6; // div 64
+ // signed shift will keep a negative index and force an
+ // array-index-out-of-bounds-exception, removing the need for an explicit check.
+ if (i >= bits.Length) return false;
+
+ int bit = index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ return (bits[i] & bitmask) != 0;
+ }
+
+
+ /** Returns true or false for the specified bit index.
+ * The index should be less than the OpenBitSet size
+ */
+ public bool FastGet(int index)
+ {
+ int i = index >> 6; // div 64
+ // signed shift will keep a negative index and force an
+ // array-index-out-of-bounds-exception, removing the need for an explicit check.
+ int bit = index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ return (bits[i] & bitmask) != 0;
+ }
+
+ /** Returns true or false for the specified bit index
+ */
+ public bool Get(long index)
+ {
+ int i = (int)(index >> 6); // div 64
+ if (i >= bits.Length) return false;
+ int bit = (int)index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ return (bits[i] & bitmask) != 0;
+ }
+
+ /** Returns true or false for the specified bit index.
+ * The index should be less than the OpenBitSet size.
+ */
+ public bool FastGet(long index)
+ {
+ int i = (int)(index >> 6); // div 64
+ int bit = (int)index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ return (bits[i] & bitmask) != 0;
+ }
+
+ /*
+ // alternate implementation of get()
+ public bool get1(int index) {
+ int i = index >> 6; // div 64
+ int bit = index & 0x3f; // mod 64
+ return ((bits[i]>>>bit) & 0x01) != 0;
+ // this does a long shift and a bittest (on x86) vs
+ // a long shift, and a long AND, (the test for zero is prob a no-op)
+ // testing on a P4 indicates this is slower than (bits[i] & bitmask) != 0;
+ }
+ */
+
+ /** returns 1 if the bit is set, 0 if not.
+ * The index should be less than the OpenBitSet size
+ */
+ public int GetBit(int index)
+ {
+ int i = index >> 6; // div 64
+ int bit = index & 0x3f; // mod 64
+ //{DOUG-2.4.0: mod'd to do logical right shift (>>>); have to use unsigned types and >>
+ //return ((int)(bits[i]>>>bit)) & 0x01;
+ return ((int)((ulong)(bits[i]) >> bit)) & 0x01;
+ }
+
+ /*
+ public bool get2(int index) {
+ int word = index >> 6; // div 64
+ int bit = index & 0x0000003f; // mod 64
+ return (bits[word] << bit) < 0; // hmmm, this would work if bit order were reversed
+ // we could right shift and check for parity bit, if it was available to us.
+ }
+ */
+
+ /** sets a bit, expanding the set size if necessary */
+ public void Set(long index)
+ {
+ int wordNum = ExpandingWordNum(index);
+ int bit = (int)index & 0x3f;
+ long bitmask = 1L << bit;
+ bits[wordNum] |= bitmask;
+ }
+
+ /** Sets the bit at the specified index.
+ * The index should be less than the OpenBitSet size.
+ */
+ public void FastSet(int index)
+ {
+ int wordNum = index >> 6; // div 64
+ int bit = index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ bits[wordNum] |= bitmask;
+ }
+
+ /** Sets the bit at the specified index.
+ * The index should be less than the OpenBitSet size.
+ */
+ public void FastSet(long index)
+ {
+ int wordNum = (int)(index >> 6);
+ int bit = (int)index & 0x3f;
+ long bitmask = 1L << bit;
+ bits[wordNum] |= bitmask;
+ }
+
+ /** Sets a range of bits, expanding the set size if necessary
+ *
+ * @param startIndex lower index
+ * @param endIndex one-past the last bit to set
+ */
+ public void Set(long startIndex, long endIndex)
+ {
+ if (endIndex <= startIndex) return;
+
+ int startWord = (int)(startIndex >> 6);
+
+ // since endIndex is one past the end, this is index of the last
+ // word to be changed.
+ int endWord = ExpandingWordNum(endIndex - 1);
+
+ long startmask = -1L << (int)startIndex;
+ //{DOUG-2.4.0: mod'd to do logical right shift (>>>); have to use unsigned types and >>
+ //long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
+ long endmask = (long)(0xffffffffffffffffUL >> (int)-endIndex); // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
+
+ if (startWord == endWord)
+ {
+ bits[startWord] |= (startmask & endmask);
+ return;
+ }
+
+ bits[startWord] |= startmask;
+ //Arrays.fill(bits, startWord+1, endWord, -1L);
+ for (int i = startWord + 1; i < endWord; i++)
+ bits[i] = -1L;
+ bits[endWord] |= endmask;
+ }
+
+ protected int ExpandingWordNum(long index)
+ {
+ int wordNum = (int)(index >> 6);
+ if (wordNum >= wlen)
+ {
+ EnsureCapacity(index + 1);
+ wlen = wordNum + 1;
+ }
+ return wordNum;
+ }
+
+ /** clears a bit.
+ * The index should be less than the OpenBitSet size.
+ */
+ public void FastClear(int index)
+ {
+ int wordNum = index >> 6;
+ int bit = index & 0x03f;
+ long bitmask = 1L << bit;
+ bits[wordNum] &= ~bitmask;
+ // hmmm, it takes one more instruction to clear than it does to set... any
+ // way to work around this? If there were only 63 bits per word, we could
+ // use a right shift of 10111111...111 in binary to position the 0 in the
+ // correct place (using sign extension).
+ // Could also use Long.rotateRight() or rotateLeft() *if* they were converted
+ // by the JVM into a native instruction.
+ // bits[word] &= Long.rotateLeft(0xfffffffe,bit);
+ }
+
+ /** clears a bit.
+ * The index should be less than the OpenBitSet size.
+ */
+ public void FastClear(long index)
+ {
+ int wordNum = (int)(index >> 6); // div 64
+ int bit = (int)index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ bits[wordNum] &= ~bitmask;
+ }
+
+ /** clears a bit, allowing access beyond the current set size without changing the size.*/
+ public void Clear(long index)
+ {
+ int wordNum = (int)(index >> 6); // div 64
+ if (wordNum >= wlen) return;
+ int bit = (int)index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ bits[wordNum] &= ~bitmask;
+ }
+
+ /** Clears a range of bits. Clearing past the end does not change the size of the set.
+ *
+ * @param startIndex lower index
+ * @param endIndex one-past the last bit to clear
+ */
+ public void Clear(long startIndex, long endIndex)
+ {
+ if (endIndex <= startIndex) return;
+
+ int startWord = (int)(startIndex >> 6);
+ if (startWord >= wlen) return;
+
+ // since endIndex is one past the end, this is index of the last
+ // word to be changed.
+ int endWord = (int)((endIndex - 1) >> 6);
+
+ long startmask = -1L << (int)startIndex;
+ //{DOUG-2.4.0: mod'd to do logical right shift (>>>); have to use unsigned types and >>
+ //long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
+ long endmask = (long)(0xffffffffffffffffUL >> (int)-endIndex); // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
+
+ // invert masks since we are clearing
+ startmask = ~startmask;
+ endmask = ~endmask;
+
+ if (startWord == endWord)
+ {
+ bits[startWord] &= (startmask | endmask);
+ return;
+ }
+
+ bits[startWord] &= startmask;
+
+ int middle = Math.Min(wlen, endWord);
+ //Arrays.fill(bits, startWord+1, middle, 0L);
+ for (int i = startWord + 1; i < middle; i++)
+ bits[i] = 0L;
+ if (endWord < wlen)
+ {
+ bits[endWord] &= endmask;
+ }
+ }
+
+ /** Sets a bit and returns the previous value.
+ * The index should be less than the OpenBitSet size.
+ */
+ public bool GetAndSet(int index)
+ {
+ int wordNum = index >> 6; // div 64
+ int bit = index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ bool val = (bits[wordNum] & bitmask) != 0;
+ bits[wordNum] |= bitmask;
+ return val;
+ }
+
+ /** Sets a bit and returns the previous value.
+ * The index should be less than the OpenBitSet size.
+ */
+ public bool GetAndSet(long index)
+ {
+ int wordNum = (int)(index >> 6); // div 64
+ int bit = (int)index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ bool val = (bits[wordNum] & bitmask) != 0;
+ bits[wordNum] |= bitmask;
+ return val;
+ }
+
+ /** flips a bit.
+ * The index should be less than the OpenBitSet size.
+ */
+ public void FastFlip(int index)
+ {
+ int wordNum = index >> 6; // div 64
+ int bit = index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ bits[wordNum] ^= bitmask;
+ }
+
+ /** flips a bit.
+ * The index should be less than the OpenBitSet size.
+ */
+ public void FastFlip(long index)
+ {
+ int wordNum = (int)(index >> 6); // div 64
+ int bit = (int)index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ bits[wordNum] ^= bitmask;
+ }
+
+ /** flips a bit, expanding the set size if necessary */
+ public void Flip(long index)
+ {
+ int wordNum = ExpandingWordNum(index);
+ int bit = (int)index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ bits[wordNum] ^= bitmask;
+ }
+
+ /** flips a bit and returns the resulting bit value.
+ * The index should be less than the OpenBitSet size.
+ */
+ public bool FlipAndGet(int index)
+ {
+ int wordNum = index >> 6; // div 64
+ int bit = index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ bits[wordNum] ^= bitmask;
+ return (bits[wordNum] & bitmask) != 0;
+ }
+
+ /** flips a bit and returns the resulting bit value.
+ * The index should be less than the OpenBitSet size.
+ */
+ public bool FlipAndGet(long index)
+ {
+ int wordNum = (int)(index >> 6); // div 64
+ int bit = (int)index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ bits[wordNum] ^= bitmask;
+ return (bits[wordNum] & bitmask) != 0;
+ }
+
+ /** Flips a range of bits, expanding the set size if necessary
+ *
+ * @param startIndex lower index
+ * @param endIndex one-past the last bit to flip
+ */
+ public void Flip(long startIndex, long endIndex)
+ {
+ if (endIndex <= startIndex) return;
+ int oldlen = wlen;
+ int startWord = (int)(startIndex >> 6);
+
+ // since endIndex is one past the end, this is index of the last
+ // word to be changed.
+ int endWord = ExpandingWordNum(endIndex - 1);
+
+ /*** Grrr, java shifting wraps around so -1L>>>64 == -1
+ * for that reason, make sure not to use endmask if the bits to flip will
+ * be zero in the last word (redefine endWord to be the last changed...)
+ long startmask = -1L << (startIndex & 0x3f); // example: 11111...111000
+ long endmask = -1L >>> (64-(endIndex & 0x3f)); // example: 00111...111111
+ ***/
+
+ long startmask = -1L << (int)startIndex;
+ //{DOUG-2.4.0: mod'd to do logical right shift (>>>); have to use unsigned types and >>
+ //long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
+ long endmask = (long)(0xffffffffffffffffUL >> (int)-endIndex); // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
+
+ if (startWord == endWord)
+ {
+ bits[startWord] ^= (startmask & endmask);
+ return;
+ }
+
+ bits[startWord] ^= startmask;
+
+ for (int i = startWord + 1; i < endWord; i++)
+ {
+ bits[i] = ~bits[i];
+ }
+
+ bits[endWord] ^= endmask;
+ }
+
+ /*
+ public static int pop(long v0, long v1, long v2, long v3) {
+ // derived from pop_array by setting last four elems to 0.
+ // exchanges one pop() call for 10 elementary operations
+ // saving about 7 instructions... is there a better way?
+ long twosA=v0 & v1;
+ long ones=v0^v1;
+
+ long u2=ones^v2;
+ long twosB =(ones&v2)|(u2&v3);
+ ones=u2^v3;
+
+ long fours=(twosA&twosB);
+ long twos=twosA^twosB;
+
+ return (pop(fours)<<2)
+ + (pop(twos)<<1)
+ + pop(ones);
+
+ }
+ */
+
+ /** @return the number of set bits */
+ public long Cardinality()
+ {
+ return BitUtil.pop_array(bits, 0, wlen);
+ }
+
+ /** Returns the popcount or cardinality of the intersection of the two sets.
+ * Neither set is modified.
+ */
+ public static long IntersectionCount(OpenBitSet a, OpenBitSet b)
+ {
+ return BitUtil.pop_intersect(a.bits, b.bits, 0, Math.Min(a.wlen, b.wlen));
+ }
+
+ /** Returns the popcount or cardinality of the union of the two sets.
+ * Neither set is modified.
+ */
+ public static long UnionCount(OpenBitSet a, OpenBitSet b)
+ {
+ long tot = BitUtil.pop_union(a.bits, b.bits, 0, Math.Min(a.wlen, b.wlen));
+ if (a.wlen < b.wlen)
+ {
+ tot += BitUtil.pop_array(b.bits, a.wlen, b.wlen - a.wlen);
+ }
+ else if (a.wlen > b.wlen)
+ {
+ tot += BitUtil.pop_array(a.bits, b.wlen, a.wlen - b.wlen);
+ }
+ return tot;
+ }
+
+ /** Returns the popcount or cardinality of "a and not b"
+ * or "intersection(a, not(b))".
+ * Neither set is modified.
+ */
+ public static long AndNotCount(OpenBitSet a, OpenBitSet b)
+ {
+ long tot = BitUtil.pop_andnot(a.bits, b.bits, 0, Math.Min(a.wlen, b.wlen));
+ if (a.wlen > b.wlen)
+ {
+ tot += BitUtil.pop_array(a.bits, b.wlen, a.wlen - b.wlen);
+ }
+ return tot;
+ }
+
+ /** Returns the popcount or cardinality of the exclusive-or of the two sets.
+ * Neither set is modified.
+ */
+ public static long XorCount(OpenBitSet a, OpenBitSet b)
+ {
+ long tot = BitUtil.pop_xor(a.bits, b.bits, 0, Math.Min(a.wlen, b.wlen));
+ if (a.wlen < b.wlen)
+ {
+ tot += BitUtil.pop_array(b.bits, a.wlen, b.wlen - a.wlen);
+ }
+ else if (a.wlen > b.wlen)
+ {
+ tot += BitUtil.pop_array(a.bits, b.wlen, a.wlen - b.wlen);
+ }
+ return tot;
+ }
+
+
+ /** Returns the index of the first set bit starting at the index specified.
+ * -1 is returned if there are no more set bits.
+ */
+ public int NextSetBit(int index)
+ {
+ int i = index >> 6;
+ if (i >= wlen) return -1;
+ int subIndex = index & 0x3f; // index within the word
+ long word = bits[i] >> subIndex; // skip all the bits to the right of index
+
+ if (word != 0)
+ {
+ return (i << 6) + subIndex + BitUtil.ntz(word);
+ }
+
+ while (++i < wlen)
+ {
+ word = bits[i];
+ if (word != 0) return (i << 6) + BitUtil.ntz(word);
+ }
+
+ return -1;
+ }
+
+ /** Returns the index of the first set bit starting at the index specified.
+ * -1 is returned if there are no more set bits.
+ */
+ public long NextSetBit(long index)
+ {
+ //{DOUG-2.4.0: mod'd to do logical right shift (>>>); have to use unsigned types and >>
+ //int i = (int)(index>>>6);
+ //if (i>=wlen) return -1;
+ //int subIndex = (int)index & 0x3f; // index within the word
+ //long word = bits[i] >>> subIndex; // skip all the bits to the right of index
+ int i = (int)((ulong)index >> 6);
+ if (i >= wlen) return -1;
+ int subIndex = (int)index & 0x3f; // index within the word
+ long word = (long)((ulong)(bits[i]) >> subIndex); // skip all the bits to the right of index
+
+ if (word != 0)
+ {
+ return (((long)i) << 6) + (subIndex + BitUtil.ntz(word));
+ }
+
+ while (++i < wlen)
+ {
+ word = bits[i];
+ if (word != 0) return (((long)i) << 6) + BitUtil.ntz(word);
+ }
+
+ return -1;
+ }
+
+
+
+
+ public virtual object Clone()
+ {
+ //try
+ //{
+ // OpenBitSet obs = (OpenBitSet)super.clone();
+ // obs.bits = (long[])obs.bits.clone(); // hopefully an array clone is as fast(er) than arraycopy
+ // return obs;
+ //}
+ //catch (CloneNotSupportedException e)
+ //{
+ // throw new RuntimeException(e);
+ //}
+ return new OpenBitSet((long[])bits.Clone(), wlen);
+ }
+
+ /** this = this AND other */
+ public void Intersect(OpenBitSet other)
+ {
+ int newLen = Math.Min(this.wlen, other.wlen);
+ long[] thisArr = this.bits;
+ long[] otherArr = other.bits;
+ // testing against zero can be more efficient
+ int pos = newLen;
+ while (--pos >= 0)
+ {
+ thisArr[pos] &= otherArr[pos];
+ }
+ if (this.wlen > newLen)
+ {
+ // fill zeros from the new shorter length to the old length
+ //Arrays.fill(bits, newLen, this.wlen, 0);
+ for (int i = newLen; i < this.wlen; i++)
+ bits[i] = 0L;
+ }
+ this.wlen = newLen;
+ }
+
+ /** this = this OR other */
+ public void Union(OpenBitSet other)
+ {
+ int newLen = Math.Max(wlen, other.wlen);
+ EnsureCapacityWords(newLen);
+
+ long[] thisArr = this.bits;
+ long[] otherArr = other.bits;
+ int pos = Math.Min(wlen, other.wlen);
+ while (--pos >= 0)
+ {
+ thisArr[pos] |= otherArr[pos];
+ }
+ if (this.wlen < newLen)
+ {
+ //System.Array.Copy(otherArr, this.wlen, thisArr, this.wlen, newLen - this.wlen);
+ Array.Copy(otherArr, this.wlen, thisArr, this.wlen, newLen - this.wlen);
+ }
+ this.wlen = newLen;
+ }
+
+
+ /** Remove all elements set in other. this = this AND_NOT other */
+ public void Remove(OpenBitSet other)
+ {
+ int idx = Math.Min(wlen, other.wlen);
+ long[] thisArr = this.bits;
+ long[] otherArr = other.bits;
+ while (--idx >= 0)
+ {
+ thisArr[idx] &= ~otherArr[idx];
+ }
+ }
+
+ /** this = this XOR other */
+ public void Xor(OpenBitSet other)
+ {
+ int newLen = Math.Max(wlen, other.wlen);
+ EnsureCapacityWords(newLen);
+
+ long[] thisArr = this.bits;
+ long[] otherArr = other.bits;
+ int pos = Math.Min(wlen, other.wlen);
+ while (--pos >= 0)
+ {
+ thisArr[pos] ^= otherArr[pos];
+ }
+ if (this.wlen < newLen)
+ {
+ //System.Array.Copy(otherArr, this.wlen, thisArr, this.wlen, newLen - this.wlen);
+ Array.Copy(otherArr, this.wlen, thisArr, this.wlen, newLen - this.wlen);
+ }
+ this.wlen = newLen;
+ }
+
+
+ // some BitSet compatability methods
+
+ //** see {@link intersect} */
+ public void And(OpenBitSet other)
+ {
+ Intersect(other);
+ }
+
+ //** see {@link union} */
+ public void Or(OpenBitSet other)
+ {
+ Union(other);
+ }
+
+ //** see {@link andNot} */
+ public void AndNot(OpenBitSet other)
+ {
+ Remove(other);
+ }
+
+ /** returns true if the sets have any elements in common */
+ public bool Intersects(OpenBitSet other)
+ {
+ int pos = Math.Min(this.wlen, other.wlen);
+ long[] thisArr = this.bits;
+ long[] otherArr = other.bits;
+ while (--pos >= 0)
+ {
+ if ((thisArr[pos] & otherArr[pos]) != 0) return true;
+ }
+ return false;
+ }
+
+
+
+ /** Expand the long[] with the size given as a number of words (64 bit longs).
+ * getNumWords() is unchanged by this call.
+ */
+ public void EnsureCapacityWords(int numWords)
+ {
+ if (bits.Length < numWords)
+ {
+ long[] newBits = new long[numWords];
+ //System.Array.Copy(bits, 0, newBits, 0, wlen);
+ Array.Copy(bits, 0, newBits, 0, wlen);
+ bits = newBits;
+ }
+ }
+
+ /** Ensure that the long[] is big enough to hold numBits, expanding it if necessary.
+ * getNumWords() is unchanged by this call.
+ */
+ public void EnsureCapacity(long numBits)
+ {
+ EnsureCapacityWords(bits2words(numBits));
+ }
+
+ /** Lowers numWords, the number of words in use,
+ * by checking for trailing zero words.
+ */
+ public void trimTrailingZeros()
+ {
+ int idx = wlen - 1;
+ while (idx >= 0 && bits[idx] == 0) idx--;
+ wlen = idx + 1;
+ }
+
+ /** returns the number of 64 bit words it would take to hold numBits */
+ public static int bits2words(long numBits)
+ {
+ //{DOUG-2.4.0: mod'd to do logical right shift (>>>); have to use unsigned types and >>
+ //return (int)(((numBits-1)>>>6)+1);
+ return (int)(((ulong)(numBits - 1) >> 6) + 1);
+ }
+
+
+ /** returns true if both sets have the same bits set */
+ public override bool Equals(object o)
+ {
+ if (this == o) return true;
+ if (!(o is OpenBitSet)) return false;
+ OpenBitSet a;
+ OpenBitSet b = (OpenBitSet)o;
+ // make a the larger set.
+ if (b.wlen > this.wlen)
+ {
+ a = b; b = this;
+ }
+ else
+ {
+ a = this;
+ }
+
+ // check for any set bits out of the range of b
+ for (int i = a.wlen - 1; i >= b.wlen; i--)
+ {
+ if (a.bits[i] != 0) return false;
+ }
+
+ for (int i = b.wlen - 1; i >= 0; i--)
+ {
+ if (a.bits[i] != b.bits[i]) return false;
+ }
+
+ return true;
+ }
+
+
+ public override int GetHashCode()
+ {
+ //{DOUG-2.4.0: mod'd to do logical right shift (>>>); have to use unsigned types and >>
+ // long h = 0x98761234; // something non-zero for length==0
+ // for (int i = bits.length; --i>=0;) {
+ // h ^= bits[i];
+ // h = (h << 1) | (h >>> 63); // rotate left
+ //return (int)((h>>32) ^ h); // fold leftmost bits into right
+ long h = 0x98761234; // something non-zero for length==0
+ for (int i = bits.Length; --i >= 0; )
+ {
+ h ^= bits[i];
+ h = (h << 1) | (long)((ulong)h >> 63); // rotate left
+ }
+ return (int)((h >> 32) ^ h); // fold leftmost bits into right
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/OpenBitSetDISI.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Util/OpenBitSetDISI.cs?rev=798995&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/OpenBitSetDISI.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/OpenBitSetDISI.cs Wed Jul 29 18:04:12 2009
@@ -0,0 +1,113 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+
+namespace Lucene.Net.Util
+{
+ public class OpenBitSetDISI : OpenBitSet
+ {
+ /** Construct an OpenBitSetDISI with its bits set
+ * from the doc ids of the given DocIdSetIterator.
+ * Also give a maximum size one larger than the largest doc id for which a
+ * bit may ever be set on this OpenBitSetDISI.
+ */
+ public OpenBitSetDISI(DocIdSetIterator disi, int maxSize)
+ : base(maxSize)
+ {
+ InPlaceOr(disi);
+ }
+
+ /** Construct an OpenBitSetDISI with no bits set, and a given maximum size
+ * one larger than the largest doc id for which a bit may ever be set
+ * on this OpenBitSetDISI.
+ */
+ public OpenBitSetDISI(int maxSize)
+ : base(maxSize)
+ {
+ }
+
+ /**
+ * Perform an inplace OR with the doc ids from a given DocIdSetIterator,
+ * setting the bit for each such doc id.
+ * These doc ids should be smaller than the maximum size passed to the
+ * constructor.
+ */
+ public void InPlaceOr(DocIdSetIterator disi)
+ {
+ while (disi.Next() && (disi.Doc() < Size()))
+ {
+ FastSet(disi.Doc());
+ }
+ }
+
+ /**
+ * Perform an inplace AND with the doc ids from a given DocIdSetIterator,
+ * leaving only the bits set for which the doc ids are in common.
+ * These doc ids should be smaller than the maximum size passed to the
+ * constructor.
+ */
+ public void InPlaceAnd(DocIdSetIterator disi)
+ {
+ int index = NextSetBit(0);
+ int lastNotCleared = -1;
+ while ((index != -1) && disi.SkipTo(index))
+ {
+ while ((index != -1) && (index < disi.Doc()))
+ {
+ FastClear(index);
+ index = NextSetBit(index + 1);
+ }
+ if (index == disi.Doc())
+ {
+ lastNotCleared = index;
+ index++;
+ }
+ System.Diagnostics.Debug.Assert((index == -1) || (index > disi.Doc()));
+ }
+ Clear(lastNotCleared + 1, Size());
+ }
+
+ /**
+ * Perform an inplace NOT with the doc ids from a given DocIdSetIterator,
+ * clearing all the bits for each such doc id.
+ * These doc ids should be smaller than the maximum size passed to the
+ * constructor.
+ */
+ public void InPlaceNot(DocIdSetIterator disi)
+ {
+ while (disi.Next() && (disi.Doc() < Size()))
+ {
+ FastClear(disi.Doc());
+ }
+ }
+
+ /**
+ * Perform an inplace XOR with the doc ids from a given DocIdSetIterator,
+ * flipping all the bits for each such doc id.
+ * These doc ids should be smaller than the maximum size passed to the
+ * constructor.
+ */
+ public void InPlaceXor(DocIdSetIterator disi)
+ {
+ while (disi.Next() && (disi.Doc() < Size()))
+ {
+ FastFlip(disi.Doc());
+ }
+ }
+ }
+}
Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/OpenBitSetIterator.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Util/OpenBitSetIterator.cs?rev=798995&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/OpenBitSetIterator.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/OpenBitSetIterator.cs Wed Jul 29 18:04:12 2009
@@ -0,0 +1,194 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+
+namespace Lucene.Net.Util
+{
+
+ /** An iterator to iterate over set bits in an OpenBitSet.
+ * This is faster than nextSetBit() for iterating over the complete set of bits,
+ * especially when the density of the bits set is high.
+ *
+ * @version $Id$
+ */
+ public class OpenBitSetIterator : DocIdSetIterator
+ {
+
+ // The General Idea: instead of having an array per byte that has
+ // the offsets of the next set bit, that array could be
+ // packed inside a 32 bit integer (8 4 bit numbers). That
+ // should be faster than accessing an array for each index, and
+ // the total array size is kept smaller (256*sizeof(int))=1K
+ //{DOUG-2.4.0: numbers too large for int trying to be assigned - could either make whole array uint or do unchecked casts
+ //protected static readonly int[] bitlist = {
+ //0x0,0x1,0x2,0x21,0x3,0x31,0x32,0x321,0x4,0x41,0x42,0x421,0x43,0x431,0x432,0x4321,0x5,0x51,0x52,0x521,0x53,0x531,0x532,0x5321,0x54,0x541,0x542,0x5421,0x543,0x5431,0x5432,0x54321,0x6,0x61,0x62,0x621,0x63,0x631,0x632,0x6321,0x64,0x641,0x642,0x6421,0x643,0x6431,0x6432,0x64321,0x65,0x651,0x652,0x6521,0x653,0x6531,0x6532,0x65321,0x654,0x6541,0x6542,0x65421,0x6543,0x65431,0x65432,0x654321,0x7,0x71,0x72,0x721,0x73,0x731,0x732,0x7321,0x74,0x741,0x742,0x7421,0x743,0x7431,0x7432,0x74321,0x75,0x751,0x752,0x7521,0x753,0x7531,0x7532,0x75321,0x754,0x7541,0x7542,0x75421,0x7543,0x75431,0x75432,0x754321,0x76,0x761,0x762,0x7621,0x763,0x7631,0x7632,0x76321,0x764,0x7641,0x7642,0x76421,0x7643,0x76431,0x76432,0x764321,0x765,0x7651,0x7652,0x76521,0x7653,0x76531,0x76532,0x765321,0x7654,0x76541,0x76542,0x765421,0x76543,0x765431,0x765432,0x7654321,0x8,0x81,0x82,0x821,0x83,0x831,0x832,0x8321,0x84,0x841,0x842,0x8421,0x843,0x8431,0x8432,0x84321,0x85,0x851,0x852,0x8521,0x853,0x8531,0x8532,0x85321,0x
854,0x8541,0x8542,0x85421,0x8543,0x85431,0x85432,0x854321,0x86,0x861,0x862,0x8621,0x863,0x8631,0x8632,0x86321,0x864,0x8641,0x8642,0x86421,0x8643,0x86431,0x86432,0x864321,0x865,0x8651,0x8652,0x86521,0x8653,0x86531,0x86532,0x865321,0x8654,0x86541,0x86542,0x865421,0x86543,0x865431,0x865432,0x8654321,0x87,0x871,0x872,0x8721,0x873,0x8731,0x8732,0x87321,0x874,0x8741,0x8742,0x87421,0x8743,0x87431,0x87432,0x874321,0x875,0x8751,0x8752,0x87521,0x8753,0x87531,0x87532,0x875321,0x8754,0x87541,0x87542,0x875421,0x87543,0x875431,0x875432,0x8754321,0x876,0x8761,0x8762,0x87621,0x8763,0x87631,0x87632,0x876321,0x8764,0x87641,0x87642,0x876421,0x87643,0x876431,0x876432,0x8764321,0x8765,0x87651,0x87652,0x876521,0x87653,0x876531,0x876532,0x8765321,0x87654,0x876541,0x876542,0x8765421,0x876543,0x8765431,0x8765432,0x87654321
+ protected static readonly uint[] bitlist = {
+ 0x0,0x1,0x2,0x21,0x3,0x31,0x32,0x321,0x4,0x41,0x42,0x421,0x43,0x431,0x432,0x4321,0x5,0x51,0x52,0x521,0x53,0x531,0x532,0x5321,0x54,0x541,0x542,0x5421,0x543,0x5431,0x5432,0x54321,0x6,0x61,0x62,0x621,0x63,0x631,0x632,0x6321,0x64,0x641,0x642,0x6421,0x643,0x6431,0x6432,0x64321,0x65,0x651,0x652,0x6521,0x653,0x6531,0x6532,0x65321,0x654,0x6541,0x6542,0x65421,0x6543,0x65431,0x65432,0x654321,0x7,0x71,0x72,0x721,0x73,0x731,0x732,0x7321,0x74,0x741,0x742,0x7421,0x743,0x7431,0x7432,0x74321,0x75,0x751,0x752,0x7521,0x753,0x7531,0x7532,0x75321,0x754,0x7541,0x7542,0x75421,0x7543,0x75431,0x75432,0x754321,0x76,0x761,0x762,0x7621,0x763,0x7631,0x7632,0x76321,0x764,0x7641,0x7642,0x76421,0x7643,0x76431,0x76432,0x764321,0x765,0x7651,0x7652,0x76521,0x7653,0x76531,0x76532,0x765321,0x7654,0x76541,0x76542,0x765421,0x76543,0x765431,0x765432,0x7654321,0x8,0x81,0x82,0x821,0x83,0x831,0x832,0x8321,0x84,0x841,0x842,0x8421,0x843,0x8431,0x8432,0x84321,0x85,0x851,0x852,0x8521,0x853,0x8531,0x8532,0x85321,0x85
4,0x8541,0x8542,0x85421,0x8543,0x85431,0x85432,0x854321,0x86,0x861,0x862,0x8621,0x863,0x8631,0x8632,0x86321,0x864,0x8641,0x8642,0x86421,0x8643,0x86431,0x86432,0x864321,0x865,0x8651,0x8652,0x86521,0x8653,0x86531,0x86532,0x865321,0x8654,0x86541,0x86542,0x865421,0x86543,0x865431,0x865432,0x8654321,0x87,0x871,0x872,0x8721,0x873,0x8731,0x8732,0x87321,0x874,0x8741,0x8742,0x87421,0x8743,0x87431,0x87432,0x874321,0x875,0x8751,0x8752,0x87521,0x8753,0x87531,0x87532,0x875321,0x8754,0x87541,0x87542,0x875421,0x87543,0x875431,0x875432,0x8754321,0x876,0x8761,0x8762,0x87621,0x8763,0x87631,0x87632,0x876321,0x8764,0x87641,0x87642,0x876421,0x87643,0x876431,0x876432,0x8764321,0x8765,0x87651,0x87652,0x876521,0x87653,0x876531,0x876532,0x8765321,0x87654,0x876541,0x876542,0x8765421,0x876543,0x8765431,0x8765432,0x87654321
+ };
+ /***** the python code that generated bitlist
+ def bits2int(val):
+ arr=0
+ for shift in range(8,0,-1):
+ if val & 0x80:
+ arr = (arr << 4) | shift
+ val = val << 1
+ return arr
+
+ def int_table():
+ tbl = [ hex(bits2int(val)).strip('L') for val in range(256) ]
+ return ','.join(tbl)
+ ******/
+
+ // hmmm, what about an iterator that finds zeros though,
+ // or a reverse iterator... should they be separate classes
+ // for efficiency, or have a common root interface? (or
+ // maybe both? could ask for a SetBitsIterator, etc...
+
+
+ private readonly long[] arr;
+ private readonly int words;
+ private int i = -1;
+ private long word;
+ private int wordShift;
+ private int indexArray;
+ private int curDocId;
+
+ public OpenBitSetIterator(OpenBitSet obs)
+ : this(obs.GetBits(), obs.GetNumWords())
+ {
+ }
+
+ public OpenBitSetIterator(long[] bits, int numWords)
+ {
+ arr = bits;
+ words = numWords;
+ }
+
+ // 64 bit shifts
+ private void shift()
+ {
+ //{DOUG-2.4.0: mod'd to do logical right shift (>>>); have to use unsigned types and >>
+ //if ((int)word ==0) {wordShift +=32; word = word >>>32; }
+ //if ((word & 0x0000FFFF) == 0) { wordShift +=16; word >>>=16; }
+ //if ((word & 0x000000FF) == 0) { wordShift +=8; word >>>=8; }
+ //indexArray = bitlist[(int)word & 0xff];
+ if ((int)word == 0) { wordShift += 32; word = (long)((ulong)word >> 32); }
+ if ((word & 0x0000FFFF) == 0) { wordShift += 16; word = (long)((ulong)word >> 16); }
+ if ((word & 0x000000FF) == 0) { wordShift += 8; word = (long)((ulong)word >> 8); }
+ indexArray = (int)bitlist[(int)word & 0xff];
+ }
+
+ /***** alternate shift implementations
+ // 32 bit shifts, but a long shift needed at the end
+ private void shift2() {
+ int y = (int)word;
+ if (y==0) {wordShift +=32; y = (int)(word >>>32); }
+ if ((y & 0x0000FFFF) == 0) { wordShift +=16; y>>>=16; }
+ if ((y & 0x000000FF) == 0) { wordShift +=8; y>>>=8; }
+ indexArray = bitlist[y & 0xff];
+ word >>>= (wordShift +1);
+ }
+
+ private void shift3() {
+ int lower = (int)word;
+ int lowByte = lower & 0xff;
+ if (lowByte != 0) {
+ indexArray=bitlist[lowByte];
+ return;
+ }
+ shift();
+ }
+ ******/
+
+ public override bool Next() {
+ if (indexArray==0) {
+ if (word!=0) {
+ //{DOUG-2.4.0: mod'd to do logical right shift (>>>); have to use unsigned types and >>
+ //word >>>= 8;
+ word = (long)((ulong)word >> 8);
+ wordShift += 8;
+ }
+
+ while (word==0) {
+ if (++i >= words) {
+ curDocId = -1;
+ return false;
+ }
+ word = arr[i];
+ wordShift =-1; // loop invariant code motion should move this
+ }
+
+ // after the first time, should I go with a linear search, or
+ // stick with the binary search in shift?
+ shift();
+ }
+
+ int bitIndex = (indexArray & 0x0f) + wordShift;
+ //{DOUG-2.4.0: mod'd to do logical right shift (>>>); have to use unsigned types and >>
+ //indexArray >>>= 4;
+ indexArray = (int)((uint)indexArray >> 4);
+
+ // should i<<6 be cached as a separate variable?
+ // it would only save one cycle in the best circumstances.
+ curDocId = (i<<6) + bitIndex;
+ return true;
+ }
+
+ public override bool SkipTo(int target) {
+ indexArray=0;
+ i = target >> 6;
+ if (i>=words) {
+ word =0; // setup so next() will also return -1
+ curDocId = -1;
+ return false;
+ }
+ wordShift = target & 0x3f;
+ //{DOUG-2.4.0: mod'd to do logical right shift (>>>); have to use unsigned types and >>
+ //word = arr[i] >>> wordShift;
+ word = (long)((ulong)(arr[i]) >> wordShift);
+ if (word !=0) {
+ wordShift--; // compensate for 1 based arrIndex
+ } else {
+ while (word ==0) {
+ if (++i >= words) {
+ curDocId = -1;
+ return false;
+ }
+ word = arr[i];
+ }
+ wordShift =-1;
+ }
+
+ shift();
+
+ int bitIndex = (indexArray & 0x0f) + wordShift;
+ //{DOUG-2.4.0: mod'd to do logical right shift (>>>); have to use unsigned types and >>
+ //indexArray >>>= 4;
+ indexArray = (int)((uint)indexArray >> 4);
+ // should i<<6 be cached as a separate variable?
+ // it would only save one cycle in the best circumstances.
+ curDocId = (i<<6) + bitIndex;
+ return true;
+ }
+
+ public override int Doc()
+ {
+ return this.curDocId;
+ }
+ }
+}
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/Parameter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Util/Parameter.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/Parameter.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/Parameter.cs Wed Jul 29 18:04:12 2009
@@ -22,7 +22,7 @@
/// <summary> A serializable Enum class.</summary>
[Serializable]
- public abstract class Parameter : System.Runtime.Serialization.IObjectReference
+ public abstract class Parameter
{
internal static System.Collections.IDictionary allParameters = new System.Collections.Hashtable();
@@ -61,22 +61,15 @@
/// </summary>
/// <returns> a reference to Parameter as resolved in the local VM
/// </returns>
- /// <throws> ObjectStreamException </throws>
- protected internal virtual System.Object ReadResolve()
+ /// <throws> objectStreamException </throws>
+ protected internal virtual object ReadResolve()
{
- System.Object par = allParameters[MakeKey(name)];
+ object par = allParameters[MakeKey(name)];
if (par == null)
throw new System.IO.IOException("Unknown parameter value: " + name);
return par;
}
-
- // "ReadResolve"s equivalent for .NET
- // https://issues.apache.org/jira/browse/LUCENENET-170
- public Object GetRealObject(System.Runtime.Serialization.StreamingContext context)
- {
- return ReadResolve();
- }
}
}
\ No newline at end of file
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/PriorityQueue.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Util/PriorityQueue.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/PriorityQueue.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/PriorityQueue.cs Wed Jul 29 18:04:12 2009
@@ -28,15 +28,15 @@
{
private int size;
private int maxSize;
- protected internal System.Object[] heap;
+ protected internal object[] heap;
/// <summary>Determines the ordering of objects in this priority queue. Subclasses
/// must define this one method.
/// </summary>
- public abstract bool LessThan(System.Object a, System.Object b);
+ public abstract bool LessThan(object a, object b);
/// <summary>Subclass constructors must call this. </summary>
- protected internal void Initialize(int maxSize)
+ protected void Initialize(int maxSize)
{
size = 0;
int heapSize;
@@ -45,15 +45,15 @@
heapSize = 2;
else
heapSize = maxSize + 1;
- heap = new System.Object[heapSize];
+ heap = new object[heapSize];
this.maxSize = maxSize;
}
- /// <summary> Adds an Object to a PriorityQueue in log(size) time.
+ /// <summary> Adds an object to a PriorityQueue in log(size) time.
/// If one tries to add more objects than maxSize from initialize
/// a RuntimeException (ArrayIndexOutOfBound) is thrown.
/// </summary>
- public void Put(System.Object element)
+ public void Put(object element)
{
size++;
heap[size] = element;
@@ -67,7 +67,7 @@
/// </param>
/// <returns> true if element is added, false otherwise.
/// </returns>
- public virtual bool Insert(System.Object element)
+ public virtual bool Insert(object element)
{
return InsertWithOverflow(element) != element;
}
@@ -81,7 +81,7 @@
/// heap and now has been replaced by a larger one, or null
/// if the queue wasn't yet full with maxSize elements.
/// </summary>
- public virtual System.Object InsertWithOverflow(System.Object element)
+ public virtual object InsertWithOverflow(object element)
{
if (size < maxSize)
{
@@ -90,7 +90,7 @@
}
else if (size > 0 && !LessThan(element, heap[1]))
{
- System.Object ret = heap[1];
+ object ret = heap[1];
heap[1] = element;
AdjustTop();
return ret;
@@ -102,7 +102,7 @@
}
/// <summary>Returns the least element of the PriorityQueue in constant time. </summary>
- public System.Object Top()
+ public object Top()
{
// We don't need to check size here: if maxSize is 0,
// then heap is length 2 array with both entries null.
@@ -113,11 +113,11 @@
/// <summary>Removes and returns the least element of the PriorityQueue in log(size)
/// time.
/// </summary>
- public System.Object Pop()
+ public object Pop()
{
if (size > 0)
{
- System.Object result = heap[1]; // save first value
+ object result = heap[1]; // save first value
heap[1] = heap[size]; // move last to first
heap[size] = null; // permit GC of objects
size--;
@@ -128,7 +128,7 @@
return null;
}
- /// <summary>Should be called when the Object at top changes values. Still log(n)
+ /// <summary>Should be called when the object at top changes values. Still log(n)
/// worst case, but it's at least twice as fast to <pre>
/// { pq.top().change(); pq.adjustTop(); }
/// </pre> instead of <pre>
@@ -157,7 +157,7 @@
private void UpHeap()
{
int i = size;
- System.Object node = heap[i]; // save bottom node
+ object node = heap[i]; // save bottom node
int j = SupportClass.Number.URShift(i, 1);
while (j > 0 && LessThan(node, heap[j]))
{
@@ -171,7 +171,7 @@
private void DownHeap()
{
int i = 1;
- System.Object node = heap[i]; // save top node
+ object node = heap[i]; // save top node
int j = i << 1; // find smaller child
int k = j + 1;
if (k <= size && LessThan(heap[k], heap[j]))
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/SmallFloat.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Util/SmallFloat.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/SmallFloat.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/SmallFloat.cs Wed Jul 29 18:04:12 2009
@@ -19,15 +19,9 @@
namespace Lucene.Net.Util
{
-
-
- /// <summary>Floating point numbers smaller than 32 bits.
- ///
+ /// <summary>
+ /// Floating point numbers smaller than 32 bits.
/// </summary>
- /// <author> yonik
- /// </author>
- /// <version> $Id$
- /// </version>
public class SmallFloat
{
Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/SortedVIntList.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Util/SortedVIntList.cs?rev=798995&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/SortedVIntList.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/SortedVIntList.cs Wed Jul 29 18:04:12 2009
@@ -0,0 +1,265 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using BitArray = System.Collections.BitArray;
+using DocIdSet = Lucene.Net.Search.DocIdSet;
+using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+
+namespace Lucene.Net.Util
+{
+ /**
+ * Store and iterate sorted integers in compressed form in RAM.
+ * <br>The code for compressing the differences between ascending integers was
+ * borrowed from {@link org.apache.lucene.store.IndexInput} and
+ * {@link org.apache.lucene.store.IndexOutput}.
+ */
+ public class SortedVIntList : DocIdSet
+ {
+
+ /** When a BitSet has fewer than 1 in BITS2VINTLIST_SIZE bits set,
+ * a SortedVIntList representing the index numbers of the set bits
+ * will be smaller than that BitSet.
+ */
+ protected internal static readonly int BITS2VINTLIST_SIZE = 8;
+
+ private int size;
+ private byte[] bytes;
+ private int lastBytePos;
+
+ /**
+ * Create a SortedVIntList from all elements of an array of integers.
+ *
+ * @param sortedInts A sorted array of non negative integers.
+ */
+ public SortedVIntList(int[] sortedInts)
+ :
+ this(sortedInts, sortedInts.Length)
+ {
+ }
+
+ /**
+ * Create a SortedVIntList from an array of integers.
+ * @param sortedInts An array of sorted non negative integers.
+ * @param inputSize The number of integers to be used from the array.
+ */
+ public SortedVIntList(int[] sortedInts, int inputSize)
+ {
+ SortedVIntListBuilder builder = new SortedVIntListBuilder(this);
+ for (int i = 0; i < inputSize; i++)
+ {
+ builder.AddInt(sortedInts[i]);
+ }
+ builder.Done();
+ }
+
+ /**
+ * Create a SortedVIntList from a BitSet.
+ * @param bits A bit set representing a set of integers.
+ */
+ public SortedVIntList(SupportClass.CollectionsSupport.BitSet bits)
+ {
+ SortedVIntListBuilder builder = new SortedVIntListBuilder(this);
+ int nextInt = bits.NextSetBit(0);
+ while (nextInt != -1)
+ {
+ builder.AddInt(nextInt);
+ nextInt = bits.NextSetBit(nextInt + 1);
+ }
+ builder.Done();
+ }
+
+ /**
+ * Create a SortedVIntList from an OpenBitSet.
+ * @param bits A bit set representing a set of integers.
+ */
+ public SortedVIntList(OpenBitSet bits)
+ {
+ SortedVIntListBuilder builder = new SortedVIntListBuilder(this);
+ int nextInt = bits.NextSetBit(0);
+ while (nextInt != -1)
+ {
+ builder.AddInt(nextInt);
+ nextInt = bits.NextSetBit(nextInt + 1);
+ }
+ builder.Done();
+ }
+
+ /**
+ * Create a SortedVIntList.
+ * @param docIdSetIterator An iterator providing document numbers as a set of integers.
+ * This DocIdSetIterator is iterated completely when this constructor
+ * is called and it must provide the integers in non
+ * decreasing order.
+ */
+ public SortedVIntList(DocIdSetIterator docIdSetIterator)
+ {
+ SortedVIntListBuilder builder = new SortedVIntListBuilder(this);
+ while (docIdSetIterator.Next())
+ {
+ builder.AddInt(docIdSetIterator.Doc());
+ }
+ builder.Done();
+ }
+
+
+
+ private void InitBytes()
+ {
+ size = 0;
+ bytes = new byte[128]; // initial byte size
+ lastBytePos = 0;
+ }
+
+ private void ResizeBytes(int newSize)
+ {
+ if (newSize != bytes.Length)
+ {
+ byte[] newBytes = new byte[newSize];
+ System.Array.Copy(bytes, 0, newBytes, 0, lastBytePos);
+ bytes = newBytes;
+ }
+ }
+
+ private static readonly int VB1 = 0x7F;
+ private static readonly int BIT_SHIFT = 7;
+ private readonly int MAX_BYTES_PER_INT = (31 / BIT_SHIFT) + 1;
+
+ /**
+ * @return The total number of sorted integers.
+ */
+ public int Size()
+ {
+ return size;
+ }
+
+ /**
+ * @return The size of the byte array storing the compressed sorted integers.
+ */
+ public int GetByteSize()
+ {
+ return bytes.Length;
+ }
+
+ /**
+ * @return An iterator over the sorted integers.
+ */
+ public override DocIdSetIterator Iterator()
+ {
+ return new AnonymousDocIdSetIterator(this);
+ }
+
+ private class AnonymousDocIdSetIterator : DocIdSetIterator
+ {
+ private SortedVIntList enclosingInstance;
+ internal AnonymousDocIdSetIterator(SortedVIntList enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+
+ int bytePos = 0;
+ int lastInt = 0;
+
+ private void Advance()
+ {
+ // See org.apache.lucene.store.IndexInput.readVInt()
+ byte b = enclosingInstance.bytes[bytePos++];
+ lastInt += b & VB1;
+ for (int s = BIT_SHIFT; (b & ~VB1) != 0; s += BIT_SHIFT)
+ {
+ b = enclosingInstance.bytes[bytePos++];
+ lastInt += (b & VB1) << s;
+ }
+ }
+
+ public override int Doc() { return lastInt; }
+
+ public override bool Next()
+ {
+ if (bytePos >= enclosingInstance.lastBytePos)
+ {
+ return false;
+ }
+ else
+ {
+ Advance();
+ return true;
+ }
+ }
+
+ public override bool SkipTo(int docNr)
+ {
+ while (bytePos < enclosingInstance.lastBytePos)
+ {
+ Advance();
+ if (lastInt >= docNr)
+ { // No skipping to docNr available.
+ return true;
+ }
+ }
+ return false;
+ }
+ }
+
+ //private class SortedVIntListBuilder {
+ internal class SortedVIntListBuilder
+ {
+ private int lastInt = 0;
+
+ private SortedVIntList enclosingInstance;
+ internal SortedVIntListBuilder(SortedVIntList enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ enclosingInstance.InitBytes();
+ lastInt = 0;
+ }
+
+ internal void AddInt(int nextInt)
+ {
+ int diff = nextInt - lastInt;
+ if (diff < 0)
+ {
+ throw new System.ArgumentException(
+ "Input not sorted or first element negative.");
+ }
+
+ if ((enclosingInstance.lastBytePos + enclosingInstance.MAX_BYTES_PER_INT) > enclosingInstance.bytes.Length)
+ {
+ // biggest possible int does not fit
+ enclosingInstance.ResizeBytes((enclosingInstance.bytes.Length * 2) + enclosingInstance.MAX_BYTES_PER_INT);
+ }
+
+ // See org.apache.lucene.store.IndexOutput.writeVInt()
+ while ((diff & ~VB1) != 0)
+ { // The high bit of the next byte needs to be set.
+ enclosingInstance.bytes[enclosingInstance.lastBytePos++] = (byte)((diff & VB1) | ~VB1);
+ //{DOUG-2.4.0: mod'd to do logical right shift (>>>); have to use unsigned types and >>
+ //diff >>>= BIT_SHIFT;
+ diff = (int)((uint)diff >> BIT_SHIFT);
+ }
+ enclosingInstance.bytes[enclosingInstance.lastBytePos++] = (byte)diff; // Last byte, high bit not set.
+ enclosingInstance.size++;
+ lastInt = nextInt;
+ }
+
+ internal void Done()
+ {
+ enclosingInstance.ResizeBytes(enclosingInstance.lastBytePos);
+ }
+ }
+
+ }
+}
\ No newline at end of file
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/StringHelper.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Util/StringHelper.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/StringHelper.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/StringHelper.cs Wed Jul 29 18:04:12 2009
@@ -15,19 +15,31 @@
* limitations under the License.
*/
-using System;
-
namespace Lucene.Net.Util
{
-
-
- /// <summary> Methods for manipulating strings.
- ///
- /// $Id: StringHelper.java 472959 2006-11-09 16:21:50Z yonik $
+ /// <summary>
+ /// Methods for manipulating strings.
/// </summary>
public abstract class StringHelper
{
-
+ /// <summary>
+ /// Compares two byte arrays, starting at their 0th elements, and returns the
+ /// number of common elements before encountering a difference.
+ /// </summary>
+ /// <param name="bytes1"></param>
+ /// <param name="len1"></param>
+ /// <param name="bytes2"></param>
+ /// <param name="len2"></param>
+ /// <returns>the number of common elements</returns>
+ public static int bytesDifference(byte[] bytes1, int len1, byte[] bytes2, int len2)
+ {
+ int len = len1 < len2 ? len1 : len2;
+ for (int i = 0; i < len; i++)
+ if (bytes1[i] != bytes2[i])
+ return i;
+ return len;
+ }
+
/// <summary> Compares two strings, character by character, and returns the
/// first position where the two strings differ from one another.
///
@@ -38,7 +50,7 @@
/// </param>
/// <returns> The first position where the two strings differ.
/// </returns>
- public static int StringDifference(System.String s1, System.String s2)
+ public static int StringDifference(string s1, string s2)
{
int len1 = s1.Length;
int len2 = s2.Length;
@@ -53,7 +65,6 @@
return len;
}
-
private StringHelper()
{
}
Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/UnicodeUtil.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Util/UnicodeUtil.cs?rev=798995&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/UnicodeUtil.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Util/UnicodeUtil.cs Wed Jul 29 18:04:12 2009
@@ -0,0 +1,498 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Some of this code came from the excellent Unicode
+ * conversion examples from:
+ *
+ * http://www.unicode.org/Public/PROGRAMS/CVTUTF
+ *
+ * Full Copyright for that code follows:
+ */
+
+/*
+ * Copyright 2001-2004 Unicode, Inc.
+ *
+ * Disclaimer
+ *
+ * This source code is provided as is by Unicode, Inc. No claims are
+ * made as to fitness for any particular purpose. No warranties of any
+ * kind are expressed or implied. The recipient agrees to determine
+ * applicability of information provided. If this file has been
+ * purchased on magnetic or optical media from Unicode, Inc., the
+ * sole remedy for any claim will be exchange of defective media
+ * within 90 days of receipt.
+ *
+ * Limitations on Rights to Redistribute This Code
+ *
+ * Unicode, Inc. hereby grants the right to freely use the information
+ * supplied in this file in the creation of products supporting the
+ * Unicode Standard, and to make copies of this file in any form
+ * for internal or external distribution as long as this notice
+ * remains attached.
+ */
+
+namespace Lucene.Net.Util
+{
+ /**
+ * Class to encode java's UTF16 char[] into UTF8 byte[]
+ * without always allocating a new byte[] as
+ * String.getBytes("UTF-8") does.
+ *
+ * <p><b>WARNING</b>: This API is a new and experimental and
+ * may suddenly change. </p>
+ */
+ sealed public class UnicodeUtil
+ {
+
+ public static readonly int UNI_SUR_HIGH_START = 0xD800;
+ public static readonly int UNI_SUR_HIGH_END = 0xDBFF;
+ public static readonly int UNI_SUR_LOW_START = 0xDC00;
+ public static readonly int UNI_SUR_LOW_END = 0xDFFF;
+ public static readonly int UNI_REPLACEMENT_CHAR = 0xFFFD;
+
+ private static readonly long UNI_MAX_BMP = 0x0000FFFF;
+
+ private static readonly int HALF_BASE = 0x0010000;
+ private static readonly long HALF_SHIFT = 10;
+ private static readonly long HALF_MASK = 0x3FFL;
+
+ sealed public class UTF8Result
+ {
+ public byte[] result = new byte[10];
+ public int length;
+
+ public void setLength(int newLength)
+ {
+ if (result.Length < newLength)
+ {
+ byte[] newArray = new byte[(int)(1.5 * newLength)];
+ System.Array.Copy(result, 0, newArray, 0, length);
+ result = newArray;
+ }
+ length = newLength;
+ }
+ }
+
+ sealed public class UTF16Result
+ {
+ public char[] result = new char[10];
+ public int[] offsets = new int[10];
+ public int length;
+
+ public void setLength(int newLength)
+ {
+ if (result.Length < newLength)
+ {
+ char[] newArray = new char[(int)(1.5 * newLength)];
+ System.Array.Copy(result, 0, newArray, 0, length);
+ result = newArray;
+ }
+ length = newLength;
+ }
+
+ public void copyText(UTF16Result other)
+ {
+ setLength(other.length);
+ System.Array.Copy(other.result, 0, result, 0, length);
+ }
+ }
+
+ /** Encode characters from a char[] source, starting at
+ * offset and stopping when the character 0xffff is seen.
+ * Returns the number of bytes written to bytesOut. */
+ public static void UTF16toUTF8(/* in */ char[] source, /* in */ int offset, UTF8Result result)
+ {
+
+ int upto = 0;
+ int i = offset;
+ byte[] out_Renamed = result.result;
+
+ while (true)
+ {
+
+ int code = (int)source[i++];
+
+ if (upto + 4 > out_Renamed.Length)
+ {
+ byte[] newOut = new byte[2 * out_Renamed.Length];
+ System.Diagnostics.Debug.Assert(newOut.Length >= upto + 4);
+ System.Array.Copy(out_Renamed, 0, newOut, 0, upto);
+ result.result = out_Renamed = newOut;
+ }
+ if (code < 0x80)
+ out_Renamed[upto++] = (byte)code;
+ else if (code < 0x800)
+ {
+ out_Renamed[upto++] = (byte)(0xC0 | (code >> 6));
+ out_Renamed[upto++] = (byte)(0x80 | (code & 0x3F));
+ }
+ else if (code < 0xD800 || code > 0xDFFF)
+ {
+ if (code == 0xffff)
+ // END
+ break;
+ out_Renamed[upto++] = (byte)(0xE0 | (code >> 12));
+ out_Renamed[upto++] = (byte)(0x80 | ((code >> 6) & 0x3F));
+ out_Renamed[upto++] = (byte)(0x80 | (code & 0x3F));
+ }
+ else
+ {
+ // surrogate pair
+ // confirm valid high surrogate
+ if (code < 0xDC00 && source[i] != 0xffff)
+ {
+ int utf32 = (int)source[i];
+ // confirm valid low surrogate and write pair
+ if (utf32 >= 0xDC00 && utf32 <= 0xDFFF)
+ {
+ utf32 = ((code - 0xD7C0) << 10) + (utf32 & 0x3FF);
+ i++;
+ out_Renamed[upto++] = (byte)(0xF0 | (utf32 >> 18));
+ out_Renamed[upto++] = (byte)(0x80 | ((utf32 >> 12) & 0x3F));
+ out_Renamed[upto++] = (byte)(0x80 | ((utf32 >> 6) & 0x3F));
+ out_Renamed[upto++] = (byte)(0x80 | (utf32 & 0x3F));
+ continue;
+ }
+ }
+ // replace unpaired surrogate or out_Renamed-of-order low surrogate
+ // with substitution character
+ out_Renamed[upto++] = (byte)0xEF;
+ out_Renamed[upto++] = (byte)0xBF;
+ out_Renamed[upto++] = (byte)0xBD;
+ }
+ }
+ //System.Diagnostics.Debug.Assert(matches(source, offset, i-offset-1, out_Renamed, upto);
+ result.length = upto;
+ }
+
+ /** Encode characters from a char[] source, starting at
+ * offset for length chars. Returns the number of bytes
+ * written to bytesOut. */
+ public static void UTF16toUTF8(/* in */ char[] source, /* in */ int offset, /* in */ int length, UTF8Result result)
+ {
+
+ int upto = 0;
+ int i = offset;
+ int end = offset + length;
+ byte[] out_Renamed = result.result;
+
+ while (i < end)
+ {
+
+ int code = (int)source[i++];
+
+ if (upto + 4 > out_Renamed.Length)
+ {
+ byte[] newOut = new byte[2 * out_Renamed.Length];
+ System.Diagnostics.Debug.Assert(newOut.Length >= upto + 4);
+ System.Array.Copy(out_Renamed, 0, newOut, 0, upto);
+ result.result = out_Renamed = newOut;
+ }
+ if (code < 0x80)
+ out_Renamed[upto++] = (byte)code;
+ else if (code < 0x800)
+ {
+ out_Renamed[upto++] = (byte)(0xC0 | (code >> 6));
+ out_Renamed[upto++] = (byte)(0x80 | (code & 0x3F));
+ }
+ else if (code < 0xD800 || code > 0xDFFF)
+ {
+ out_Renamed[upto++] = (byte)(0xE0 | (code >> 12));
+ out_Renamed[upto++] = (byte)(0x80 | ((code >> 6) & 0x3F));
+ out_Renamed[upto++] = (byte)(0x80 | (code & 0x3F));
+ }
+ else
+ {
+ // surrogate pair
+ // confirm valid high surrogate
+ if (code < 0xDC00 && i < end && source[i] != 0xffff)
+ {
+ int utf32 = (int)source[i];
+ // confirm valid low surrogate and write pair
+ if (utf32 >= 0xDC00 && utf32 <= 0xDFFF)
+ {
+ utf32 = ((code - 0xD7C0) << 10) + (utf32 & 0x3FF);
+ i++;
+ out_Renamed[upto++] = (byte)(0xF0 | (utf32 >> 18));
+ out_Renamed[upto++] = (byte)(0x80 | ((utf32 >> 12) & 0x3F));
+ out_Renamed[upto++] = (byte)(0x80 | ((utf32 >> 6) & 0x3F));
+ out_Renamed[upto++] = (byte)(0x80 | (utf32 & 0x3F));
+ continue;
+ }
+ }
+ // replace unpaired surrogate or out_Renamed-of-order low surrogate
+ // with substitution character
+ out_Renamed[upto++] = (byte)0xEF;
+ out_Renamed[upto++] = (byte)0xBF;
+ out_Renamed[upto++] = (byte)0xBD;
+ }
+ }
+ //System.Diagnostics.Debug.Assert(matches(source, offset, length, out_Renamed, upto);
+ result.length = upto;
+ }
+
+ /** Encode characters from this String, starting at offset
+ * for length characters. Returns the number of bytes
+ * written to bytesOut. */
+ public static void UTF16toUTF8(/* in */ string s, /* in */ int offset, /* in */ int length, UTF8Result result)
+ {
+ int end = offset + length;
+
+ byte[] out_Renamed = result.result;
+
+ int upto = 0;
+ for (int i = offset; i < end; i++)
+ {
+ int code = (int)s[i];
+
+ if (upto + 4 > out_Renamed.Length)
+ {
+ byte[] newOut = new byte[2 * out_Renamed.Length];
+ System.Diagnostics.Debug.Assert(newOut.Length >= upto + 4);
+ System.Array.Copy(out_Renamed, 0, newOut, 0, upto);
+ result.result = out_Renamed = newOut;
+ }
+ if (code < 0x80)
+ out_Renamed[upto++] = (byte)code;
+ else if (code < 0x800)
+ {
+ out_Renamed[upto++] = (byte)(0xC0 | (code >> 6));
+ out_Renamed[upto++] = (byte)(0x80 | (code & 0x3F));
+ }
+ else if (code < 0xD800 || code > 0xDFFF)
+ {
+ out_Renamed[upto++] = (byte)(0xE0 | (code >> 12));
+ out_Renamed[upto++] = (byte)(0x80 | ((code >> 6) & 0x3F));
+ out_Renamed[upto++] = (byte)(0x80 | (code & 0x3F));
+ }
+ else
+ {
+ // surrogate pair
+ // confirm valid high surrogate
+ if (code < 0xDC00 && (i < end - 1))
+ {
+ int utf32 = (int)s[i + 1];
+ // confirm valid low surrogate and write pair
+ if (utf32 >= 0xDC00 && utf32 <= 0xDFFF)
+ {
+ utf32 = ((code - 0xD7C0) << 10) + (utf32 & 0x3FF);
+ i++;
+ out_Renamed[upto++] = (byte)(0xF0 | (utf32 >> 18));
+ out_Renamed[upto++] = (byte)(0x80 | ((utf32 >> 12) & 0x3F));
+ out_Renamed[upto++] = (byte)(0x80 | ((utf32 >> 6) & 0x3F));
+ out_Renamed[upto++] = (byte)(0x80 | (utf32 & 0x3F));
+ continue;
+ }
+ }
+ // replace unpaired surrogate or out_Renamed-of-order low surrogate
+ // with substitution character
+ out_Renamed[upto++] = (byte)0xEF;
+ out_Renamed[upto++] = (byte)0xBF;
+ out_Renamed[upto++] = (byte)0xBD;
+ }
+ }
+ //System.Diagnostics.Debug.Assert(matches(s, offset, length, out_Renamed, upto);
+ result.length = upto;
+ }
+
+ /** Convert UTF8 bytes into UTF16 characters. If offset
+ * is non-zero, conversion starts at that starting point
+ * in utf8, re-using the results from the previous call
+ * up until offset. */
+ public static void UTF8toUTF16(/* in */ byte[] utf8, /* in */ int offset, /* in */ int length, /* in */ UTF16Result result)
+ {
+
+ int end = offset + length;
+ char[] out_Renamed = result.result;
+ if (result.offsets.Length <= end)
+ {
+ int[] newOffsets = new int[2 * end];
+ System.Array.Copy(result.offsets, 0, newOffsets, 0, result.offsets.Length);
+ result.offsets = newOffsets;
+ }
+ int[] offsets = result.offsets;
+
+ // If incremental decoding fell in the middle of a
+ // single unicode character, rollback to its start:
+ int upto = offset;
+ while (offsets[upto] == -1)
+ upto--;
+
+ int outUpto = offsets[upto];
+
+ // Pre-allocate for worst case 1-for-1
+ if (outUpto + length >= out_Renamed.Length)
+ {
+ char[] newOut = new char[2 * (outUpto + length)];
+ System.Array.Copy(out_Renamed, 0, newOut, 0, outUpto);
+ result.result = out_Renamed = newOut;
+ }
+
+ while (upto < end)
+ {
+
+ int b = utf8[upto] & 0xff;
+ int ch;
+
+ offsets[upto++] = outUpto;
+
+ if (b < 0xc0)
+ {
+ System.Diagnostics.Debug.Assert(b < 0x80);
+ ch = b;
+ }
+ else if (b < 0xe0)
+ {
+ ch = ((b & 0x1f) << 6) + (utf8[upto] & 0x3f);
+ offsets[upto++] = -1;
+ }
+ else if (b < 0xf0)
+ {
+ ch = ((b & 0xf) << 12) + ((utf8[upto] & 0x3f) << 6) + (utf8[upto + 1] & 0x3f);
+ offsets[upto++] = -1;
+ offsets[upto++] = -1;
+ }
+ else
+ {
+ System.Diagnostics.Debug.Assert(b < 0xf8);
+ ch = ((b & 0x7) << 18) + ((utf8[upto] & 0x3f) << 12) + ((utf8[upto + 1] & 0x3f) << 6) + (utf8[upto + 2] & 0x3f);
+ offsets[upto++] = -1;
+ offsets[upto++] = -1;
+ offsets[upto++] = -1;
+ }
+
+ if (ch <= UNI_MAX_BMP)
+ {
+ // target is a character <= 0xFFFF
+ out_Renamed[outUpto++] = (char)ch;
+ }
+ else
+ {
+ // target is a character in range 0xFFFF - 0x10FFFF
+ int chHalf = ch - HALF_BASE;
+ out_Renamed[outUpto++] = (char)((chHalf >> (int)HALF_SHIFT) + UNI_SUR_HIGH_START);
+ out_Renamed[outUpto++] = (char)((chHalf & HALF_MASK) + UNI_SUR_LOW_START);
+ }
+ }
+
+ offsets[upto] = outUpto;
+ result.length = outUpto;
+ }
+
+ // Only called from assert
+ /*
+ private static boolean matches(char[] source, int offset, int length, byte[] result, int upto) {
+ try {
+ String s1 = new String(source, offset, length);
+ String s2 = new String(result, 0, upto, "UTF-8");
+ if (!s1.equals(s2)) {
+ //System.out_Renamed.println("DIFF: s1 len=" + s1.Length());
+ //for(int i=0;i<s1.Length();i++)
+ // System.out_Renamed.println(" " + i + ": " + (int) s1.charAt(i));
+ //System.out_Renamed.println("s2 len=" + s2.Length());
+ //for(int i=0;i<s2.Length();i++)
+ // System.out_Renamed.println(" " + i + ": " + (int) s2.charAt(i));
+
+ // If the input string was invalid, then the
+ // difference is OK
+ if (!validUTF16String(s1))
+ return true;
+
+ return false;
+ }
+ return s1.equals(s2);
+ } catch (UnsupportedEncodingException uee) {
+ return false;
+ }
+ }
+
+ // Only called from assert
+ private static boolean matches(String source, int offset, int length, byte[] result, int upto) {
+ try {
+ String s1 = source.substring(offset, offset+length);
+ String s2 = new String(result, 0, upto, "UTF-8");
+ if (!s1.equals(s2)) {
+ // Allow a difference if s1 is not valid UTF-16
+
+ //System.out_Renamed.println("DIFF: s1 len=" + s1.Length());
+ //for(int i=0;i<s1.Length();i++)
+ // System.out_Renamed.println(" " + i + ": " + (int) s1.charAt(i));
+ //System.out_Renamed.println(" s2 len=" + s2.Length());
+ //for(int i=0;i<s2.Length();i++)
+ // System.out_Renamed.println(" " + i + ": " + (int) s2.charAt(i));
+
+ // If the input string was invalid, then the
+ // difference is OK
+ if (!validUTF16String(s1))
+ return true;
+
+ return false;
+ }
+ return s1.equals(s2);
+ } catch (UnsupportedEncodingException uee) {
+ return false;
+ }
+ }
+
+ public static final boolean validUTF16String(String s) {
+ final int size = s.Length();
+ for(int i=0;i<size;i++) {
+ char ch = s.charAt(i);
+ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
+ if (i < size-1) {
+ i++;
+ char nextCH = s.charAt(i);
+ if (nextCH >= UNI_SUR_LOW_START && nextCH <= UNI_SUR_LOW_END) {
+ // Valid surrogate pair
+ } else
+ // Unmatched hight surrogate
+ return false;
+ } else
+ // Unmatched hight surrogate
+ return false;
+ } else if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)
+ // Unmatched low surrogate
+ return false;
+ }
+
+ return true;
+ }
+
+ public static final boolean validUTF16String(char[] s, int size) {
+ for(int i=0;i<size;i++) {
+ char ch = s[i];
+ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
+ if (i < size-1) {
+ i++;
+ char nextCH = s[i];
+ if (nextCH >= UNI_SUR_LOW_START && nextCH <= UNI_SUR_LOW_END) {
+ // Valid surrogate pair
+ } else
+ return false;
+ } else
+ return false;
+ } else if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)
+ // Unmatched low surrogate
+ return false;
+ }
+
+ return true;
+ }
+ */
+ }
+}