You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2014/11/08 00:12:07 UTC
[03/34] lucenenet git commit: Raw porting of
Lucene.Net.Analysis.Common
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs
new file mode 100644
index 0000000..e876a6f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs
@@ -0,0 +1,498 @@
+using System.Diagnostics;
+
+namespace org.apache.lucene.analysis.util
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using Version = org.apache.lucene.util.Version;
+
+ /// <summary>
+ /// <seealso cref="CharacterUtils"/> provides a unified interface to Character-related
+ /// operations to implement backwards compatible character operations based on a
+ /// <seealso cref="Version"/> instance.
+ ///
+ /// @lucene.internal
+ /// </summary>
+ public abstract class CharacterUtils
+ {
+ private static readonly Java4CharacterUtils JAVA_4 = new Java4CharacterUtils();
+ private static readonly Java5CharacterUtils JAVA_5 = new Java5CharacterUtils();
+
+ /// <summary>
+ /// Returns a <seealso cref="CharacterUtils"/> implementation according to the given
+ /// <seealso cref="Version"/> instance.
+ /// </summary>
+ /// <param name="matchVersion">
+ /// a version instance </param>
+ /// <returns> a <seealso cref="CharacterUtils"/> implementation according to the given
+ /// <seealso cref="Version"/> instance. </returns>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public static CharacterUtils getInstance(final org.apache.lucene.util.Version matchVersion)
+ public static CharacterUtils getInstance(Version matchVersion)
+ {
+ return matchVersion.onOrAfter(Version.LUCENE_31) ? JAVA_5 : JAVA_4;
+ }
+
+ /// <summary>
+ /// Return a <seealso cref="CharacterUtils"/> instance compatible with Java 1.4. </summary>
+ public static CharacterUtils Java4Instance
+ {
+ get
+ {
+ return JAVA_4;
+ }
+ }
+
+ /// <summary>
+ /// Returns the code point at the given index of the <seealso cref="CharSequence"/>.
+ /// Depending on the <seealso cref="Version"/> passed to
+ /// <seealso cref="CharacterUtils#getInstance(Version)"/> this method mimics the behavior
+ /// of <seealso cref="Character#codePointAt(char[], int)"/> as it would have been
+ /// available on a Java 1.4 JVM or on a later virtual machine version.
+ /// </summary>
+ /// <param name="seq">
+ /// a character sequence </param>
+ /// <param name="offset">
+ /// the offset to the char values in the chars array to be converted
+ /// </param>
+ /// <returns> the Unicode code point at the given index </returns>
+ /// <exception cref="NullPointerException">
+ /// - if the sequence is null. </exception>
+ /// <exception cref="IndexOutOfBoundsException">
+ /// - if the value offset is negative or not less than the length of
+ /// the character sequence. </exception>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public abstract int codePointAt(final CharSequence seq, final int offset);
+ public abstract int codePointAt(CharSequence seq, int offset);
+
+ /// <summary>
+ /// Returns the code point at the given index of the char array where only elements
+ /// with index less than the limit are used.
+ /// Depending on the <seealso cref="Version"/> passed to
+ /// <seealso cref="CharacterUtils#getInstance(Version)"/> this method mimics the behavior
+ /// of <seealso cref="Character#codePointAt(char[], int)"/> as it would have been
+ /// available on a Java 1.4 JVM or on a later virtual machine version.
+ /// </summary>
+ /// <param name="chars">
+ /// a character array </param>
+ /// <param name="offset">
+ /// the offset to the char values in the chars array to be converted </param>
+ /// <param name="limit"> the index afer the last element that should be used to calculate
+ /// codepoint.
+ /// </param>
+ /// <returns> the Unicode code point at the given index </returns>
+ /// <exception cref="NullPointerException">
+ /// - if the array is null. </exception>
+ /// <exception cref="IndexOutOfBoundsException">
+ /// - if the value offset is negative or not less than the length of
+ /// the char array. </exception>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public abstract int codePointAt(final char[] chars, final int offset, final int limit);
+ public abstract int codePointAt(char[] chars, int offset, int limit);
+
+ /// <summary>
+ /// Return the number of characters in <code>seq</code>. </summary>
+ public abstract int codePointCount(CharSequence seq);
+
+ /// <summary>
+ /// Creates a new <seealso cref="CharacterBuffer"/> and allocates a <code>char[]</code>
+ /// of the given bufferSize.
+ /// </summary>
+ /// <param name="bufferSize">
+ /// the internal char buffer size, must be <code>>= 2</code> </param>
+ /// <returns> a new <seealso cref="CharacterBuffer"/> instance. </returns>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public static CharacterBuffer newCharacterBuffer(final int bufferSize)
+ public static CharacterBuffer newCharacterBuffer(int bufferSize)
+ {
+ if (bufferSize < 2)
+ {
+ throw new System.ArgumentException("buffersize must be >= 2");
+ }
+ return new CharacterBuffer(new char[bufferSize], 0, 0);
+ }
+
+
+ /// <summary>
+ /// Converts each unicode codepoint to lowerCase via <seealso cref="Character#toLowerCase(int)"/> starting
+ /// at the given offset. </summary>
+ /// <param name="buffer"> the char buffer to lowercase </param>
+ /// <param name="offset"> the offset to start at </param>
+ /// <param name="limit"> the max char in the buffer to lower case </param>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public final void toLowerCase(final char[] buffer, final int offset, final int limit)
+ public void ToLower(char[] buffer, int offset, int limit)
+ {
+ Debug.Assert(buffer.Length >= limit);
+ Debug.Assert(offset <= 0 && offset <= buffer.Length);
+ for (int i = offset; i < limit;)
+ {
+ i += char.toChars(char.ToLower(codePointAt(buffer, i, limit)), buffer, i);
+ }
+ }
+
+ /// <summary>
+ /// Converts each unicode codepoint to UpperCase via <seealso cref="Character#toUpperCase(int)"/> starting
+ /// at the given offset. </summary>
+ /// <param name="buffer"> the char buffer to UPPERCASE </param>
+ /// <param name="offset"> the offset to start at </param>
+ /// <param name="limit"> the max char in the buffer to lower case </param>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public final void toUpperCase(final char[] buffer, final int offset, final int limit)
+ public void ToUpper(char[] buffer, int offset, int limit)
+ {
+ Debug.Assert(buffer.Length >= limit);
+ Debug.Assert(offset <= 0 && offset <= buffer.Length);
+ for (int i = offset; i < limit;)
+ {
+ i += char.toChars(char.ToUpper(codePointAt(buffer, i, limit)), buffer, i);
+ }
+ }
+
+ /// <summary>
+ /// Converts a sequence of Java characters to a sequence of unicode code points. </summary>
+ /// <returns> the number of code points written to the destination buffer </returns>
+ public int toCodePoints(char[] src, int srcOff, int srcLen, int[] dest, int destOff)
+ {
+ if (srcLen < 0)
+ {
+ throw new System.ArgumentException("srcLen must be >= 0");
+ }
+ int codePointCount_Renamed = 0;
+ for (int i = 0; i < srcLen;)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int cp = codePointAt(src, srcOff + i, srcOff + srcLen);
+ int cp = codePointAt(src, srcOff + i, srcOff + srcLen);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int charCount = Character.charCount(cp);
+ int charCount = char.charCount(cp);
+ dest[destOff + codePointCount_Renamed++] = cp;
+ i += charCount;
+ }
+ return codePointCount_Renamed;
+ }
+
+ /// <summary>
+ /// Converts a sequence of unicode code points to a sequence of Java characters. </summary>
+ /// <returns> the number of chars written to the destination buffer </returns>
+ public int toChars(int[] src, int srcOff, int srcLen, char[] dest, int destOff)
+ {
+ if (srcLen < 0)
+ {
+ throw new System.ArgumentException("srcLen must be >= 0");
+ }
+ int written = 0;
+ for (int i = 0; i < srcLen; ++i)
+ {
+ written += char.toChars(src[srcOff + i], dest, destOff + written);
+ }
+ return written;
+ }
+
+ /// <summary>
+ /// Fills the <seealso cref="CharacterBuffer"/> with characters read from the given
+ /// reader <seealso cref="Reader"/>. This method tries to read <code>numChars</code>
+ /// characters into the <seealso cref="CharacterBuffer"/>, each call to fill will start
+ /// filling the buffer from offset <code>0</code> up to <code>numChars</code>.
+ /// In case code points can span across 2 java characters, this method may
+ /// only fill <code>numChars - 1</code> characters in order not to split in
+ /// the middle of a surrogate pair, even if there are remaining characters in
+ /// the <seealso cref="Reader"/>.
+ /// <para>
+ /// Depending on the <seealso cref="Version"/> passed to
+ /// <seealso cref="CharacterUtils#getInstance(Version)"/> this method implements
+ /// supplementary character awareness when filling the given buffer. For all
+ /// <seealso cref="Version"/> > 3.0 <seealso cref="#fill(CharacterBuffer, Reader, int)"/> guarantees
+ /// that the given <seealso cref="CharacterBuffer"/> will never contain a high surrogate
+ /// character as the last element in the buffer unless it is the last available
+ /// character in the reader. In other words, high and low surrogate pairs will
+ /// always be preserved across buffer boarders.
+ /// </para>
+ /// <para>
+ /// A return value of <code>false</code> means that this method call exhausted
+ /// the reader, but there may be some bytes which have been read, which can be
+ /// verified by checking whether <code>buffer.getLength() > 0</code>.
+ /// </para>
+ /// </summary>
+ /// <param name="buffer">
+ /// the buffer to fill. </param>
+ /// <param name="reader">
+ /// the reader to read characters from. </param>
+ /// <param name="numChars">
+ /// the number of chars to read </param>
+ /// <returns> <code>false</code> if and only if reader.read returned -1 while trying to fill the buffer </returns>
+ /// <exception cref="IOException">
+ /// if the reader throws an <seealso cref="IOException"/>. </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public abstract boolean fill(CharacterBuffer buffer, java.io.Reader reader, int numChars) throws java.io.IOException;
+ public abstract bool fill(CharacterBuffer buffer, Reader reader, int numChars);
+
+ /// <summary>
+ /// Convenience method which calls <code>fill(buffer, reader, buffer.buffer.length)</code>. </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public final boolean fill(CharacterBuffer buffer, java.io.Reader reader) throws java.io.IOException
+ public bool fill(CharacterBuffer buffer, Reader reader)
+ {
+ return fill(buffer, reader, buffer.buffer.Length);
+ }
+
+ /// <summary>
+ /// Return the index within <code>buf[start:start+count]</code> which is by <code>offset</code>
+ /// code points from <code>index</code>.
+ /// </summary>
+ public abstract int offsetByCodePoints(char[] buf, int start, int count, int index, int offset);
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: static int readFully(java.io.Reader reader, char[] dest, int offset, int len) throws java.io.IOException
+ internal static int readFully(Reader reader, char[] dest, int offset, int len)
+ {
+ int read = 0;
+ while (read < len)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int r = reader.read(dest, offset + read, len - read);
+ int r = reader.read(dest, offset + read, len - read);
+ if (r == -1)
+ {
+ break;
+ }
+ read += r;
+ }
+ return read;
+ }
+
+ private sealed class Java5CharacterUtils : CharacterUtils
+ {
+ internal Java5CharacterUtils()
+ {
+ }
+
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: @Override public int codePointAt(final CharSequence seq, final int offset)
+ public override int codePointAt(CharSequence seq, int offset)
+ {
+ return char.codePointAt(seq, offset);
+ }
+
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: @Override public int codePointAt(final char[] chars, final int offset, final int limit)
+ public override int codePointAt(char[] chars, int offset, int limit)
+ {
+ return char.codePointAt(chars, offset, limit);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean fill(final CharacterBuffer buffer, final java.io.Reader reader, int numChars) throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+ public override bool fill(CharacterBuffer buffer, Reader reader, int numChars)
+ {
+ Debug.Assert(buffer.buffer.Length >= 2);
+ if (numChars < 2 || numChars > buffer.buffer.Length)
+ {
+ throw new System.ArgumentException("numChars must be >= 2 and <= the buffer size");
+ }
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] charBuffer = buffer.buffer;
+ char[] charBuffer = buffer.buffer;
+ buffer.offset = 0;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int offset;
+ int offset;
+
+ // Install the previously saved ending high surrogate:
+ if (buffer.lastTrailingHighSurrogate != 0)
+ {
+ charBuffer[0] = buffer.lastTrailingHighSurrogate;
+ buffer.lastTrailingHighSurrogate = (char)0;
+ offset = 1;
+ }
+ else
+ {
+ offset = 0;
+ }
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int read = readFully(reader, charBuffer, offset, numChars - offset);
+ int read = readFully(reader, charBuffer, offset, numChars - offset);
+
+ buffer.length = offset + read;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final boolean result = buffer.length == numChars;
+ bool result = buffer.length == numChars;
+ if (buffer.length < numChars)
+ {
+ // We failed to fill the buffer. Even if the last char is a high
+ // surrogate, there is nothing we can do
+ return result;
+ }
+
+ if (char.IsHighSurrogate(charBuffer[buffer.length - 1]))
+ {
+ buffer.lastTrailingHighSurrogate = charBuffer[--buffer.length];
+ }
+ return result;
+ }
+
+ public override int codePointCount(CharSequence seq)
+ {
+ return char.codePointCount(seq, 0, seq.length());
+ }
+
+ public override int offsetByCodePoints(char[] buf, int start, int count, int index, int offset)
+ {
+ return char.offsetByCodePoints(buf, start, count, index, offset);
+ }
+ }
+
+ private sealed class Java4CharacterUtils : CharacterUtils
+ {
+ internal Java4CharacterUtils()
+ {
+ }
+
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: @Override public int codePointAt(final CharSequence seq, final int offset)
+ public override int codePointAt(CharSequence seq, int offset)
+ {
+ return seq.charAt(offset);
+ }
+
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: @Override public int codePointAt(final char[] chars, final int offset, final int limit)
+ public override int codePointAt(char[] chars, int offset, int limit)
+ {
+ if (offset >= limit)
+ {
+ throw new System.IndexOutOfRangeException("offset must be less than limit");
+ }
+ return chars[offset];
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean fill(CharacterBuffer buffer, java.io.Reader reader, int numChars) throws java.io.IOException
+ public override bool fill(CharacterBuffer buffer, Reader reader, int numChars)
+ {
+ Debug.Assert(buffer.buffer.Length >= 1);
+ if (numChars < 1 || numChars > buffer.buffer.Length)
+ {
+ throw new System.ArgumentException("numChars must be >= 1 and <= the buffer size");
+ }
+ buffer.offset = 0;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int read = readFully(reader, buffer.buffer, 0, numChars);
+ int read = readFully(reader, buffer.buffer, 0, numChars);
+ buffer.length = read;
+ buffer.lastTrailingHighSurrogate = (char)0;
+ return read == numChars;
+ }
+
+ public override int codePointCount(CharSequence seq)
+ {
+ return seq.length();
+ }
+
+ public override int offsetByCodePoints(char[] buf, int start, int count, int index, int offset)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int result = index + offset;
+ int result = index + offset;
+ if (result < 0 || result > count)
+ {
+ throw new System.IndexOutOfRangeException();
+ }
+ return result;
+ }
+
+ }
+
+ /// <summary>
+ /// A simple IO buffer to use with
+ /// <seealso cref="CharacterUtils#fill(CharacterBuffer, Reader)"/>.
+ /// </summary>
+ public sealed class CharacterBuffer
+ {
+
+ internal readonly char[] buffer;
+ internal int offset;
+ internal int length;
+ // NOTE: not private so outer class can access without
+ // $access methods:
+ internal char lastTrailingHighSurrogate;
+
+ internal CharacterBuffer(char[] buffer, int offset, int length)
+ {
+ this.buffer = buffer;
+ this.offset = offset;
+ this.length = length;
+ }
+
+ /// <summary>
+ /// Returns the internal buffer
+ /// </summary>
+ /// <returns> the buffer </returns>
+ public char[] Buffer
+ {
+ get
+ {
+ return buffer;
+ }
+ }
+
+ /// <summary>
+ /// Returns the data offset in the internal buffer.
+ /// </summary>
+ /// <returns> the offset </returns>
+ public int Offset
+ {
+ get
+ {
+ return offset;
+ }
+ }
+
+ /// <summary>
+ /// Return the length of the data in the internal buffer starting at
+ /// <seealso cref="#getOffset()"/>
+ /// </summary>
+ /// <returns> the length </returns>
+ public int Length
+ {
+ get
+ {
+ return length;
+ }
+ }
+
+ /// <summary>
+ /// Resets the CharacterBuffer. All internals are reset to its default
+ /// values.
+ /// </summary>
+ public void reset()
+ {
+ offset = 0;
+ length = 0;
+ lastTrailingHighSurrogate = (char)0;
+ }
+ }
+
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/ClasspathResourceLoader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/ClasspathResourceLoader.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/ClasspathResourceLoader.cs
new file mode 100644
index 0000000..8b7c93b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/ClasspathResourceLoader.cs
@@ -0,0 +1,105 @@
+using System;
+using System.Threading;
+
+namespace org.apache.lucene.analysis.util
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ /// <summary>
+ /// Simple <seealso cref="ResourceLoader"/> that uses <seealso cref="ClassLoader#getResourceAsStream(String)"/>
+ /// and <seealso cref="Class#forName(String,boolean,ClassLoader)"/> to open resources and
+ /// classes, respectively.
+ /// </summary>
+ public sealed class ClasspathResourceLoader : ResourceLoader
+ {
+ private readonly Type clazz;
+ private readonly ClassLoader loader;
+
+ /// <summary>
+ /// Creates an instance using the context classloader to load Resources and classes.
+ /// Resource paths must be absolute.
+ /// </summary>
+ public ClasspathResourceLoader() : this(Thread.CurrentThread.ContextClassLoader)
+ {
+ }
+
+ /// <summary>
+ /// Creates an instance using the given classloader to load Resources and classes.
+ /// Resource paths must be absolute.
+ /// </summary>
+ public ClasspathResourceLoader(ClassLoader loader) : this(null, loader)
+ {
+ }
+
+ /// <summary>
+ /// Creates an instance using the context classloader to load Resources and classes
+ /// Resources are resolved relative to the given class, if path is not absolute.
+ /// </summary>
+ public ClasspathResourceLoader(Type clazz) : this(clazz, clazz.ClassLoader)
+ {
+ }
+
+ private ClasspathResourceLoader(Type clazz, ClassLoader loader)
+ {
+ this.clazz = clazz;
+ this.loader = loader;
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public java.io.InputStream openResource(String resource) throws java.io.IOException
+ public InputStream openResource(string resource)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.io.InputStream stream = (clazz != null) ? clazz.getResourceAsStream(resource) : loader.getResourceAsStream(resource);
+ InputStream stream = (clazz != null) ? clazz.getResourceAsStream(resource) : loader.getResourceAsStream(resource);
+ if (stream == null)
+ {
+ throw new IOException("Resource not found: " + resource);
+ }
+ return stream;
+ }
+
+ public Type findClass<T>(string cname, Type expectedType)
+ {
+ try
+ {
+ return Type.GetType(cname, true, loader).asSubclass(expectedType);
+ }
+ catch (Exception e)
+ {
+ throw new Exception("Cannot load class: " + cname, e);
+ }
+ }
+
+ public T newInstance<T>(string cname, Type expectedType)
+ {
+ Type clazz = findClass(cname, expectedType);
+ try
+ {
+ return clazz.newInstance();
+ }
+ catch (Exception e)
+ {
+ throw new Exception("Cannot create instance: " + cname, e);
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilter.cs
new file mode 100644
index 0000000..2571ccd
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilter.cs
@@ -0,0 +1,80 @@
+namespace org.apache.lucene.analysis.util
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+ /// <summary>
+ /// Removes elisions from a <seealso cref="TokenStream"/>. For example, "l'avion" (the plane) will be
+ /// tokenized as "avion" (plane).
+ /// </summary>
+ /// <seealso cref= <a href="http://fr.wikipedia.org/wiki/%C3%89lision">Elision in Wikipedia</a> </seealso>
+ public sealed class ElisionFilter : TokenFilter
+ {
+ private readonly CharArraySet articles;
+ private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+
+ /// <summary>
+ /// Constructs an elision filter with a Set of stop words </summary>
+ /// <param name="input"> the source <seealso cref="TokenStream"/> </param>
+ /// <param name="articles"> a set of stopword articles </param>
+ public ElisionFilter(TokenStream input, CharArraySet articles) : base(input)
+ {
+ this.articles = articles;
+ }
+
+ /// <summary>
+ /// Increments the <seealso cref="TokenStream"/> with a <seealso cref="CharTermAttribute"/> without elisioned start
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+ public override bool incrementToken()
+ {
+ if (input.incrementToken())
+ {
+ char[] termBuffer = termAtt.buffer();
+ int termLength = termAtt.length();
+
+ int index = -1;
+ for (int i = 0; i < termLength; i++)
+ {
+ char ch = termBuffer[i];
+ if (ch == '\'' || ch == '\u2019')
+ {
+ index = i;
+ break;
+ }
+ }
+
+ // An apostrophe has been found. If the prefix is an article strip it off.
+ if (index >= 0 && articles.contains(termBuffer, 0, index))
+ {
+ termAtt.copyBuffer(termBuffer, index + 1, termLength - (index + 1));
+ }
+
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilterFactory.cs
new file mode 100644
index 0000000..7dc1bbe
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilterFactory.cs
@@ -0,0 +1,86 @@
+using System.Collections.Generic;
+using Lucene.Net.Analysis.Util;
+
+namespace org.apache.lucene.analysis.util
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using FrenchAnalyzer = org.apache.lucene.analysis.fr.FrenchAnalyzer;
+
+ /// <summary>
+ /// Factory for <seealso cref="ElisionFilter"/>.
+ /// <pre class="prettyprint">
+ /// <fieldType name="text_elsn" class="solr.TextField" positionIncrementGap="100">
+ /// <analyzer>
+ /// <tokenizer class="solr.StandardTokenizerFactory"/>
+ /// <filter class="solr.LowerCaseFilterFactory"/>
+ /// <filter class="solr.ElisionFilterFactory"
+ /// articles="stopwordarticles.txt" ignoreCase="true"/>
+ /// </analyzer>
+ /// </fieldType></pre>
+ /// </summary>
+ public class ElisionFilterFactory : TokenFilterFactory, ResourceLoaderAware, MultiTermAwareComponent
+ {
+ private readonly string articlesFile;
+ private readonly bool ignoreCase;
+ private CharArraySet articles;
+
+ /// <summary>
+ /// Creates a new ElisionFilterFactory </summary>
+ public ElisionFilterFactory(IDictionary<string, string> args) : base(args)
+ {
+ articlesFile = get(args, "articles");
+ ignoreCase = getBoolean(args, "ignoreCase", false);
+ if (args.Count > 0)
+ {
+ throw new System.ArgumentException("Unknown parameters: " + args);
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void inform(ResourceLoader loader) throws java.io.IOException
+ public virtual void inform(ResourceLoader loader)
+ {
+ if (articlesFile == null)
+ {
+ articles = FrenchAnalyzer.DEFAULT_ARTICLES;
+ }
+ else
+ {
+ articles = getWordSet(loader, articlesFile, ignoreCase);
+ }
+ }
+
+ public override ElisionFilter create(TokenStream input)
+ {
+ return new ElisionFilter(input, articles);
+ }
+
+ public virtual AbstractAnalysisFactory MultiTermComponent
+ {
+ get
+ {
+ return this;
+ }
+ }
+ }
+
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/FilesystemResourceLoader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/FilesystemResourceLoader.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/FilesystemResourceLoader.cs
new file mode 100644
index 0000000..598fef8
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/FilesystemResourceLoader.cs
@@ -0,0 +1,113 @@
+using System;
+
+namespace org.apache.lucene.analysis.util
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ /// <summary>
+ /// Simple <seealso cref="ResourceLoader"/> that opens resource files
+ /// from the local file system, optionally resolving against
+ /// a base directory.
+ ///
+ /// <para>This loader wraps a delegate <seealso cref="ResourceLoader"/>
+ /// that is used to resolve all files, the current base directory
+ /// does not contain. <seealso cref="#newInstance"/> is always resolved
+ /// against the delegate, as a <seealso cref="ClassLoader"/> is needed.
+ ///
+ /// </para>
+ /// <para>You can chain several {@code FilesystemResourceLoader}s
+ /// to allow lookup of files in more than one base directory.
+ /// </para>
+ /// </summary>
+ public sealed class FilesystemResourceLoader : ResourceLoader
+ {
+ private readonly File baseDirectory;
+ private readonly ResourceLoader @delegate;
+
+ /// <summary>
+ /// Creates a resource loader that requires absolute filenames or relative to CWD
+ /// to resolve resources. Files not found in file system and class lookups
+ /// are delegated to context classloader.
+ /// </summary>
+ public FilesystemResourceLoader() : this((File) null)
+ {
+ }
+
+ /// <summary>
+ /// Creates a resource loader that resolves resources against the given
+ /// base directory (may be {@code null} to refer to CWD).
+ /// Files not found in file system and class lookups are delegated to context
+ /// classloader.
+ /// </summary>
+ public FilesystemResourceLoader(File baseDirectory) : this(baseDirectory, new ClasspathResourceLoader())
+ {
+ }
+
+ /// <summary>
+ /// Creates a resource loader that resolves resources against the given
+ /// base directory (may be {@code null} to refer to CWD).
+ /// Files not found in file system and class lookups are delegated
+ /// to the given delegate <seealso cref="ResourceLoader"/>.
+ /// </summary>
+ public FilesystemResourceLoader(File baseDirectory, ResourceLoader @delegate)
+ {
+ if (baseDirectory != null && !baseDirectory.Directory)
+ {
+ throw new System.ArgumentException("baseDirectory is not a directory or null");
+ }
+ if (@delegate == null)
+ {
+ throw new System.ArgumentException("delegate ResourceLoader may not be null");
+ }
+ this.baseDirectory = baseDirectory;
+ this.@delegate = @delegate;
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public java.io.InputStream openResource(String resource) throws java.io.IOException
+ public InputStream openResource(string resource)
+ {
+ try
+ {
+ File file = new File(resource);
+ if (baseDirectory != null && !file.Absolute)
+ {
+ file = new File(baseDirectory, resource);
+ }
+ return new FileInputStream(file);
+ }
+ catch (FileNotFoundException)
+ {
+ return @delegate.openResource(resource);
+ }
+ }
+
+ public T newInstance<T>(string cname, Type expectedType)
+ {
+ return @delegate.newInstance(cname, expectedType);
+ }
+
+ public Type findClass<T>(string cname, Type expectedType)
+ {
+ return @delegate.findClass(cname, expectedType);
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs
new file mode 100644
index 0000000..4d55a25
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs
@@ -0,0 +1,150 @@
+using System;
+using Lucene.Net.Analysis.Tokenattributes;
+
+namespace Lucene.Net.Analysis.Util
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// Abstract base class for TokenFilters that may remove tokens.
+ /// You have to implement <seealso cref="#accept"/> and return a boolean if the current
+ /// token should be preserved. <seealso cref="#incrementToken"/> uses this method
+ /// to decide if a token should be passed to the caller.
+ /// <para><a name="lucene_match_version" />As of Lucene 4.4, an
+ /// <seealso cref="IllegalArgumentException"/> is thrown when trying to disable position
+ /// increments when filtering terms.
+ /// </para>
+ /// </summary>
+ public abstract class FilteringTokenFilter : TokenFilter
+ {
+
+ private static void CheckPositionIncrement(Version version, bool enablePositionIncrements)
+ {
+ if (!enablePositionIncrements && version.OnOrAfter(Version.LUCENE_44))
+ {
+ throw new System.ArgumentException("enablePositionIncrements=false is not supported anymore as of Lucene 4.4 as it can create broken token streams");
+ }
+ }
+
+ protected internal readonly Version version;
+ private readonly PositionIncrementAttribute posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
+ private bool enablePositionIncrements; // no init needed, as ctor enforces setting value!
+ private bool first = true;
+ private int skippedPositions;
+
+ /// <summary>
+ /// Create a new <seealso cref="FilteringTokenFilter"/>. </summary>
+ /// <param name="version"> the <a href="#lucene_match_version">Lucene match version</a> </param>
+ /// <param name="enablePositionIncrements"> whether to increment position increments when filtering out terms </param>
+ /// <param name="input"> the input to consume </param>
+ /// @deprecated enablePositionIncrements=false is not supported anymore as of Lucene 4.4
+ [Obsolete("enablePositionIncrements=false is not supported anymore as of Lucene 4.4")]
+ public FilteringTokenFilter(Version version, bool enablePositionIncrements, TokenStream input)
+ : this(version, input)
+ {
+ CheckPositionIncrement(version, enablePositionIncrements);
+ this.enablePositionIncrements = enablePositionIncrements;
+ }
+
+ /// <summary>
+ /// Create a new <seealso cref="FilteringTokenFilter"/>. </summary>
+ /// <param name="version"> the Lucene match version </param>
+ /// <param name="in"> the <seealso cref="TokenStream"/> to consume </param>
+ public FilteringTokenFilter(Version version, TokenStream @in)
+ : base(@in)
+ {
+ this.version = version;
+ this.enablePositionIncrements = true;
+ }
+
+ /// <summary>
+ /// Override this method and return if the current input token should be returned by <seealso cref="#incrementToken"/>. </summary>
+ protected internal abstract bool Accept();
+
+ public override bool IncrementToken()
+ {
+ if (enablePositionIncrements)
+ {
+ skippedPositions = 0;
+ while (Input.IncrementToken())
+ {
+ if (Accept())
+ {
+ if (skippedPositions != 0)
+ {
+ posIncrAtt.PositionIncrement = posIncrAtt.PositionIncrement + skippedPositions;
+ }
+ return true;
+ }
+ skippedPositions += posIncrAtt.PositionIncrement;
+ }
+ }
+ else
+ {
+ while (Input.IncrementToken())
+ {
+ if (Accept())
+ {
+ if (first)
+ {
+ // first token having posinc=0 is illegal.
+ if (posIncrAtt.PositionIncrement == 0)
+ {
+ posIncrAtt.PositionIncrement = 1;
+ }
+ first = false;
+ }
+ return true;
+ }
+ }
+ }
+ // reached EOS -- return false
+ return false;
+ }
+
+ public override void Reset()
+ {
+ base.Reset();
+ first = true;
+ skippedPositions = 0;
+ }
+
+ /// <seealso cref= #setEnablePositionIncrements(boolean) </seealso>
+ public virtual bool EnablePositionIncrements
+ {
+ get
+ {
+ return enablePositionIncrements;
+ }
+ set
+ {
+ CheckPositionIncrement(version, value);
+ this.enablePositionIncrements = value;
+ }
+ }
+
+ public override void End()
+ {
+ base.End();
+ if (enablePositionIncrements)
+ {
+ posIncrAtt.PositionIncrement = posIncrAtt.PositionIncrement + skippedPositions;
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/MultiTermAwareComponent.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/MultiTermAwareComponent.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/MultiTermAwareComponent.cs
new file mode 100644
index 0000000..64cdb36
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/MultiTermAwareComponent.cs
@@ -0,0 +1,39 @@
+using Lucene.Net.Analysis.Util;
+
+namespace org.apache.lucene.analysis.util
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Add to any analysis factory component to allow returning an
+ /// analysis component factory for use with partial terms in prefix queries,
+ /// wildcard queries, range query endpoints, regex queries, etc.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public interface MultiTermAwareComponent
+ {
+ /// <summary>
+ /// Returns an analysis component to handle analysis if multi-term queries.
+ /// The returned component must be a TokenizerFactory, TokenFilterFactory or CharFilterFactory.
+ /// </summary>
+ AbstractAnalysisFactory MultiTermComponent {get;}
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
new file mode 100644
index 0000000..ead67a2
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
@@ -0,0 +1,205 @@
+using System;
+
+namespace org.apache.lucene.analysis.util
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// A StringBuilder that allows one to access the array.
+ /// </summary>
+ public class OpenStringBuilder : Appendable, CharSequence
+ {
+ protected internal char[] buf;
+ protected internal int len;
+
+ public OpenStringBuilder() : this(32)
+ {
+ }
+
+ public OpenStringBuilder(int size)
+ {
+ buf = new char[size_Renamed];
+ }
+
+ public OpenStringBuilder(char[] arr, int len)
+ {
+ set(arr, len);
+ }
+
+ public virtual int Length
+ {
+ set
+ {
+ this.len = value;
+ }
+ }
+
+ public virtual void set(char[] arr, int end)
+ {
+ this.buf = arr;
+ this.len = end;
+ }
+
+ public virtual char[] Array
+ {
+ get
+ {
+ return buf;
+ }
+ }
+ public virtual int size()
+ {
+ return len;
+ }
+ public override int length()
+ {
+ return len;
+ }
+ public virtual int capacity()
+ {
+ return buf.Length;
+ }
+
+ public override Appendable append(CharSequence csq)
+ {
+ return append(csq, 0, csq.length());
+ }
+
+ public override Appendable append(CharSequence csq, int start, int end)
+ {
+ reserve(end - start);
+ for (int i = start; i < end; i++)
+ {
+ unsafeWrite(csq.charAt(i));
+ }
+ return this;
+ }
+
+ public override Appendable append(char c)
+ {
+ write(c);
+ return this;
+ }
+
+ public override char charAt(int index)
+ {
+ return buf[index];
+ }
+
+ public virtual void setCharAt(int index, char ch)
+ {
+ buf[index] = ch;
+ }
+
+ public override CharSequence subSequence(int start, int end)
+ {
+ throw new System.NotSupportedException(); // todo
+ }
+
+ public virtual void unsafeWrite(char b)
+ {
+ buf[len++] = b;
+ }
+
+ public virtual void unsafeWrite(int b)
+ {
+ unsafeWrite((char)b);
+ }
+
+ public virtual void unsafeWrite(char[] b, int off, int len)
+ {
+ Array.Copy(b, off, buf, this.len, len);
+ this.len += len;
+ }
+
+ protected internal virtual void resize(int len)
+ {
+ char[] newbuf = new char[Math.Max(buf.Length << 1, len)];
+ Array.Copy(buf, 0, newbuf, 0, size());
+ buf = newbuf;
+ }
+
+ public virtual void reserve(int num)
+ {
+ if (len + num > buf.Length)
+ {
+ resize(len + num);
+ }
+ }
+
+ public virtual void write(char b)
+ {
+ if (len >= buf.Length)
+ {
+ resize(len + 1);
+ }
+ unsafeWrite(b);
+ }
+
+ public virtual void write(int b)
+ {
+ write((char)b);
+ }
+
+ public void write(char[] b)
+ {
+ write(b,0,b.Length);
+ }
+
+ public virtual void write(char[] b, int off, int len)
+ {
+ reserve(len);
+ unsafeWrite(b, off, len);
+ }
+
+ public void write(OpenStringBuilder arr)
+ {
+ write(arr.buf, 0, len);
+ }
+
+ public virtual void write(string s)
+ {
+ reserve(s.Length);
+ s.CopyTo(0, buf, len, s.Length - 0);
+ len += s.Length;
+ }
+
+ public virtual void flush()
+ {
+ }
+
+ public void reset()
+ {
+ len = 0;
+ }
+
+ public virtual char[] ToCharArray()
+ {
+ char[] newbuf = new char[size()];
+ Array.Copy(buf, 0, newbuf, 0, size());
+ return newbuf;
+ }
+
+ public override string ToString()
+ {
+ return new string(buf, 0, size());
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoader.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoader.cs
new file mode 100644
index 0000000..3e4bc1f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoader.cs
@@ -0,0 +1,49 @@
+using System;
+
+namespace org.apache.lucene.analysis.util
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ /// <summary>
+ /// Abstraction for loading resources (streams, files, and classes).
+ /// </summary>
+ public interface ResourceLoader
+ {
+
+ /// <summary>
+ /// Opens a named resource
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public java.io.InputStream openResource(String resource) throws java.io.IOException;
+ InputStream openResource(string resource);
+
+
+ /// <summary>
+ /// Finds class of the name and expected type
+ /// </summary>
+ Type findClass<T>(string cname, Type expectedType);
+
+ /// <summary>
+ /// Creates an instance of the name and expected type
+ /// </summary>
+ // TODO: fix exception handling
+ T newInstance<T>(string cname, Type expectedType);
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoaderAware.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoaderAware.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoaderAware.cs
new file mode 100644
index 0000000..97fe682
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoaderAware.cs
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.analysis.util
+{
+
+ /// <summary>
+ /// Interface for a component that needs to be initialized by
+ /// an implementation of <seealso cref="ResourceLoader"/>.
+ /// </summary>
+ /// <seealso cref= ResourceLoader </seealso>
+ public interface ResourceLoaderAware
+ {
+
+ /// <summary>
+ /// Initializes this component with the provided ResourceLoader
+ /// (used for loading classes, files, etc).
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: void inform(ResourceLoader loader) throws java.io.IOException;
+ void inform(ResourceLoader loader);
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/RollingCharBuffer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/RollingCharBuffer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/RollingCharBuffer.cs
new file mode 100644
index 0000000..1aae904
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/RollingCharBuffer.cs
@@ -0,0 +1,200 @@
+using System;
+using System.Diagnostics;
+
+namespace org.apache.lucene.analysis.util
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using ArrayUtil = org.apache.lucene.util.ArrayUtil;
+ using RamUsageEstimator = org.apache.lucene.util.RamUsageEstimator;
+
+ /// <summary>
+ /// Acts like a forever growing char[] as you read
+ /// characters into it from the provided reader, but
+ /// internally it uses a circular buffer to only hold the
+ /// characters that haven't been freed yet. This is like a
+ /// PushbackReader, except you don't have to specify
+ /// up-front the max size of the buffer, but you do have to
+ /// periodically call <seealso cref="#freeBefore"/>.
+ /// </summary>
+
+ public sealed class RollingCharBuffer
+ {
+
+ private Reader reader;
+
+ private char[] buffer = new char[512];
+
+ // Next array index to write to in buffer:
+ private int nextWrite;
+
+ // Next absolute position to read from reader:
+ private int nextPos;
+
+ // How many valid chars (wrapped) are in the buffer:
+ private int count;
+
+ // True if we hit EOF
+ private bool end;
+
+ /// <summary>
+ /// Clear array and switch to new reader. </summary>
+ public void reset(Reader reader)
+ {
+ this.reader = reader;
+ nextPos = 0;
+ nextWrite = 0;
+ count = 0;
+ end = false;
+ }
+
+ /* Absolute position read. NOTE: pos must not jump
+ * ahead by more than 1! Ie, it's OK to read arbitarily
+ * far back (just not prior to the last {@link
+ * #freeBefore}), but NOT ok to read arbitrarily far
+ * ahead. Returns -1 if you hit EOF. */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public int get(int pos) throws java.io.IOException
+ public int get(int pos)
+ {
+ //System.out.println(" get pos=" + pos + " nextPos=" + nextPos + " count=" + count);
+ if (pos == nextPos)
+ {
+ if (end)
+ {
+ return -1;
+ }
+ if (count == buffer.Length)
+ {
+ // Grow
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] newBuffer = new char[org.apache.lucene.util.ArrayUtil.oversize(1+count, org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_CHAR)];
+ char[] newBuffer = new char[ArrayUtil.oversize(1 + count, RamUsageEstimator.NUM_BYTES_CHAR)];
+ //System.out.println(Thread.currentThread().getName() + ": cb grow " + newBuffer.length);
+ Array.Copy(buffer, nextWrite, newBuffer, 0, buffer.Length - nextWrite);
+ Array.Copy(buffer, 0, newBuffer, buffer.Length - nextWrite, nextWrite);
+ nextWrite = buffer.Length;
+ buffer = newBuffer;
+ }
+ if (nextWrite == buffer.Length)
+ {
+ nextWrite = 0;
+ }
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int toRead = buffer.length - Math.max(count, nextWrite);
+ int toRead = buffer.Length - Math.Max(count, nextWrite);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int readCount = reader.read(buffer, nextWrite, toRead);
+ int readCount = reader.read(buffer, nextWrite, toRead);
+ if (readCount == -1)
+ {
+ end = true;
+ return -1;
+ }
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int ch = buffer[nextWrite];
+ int ch = buffer[nextWrite];
+ nextWrite += readCount;
+ count += readCount;
+ nextPos += readCount;
+ return ch;
+ }
+ else
+ {
+ // Cannot read from future (except by 1):
+ Debug.Assert(pos < nextPos);
+
+ // Cannot read from already freed past:
+ Debug.Assert(nextPos - pos <= count, "nextPos=" + nextPos + " pos=" + pos + " count=" + count);
+
+ return buffer[getIndex(pos)];
+ }
+ }
+
+ // For assert:
+ private bool inBounds(int pos)
+ {
+ return pos >= 0 && pos < nextPos && pos >= nextPos - count;
+ }
+
+ private int getIndex(int pos)
+ {
+ int index = nextWrite - (nextPos - pos);
+ if (index < 0)
+ {
+ // Wrap:
+ index += buffer.Length;
+ Debug.Assert(index >= 0);
+ }
+ return index;
+ }
+
+ public char[] get(int posStart, int length)
+ {
+ Debug.Assert(length > 0);
+ Debug.Assert(inBounds(posStart), "posStart=" + posStart + " length=" + length);
+ //System.out.println(" buffer.get posStart=" + posStart + " len=" + length);
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int startIndex = getIndex(posStart);
+ int startIndex = getIndex(posStart);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int endIndex = getIndex(posStart + length);
+ int endIndex = getIndex(posStart + length);
+ //System.out.println(" startIndex=" + startIndex + " endIndex=" + endIndex);
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] result = new char[length];
+ char[] result = new char[length];
+ if (endIndex >= startIndex && length < buffer.Length)
+ {
+ Array.Copy(buffer, startIndex, result, 0, endIndex - startIndex);
+ }
+ else
+ {
+ // Wrapped:
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int part1 = buffer.length-startIndex;
+ int part1 = buffer.Length - startIndex;
+ Array.Copy(buffer, startIndex, result, 0, part1);
+ Array.Copy(buffer, 0, result, buffer.Length - startIndex, length - part1);
+ }
+ return result;
+ }
+
+ /// <summary>
+ /// Call this to notify us that no chars before this
+ /// absolute position are needed anymore.
+ /// </summary>
+ public void freeBefore(int pos)
+ {
+ Debug.Assert(pos >= 0);
+ Debug.Assert(pos <= nextPos);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newCount = nextPos - pos;
+ int newCount = nextPos - pos;
+ Debug.Assert(newCount <= count, "newCount=" + newCount + " count=" + count);
+ Debug.Assert(newCount <= buffer.Length, "newCount=" + newCount + " buf.length=" + buffer.Length);
+ count = newCount;
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs
new file mode 100644
index 0000000..873936e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs
@@ -0,0 +1,258 @@
+using System;
+using System.Diagnostics;
+
+namespace org.apache.lucene.analysis.util
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+
+ /// <summary>
+ /// Breaks text into sentences with a <seealso cref="BreakIterator"/> and
+ /// allows subclasses to decompose these sentences into words.
+ /// <para>
+ /// This can be used by subclasses that need sentence context
+ /// for tokenization purposes, such as CJK segmenters.
+ /// </para>
+ /// <para>
+ /// Additionally it can be used by subclasses that want to mark
+ /// sentence boundaries (with a custom attribute, extra token, position
+ /// increment, etc) for downstream processing.
+ ///
+ /// @lucene.experimental
+ /// </para>
+ /// </summary>
+ public abstract class SegmentingTokenizerBase : Tokenizer
+ {
+ protected internal const int BUFFERMAX = 1024;
+ protected internal readonly char[] buffer = new char[BUFFERMAX];
+ /// <summary>
+ /// true length of text in the buffer </summary>
+ private int length = 0;
+ /// <summary>
+ /// length in buffer that can be evaluated safely, up to a safe end point </summary>
+ private int usableLength = 0;
+ /// <summary>
+ /// accumulated offset of previous buffers for this reader, for offsetAtt </summary>
+ protected internal int offset = 0;
+
+ private readonly BreakIterator iterator;
+ private readonly CharArrayIterator wrapper = CharArrayIterator.newSentenceInstance();
+
+ private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+
+ /// <summary>
+ /// Construct a new SegmenterBase, using
+ /// the provided BreakIterator for sentence segmentation.
+ /// <para>
+ /// Note that you should never share BreakIterators across different
+ /// TokenStreams, instead a newly created or cloned one should always
+ /// be provided to this constructor.
+ /// </para>
+ /// </summary>
+ public SegmentingTokenizerBase(Reader reader, BreakIterator iterator) : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, reader, iterator)
+ {
+ }
+
+ /// <summary>
+ /// Construct a new SegmenterBase, also supplying the AttributeFactory
+ /// </summary>
+ public SegmentingTokenizerBase(AttributeFactory factory, Reader reader, BreakIterator iterator) : base(factory, reader)
+ {
+ this.iterator = iterator;
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+ public override bool incrementToken()
+ {
+ if (length == 0 || !incrementWord())
+ {
+ while (!incrementSentence())
+ {
+ refill();
+ if (length <= 0) // no more bytes to read;
+ {
+ return false;
+ }
+ }
+ }
+
+ return true;
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+ public override void reset()
+ {
+ base.reset();
+ wrapper.setText(buffer, 0, 0);
+ iterator.Text = wrapper;
+ length = usableLength = offset = 0;
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
+ public override void end()
+ {
+ base.end();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int finalOffset = correctOffset(length < 0 ? offset : offset + length);
+ int finalOffset = correctOffset(length < 0 ? offset : offset + length);
+ offsetAtt.setOffset(finalOffset, finalOffset);
+ }
+
+ /// <summary>
+ /// Returns the last unambiguous break position in the text. </summary>
+ private int findSafeEnd()
+ {
+ for (int i = length - 1; i >= 0; i--)
+ {
+ if (isSafeEnd(buffer[i]))
+ {
+ return i + 1;
+ }
+ }
+ return -1;
+ }
+
+ /// <summary>
+ /// For sentence tokenization, these are the unambiguous break positions. </summary>
+ protected internal virtual bool isSafeEnd(char ch)
+ {
+ switch (ch)
+ {
+ case 0x000D:
+ case 0x000A:
+ case 0x0085:
+ case 0x2028:
+ case 0x2029:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ /// <summary>
+ /// Refill the buffer, accumulating the offset and setting usableLength to the
+ /// last unambiguous break position
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void refill() throws java.io.IOException
+ private void refill()
+ {
+ offset += usableLength;
+ int leftover = length - usableLength;
+ Array.Copy(buffer, usableLength, buffer, 0, leftover);
+ int requested = buffer.Length - leftover;
+ int returned = read(input, buffer, leftover, requested);
+ length = returned < 0 ? leftover : returned + leftover;
+ if (returned < requested) // reader has been emptied, process the rest
+ {
+ usableLength = length;
+ }
+ else // still more data to be read, find a safe-stopping place
+ {
+ usableLength = findSafeEnd();
+ if (usableLength < 0)
+ {
+ usableLength = length; /*
+ }
+ * more than IOBUFFER of text without breaks,
+ * gonna possibly truncate tokens
+ */
+ }
+
+ wrapper.setText(buffer, 0, Math.Max(0, usableLength));
+ iterator.Text = wrapper;
+ }
+
+ // TODO: refactor to a shared readFully somewhere
+ // (NGramTokenizer does this too):
+ /// <summary>
+ /// commons-io's readFully, but without bugs if offset != 0 </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private static int read(java.io.Reader input, char[] buffer, int offset, int length) throws java.io.IOException
+ private static int read(Reader input, char[] buffer, int offset, int length)
+ {
+ Debug.Assert(length >= 0, "length must not be negative: " + length);
+
+ int remaining = length;
+ while (remaining > 0)
+ {
+ int location = length - remaining;
+ int count = input.read(buffer, offset + location, remaining);
+ if (-1 == count) // EOF
+ {
+ break;
+ }
+ remaining -= count;
+ }
+ return length - remaining;
+ }
+
+ /// <summary>
+ /// return true if there is a token from the buffer, or null if it is
+ /// exhausted.
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private boolean incrementSentence() throws java.io.IOException
+ private bool incrementSentence()
+ {
+ if (length == 0) // we must refill the buffer
+ {
+ return false;
+ }
+
+ while (true)
+ {
+ int start = iterator.current();
+
+ if (start == BreakIterator.DONE)
+ {
+ return false; // BreakIterator exhausted
+ }
+
+ // find the next set of boundaries
+ int end_Renamed = iterator.next();
+
+ if (end_Renamed == BreakIterator.DONE)
+ {
+ return false; // BreakIterator exhausted
+ }
+
+ setNextSentence(start, end_Renamed);
+ if (incrementWord())
+ {
+ return true;
+ }
+ }
+ }
+
+ /// <summary>
+ /// Provides the next input sentence for analysis </summary>
+ protected internal abstract void setNextSentence(int sentenceStart, int sentenceEnd);
+
+ /// <summary>
+ /// Returns true if another word is available </summary>
+ protected internal abstract bool incrementWord();
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/StemmerUtil.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/StemmerUtil.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/StemmerUtil.cs
new file mode 100644
index 0000000..e8a1ddc
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/StemmerUtil.cs
@@ -0,0 +1,153 @@
+using System;
+using System.Diagnostics;
+
+namespace org.apache.lucene.analysis.util
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Some commonly-used stemming functions
+ ///
+ /// @lucene.internal
+ /// </summary>
+ public class StemmerUtil
+ {
+ /// <summary>
+ /// no instance </summary>
+ private StemmerUtil()
+ {
+ }
+
+ /// <summary>
+ /// Returns true if the character array starts with the suffix.
+ /// </summary>
+ /// <param name="s"> Input Buffer </param>
+ /// <param name="len"> length of input buffer </param>
+ /// <param name="prefix"> Prefix string to test </param>
+ /// <returns> true if <code>s</code> starts with <code>prefix</code> </returns>
+ public static bool StartsWith(char[] s, int len, string prefix)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int prefixLen = prefix.length();
+ int prefixLen = prefix.Length;
+ if (prefixLen > len)
+ {
+ return false;
+ }
+ for (int i = 0; i < prefixLen; i++)
+ {
+ if (s[i] != prefix[i])
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /// <summary>
+ /// Returns true if the character array ends with the suffix.
+ /// </summary>
+ /// <param name="s"> Input Buffer </param>
+ /// <param name="len"> length of input buffer </param>
+ /// <param name="suffix"> Suffix string to test </param>
+ /// <returns> true if <code>s</code> ends with <code>suffix</code> </returns>
+ public static bool EndsWith(char[] s, int len, string suffix)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int suffixLen = suffix.length();
+ int suffixLen = suffix.Length;
+ if (suffixLen > len)
+ {
+ return false;
+ }
+ for (int i = suffixLen - 1; i >= 0; i--)
+ {
+ if (s[len - (suffixLen - i)] != suffix[i])
+ {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ /// <summary>
+ /// Returns true if the character array ends with the suffix.
+ /// </summary>
+ /// <param name="s"> Input Buffer </param>
+ /// <param name="len"> length of input buffer </param>
+ /// <param name="suffix"> Suffix string to test </param>
+ /// <returns> true if <code>s</code> ends with <code>suffix</code> </returns>
+ public static bool EndsWith(char[] s, int len, char[] suffix)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int suffixLen = suffix.length;
+ int suffixLen = suffix.Length;
+ if (suffixLen > len)
+ {
+ return false;
+ }
+ for (int i = suffixLen - 1; i >= 0; i--)
+ {
+ if (s[len - (suffixLen - i)] != suffix[i])
+ {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ /// <summary>
+ /// Delete a character in-place
+ /// </summary>
+ /// <param name="s"> Input Buffer </param>
+ /// <param name="pos"> Position of character to delete </param>
+ /// <param name="len"> length of input buffer </param>
+ /// <returns> length of input buffer after deletion </returns>
+ public static int delete(char[] s, int pos, int len)
+ {
+ Debug.Assert(pos < len);
+ if (pos < len - 1) // don't arraycopy if asked to delete last character
+ {
+ Array.Copy(s, pos + 1, s, pos, len - pos - 1);
+ }
+ return len - 1;
+ }
+
+ /// <summary>
+ /// Delete n characters in-place
+ /// </summary>
+ /// <param name="s"> Input Buffer </param>
+ /// <param name="pos"> Position of character to delete </param>
+ /// <param name="len"> Length of input buffer </param>
+ /// <param name="nChars"> number of characters to delete </param>
+ /// <returns> length of input buffer after deletion </returns>
+ public static int deleteN(char[] s, int pos, int len, int nChars)
+ {
+ Debug.Assert(pos + nChars <= len);
+ if (pos + nChars < len) // don't arraycopy if asked to delete the last characters
+ {
+ Array.Copy(s, pos + nChars, s, pos, len - pos - nChars);
+ }
+ return len - nChars;
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/StopwordAnalyzerBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/StopwordAnalyzerBase.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/StopwordAnalyzerBase.cs
new file mode 100644
index 0000000..2433a83
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/StopwordAnalyzerBase.cs
@@ -0,0 +1,172 @@
+using System;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.analysis.util
+{
+
+
+ using IOUtils = org.apache.lucene.util.IOUtils;
+ using Version = org.apache.lucene.util.Version;
+
+ /// <summary>
+ /// Base class for Analyzers that need to make use of stopword sets.
+ ///
+ /// </summary>
+ public abstract class StopwordAnalyzerBase : Analyzer
+ {
+
+ /// <summary>
+ /// An immutable stopword set
+ /// </summary>
+ protected internal readonly CharArraySet stopwords;
+
+ protected internal readonly Version matchVersion;
+
+ /// <summary>
+ /// Returns the analyzer's stopword set or an empty set if the analyzer has no
+ /// stopwords
+ /// </summary>
+ /// <returns> the analyzer's stopword set or an empty set if the analyzer has no
+ /// stopwords </returns>
+ public virtual CharArraySet StopwordSet
+ {
+ get
+ {
+ return stopwords;
+ }
+ }
+
+ /// <summary>
+ /// Creates a new instance initialized with the given stopword set
+ /// </summary>
+ /// <param name="version">
+ /// the Lucene version for cross version compatibility </param>
+ /// <param name="stopwords">
+ /// the analyzer's stopword set </param>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: protected StopwordAnalyzerBase(final org.apache.lucene.util.Version version, final CharArraySet stopwords)
+ protected internal StopwordAnalyzerBase(Version version, CharArraySet stopwords)
+ {
+ matchVersion = version;
+ // analyzers should use char array set for stopwords!
+ this.stopwords = stopwords == null ? CharArraySet.EMPTY_SET : CharArraySet.unmodifiableSet(CharArraySet.copy(version, stopwords));
+ }
+
+ /// <summary>
+ /// Creates a new Analyzer with an empty stopword set
+ /// </summary>
+ /// <param name="version">
+ /// the Lucene version for cross version compatibility </param>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: protected StopwordAnalyzerBase(final org.apache.lucene.util.Version version)
+ protected internal StopwordAnalyzerBase(Version version) : this(version, null)
+ {
+ }
+
+ /// <summary>
+ /// Creates a CharArraySet from a file resource associated with a class. (See
+ /// <seealso cref="Class#getResourceAsStream(String)"/>).
+ /// </summary>
+ /// <param name="ignoreCase">
+ /// <code>true</code> if the set should ignore the case of the
+ /// stopwords, otherwise <code>false</code> </param>
+ /// <param name="aClass">
+ /// a class that is associated with the given stopwordResource </param>
+ /// <param name="resource">
+ /// name of the resource file associated with the given class </param>
+ /// <param name="comment">
+ /// comment string to ignore in the stopword file </param>
+ /// <returns> a CharArraySet containing the distinct stopwords from the given
+ /// file </returns>
+ /// <exception cref="IOException">
+ /// if loading the stopwords throws an <seealso cref="IOException"/> </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: protected static CharArraySet loadStopwordSet(final boolean ignoreCase, final Class aClass, final String resource, final String comment) throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+ protected internal static CharArraySet loadStopwordSet(bool ignoreCase, Type aClass, string resource, string comment)
+ {
+ Reader reader = null;
+ try
+ {
+ reader = IOUtils.getDecodingReader(aClass.getResourceAsStream(resource), StandardCharsets.UTF_8);
+ return WordlistLoader.getWordSet(reader, comment, new CharArraySet(Version.LUCENE_CURRENT, 16, ignoreCase));
+ }
+ finally
+ {
+ IOUtils.close(reader);
+ }
+
+ }
+
+ /// <summary>
+ /// Creates a CharArraySet from a file.
+ /// </summary>
+ /// <param name="stopwords">
+ /// the stopwords file to load
+ /// </param>
+ /// <param name="matchVersion">
+ /// the Lucene version for cross version compatibility </param>
+ /// <returns> a CharArraySet containing the distinct stopwords from the given
+ /// file </returns>
+ /// <exception cref="IOException">
+ /// if loading the stopwords throws an <seealso cref="IOException"/> </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: protected static CharArraySet loadStopwordSet(java.io.File stopwords, org.apache.lucene.util.Version matchVersion) throws java.io.IOException
+ protected internal static CharArraySet loadStopwordSet(File stopwords, Version matchVersion)
+ {
+ Reader reader = null;
+ try
+ {
+ reader = IOUtils.getDecodingReader(stopwords, StandardCharsets.UTF_8);
+ return WordlistLoader.getWordSet(reader, matchVersion);
+ }
+ finally
+ {
+ IOUtils.close(reader);
+ }
+ }
+
+ /// <summary>
+ /// Creates a CharArraySet from a file.
+ /// </summary>
+ /// <param name="stopwords">
+ /// the stopwords reader to load
+ /// </param>
+ /// <param name="matchVersion">
+ /// the Lucene version for cross version compatibility </param>
+ /// <returns> a CharArraySet containing the distinct stopwords from the given
+ /// reader </returns>
+ /// <exception cref="IOException">
+ /// if loading the stopwords throws an <seealso cref="IOException"/> </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: protected static CharArraySet loadStopwordSet(java.io.Reader stopwords, org.apache.lucene.util.Version matchVersion) throws java.io.IOException
+ protected internal static CharArraySet loadStopwordSet(Reader stopwords, Version matchVersion)
+ {
+ try
+ {
+ return WordlistLoader.getWordSet(stopwords, matchVersion);
+ }
+ finally
+ {
+ IOUtils.close(stopwords);
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/TokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/TokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/TokenFilterFactory.cs
new file mode 100644
index 0000000..c7769ba
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/TokenFilterFactory.cs
@@ -0,0 +1,86 @@
+using System;
+using System.Collections.Generic;
+using org.apache.lucene.analysis.util;
+
+namespace Lucene.Net.Analysis.Util
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ /// <summary>
+ /// Abstract parent class for analysis factories that create <seealso cref="TokenFilter"/>
+ /// instances.
+ /// </summary>
+ public abstract class TokenFilterFactory : AbstractAnalysisFactory
+ {
+
+ private static readonly AnalysisSPILoader<TokenFilterFactory> loader = new AnalysisSPILoader<TokenFilterFactory>(typeof(TokenFilterFactory), new string[] { "TokenFilterFactory", "FilterFactory" });
+
+ /// <summary>
+ /// looks up a tokenfilter by name from context classpath </summary>
+ public static TokenFilterFactory forName(string name, IDictionary<string, string> args)
+ {
+ return loader.newInstance(name, args);
+ }
+
+ /// <summary>
+ /// looks up a tokenfilter class by name from context classpath </summary>
+ public static Type lookupClass(string name)
+ {
+ return loader.lookupClass(name);
+ }
+
+ /// <summary>
+ /// returns a list of all available tokenfilter names from context classpath </summary>
+ public static HashSet<string> availableTokenFilters()
+ {
+ return loader.availableServices();
+ }
+
+ /// <summary>
+ /// Reloads the factory list from the given <seealso cref="ClassLoader"/>.
+ /// Changes to the factories are visible after the method ends, all
+ /// iterators (<seealso cref="#availableTokenFilters()"/>,...) stay consistent.
+ ///
+ /// <para><b>NOTE:</b> Only new factories are added, existing ones are
+ /// never removed or replaced.
+ ///
+ /// </para>
+ /// <para><em>This method is expensive and should only be called for discovery
+ /// of new factories on the given classpath/classloader!</em>
+ /// </para>
+ /// </summary>
+ public static void ReloadTokenFilters(ClassLoader classloader)
+ {
+ loader.reload(classloader);
+ }
+
+ /// <summary>
+ /// Initialize this factory via a set of key-value pairs.
+ /// </summary>
+ protected internal TokenFilterFactory(IDictionary<string, string> args)
+ : base(args)
+ {
+ }
+
+ /// <summary>
+ /// Transform the specified input TokenStream </summary>
+ public abstract TokenStream Create(TokenStream input);
+ }
+}
\ No newline at end of file