You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@lucenenet.apache.org by sy...@apache.org on 2015/04/15 01:32:27 UTC

[1/3] lucenenet git commit: TextReader.Read actually returns 0

Repository: lucenenet
Updated Branches:
  refs/heads/master e670c1e76 -> b4eaf2fc4


TextReader.Read actually returns 0


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/8d7a54fc
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/8d7a54fc
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/8d7a54fc

Branch: refs/heads/master
Commit: 8d7a54fc66ecc1ffe2e4d7af59d6b73c81854db7
Parents: e670c1e
Author: Itamar Syn-Hershko <it...@code972.com>
Authored: Tue Apr 14 02:22:15 2015 +0300
Committer: Itamar Syn-Hershko <it...@code972.com>
Committed: Tue Apr 14 02:22:15 2015 +0300

----------------------------------------------------------------------
 .../Analysis/Util/CharacterUtils.cs             | 874 +++++++++----------
 1 file changed, 437 insertions(+), 437 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8d7a54fc/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs
index e0e9a78..4d2e076 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs
@@ -8,443 +8,443 @@ using Version = Lucene.Net.Util.LuceneVersion;
 namespace Lucene.Net.Analysis.Util
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
     /// <summary>
-	/// <seealso cref="CharacterUtils"/> provides a unified interface to Character-related
-	/// operations to implement backwards compatible character operations based on a
-	/// <seealso cref="LuceneVersion"/> instance.
-	/// 
-	/// @lucene.internal
-	/// </summary>
-	public abstract class CharacterUtils
-	{
-	  private static readonly CharacterUtils JAVA_4 = new Java4CharacterUtils();
-	  private static readonly CharacterUtils JAVA_5 = new Java5CharacterUtils();
-
-	  /// <summary>
-	  /// Returns a <seealso cref="CharacterUtils"/> implementation according to the given
-	  /// <seealso cref="LuceneVersion"/> instance.
-	  /// </summary>
-	  /// <param name="matchVersion">
-	  ///          a version instance </param>
-	  /// <returns> a <seealso cref="CharacterUtils"/> implementation according to the given
-	  ///         <seealso cref="LuceneVersion"/> instance. </returns>
-	  public static CharacterUtils GetInstance(LuceneVersion matchVersion)
-	  {
-		return matchVersion.OnOrAfter(LuceneVersion.LUCENE_31) ? JAVA_5 : JAVA_4;
-	  }
-
-	  /// <summary>
-	  /// Return a <seealso cref="CharacterUtils"/> instance compatible with Java 1.4. </summary>
-	  public static CharacterUtils Java4Instance
-	  {
-		  get
-		  {
-			return JAVA_4;
-		  }
-	  }
-
-	  /// <summary>
-	  /// Returns the code point at the given index of the <seealso cref="CharSequence"/>.
-	  /// Depending on the <seealso cref="LuceneVersion"/> passed to
-	  /// <seealso cref="CharacterUtils#getInstance(Version)"/> this method mimics the behavior
-	  /// of <seealso cref="Character#codePointAt(char[], int)"/> as it would have been
-	  /// available on a Java 1.4 JVM or on a later virtual machine version.
-	  /// </summary>
-	  /// <param name="seq">
-	  ///          a character sequence </param>
-	  /// <param name="offset">
-	  ///          the offset to the char values in the chars array to be converted
-	  /// </param>
-	  /// <returns> the Unicode code point at the given index </returns>
-	  /// <exception cref="NullPointerException">
-	  ///           - if the sequence is null. </exception>
-	  /// <exception cref="IndexOutOfBoundsException">
-	  ///           - if the value offset is negative or not less than the length of
-	  ///           the character sequence. </exception>
-	  public abstract int CodePointAt(string seq, int offset);
-
-	  /// <summary>
-	  /// Returns the code point at the given index of the char array where only elements
-	  /// with index less than the limit are used.
-	  /// Depending on the <seealso cref="LuceneVersion"/> passed to
-	  /// <seealso cref="CharacterUtils#getInstance(Version)"/> this method mimics the behavior
-	  /// of <seealso cref="Character#codePointAt(char[], int)"/> as it would have been
-	  /// available on a Java 1.4 JVM or on a later virtual machine version.
-	  /// </summary>
-	  /// <param name="chars">
-	  ///          a character array </param>
-	  /// <param name="offset">
-	  ///          the offset to the char values in the chars array to be converted </param>
-	  /// <param name="limit"> the index afer the last element that should be used to calculate
-	  ///        codepoint.  
-	  /// </param>
-	  /// <returns> the Unicode code point at the given index </returns>
-	  /// <exception cref="NullPointerException">
-	  ///           - if the array is null. </exception>
-	  /// <exception cref="IndexOutOfBoundsException">
-	  ///           - if the value offset is negative or not less than the length of
-	  ///           the char array. </exception>
-	  public abstract int CodePointAt(char[] chars, int offset, int limit);
-
-	  /// <summary>
-	  /// Return the number of characters in <code>seq</code>. </summary>
-	  public abstract int CodePointCount(string seq);
-
-	  /// <summary>
-	  /// Creates a new <seealso cref="CharacterBuffer"/> and allocates a <code>char[]</code>
-	  /// of the given bufferSize.
-	  /// </summary>
-	  /// <param name="bufferSize">
-	  ///          the internal char buffer size, must be <code>&gt;= 2</code> </param>
-	  /// <returns> a new <seealso cref="CharacterBuffer"/> instance. </returns>
-	  public static CharacterBuffer NewCharacterBuffer(int bufferSize)
-	  {
-		if (bufferSize < 2)
-		{
-		  throw new System.ArgumentException("buffersize must be >= 2");
-		}
-		return new CharacterBuffer(new char[bufferSize], 0, 0);
-	  }
-
-
-	  /// <summary>
-	  /// Converts each unicode codepoint to lowerCase via <seealso cref="Character#toLowerCase(int)"/> starting 
-	  /// at the given offset. </summary>
-	  /// <param name="buffer"> the char buffer to lowercase </param>
-	  /// <param name="offset"> the offset to start at </param>
-	  /// <param name="limit"> the max char in the buffer to lower case </param>
-	  public void ToLower(char[] buffer, int offset, int limit)
-	  {
-		Debug.Assert(buffer.Length >= limit);
-		Debug.Assert(offset <= 0 && offset <= buffer.Length);
-		for (int i = offset; i < limit;)
-		{
-		  i += Character.ToChars(char.ToLower((char)CodePointAt(buffer, i, limit)), buffer, i);
-		}
-	  }
-
-	  /// <summary>
-	  /// Converts each unicode codepoint to UpperCase via <seealso cref="Character#toUpperCase(int)"/> starting 
-	  /// at the given offset. </summary>
-	  /// <param name="buffer"> the char buffer to UPPERCASE </param>
-	  /// <param name="offset"> the offset to start at </param>
-	  /// <param name="limit"> the max char in the buffer to lower case </param>
-	  public void ToUpper(char[] buffer, int offset, int limit)
-	  {
-		Debug.Assert(buffer.Length >= limit);
-		Debug.Assert(offset <= 0 && offset <= buffer.Length);
-		for (int i = offset; i < limit;)
-		{
-		  i += Character.ToChars(char.ToUpper((char)CodePointAt(buffer, i, limit)), buffer, i);
-		}
-	  }
-
-	  /// <summary>
-	  /// Converts a sequence of Java characters to a sequence of unicode code points. </summary>
-	  ///  <returns> the number of code points written to the destination buffer  </returns>
-	  public int toCodePoints(char[] src, int srcOff, int srcLen, int[] dest, int destOff)
-	  {
-		if (srcLen < 0)
-		{
-		  throw new System.ArgumentException("srcLen must be >= 0");
-		}
-		int codePointCount_Renamed = 0;
-		for (int i = 0; i < srcLen;)
-		{
-		  int cp = CodePointAt(src, srcOff + i, srcOff + srcLen);
-		  int charCount = Character.CharCount(cp);
-		  dest[destOff + codePointCount_Renamed++] = cp;
-		  i += charCount;
-		}
-		return codePointCount_Renamed;
-	  }
-
-	  /// <summary>
-	  /// Converts a sequence of unicode code points to a sequence of Java characters. </summary>
-	  ///  <returns> the number of chars written to the destination buffer  </returns>
-	  public int toChars(int[] src, int srcOff, int srcLen, char[] dest, int destOff)
-	  {
-		if (srcLen < 0)
-		{
-		  throw new System.ArgumentException("srcLen must be >= 0");
-		}
-		int written = 0;
-		for (int i = 0; i < srcLen; ++i)
-		{
-		  written += Character.ToChars(src[srcOff + i], dest, destOff + written);
-		}
-		return written;
-	  }
-
-	  /// <summary>
-	  /// Fills the <seealso cref="CharacterBuffer"/> with characters read from the given
-	  /// reader <seealso cref="Reader"/>. This method tries to read <code>numChars</code>
-	  /// characters into the <seealso cref="CharacterBuffer"/>, each call to fill will start
-	  /// filling the buffer from offset <code>0</code> up to <code>numChars</code>.
-	  /// In case code points can span across 2 java characters, this method may
-	  /// only fill <code>numChars - 1</code> characters in order not to split in
-	  /// the middle of a surrogate pair, even if there are remaining characters in
-	  /// the <seealso cref="Reader"/>.
-	  /// <para>
-	  /// Depending on the <seealso cref="LuceneVersion"/> passed to
-	  /// <seealso cref="CharacterUtils#getInstance(Version)"/> this method implements
-	  /// supplementary character awareness when filling the given buffer. For all
-	  /// <seealso cref="LuceneVersion"/> &gt; 3.0 <seealso cref="#fill(CharacterBuffer, Reader, int)"/> guarantees
-	  /// that the given <seealso cref="CharacterBuffer"/> will never contain a high surrogate
-	  /// character as the last element in the buffer unless it is the last available
-	  /// character in the reader. In other words, high and low surrogate pairs will
-	  /// always be preserved across buffer boarders.
-	  /// </para>
-	  /// <para>
-	  /// A return value of <code>false</code> means that this method call exhausted
-	  /// the reader, but there may be some bytes which have been read, which can be
-	  /// verified by checking whether <code>buffer.getLength() &gt; 0</code>.
-	  /// </para>
-	  /// </summary>
-	  /// <param name="buffer">
-	  ///          the buffer to fill. </param>
-	  /// <param name="reader">
-	  ///          the reader to read characters from. </param>
-	  /// <param name="numChars">
-	  ///          the number of chars to read </param>
-	  /// <returns> <code>false</code> if and only if reader.read returned -1 while trying to fill the buffer </returns>
-	  /// <exception cref="IOException">
-	  ///           if the reader throws an <seealso cref="IOException"/>. </exception>
-	  public abstract bool Fill(CharacterBuffer buffer, Reader reader, int numChars);
-
-	  /// <summary>
-	  /// Convenience method which calls <code>fill(buffer, reader, buffer.buffer.length)</code>. </summary>
-	  public virtual bool Fill(CharacterBuffer buffer, Reader reader)
-	  {
-		return Fill(buffer, reader, buffer.buffer.Length);
-	  }
-
-	  /// <summary>
-	  /// Return the index within <code>buf[start:start+count]</code> which is by <code>offset</code>
-	  ///  code points from <code>index</code>. 
-	  /// </summary>
-	  public abstract int OffsetByCodePoints(char[] buf, int start, int count, int index, int offset);
-
-	  internal static int ReadFully(Reader reader, char[] dest, int offset, int len)
-	  {
-		int read = 0;
-		while (read < len)
-		{
-		  int r = reader.Read(dest, offset + read, len - read);
-		  if (r == -1)
-		  {
-			break;
-		  }
-		  read += r;
-		}
-		return read;
-	  }
-
-	  private sealed class Java5CharacterUtils : CharacterUtils
-	  {
-		internal Java5CharacterUtils()
-		{
-		}
-
-		public override int CodePointAt(string seq, int offset)
-		{
-		  return Character.CodePointAt(seq, offset);
-		}
-
-		public override int CodePointAt(char[] chars, int offset, int limit)
-		{
-		 return Character.CodePointAt(chars, offset, limit);
-		}
-
-		public override bool Fill(CharacterBuffer buffer, Reader reader, int numChars)
-		{
-		  Debug.Assert(buffer.buffer.Length >= 2);
-		  if (numChars < 2 || numChars > buffer.buffer.Length)
-		  {
-			throw new System.ArgumentException("numChars must be >= 2 and <= the buffer size");
-		  }
-		  char[] charBuffer = buffer.buffer;
-		  buffer.offset = 0;
-		  int offset;
-
-		  // Install the previously saved ending high surrogate:
-		  if (buffer.lastTrailingHighSurrogate != 0)
-		  {
-			charBuffer[0] = buffer.lastTrailingHighSurrogate;
-			buffer.lastTrailingHighSurrogate = (char)0;
-			offset = 1;
-		  }
-		  else
-		  {
-			offset = 0;
-		  }
-
-		  int read = ReadFully(reader, charBuffer, offset, numChars - offset);
-
-		  buffer.length = offset + read;
-		  bool result = buffer.length == numChars;
-		  if (buffer.length < numChars)
-		  {
-			// We failed to fill the buffer. Even if the last char is a high
-			// surrogate, there is nothing we can do
-			return result;
-		  }
-
-		  if (char.IsHighSurrogate(charBuffer[buffer.length - 1]))
-		  {
-			buffer.lastTrailingHighSurrogate = charBuffer[--buffer.length];
-		  }
-		  return result;
-		}
-
-		public override int CodePointCount(string seq)
-		{
-		  return Character.CodePointCount(seq, 0, seq.Length);
-		}
-
-		public override int OffsetByCodePoints(char[] buf, int start, int count, int index, int offset)
-		{
-		  return Character.OffsetByCodePoints(buf, start, count, index, offset);
-		}
-	  }
-
-	  private sealed class Java4CharacterUtils : CharacterUtils
-	  {
-		internal Java4CharacterUtils()
-		{
-		}
-
-		public override int CodePointAt(string seq, int offset)
-		{
-		  return seq[offset];
-		}
-
-		public override int CodePointAt(char[] chars, int offset, int limit)
-		{
-		  if (offset >= limit)
-		  {
-			throw new System.IndexOutOfRangeException("offset must be less than limit");
-		  }
-		  return chars[offset];
-		}
-
-		public override bool Fill(CharacterBuffer buffer, Reader reader, int numChars)
-		{
-		  Debug.Assert(buffer.buffer.Length >= 1);
-		  if (numChars < 1 || numChars > buffer.buffer.Length)
-		  {
-			throw new System.ArgumentException("numChars must be >= 1 and <= the buffer size");
-		  }
-		  buffer.offset = 0;
-		  int read = ReadFully(reader, buffer.buffer, 0, numChars);
-		  buffer.length = read;
-		  buffer.lastTrailingHighSurrogate = (char)0;
-		  return read == numChars;
-		}
-
-		public override int CodePointCount(string seq)
-		{
-		  return seq.Length;
-		}
-
-		public override int OffsetByCodePoints(char[] buf, int start, int count, int index, int offset)
-		{
-		  int result = index + offset;
-		  if (result < 0 || result > count)
-		  {
-			throw new System.IndexOutOfRangeException();
-		  }
-		  return result;
-		}
-
-	  }
-
-	  /// <summary>
-	  /// A simple IO buffer to use with
-	  /// <seealso cref="CharacterUtils#fill(CharacterBuffer, Reader)"/>.
-	  /// </summary>
-	  public sealed class CharacterBuffer
-	  {
-
-		internal readonly char[] buffer;
-		internal int offset;
-		internal int length;
-		// NOTE: not private so outer class can access without
-		// $access methods:
-		internal char lastTrailingHighSurrogate;
-
-		internal CharacterBuffer(char[] buffer, int offset, int length)
-		{
-		  this.buffer = buffer;
-		  this.offset = offset;
-		  this.length = length;
-		}
-
-		/// <summary>
-		/// Returns the internal buffer
-		/// </summary>
-		/// <returns> the buffer </returns>
-		public char[] Buffer
-		{
-			get
-			{
-			  return buffer;
-			}
-		}
-
-		/// <summary>
-		/// Returns the data offset in the internal buffer.
-		/// </summary>
-		/// <returns> the offset </returns>
-		public int Offset
-		{
-			get
-			{
-			  return offset;
-			}
-		}
-
-		/// <summary>
-		/// Return the length of the data in the internal buffer starting at
-		/// <seealso cref="#getOffset()"/>
-		/// </summary>
-		/// <returns> the length </returns>
-		public int Length
-		{
-			get
-			{
-			  return length;
-			}
-		}
-
-		/// <summary>
-		/// Resets the CharacterBuffer. All internals are reset to its default
-		/// values.
-		/// </summary>
-		public void reset()
-		{
-		  offset = 0;
-		  length = 0;
-		  lastTrailingHighSurrogate = (char)0;
-		}
-	  }
-
-	}
+    /// <seealso cref="CharacterUtils"/> provides a unified interface to Character-related
+    /// operations to implement backwards compatible character operations based on a
+    /// <seealso cref="LuceneVersion"/> instance.
+    /// 
+    /// @lucene.internal
+    /// </summary>
+    public abstract class CharacterUtils
+    {
+        private static readonly CharacterUtils JAVA_4 = new Java4CharacterUtils();
+        private static readonly CharacterUtils JAVA_5 = new Java5CharacterUtils();
+
+        /// <summary>
+        /// Returns a <seealso cref="CharacterUtils"/> implementation according to the given
+        /// <seealso cref="LuceneVersion"/> instance.
+        /// </summary>
+        /// <param name="matchVersion">
+        ///          a version instance </param>
+        /// <returns> a <seealso cref="CharacterUtils"/> implementation according to the given
+        ///         <seealso cref="LuceneVersion"/> instance. </returns>
+        public static CharacterUtils GetInstance(LuceneVersion matchVersion)
+        {
+            return matchVersion.OnOrAfter(LuceneVersion.LUCENE_31) ? JAVA_5 : JAVA_4;
+        }
+
+        /// <summary>
+        /// Return a <seealso cref="CharacterUtils"/> instance compatible with Java 1.4. </summary>
+        public static CharacterUtils Java4Instance
+        {
+            get
+            {
+                return JAVA_4;
+            }
+        }
+
+        /// <summary>
+        /// Returns the code point at the given index of the <seealso cref="CharSequence"/>.
+        /// Depending on the <seealso cref="LuceneVersion"/> passed to
+        /// <seealso cref="CharacterUtils#getInstance(Version)"/> this method mimics the behavior
+        /// of <seealso cref="Character#codePointAt(char[], int)"/> as it would have been
+        /// available on a Java 1.4 JVM or on a later virtual machine version.
+        /// </summary>
+        /// <param name="seq">
+        ///          a character sequence </param>
+        /// <param name="offset">
+        ///          the offset to the char values in the chars array to be converted
+        /// </param>
+        /// <returns> the Unicode code point at the given index </returns>
+        /// <exception cref="NullPointerException">
+        ///           - if the sequence is null. </exception>
+        /// <exception cref="IndexOutOfBoundsException">
+        ///           - if the value offset is negative or not less than the length of
+        ///           the character sequence. </exception>
+        public abstract int CodePointAt(string seq, int offset);
+
+        /// <summary>
+        /// Returns the code point at the given index of the char array where only elements
+        /// with index less than the limit are used.
+        /// Depending on the <seealso cref="LuceneVersion"/> passed to
+        /// <seealso cref="CharacterUtils#getInstance(Version)"/> this method mimics the behavior
+        /// of <seealso cref="Character#codePointAt(char[], int)"/> as it would have been
+        /// available on a Java 1.4 JVM or on a later virtual machine version.
+        /// </summary>
+        /// <param name="chars">
+        ///          a character array </param>
+        /// <param name="offset">
+        ///          the offset to the char values in the chars array to be converted </param>
+        /// <param name="limit"> the index afer the last element that should be used to calculate
+        ///        codepoint.  
+        /// </param>
+        /// <returns> the Unicode code point at the given index </returns>
+        /// <exception cref="NullPointerException">
+        ///           - if the array is null. </exception>
+        /// <exception cref="IndexOutOfBoundsException">
+        ///           - if the value offset is negative or not less than the length of
+        ///           the char array. </exception>
+        public abstract int CodePointAt(char[] chars, int offset, int limit);
+
+        /// <summary>
+        /// Return the number of characters in <code>seq</code>. </summary>
+        public abstract int CodePointCount(string seq);
+
+        /// <summary>
+        /// Creates a new <seealso cref="CharacterBuffer"/> and allocates a <code>char[]</code>
+        /// of the given bufferSize.
+        /// </summary>
+        /// <param name="bufferSize">
+        ///          the internal char buffer size, must be <code>&gt;= 2</code> </param>
+        /// <returns> a new <seealso cref="CharacterBuffer"/> instance. </returns>
+        public static CharacterBuffer NewCharacterBuffer(int bufferSize)
+        {
+            if (bufferSize < 2)
+            {
+                throw new System.ArgumentException("buffersize must be >= 2");
+            }
+            return new CharacterBuffer(new char[bufferSize], 0, 0);
+        }
+
+
+        /// <summary>
+        /// Converts each unicode codepoint to lowerCase via <seealso cref="Character#toLowerCase(int)"/> starting 
+        /// at the given offset. </summary>
+        /// <param name="buffer"> the char buffer to lowercase </param>
+        /// <param name="offset"> the offset to start at </param>
+        /// <param name="limit"> the max char in the buffer to lower case </param>
+        public void ToLower(char[] buffer, int offset, int limit)
+        {
+            Debug.Assert(buffer.Length >= limit);
+            Debug.Assert(offset <= 0 && offset <= buffer.Length);
+            for (int i = offset; i < limit; )
+            {
+                i += Character.ToChars(char.ToLower((char)CodePointAt(buffer, i, limit)), buffer, i);
+            }
+        }
+
+        /// <summary>
+        /// Converts each unicode codepoint to UpperCase via <seealso cref="Character#toUpperCase(int)"/> starting 
+        /// at the given offset. </summary>
+        /// <param name="buffer"> the char buffer to UPPERCASE </param>
+        /// <param name="offset"> the offset to start at </param>
+        /// <param name="limit"> the max char in the buffer to lower case </param>
+        public void ToUpper(char[] buffer, int offset, int limit)
+        {
+            Debug.Assert(buffer.Length >= limit);
+            Debug.Assert(offset <= 0 && offset <= buffer.Length);
+            for (int i = offset; i < limit; )
+            {
+                i += Character.ToChars(char.ToUpper((char)CodePointAt(buffer, i, limit)), buffer, i);
+            }
+        }
+
+        /// <summary>
+        /// Converts a sequence of Java characters to a sequence of unicode code points. </summary>
+        ///  <returns> the number of code points written to the destination buffer  </returns>
+        public int toCodePoints(char[] src, int srcOff, int srcLen, int[] dest, int destOff)
+        {
+            if (srcLen < 0)
+            {
+                throw new System.ArgumentException("srcLen must be >= 0");
+            }
+            int codePointCount_Renamed = 0;
+            for (int i = 0; i < srcLen; )
+            {
+                int cp = CodePointAt(src, srcOff + i, srcOff + srcLen);
+                int charCount = Character.CharCount(cp);
+                dest[destOff + codePointCount_Renamed++] = cp;
+                i += charCount;
+            }
+            return codePointCount_Renamed;
+        }
+
+        /// <summary>
+        /// Converts a sequence of unicode code points to a sequence of Java characters. </summary>
+        ///  <returns> the number of chars written to the destination buffer  </returns>
+        public int toChars(int[] src, int srcOff, int srcLen, char[] dest, int destOff)
+        {
+            if (srcLen < 0)
+            {
+                throw new System.ArgumentException("srcLen must be >= 0");
+            }
+            int written = 0;
+            for (int i = 0; i < srcLen; ++i)
+            {
+                written += Character.ToChars(src[srcOff + i], dest, destOff + written);
+            }
+            return written;
+        }
+
+        /// <summary>
+        /// Fills the <seealso cref="CharacterBuffer"/> with characters read from the given
+        /// reader <seealso cref="Reader"/>. This method tries to read <code>numChars</code>
+        /// characters into the <seealso cref="CharacterBuffer"/>, each call to fill will start
+        /// filling the buffer from offset <code>0</code> up to <code>numChars</code>.
+        /// In case code points can span across 2 java characters, this method may
+        /// only fill <code>numChars - 1</code> characters in order not to split in
+        /// the middle of a surrogate pair, even if there are remaining characters in
+        /// the <seealso cref="Reader"/>.
+        /// <para>
+        /// Depending on the <seealso cref="LuceneVersion"/> passed to
+        /// <seealso cref="CharacterUtils#getInstance(Version)"/> this method implements
+        /// supplementary character awareness when filling the given buffer. For all
+        /// <seealso cref="LuceneVersion"/> &gt; 3.0 <seealso cref="#fill(CharacterBuffer, Reader, int)"/> guarantees
+        /// that the given <seealso cref="CharacterBuffer"/> will never contain a high surrogate
+        /// character as the last element in the buffer unless it is the last available
+        /// character in the reader. In other words, high and low surrogate pairs will
+        /// always be preserved across buffer boarders.
+        /// </para>
+        /// <para>
+        /// A return value of <code>false</code> means that this method call exhausted
+        /// the reader, but there may be some bytes which have been read, which can be
+        /// verified by checking whether <code>buffer.getLength() &gt; 0</code>.
+        /// </para>
+        /// </summary>
+        /// <param name="buffer">
+        ///          the buffer to fill. </param>
+        /// <param name="reader">
+        ///          the reader to read characters from. </param>
+        /// <param name="numChars">
+        ///          the number of chars to read </param>
+        /// <returns> <code>false</code> if and only if reader.read returned -1 while trying to fill the buffer </returns>
+        /// <exception cref="IOException">
+        ///           if the reader throws an <seealso cref="IOException"/>. </exception>
+        public abstract bool Fill(CharacterBuffer buffer, Reader reader, int numChars);
+
+        /// <summary>
+        /// Convenience method which calls <code>fill(buffer, reader, buffer.buffer.length)</code>. </summary>
+        public virtual bool Fill(CharacterBuffer buffer, Reader reader)
+        {
+            return Fill(buffer, reader, buffer.buffer.Length);
+        }
+
+        /// <summary>
+        /// Return the index within <code>buf[start:start+count]</code> which is by <code>offset</code>
+        ///  code points from <code>index</code>. 
+        /// </summary>
+        public abstract int OffsetByCodePoints(char[] buf, int start, int count, int index, int offset);
+
+        internal static int ReadFully(Reader reader, char[] dest, int offset, int len)
+        {
+            int read = 0;
+            while (read < len)
+            {
+                int r = reader.Read(dest, offset + read, len - read);
+                if (r == 0)
+                {
+                    break;
+                }
+                read += r;
+            }
+            return read;
+        }
+
+        private sealed class Java5CharacterUtils : CharacterUtils
+        {
+            internal Java5CharacterUtils()
+            {
+            }
+
+            public override int CodePointAt(string seq, int offset)
+            {
+                return Character.CodePointAt(seq, offset);
+            }
+
+            public override int CodePointAt(char[] chars, int offset, int limit)
+            {
+                return Character.CodePointAt(chars, offset, limit);
+            }
+
+            public override bool Fill(CharacterBuffer buffer, Reader reader, int numChars)
+            {
+                Debug.Assert(buffer.buffer.Length >= 2);
+                if (numChars < 2 || numChars > buffer.buffer.Length)
+                {
+                    throw new System.ArgumentException("numChars must be >= 2 and <= the buffer size");
+                }
+                char[] charBuffer = buffer.buffer;
+                buffer.offset = 0;
+                int offset;
+
+                // Install the previously saved ending high surrogate:
+                if (buffer.lastTrailingHighSurrogate != 0)
+                {
+                    charBuffer[0] = buffer.lastTrailingHighSurrogate;
+                    buffer.lastTrailingHighSurrogate = (char)0;
+                    offset = 1;
+                }
+                else
+                {
+                    offset = 0;
+                }
+
+                int read = ReadFully(reader, charBuffer, offset, numChars - offset);
+
+                buffer.length = offset + read;
+                bool result = buffer.length == numChars;
+                if (buffer.length < numChars)
+                {
+                    // We failed to fill the buffer. Even if the last char is a high
+                    // surrogate, there is nothing we can do
+                    return result;
+                }
+
+                if (char.IsHighSurrogate(charBuffer[buffer.length - 1]))
+                {
+                    buffer.lastTrailingHighSurrogate = charBuffer[--buffer.length];
+                }
+                return result;
+            }
+
+            public override int CodePointCount(string seq)
+            {
+                return Character.CodePointCount(seq, 0, seq.Length);
+            }
+
+            public override int OffsetByCodePoints(char[] buf, int start, int count, int index, int offset)
+            {
+                return Character.OffsetByCodePoints(buf, start, count, index, offset);
+            }
+        }
+
+        private sealed class Java4CharacterUtils : CharacterUtils
+        {
+            internal Java4CharacterUtils()
+            {
+            }
+
+            public override int CodePointAt(string seq, int offset)
+            {
+                return seq[offset];
+            }
+
+            public override int CodePointAt(char[] chars, int offset, int limit)
+            {
+                if (offset >= limit)
+                {
+                    throw new System.IndexOutOfRangeException("offset must be less than limit");
+                }
+                return chars[offset];
+            }
+
+            public override bool Fill(CharacterBuffer buffer, Reader reader, int numChars)
+            {
+                Debug.Assert(buffer.buffer.Length >= 1);
+                if (numChars < 1 || numChars > buffer.buffer.Length)
+                {
+                    throw new System.ArgumentException("numChars must be >= 1 and <= the buffer size");
+                }
+                buffer.offset = 0;
+                int read = ReadFully(reader, buffer.buffer, 0, numChars);
+                buffer.length = read;
+                buffer.lastTrailingHighSurrogate = (char)0;
+                return read == numChars;
+            }
+
+            public override int CodePointCount(string seq)
+            {
+                return seq.Length;
+            }
+
+            public override int OffsetByCodePoints(char[] buf, int start, int count, int index, int offset)
+            {
+                int result = index + offset;
+                if (result < 0 || result > count)
+                {
+                    throw new System.IndexOutOfRangeException();
+                }
+                return result;
+            }
+
+        }
+
+        /// <summary>
+        /// A simple IO buffer to use with
+        /// <seealso cref="CharacterUtils#fill(CharacterBuffer, Reader)"/>.
+        /// </summary>
+        public sealed class CharacterBuffer
+        {
+
+            internal readonly char[] buffer;
+            internal int offset;
+            internal int length;
+            // NOTE: not private so outer class can access without
+            // $access methods:
+            internal char lastTrailingHighSurrogate;
+
+            internal CharacterBuffer(char[] buffer, int offset, int length)
+            {
+                this.buffer = buffer;
+                this.offset = offset;
+                this.length = length;
+            }
+
+            /// <summary>
+            /// Returns the internal buffer
+            /// </summary>
+            /// <returns> the buffer </returns>
+            public char[] Buffer
+            {
+                get
+                {
+                    return buffer;
+                }
+            }
+
+            /// <summary>
+            /// Returns the data offset in the internal buffer.
+            /// </summary>
+            /// <returns> the offset </returns>
+            public int Offset
+            {
+                get
+                {
+                    return offset;
+                }
+            }
+
+            /// <summary>
+            /// Return the length of the data in the internal buffer starting at
+            /// <seealso cref="#getOffset()"/>
+            /// </summary>
+            /// <returns> the length </returns>
+            public int Length
+            {
+                get
+                {
+                    return length;
+                }
+            }
+
+            /// <summary>
+            /// Resets the CharacterBuffer. All internals are reset to its default
+            /// values.
+            /// </summary>
+            public void reset()
+            {
+                offset = 0;
+                length = 0;
+                lastTrailingHighSurrogate = (char)0;
+            }
+        }
+
+    }
 
 }
\ No newline at end of file

[2/3] lucenenet git commit: More porting work

Posted by sy...@apache.org.

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs
index ecb534f..d0502aa 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs
@@ -2,145 +2,148 @@
 using Lucene.Net.Analysis.Core;
 using Lucene.Net.Analysis.Util;
 using Lucene.Net.Util;
-using org.apache.lucene.analysis.standard;
 
 namespace Lucene.Net.Analysis.Standard
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
     /// <summary>
-	/// Filters <seealso cref="StandardTokenizer"/> with <seealso cref="StandardFilter"/>, {@link
-	/// LowerCaseFilter} and <seealso cref="StopFilter"/>, using a list of
-	/// English stop words.
-	/// 
-	/// <a name="version"/>
-	/// <para>You must specify the required <seealso cref="LuceneVersion"/>
-	/// compatibility when creating StandardAnalyzer:
-	/// <ul>
-	///   <li> As of 3.4, Hiragana and Han characters are no longer wrongly split
-	///        from their combining characters. If you use a previous version number,
-	///        you get the exact broken behavior for backwards compatibility.
-	///   <li> As of 3.1, StandardTokenizer implements Unicode text segmentation,
-	///        and StopFilter correctly handles Unicode 4.0 supplementary characters
-	///        in stopwords.  <seealso cref="ClassicTokenizer"/> and <seealso cref="ClassicAnalyzer"/> 
-	///        are the pre-3.1 implementations of StandardTokenizer and
-	///        StandardAnalyzer.
-	///   <li> As of 2.9, StopFilter preserves position increments
-	///   <li> As of 2.4, Tokens incorrectly identified as acronyms
-	///        are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
-	/// </ul>
-	/// </para>
-	/// </summary>
-	public sealed class StandardAnalyzer : StopwordAnalyzerBase
-	{
-
-	  /// <summary>
-	  /// Default maximum allowed token length </summary>
-	  public const int DEFAULT_MAX_TOKEN_LENGTH = 255;
-
-	  private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
-
-	  /// <summary>
-	  /// An unmodifiable set containing some common English words that are usually not
-	  /// useful for searching. 
-	  /// </summary>
-	  public static readonly CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
-
-	  /// <summary>
-	  /// Builds an analyzer with the given stop words. </summary>
-	  /// <param name="matchVersion"> Lucene version to match See {@link
-	  /// <a href="#version">above</a>} </param>
-	  /// <param name="stopWords"> stop words  </param>
-	  public StandardAnalyzer(LuceneVersion matchVersion, CharArraySet stopWords) : base(matchVersion, stopWords)
-	  {
-	  }
-
-	  /// <summary>
-	  /// Builds an analyzer with the default stop words ({@link
-	  /// #STOP_WORDS_SET}). </summary>
-	  /// <param name="matchVersion"> Lucene version to match See {@link
-	  /// <a href="#version">above</a>} </param>
-	  public StandardAnalyzer(LuceneVersion matchVersion) : this(matchVersion, STOP_WORDS_SET)
-	  {
-	  }
-
-	  /// <summary>
-	  /// Builds an analyzer with the stop words from the given reader. </summary>
-	  /// <seealso cref= WordlistLoader#getWordSet(Reader, Version) </seealso>
-	  /// <param name="matchVersion"> Lucene version to match See {@link
-	  /// <a href="#version">above</a>} </param>
-	  /// <param name="stopwords"> Reader to read stop words from  </param>
-	  public StandardAnalyzer(LuceneVersion matchVersion, TextReader stopwords) : this(matchVersion, loadStopwordSet(stopwords, matchVersion))
-	  {
-	  }
-
-	  /// <summary>
-	  /// Set maximum allowed token length.  If a token is seen
-	  /// that exceeds this length then it is discarded.  This
-	  /// setting only takes effect the next time tokenStream or
-	  /// tokenStream is called.
-	  /// </summary>
-	  public int MaxTokenLength
-	  {
-		  set
-		  {
-			maxTokenLength = value;
-		  }
-		  get
-		  {
-			return maxTokenLength;
-		  }
-	  }
-
-
-	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
-	  {
-		var src = new StandardTokenizer(matchVersion, reader);
-		src.MaxTokenLength = maxTokenLength;
-		TokenStream tok = new StandardFilter(matchVersion, src);
-		tok = new LowerCaseFilter(matchVersion, tok);
-		tok = new StopFilter(matchVersion, tok, stopwords);
-		return new TokenStreamComponentsAnonymousInnerClassHelper(this, src, tok, reader);
-	  }
-
-	  private class TokenStreamComponentsAnonymousInnerClassHelper : TokenStreamComponents
-	  {
-		  private readonly StandardAnalyzer outerInstance;
-
-		  private TextReader reader;
-		  private readonly StandardTokenizer src;
-
-		  public TokenStreamComponentsAnonymousInnerClassHelper(StandardAnalyzer outerInstance, StandardTokenizer src, TokenStream tok, Reader reader) : base(src, tok)
-		  {
-			  this.outerInstance = outerInstance;
-			  this.reader = reader;
-			  this.src = src;
-		  }
-
-		  protected internal override Reader Reader
-		  {
-			  set
-			  {
-				src.MaxTokenLength = outerInstance.maxTokenLength;
-				base.Reader = value;
-			  }
-		  }
-	  }
-	}
+    /// Filters <seealso cref="StandardTokenizer"/> with <seealso cref="StandardFilter"/>, {@link
+    /// LowerCaseFilter} and <seealso cref="StopFilter"/>, using a list of
+    /// English stop words.
+    /// 
+    /// <a name="version"/>
+    /// <para>You must specify the required <seealso cref="LuceneVersion"/>
+    /// compatibility when creating StandardAnalyzer:
+    /// <ul>
+    ///   <li> As of 3.4, Hiragana and Han characters are no longer wrongly split
+    ///        from their combining characters. If you use a previous version number,
+    ///        you get the exact broken behavior for backwards compatibility.
+    ///   <li> As of 3.1, StandardTokenizer implements Unicode text segmentation,
+    ///        and StopFilter correctly handles Unicode 4.0 supplementary characters
+    ///        in stopwords.  <seealso cref="ClassicTokenizer"/> and <seealso cref="ClassicAnalyzer"/> 
+    ///        are the pre-3.1 implementations of StandardTokenizer and
+    ///        StandardAnalyzer.
+    ///   <li> As of 2.9, StopFilter preserves position increments
+    ///   <li> As of 2.4, Tokens incorrectly identified as acronyms
+    ///        are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
+    /// </ul>
+    /// </para>
+    /// </summary>
+    public sealed class StandardAnalyzer : StopwordAnalyzerBase
+    {
+
+        /// <summary>
+        /// Default maximum allowed token length </summary>
+        public const int DEFAULT_MAX_TOKEN_LENGTH = 255;
+
+        private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
+
+        /// <summary>
+        /// An unmodifiable set containing some common English words that are usually not
+        /// useful for searching. 
+        /// </summary>
+        public static readonly CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
+
+        /// <summary>
+        /// Builds an analyzer with the given stop words. </summary>
+        /// <param name="matchVersion"> Lucene version to match See {@link
+        /// <a href="#version">above</a>} </param>
+        /// <param name="stopWords"> stop words  </param>
+        public StandardAnalyzer(LuceneVersion matchVersion, CharArraySet stopWords)
+            : base(matchVersion, stopWords)
+        {
+        }
+
+        /// <summary>
+        /// Builds an analyzer with the default stop words ({@link
+        /// #STOP_WORDS_SET}). </summary>
+        /// <param name="matchVersion"> Lucene version to match See {@link
+        /// <a href="#version">above</a>} </param>
+        public StandardAnalyzer(LuceneVersion matchVersion)
+            : this(matchVersion, STOP_WORDS_SET)
+        {
+        }
+
+        /// <summary>
+        /// Builds an analyzer with the stop words from the given reader. </summary>
+        /// <seealso cref= WordlistLoader#getWordSet(Reader, Version) </seealso>
+        /// <param name="matchVersion"> Lucene version to match See {@link
+        /// <a href="#version">above</a>} </param>
+        /// <param name="stopwords"> Reader to read stop words from  </param>
+        public StandardAnalyzer(LuceneVersion matchVersion, TextReader stopwords)
+            : this(matchVersion, loadStopwordSet(stopwords, matchVersion))
+        {
+        }
+
+        /// <summary>
+        /// Set maximum allowed token length.  If a token is seen
+        /// that exceeds this length then it is discarded.  This
+        /// setting only takes effect the next time tokenStream or
+        /// tokenStream is called.
+        /// </summary>
+        public int MaxTokenLength
+        {
+            set
+            {
+                maxTokenLength = value;
+            }
+            get
+            {
+                return maxTokenLength;
+            }
+        }
+
+
+        public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+        {
+            var src = new StandardTokenizer(matchVersion, reader);
+            src.MaxTokenLength = maxTokenLength;
+            TokenStream tok = new StandardFilter(matchVersion, src);
+            tok = new LowerCaseFilter(matchVersion, tok);
+            tok = new StopFilter(matchVersion, tok, stopwords);
+            return new TokenStreamComponentsAnonymousInnerClassHelper(this, src, tok, reader);
+        }
+
+        private class TokenStreamComponentsAnonymousInnerClassHelper : TokenStreamComponents
+        {
+            private readonly StandardAnalyzer outerInstance;
+
+            private TextReader reader;
+            private readonly StandardTokenizer src;
+
+            public TokenStreamComponentsAnonymousInnerClassHelper(StandardAnalyzer outerInstance, StandardTokenizer src, TokenStream tok, TextReader reader)
+                : base(src, tok)
+            {
+                this.outerInstance = outerInstance;
+                this.reader = reader;
+                this.src = src;
+            }
+
+            protected override TextReader Reader
+            {
+                set
+                {
+                    src.MaxTokenLength = outerInstance.maxTokenLength;
+                    base.Reader = value;
+                }
+            }
+        }
+    }
 
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilter.cs
index a2641ce..6093cd6 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilter.cs
@@ -1,102 +1,91 @@
-using Lucene.Net.Analysis.Standard;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
 
-namespace org.apache.lucene.analysis.standard
+namespace Lucene.Net.Analysis.Standard
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// Normalizes tokens extracted with <seealso cref="StandardTokenizer"/>.
+    /// </summary>
+    public class StandardFilter : TokenFilter
+    {
+        private readonly LuceneVersion matchVersion;
 
-	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-	using Version = org.apache.lucene.util.Version;
+        public StandardFilter(LuceneVersion matchVersion, TokenStream @in)
+            : base(@in)
+        {
+            this.matchVersion = matchVersion;
+            typeAtt = AddAttribute<ITypeAttribute>();
+            termAtt = AddAttribute<ICharTermAttribute>();
+        }
 
-	/// <summary>
-	/// Normalizes tokens extracted with <seealso cref="StandardTokenizer"/>.
-	/// </summary>
-	public class StandardFilter : TokenFilter
-	{
-	  private readonly Version matchVersion;
+        private static readonly string APOSTROPHE_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.APOSTROPHE];
+        private static readonly string ACRONYM_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.ACRONYM];
 
-	  public StandardFilter(Version matchVersion, TokenStream @in) : base(@in)
-	  {
-		this.matchVersion = matchVersion;
-	  }
+        // this filters uses attribute type
+        private readonly ITypeAttribute typeAtt;
+        private readonly ICharTermAttribute termAtt;
 
-	  private static readonly string APOSTROPHE_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.APOSTROPHE];
-	  private static readonly string ACRONYM_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.ACRONYM];
+        public override bool IncrementToken()
+        {
+            if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_31))
+            {
+                return input.IncrementToken(); // TODO: add some niceties for the new grammar
+            }
+            else
+            {
+                return IncrementTokenClassic();
+            }
+        }
 
-	  // this filters uses attribute type
-	  private readonly TypeAttribute typeAtt = addAttribute(typeof(TypeAttribute));
-	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+        public bool IncrementTokenClassic()
+        {
+            if (!input.IncrementToken())
+            {
+                return false;
+            }
 
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
-	  public override bool incrementToken()
-	  {
-		if (matchVersion.onOrAfter(Version.LUCENE_31))
-		{
-		  return input.incrementToken(); // TODO: add some niceties for the new grammar
-		}
-		else
-		{
-		  return incrementTokenClassic();
-		}
-	  }
+            char[] buffer = termAtt.Buffer();
+            int bufferLength = termAtt.Length;
+            string type = typeAtt.Type;
 
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public final boolean incrementTokenClassic() throws java.io.IOException
-	  public bool incrementTokenClassic()
-	  {
-		if (!input.incrementToken())
-		{
-		  return false;
-		}
+            if (type == APOSTROPHE_TYPE && bufferLength >= 2 && buffer[bufferLength - 2] == '\'' && (buffer[bufferLength - 1] == 's' || buffer[bufferLength - 1] == 'S')) // remove 's
+            {
+                // Strip last 2 characters off
+                termAtt.Length = bufferLength - 2;
+            } // remove dots
+            else if (type == ACRONYM_TYPE)
+            {
+                int upto = 0;
+                for (int i = 0; i < bufferLength; i++)
+                {
+                    char c = buffer[i];
+                    if (c != '.')
+                    {
+                        buffer[upto++] = c;
+                    }
+                }
+                termAtt.Length = upto;
+            }
 
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final char[] buffer = termAtt.buffer();
-		char[] buffer = termAtt.buffer();
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int bufferLength = termAtt.length();
-		int bufferLength = termAtt.length();
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final String type = typeAtt.type();
-		string type = typeAtt.type();
-
-		if (type == APOSTROPHE_TYPE && bufferLength >= 2 && buffer[bufferLength - 2] == '\'' && (buffer[bufferLength - 1] == 's' || buffer[bufferLength - 1] == 'S')) // remove 's
-		{
-		  // Strip last 2 characters off
-		  termAtt.Length = bufferLength - 2;
-		} // remove dots
-		else if (type == ACRONYM_TYPE)
-		{
-		  int upto = 0;
-		  for (int i = 0;i < bufferLength;i++)
-		  {
-			char c = buffer[i];
-			if (c != '.')
-			{
-			  buffer[upto++] = c;
-			}
-		  }
-		  termAtt.Length = upto;
-		}
-
-		return true;
-	  }
-	}
+            return true;
+        }
+    }
 
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilterFactory.cs
index eab0156..b634397 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilterFactory.cs
@@ -1,4 +1,5 @@
 using System.Collections.Generic;
+using Lucene.Net.Analysis.Standard;
 using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
 
 namespace org.apache.lucene.analysis.standard

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizer.cs
index abf55e8..e47b481 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizer.cs
@@ -16,7 +16,10 @@
  */
 using System;
 using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
 using org.apache.lucene.analysis.standard;
+using Version = Lucene.Net.Util.LuceneVersion;
+using Reader = System.IO.TextReader;
 
 namespace Lucene.Net.Analysis.Standard
 {
@@ -144,15 +147,15 @@ namespace Lucene.Net.Analysis.Standard
             {
                 this.scanner = new StandardTokenizerImpl(input);
             }
-            else if (matchVersion.onOrAfter(Version.LUCENE_40))
+            else if (matchVersion.OnOrAfter(Version.LUCENE_40))
             {
                 this.scanner = new StandardTokenizerImpl40(input);
             }
-            else if (matchVersion.onOrAfter(Version.LUCENE_34))
+            else if (matchVersion.OnOrAfter(Version.LUCENE_34))
             {
                 this.scanner = new StandardTokenizerImpl34(input);
             }
-            else if (matchVersion.onOrAfter(Version.LUCENE_31))
+            else if (matchVersion.OnOrAfter(Version.LUCENE_31))
             {
                 this.scanner = new StandardTokenizerImpl31(input);
             }
@@ -229,9 +232,9 @@ namespace Lucene.Net.Analysis.Standard
             posIncrAtt.PositionIncrement = posIncrAtt.PositionIncrement + skippedPositions;
         }
 
-        public override void Close()
+        public override void Dispose()
         {
-            base.Close();
+            base.Dispose();
             scanner.yyreset(input);
         }
 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerFactory.cs
index 0b6bbe6..2c4560f 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerFactory.cs
@@ -1,8 +1,9 @@
 using System.Collections.Generic;
-using Lucene.Net.Analysis.Standard;
-using TokenizerFactory = Lucene.Net.Analysis.Util.TokenizerFactory;
+using System.IO;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
 
-namespace org.apache.lucene.analysis.standard
+namespace Lucene.Net.Analysis.Standard
 {
 
 	/*
@@ -21,12 +22,7 @@ namespace org.apache.lucene.analysis.standard
 	 * See the License for the specific language governing permissions and
 	 * limitations under the License.
 	 */
-
-	using TokenizerFactory = TokenizerFactory;
-	using AttributeFactory = org.apache.lucene.util.AttributeSource.AttributeFactory;
-
-
-	/// <summary>
+    /// <summary>
 	/// Factory for <seealso cref="StandardTokenizer"/>. 
 	/// <pre class="prettyprint">
 	/// &lt;fieldType name="text_stndrd" class="solr.TextField" positionIncrementGap="100"&gt;
@@ -51,9 +47,9 @@ namespace org.apache.lucene.analysis.standard
 		}
 	  }
 
-	  public override StandardTokenizer create(AttributeFactory factory, Reader input)
+	  public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input)
 	  {
-		StandardTokenizer tokenizer = new StandardTokenizer(luceneMatchVersion, factory, input);
+		var tokenizer = new StandardTokenizer(luceneMatchVersion, factory, input);
 		tokenizer.MaxTokenLength = maxTokenLength;
 		return tokenizer;
 	  }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerImpl.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerImpl.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerImpl.cs
index 86ba884..44a9bbe 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerImpl.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerImpl.cs
@@ -22,8 +22,6 @@ namespace org.apache.lucene.analysis.standard
 	 * limitations under the License.
 	 */
 
-	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-
 	/// <summary>
 	/// This class implements Word Break rules from the Unicode Text Segmentation 
 	/// algorithm, as specified in 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs
index 628ca23..273896b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs
@@ -1,9 +1,10 @@
 using Lucene.Net.Analysis.Core;
-using Lucene.Net.Analysis.Standard;
 using Lucene.Net.Analysis.Util;
-using StopwordAnalyzerBase = Lucene.Net.Analysis.Util.StopwordAnalyzerBase;
+using Lucene.Net.Util;
+using org.apache.lucene.analysis.standard;
+using Reader = System.IO.TextReader;
 
-namespace org.apache.lucene.analysis.standard
+namespace Lucene.Net.Analysis.Standard
 {
 
 	/*
@@ -22,18 +23,9 @@ namespace org.apache.lucene.analysis.standard
 	 * See the License for the specific language governing permissions and
 	 * limitations under the License.
 	 */
-
-	using LowerCaseFilter = LowerCaseFilter;
-	using StopAnalyzer = StopAnalyzer;
-	using StopFilter = StopFilter;
-	using CharArraySet = CharArraySet;
-	using StopwordAnalyzerBase = StopwordAnalyzerBase;
-	using Version = org.apache.lucene.util.Version;
-
-
-	/// <summary>
+    /// <summary>
 	/// Filters <seealso cref="org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer"/>
-	/// with <seealso cref="org.apache.lucene.analysis.standard.StandardFilter"/>,
+	/// with <seealso cref="StandardFilter"/>,
 	/// <seealso cref="LowerCaseFilter"/> and
 	/// <seealso cref="StopFilter"/>, using a list of
 	/// English stop words.
@@ -64,7 +56,7 @@ namespace org.apache.lucene.analysis.standard
 	  /// <param name="matchVersion"> Lucene version to match See {@link
 	  /// <a href="#version">above</a>} </param>
 	  /// <param name="stopWords"> stop words  </param>
-	  public UAX29URLEmailAnalyzer(Version matchVersion, CharArraySet stopWords) : base(matchVersion, stopWords)
+	  public UAX29URLEmailAnalyzer(LuceneVersion matchVersion, CharArraySet stopWords) : base(matchVersion, stopWords)
 	  {
 	  }
 
@@ -73,7 +65,7 @@ namespace org.apache.lucene.analysis.standard
 	  /// #STOP_WORDS_SET}). </summary>
 	  /// <param name="matchVersion"> Lucene version to match See {@link
 	  /// <a href="#version">above</a>} </param>
-	  public UAX29URLEmailAnalyzer(Version matchVersion) : this(matchVersion, STOP_WORDS_SET)
+	  public UAX29URLEmailAnalyzer(LuceneVersion matchVersion) : this(matchVersion, STOP_WORDS_SET)
 	  {
 	  }
 
@@ -83,9 +75,7 @@ namespace org.apache.lucene.analysis.standard
 	  /// <param name="matchVersion"> Lucene version to match See {@link
 	  /// <a href="#version">above</a>} </param>
 	  /// <param name="stopwords"> Reader to read stop words from  </param>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public UAX29URLEmailAnalyzer(org.apache.lucene.util.Version matchVersion, java.io.Reader stopwords) throws java.io.IOException
-	  public UAX29URLEmailAnalyzer(Version matchVersion, Reader stopwords) : this(matchVersion, loadStopwordSet(stopwords, matchVersion))
+	  public UAX29URLEmailAnalyzer(LuceneVersion matchVersion, Reader stopwords) : this(matchVersion, loadStopwordSet(stopwords, matchVersion))
 	  {
 	  }
 
@@ -108,12 +98,8 @@ namespace org.apache.lucene.analysis.standard
 	  }
 
 
-//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
-//ORIGINAL LINE: @Override protected TokenStreamComponents createComponents(final String fieldName, final java.io.Reader reader)
-	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+        public override TokenStreamComponents CreateComponents(string fieldName, Reader reader)
 	  {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final UAX29URLEmailTokenizer src = new UAX29URLEmailTokenizer(matchVersion, reader);
 		UAX29URLEmailTokenizer src = new UAX29URLEmailTokenizer(matchVersion, reader);
 		src.MaxTokenLength = maxTokenLength;
 		TokenStream tok = new StandardFilter(matchVersion, src);
@@ -127,19 +113,16 @@ namespace org.apache.lucene.analysis.standard
 		  private readonly UAX29URLEmailAnalyzer outerInstance;
 
 		  private Reader reader;
-		  private org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer src;
+		  private UAX29URLEmailTokenizer src;
 
-		  public TokenStreamComponentsAnonymousInnerClassHelper(UAX29URLEmailAnalyzer outerInstance, org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer src, TokenStream tok, Reader reader) : base(src, tok)
+		  public TokenStreamComponentsAnonymousInnerClassHelper(UAX29URLEmailAnalyzer outerInstance, UAX29URLEmailTokenizer src, TokenStream tok, Reader reader) : base(src, tok)
 		  {
 			  this.outerInstance = outerInstance;
 			  this.reader = reader;
 			  this.src = src;
 		  }
 
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override protected void setReader(final java.io.Reader reader) throws java.io.IOException
-//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
-		  protected internal override Reader Reader
+		  protected override Reader Reader
 		  {
 			  set
 			  {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
index f319675..4faa921 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
@@ -1,7 +1,6 @@
 using System.Collections.Generic;
 using System.Text;
 using Lucene.Net.Util;
-using org.apache.lucene.analysis.util;
 
 namespace Lucene.Net.Analysis.Util
 {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizer.cs
index 1fd76f8..f4fa262 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizer.cs
@@ -1,7 +1,4 @@
-using System.Collections.Generic;
-using System.Text;
-
-/*
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -17,20 +14,14 @@ using System.Text;
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+using System.Collections.Generic;
+using System.Text;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
 
-namespace org.apache.lucene.analysis.wikipedia
+namespace Lucene.Net.Analysis.Wikipedia
 {
-
-	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-	using FlagsAttribute = org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
-	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-	using AttributeSource = org.apache.lucene.util.AttributeSource;
-
-
-
-	/// <summary>
+    /// <summary>
 	/// Extension of StandardTokenizer that is aware of Wikipedia syntax.  It is based off of the
 	/// Wikipedia tutorial available at http://en.wikipedia.org/wiki/Wikipedia:Tutorial, but it may not be complete.
 	/// <p/>

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizerFactory.cs
index ad7027f..e320469 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizerFactory.cs
@@ -1,4 +1,5 @@
 using System.Collections.Generic;
+using Lucene.Net.Analysis.Wikipedia;
 using TokenizerFactory = Lucene.Net.Analysis.Util.TokenizerFactory;
 
 namespace org.apache.lucene.analysis.wikipedia

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Core/Util/StringHelper.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/StringHelper.cs b/src/Lucene.Net.Core/Util/StringHelper.cs
index 9a8dc3c..74d6992 100644
--- a/src/Lucene.Net.Core/Util/StringHelper.cs
+++ b/src/Lucene.Net.Core/Util/StringHelper.cs
@@ -66,18 +66,18 @@ namespace Lucene.Net.Util
             }
         }
 
-        private static IComparer<string> versionComparator = new ComparatorAnonymousInnerClassHelper();
+        private static readonly IComparer<string> versionComparator = new ComparatorAnonymousInnerClassHelper();
 
-        private class ComparatorAnonymousInnerClassHelper : IComparer<string>
+        private sealed class ComparatorAnonymousInnerClassHelper : IComparer<string>
         {
             public ComparatorAnonymousInnerClassHelper()
             {
             }
 
-            public virtual int Compare(string a, string b)
+            public int Compare(string a, string b)
             {
-                StringTokenizer aTokens = new StringTokenizer(a, ".");
-                StringTokenizer bTokens = new StringTokenizer(b, ".");
+                var aTokens = new StringTokenizer(a, ".");
+                var bTokens = new StringTokenizer(b, ".");
 
                 while (aTokens.HasMoreTokens())
                 {

[3/3] lucenenet git commit: More porting work

Posted by sy...@apache.org.

More porting work


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/b4eaf2fc
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/b4eaf2fc
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/b4eaf2fc

Branch: refs/heads/master
Commit: b4eaf2fc441dfd5d32732eda844ef1e8e62588a1
Parents: 8d7a54f
Author: Itamar Syn-Hershko <it...@code972.com>
Authored: Wed Apr 15 02:32:11 2015 +0300
Committer: Itamar Syn-Hershko <it...@code972.com>
Committed: Wed Apr 15 02:32:11 2015 +0300

----------------------------------------------------------------------
 .../Compound/CompoundWordTokenFilterBase.cs     | 365 ++++++-----
 .../Analysis/Core/UpperCaseFilter.cs            | 114 ++--
 .../Ngram/Lucene43EdgeNGramTokenizer.cs         | 609 +++++++++----------
 .../Analysis/Standard/ClassicAnalyzer.cs        | 299 +++++----
 .../Analysis/Standard/ClassicFilter.cs          | 153 +++--
 .../Analysis/Standard/ClassicFilterFactory.cs   |  92 ++-
 .../Analysis/Standard/ClassicTokenizer.cs       | 369 ++++++-----
 .../Analysis/Standard/ClassicTokenizerImpl.cs   |  14 +-
 .../Analysis/Standard/StandardAnalyzer.cs       | 273 +++++----
 .../Analysis/Standard/StandardFilter.cs         | 167 +++--
 .../Analysis/Standard/StandardFilterFactory.cs  |   1 +
 .../Analysis/Standard/StandardTokenizer.cs      |  13 +-
 .../Standard/StandardTokenizerFactory.cs        |  18 +-
 .../Analysis/Standard/StandardTokenizerImpl.cs  |   2 -
 .../Analysis/Standard/UAX29URLEmailAnalyzer.cs  |  43 +-
 .../Analysis/Util/CharArraySet.cs               |   1 -
 .../Analysis/Wikipedia/WikipediaTokenizer.cs    |  23 +-
 .../Wikipedia/WikipediaTokenizerFactory.cs      |   1 +
 src/Lucene.Net.Core/Util/StringHelper.cs        |  10 +-
 19 files changed, 1239 insertions(+), 1328 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
index ba8fd6c..c6bc4cd 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
@@ -2,193 +2,192 @@
 using System.Diagnostics;
 using Lucene.Net.Analysis.Tokenattributes;
 using Lucene.Net.Analysis.Util;
+using Lucene.Net.Support;
 using Lucene.Net.Util;
 
 namespace Lucene.Net.Analysis.Compound
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
     /// <summary>
-	/// Base class for decomposition token filters.
-	/// <para>
-	/// 
-	/// <a name="version"></a>
-	/// You must specify the required <seealso cref="LuceneVersion"/> compatibility when creating
-	/// CompoundWordTokenFilterBase:
-	/// <ul>
-	/// <li>As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0
-	/// supplementary characters in strings and char arrays provided as compound word
-	/// dictionaries.
-	/// <li>As of 4.4, <seealso cref="CompoundWordTokenFilterBase"/> doesn't update offsets.
-	/// </ul>
-	/// </para>
-	/// </summary>
-	public abstract class CompoundWordTokenFilterBase : TokenFilter
-	{
-	  /// <summary>
-	  /// The default for minimal word length that gets decomposed
-	  /// </summary>
-	  public const int DEFAULT_MIN_WORD_SIZE = 5;
-
-	  /// <summary>
-	  /// The default for minimal length of subwords that get propagated to the output of this filter
-	  /// </summary>
-	  public const int DEFAULT_MIN_SUBWORD_SIZE = 2;
-
-	  /// <summary>
-	  /// The default for maximal length of subwords that get propagated to the output of this filter
-	  /// </summary>
-	  public const int DEFAULT_MAX_SUBWORD_SIZE = 15;
-
-	  protected internal readonly LuceneVersion matchVersion;
-	  protected internal readonly CharArraySet dictionary;
-	  protected internal readonly LinkedList<CompoundToken> tokens;
-	  protected internal readonly int minWordSize;
-	  protected internal readonly int minSubwordSize;
-	  protected internal readonly int maxSubwordSize;
-	  protected internal readonly bool onlyLongestMatch;
-
-	  protected internal readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
-	  protected internal readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
-	  private readonly PositionIncrementAttribute posIncAtt = addAttribute(typeof(PositionIncrementAttribute));
-
-	  private AttributeSource.State current;
-
-	  protected internal CompoundWordTokenFilterBase(LuceneVersion matchVersion, TokenStream input, CharArraySet dictionary, bool onlyLongestMatch) : this(matchVersion, input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, onlyLongestMatch)
-	  {
-	  }
-
-	  protected internal CompoundWordTokenFilterBase(LuceneVersion matchVersion, TokenStream input, CharArraySet dictionary) : this(matchVersion, input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, false)
-	  {
-	  }
-
-	  protected internal CompoundWordTokenFilterBase(LuceneVersion matchVersion, TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch) : base(input)
-	  {
-		this.matchVersion = matchVersion;
-		this.tokens = new LinkedList<CompoundToken>();
-		if (minWordSize < 0)
-		{
-		  throw new System.ArgumentException("minWordSize cannot be negative");
-		}
-		this.minWordSize = minWordSize;
-		if (minSubwordSize < 0)
-		{
-		  throw new System.ArgumentException("minSubwordSize cannot be negative");
-		}
-		this.minSubwordSize = minSubwordSize;
-		if (maxSubwordSize < 0)
-		{
-		  throw new System.ArgumentException("maxSubwordSize cannot be negative");
-		}
-		this.maxSubwordSize = maxSubwordSize;
-		this.onlyLongestMatch = onlyLongestMatch;
-		this.dictionary = dictionary;
-	  }
-
-	  public override bool IncrementToken()
-	  {
-		if (tokens.Count > 0)
-		{
-		  Debug.Assert(current != null);
-		  CompoundToken token = tokens.First.Value; tokens.RemoveFirst();
-		  RestoreState(current); // keep all other attributes untouched
-		  termAtt.SetEmpty().Append(token.txt);
-		  offsetAtt.SetOffset(token.startOffset, token.endOffset);
-		  posIncAtt.PositionIncrement = 0;
-		  return true;
-		}
-
-		current = null; // not really needed, but for safety
-		if (input.incrementToken())
-		{
-		  // Only words longer than minWordSize get processed
-		  if (termAtt.length() >= this.minWordSize)
-		  {
-			decompose();
-			// only capture the state if we really need it for producing new tokens
-			if (tokens.Count > 0)
-			{
-			  current = captureState();
-			}
-		  }
-		  // return original token:
-		  return true;
-		}
-		else
-		{
-		  return false;
-		}
-	  }
-
-	  /// <summary>
-	  /// Decomposes the current <seealso cref="#termAtt"/> and places <seealso cref="CompoundToken"/> instances in the <seealso cref="#tokens"/> list.
-	  /// The original token may not be placed in the list, as it is automatically passed through this filter.
-	  /// </summary>
-	  protected internal abstract void decompose();
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
-	  public override void reset()
-	  {
-		base.reset();
-		tokens.Clear();
-		current = null;
-	  }
-
-	  /// <summary>
-	  /// Helper class to hold decompounded token information
-	  /// </summary>
-	  protected internal class CompoundToken
-	  {
-		  private readonly CompoundWordTokenFilterBase outerInstance;
-
-		public readonly string txt;
-		public readonly int startOffset, endOffset;
-
-		/// <summary>
-		/// Construct the compound token based on a slice of the current <seealso cref="CompoundWordTokenFilterBase#termAtt"/>. </summary>
-		public CompoundToken(CompoundWordTokenFilterBase outerInstance, int offset, int length)
-		{
-			this.outerInstance = outerInstance;
-		  this.txt = outerInstance.termAtt.subSequence(offset, offset + length);
-
-		  // offsets of the original word
-		  int startOff = outerInstance.offsetAtt.startOffset();
-		  int endOff = outerInstance.offsetAtt.endOffset();
-
-		  if (outerInstance.matchVersion.onOrAfter(LuceneVersion.LUCENE_44) || endOff - startOff != outerInstance.termAtt.length())
-		  {
-			// if length by start + end offsets doesn't match the term text then assume
-			// this is a synonym and don't adjust the offsets.
-			this.startOffset = startOff;
-			this.endOffset = endOff;
-		  }
-		  else
-		  {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int newStart = startOff + offset;
-			int newStart = startOff + offset;
-			this.startOffset = newStart;
-			this.endOffset = newStart + length;
-		  }
-		}
-
-	  }
-	}
-
+    /// Base class for decomposition token filters.
+    /// <para>
+    /// 
+    /// <a name="version"></a>
+    /// You must specify the required <seealso cref="LuceneVersion"/> compatibility when creating
+    /// CompoundWordTokenFilterBase:
+    /// <ul>
+    /// <li>As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0
+    /// supplementary characters in strings and char arrays provided as compound word
+    /// dictionaries.
+    /// <li>As of 4.4, <seealso cref="CompoundWordTokenFilterBase"/> doesn't update offsets.
+    /// </ul>
+    /// </para>
+    /// </summary>
+    public abstract class CompoundWordTokenFilterBase : TokenFilter
+    {
+        /// <summary>
+        /// The default for minimal word length that gets decomposed
+        /// </summary>
+        public const int DEFAULT_MIN_WORD_SIZE = 5;
+
+        /// <summary>
+        /// The default for minimal length of subwords that get propagated to the output of this filter
+        /// </summary>
+        public const int DEFAULT_MIN_SUBWORD_SIZE = 2;
+
+        /// <summary>
+        /// The default for maximal length of subwords that get propagated to the output of this filter
+        /// </summary>
+        public const int DEFAULT_MAX_SUBWORD_SIZE = 15;
+
+        protected internal readonly LuceneVersion matchVersion;
+        protected internal readonly CharArraySet dictionary;
+        protected internal readonly LinkedList<CompoundToken> tokens;
+        protected internal readonly int minWordSize;
+        protected internal readonly int minSubwordSize;
+        protected internal readonly int maxSubwordSize;
+        protected internal readonly bool onlyLongestMatch;
+
+        protected internal readonly CharTermAttribute termAtt;
+        protected internal readonly IOffsetAttribute offsetAtt;
+        private readonly IPositionIncrementAttribute posIncAtt;
+
+        private AttributeSource.State current;
+
+        protected CompoundWordTokenFilterBase(LuceneVersion matchVersion, TokenStream input, CharArraySet dictionary, bool onlyLongestMatch)
+            : this(matchVersion, input, dictionary, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, onlyLongestMatch)
+        {
+        }
+
+        protected CompoundWordTokenFilterBase(LuceneVersion matchVersion, TokenStream input, CharArraySet dictionary)
+            : this(matchVersion, input, dictionary, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false)
+        {
+        }
+
+        protected CompoundWordTokenFilterBase(LuceneVersion matchVersion, TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch)
+            : base(input)
+        {
+            termAtt = AddAttribute<ICharTermAttribute>() as CharTermAttribute;
+            offsetAtt = AddAttribute<IOffsetAttribute>();
+            posIncAtt = AddAttribute<IPositionIncrementAttribute>();
+
+            this.matchVersion = matchVersion;
+            this.tokens = new LinkedList<CompoundToken>();
+            if (minWordSize < 0)
+            {
+                throw new System.ArgumentException("minWordSize cannot be negative");
+            }
+            this.minWordSize = minWordSize;
+            if (minSubwordSize < 0)
+            {
+                throw new System.ArgumentException("minSubwordSize cannot be negative");
+            }
+            this.minSubwordSize = minSubwordSize;
+            if (maxSubwordSize < 0)
+            {
+                throw new System.ArgumentException("maxSubwordSize cannot be negative");
+            }
+            this.maxSubwordSize = maxSubwordSize;
+            this.onlyLongestMatch = onlyLongestMatch;
+            this.dictionary = dictionary;
+        }
+
+        public override bool IncrementToken()
+        {
+            if (tokens.Count > 0)
+            {
+                Debug.Assert(current != null);
+                CompoundToken token = tokens.First.Value; tokens.RemoveFirst();
+                RestoreState(current); // keep all other attributes untouched
+                termAtt.SetEmpty().Append(token.txt);
+                offsetAtt.SetOffset(token.startOffset, token.endOffset);
+                posIncAtt.PositionIncrement = 0;
+                return true;
+            }
+
+            current = null; // not really needed, but for safety
+            if (input.IncrementToken())
+            {
+                // Only words longer than minWordSize get processed
+                if (termAtt.Length >= this.minWordSize)
+                {
+                    Decompose();
+                    // only capture the state if we really need it for producing new tokens
+                    if (tokens.Count > 0)
+                    {
+                        current = CaptureState();
+                    }
+                }
+                // return original token:
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+
+        /// <summary>
+        /// Decomposes the current <seealso cref="#termAtt"/> and places <seealso cref="CompoundToken"/> instances in the <seealso cref="#tokens"/> list.
+        /// The original token may not be placed in the list, as it is automatically passed through this filter.
+        /// </summary>
+        protected abstract void Decompose();
+
+        public override void Reset()
+        {
+            base.Reset();
+            tokens.Clear();
+            current = null;
+        }
+
+        /// <summary>
+        /// Helper class to hold decompounded token information
+        /// </summary>
+        protected internal class CompoundToken
+        {
+            public readonly ICharSequence txt;
+            public readonly int startOffset, endOffset;
+
+            /// <summary>
+            /// Construct the compound token based on a slice of the current <seealso cref="CompoundWordTokenFilterBase#termAtt"/>. </summary>
+            public CompoundToken(CompoundWordTokenFilterBase outerInstance, int offset, int length)
+            {
+                this.txt = outerInstance.termAtt.SubSequence(offset, offset + length);
+
+                // offsets of the original word
+                int startOff = outerInstance.offsetAtt.StartOffset();
+                int endOff = outerInstance.offsetAtt.EndOffset();
+
+                if (outerInstance.matchVersion.OnOrAfter(LuceneVersion.LUCENE_44) || endOff - startOff != outerInstance.termAtt.Length)
+                {
+                    // if length by start + end offsets doesn't match the term text then assume
+                    // this is a synonym and don't adjust the offsets.
+                    this.startOffset = startOff;
+                    this.endOffset = endOff;
+                }
+                else
+                {
+                    int newStart = startOff + offset;
+                    this.startOffset = newStart;
+                    this.endOffset = newStart + length;
+                }
+            }
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs
index 6b722ad..c8b5f5f 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs
@@ -4,65 +4,65 @@ using Lucene.Net.Util;
 
 namespace Lucene.Net.Analysis.Core
 {
-
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
     /// <summary>
-	/// Normalizes token text to UPPER CASE.
-	/// <a name="version"/>
-	/// <para>You must specify the required <seealso cref="LuceneVersion"/>
-	/// compatibility when creating UpperCaseFilter
-	/// 
-	/// </para>
-	/// <para><b>NOTE:</b> In Unicode, this transformation may lose information when the
-	/// upper case character represents more than one lower case character. Use this filter
-	/// when you require uppercase tokens.  Use the <seealso cref="LowerCaseFilter"/> for 
-	/// general search matching
-	/// </para>
-	/// </summary>
-	public sealed class UpperCaseFilter : TokenFilter
-	{
-	  private readonly CharacterUtils charUtils;
-        private readonly ICharTermAttribute termAtt;;
+    /// Normalizes token text to UPPER CASE.
+    /// <a name="version"/>
+    /// <para>You must specify the required <seealso cref="LuceneVersion"/>
+    /// compatibility when creating UpperCaseFilter
+    /// 
+    /// </para>
+    /// <para><b>NOTE:</b> In Unicode, this transformation may lose information when the
+    /// upper case character represents more than one lower case character. Use this filter
+    /// when you require uppercase tokens.  Use the <seealso cref="LowerCaseFilter"/> for 
+    /// general search matching
+    /// </para>
+    /// </summary>
+    public sealed class UpperCaseFilter : TokenFilter
+    {
+        private readonly CharacterUtils charUtils;
+        private readonly ICharTermAttribute termAtt;
 
-	  /// <summary>
-	  /// Create a new UpperCaseFilter, that normalizes token text to upper case.
-	  /// </summary>
-	  /// <param name="matchVersion"> See <a href="#version">above</a> </param>
-	  /// <param name="in"> TokenStream to filter </param>
-	  public UpperCaseFilter(LuceneVersion matchVersion, TokenStream @in) : base(@in)
-	  {
-	      termAtt = AddAttribute<ICharTermAttribute>();
-	      termAtt = AddAttribute<ICharTermAttribute>();
-		charUtils = CharacterUtils.GetInstance(matchVersion);
-	  }
+        /// <summary>
+        /// Create a new UpperCaseFilter, that normalizes token text to upper case.
+        /// </summary>
+        /// <param name="matchVersion"> See <a href="#version">above</a> </param>
+        /// <param name="in"> TokenStream to filter </param>
+        public UpperCaseFilter(LuceneVersion matchVersion, TokenStream @in)
+            : base(@in)
+        {
+            termAtt = AddAttribute<ICharTermAttribute>();
+            termAtt = AddAttribute<ICharTermAttribute>();
+            charUtils = CharacterUtils.GetInstance(matchVersion);
+        }
 
-	  public override bool IncrementToken()
-	  {
-		if (input.IncrementToken())
-		{
-		  charUtils.ToUpper(termAtt.Buffer(), 0, termAtt.Length);
-		  return true;
-		}
-		else
-		{
-		  return false;
-		}
-	  }
-	}
+        public override bool IncrementToken()
+        {
+            if (input.IncrementToken())
+            {
+                charUtils.ToUpper(termAtt.Buffer(), 0, termAtt.Length);
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+    }
 
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs
index 3827b36..c277918 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs
@@ -1,323 +1,308 @@
 using System;
 using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
 using Reader = System.IO.TextReader;
 using Version = Lucene.Net.Util.LuceneVersion;
 
 namespace Lucene.Net.Analysis.Ngram
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
     /// <summary>
-	/// Old version of <seealso cref="EdgeNGramTokenizer"/> which doesn't handle correctly
-	/// supplementary characters.
-	/// </summary>
-	[Obsolete]
-	public sealed class Lucene43EdgeNGramTokenizer : Tokenizer
-	{
-	  public const Side DEFAULT_SIDE = Side.FRONT;
-	  public const int DEFAULT_MAX_GRAM_SIZE = 1;
-	  public const int DEFAULT_MIN_GRAM_SIZE = 1;
-
-	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
-	  private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
-	  private readonly PositionIncrementAttribute posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
-
-	  /// <summary>
-	  /// Specifies which side of the input the n-gram should be generated from </summary>
-	  public enum Side
-	  {
-
-		/// <summary>
-		/// Get the n-gram from the front of the input </summary>
-//JAVA TO C# CONVERTER TODO TASK: The following line could not be converted:
-		FRONT
-		{
-		  public String getLabel() { return "front"
-		}
-	  },
-
-		/// <summary>
-		/// Get the n-gram from the end of the input </summary>
-//JAVA TO C# CONVERTER TODO TASK: The following line could not be converted:
-		BACK
-		{
-		  public String getLabel()
-		  {
-			  return "back";
-		  }
-		}
-
-		public = 
-
-		// Get the appropriate Side from a string
-		public static Side getSide(String sideName)
-		{
-//JAVA TO C# CONVERTER TODO TASK: The following line could not be converted:
-		  if (FRONT.getLabel().equals(sideName))
-		  {
-			return FRONT;
-		  }
-//JAVA TO C# CONVERTER TODO TASK: The following line could not be converted:
-		  if (BACK.getLabel().equals(sideName))
-		  {
-			return BACK;
-		  }
-		  return null;
-		}
-	}
-
-	  private int minGram;
-	  private int maxGram;
-	  private int gramSize;
-	  private Side side;
-	  private bool started;
-	  private int inLen; // length of the input AFTER trim()
-	  private int charsRead; // length of the input
-	  private string inStr;
-
-
-	  /// <summary>
-	  /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
-	  /// </summary>
-	  /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
-	  /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
-	  /// <param name="side"> the <seealso cref="Side"/> from which to chop off an n-gram </param>
-	  /// <param name="minGram"> the smallest n-gram to generate </param>
-	  /// <param name="maxGram"> the largest n-gram to generate </param>
-	  [Obsolete]
-	  public Lucene43EdgeNGramTokenizer(Version version, Reader input, Side side, int minGram, int maxGram) : base(input)
-	  {
-		init(version, side, minGram, maxGram);
-	  }
-
-	  /// <summary>
-	  /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
-	  /// </summary>
-	  /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
-	  /// <param name="factory"> <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> to use </param>
-	  /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
-	  /// <param name="side"> the <seealso cref="Side"/> from which to chop off an n-gram </param>
-	  /// <param name="minGram"> the smallest n-gram to generate </param>
-	  /// <param name="maxGram"> the largest n-gram to generate </param>
-	  [Obsolete]
-	  public Lucene43EdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, Side side, int minGram, int maxGram) : base(factory, input)
-	  {
-		init(version, side, minGram, maxGram);
-	  }
-
-	  /// <summary>
-	  /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
-	  /// </summary>
-	  /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
-	  /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
-	  /// <param name="sideLabel"> the name of the <seealso cref="Side"/> from which to chop off an n-gram </param>
-	  /// <param name="minGram"> the smallest n-gram to generate </param>
-	  /// <param name="maxGram"> the largest n-gram to generate </param>
-	  [Obsolete]
-	  public Lucene43EdgeNGramTokenizer(Version version, Reader input, string sideLabel, int minGram, int maxGram) : this(version, input, Side.getSide(sideLabel), minGram, maxGram)
-	  {
-	  }
-
-	  /// <summary>
-	  /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
-	  /// </summary>
-	  /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
-	  /// <param name="factory"> <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> to use </param>
-	  /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
-	  /// <param name="sideLabel"> the name of the <seealso cref="Side"/> from which to chop off an n-gram </param>
-	  /// <param name="minGram"> the smallest n-gram to generate </param>
-	  /// <param name="maxGram"> the largest n-gram to generate </param>
-	  [Obsolete]
-	  public Lucene43EdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, string sideLabel, int minGram, int maxGram) : this(version, factory, input, Side.getSide(sideLabel), minGram, maxGram)
-	  {
-	  }
-
-	  /// <summary>
-	  /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
-	  /// </summary>
-	  /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
-	  /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
-	  /// <param name="minGram"> the smallest n-gram to generate </param>
-	  /// <param name="maxGram"> the largest n-gram to generate </param>
-	  public Lucene43EdgeNGramTokenizer(Version version, Reader input, int minGram, int maxGram) : this(version, input, Side.FRONT, minGram, maxGram)
-	  {
-	  }
-
-	  /// <summary>
-	  /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
-	  /// </summary>
-	  /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
-	  /// <param name="factory"> <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> to use </param>
-	  /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
-	  /// <param name="minGram"> the smallest n-gram to generate </param>
-	  /// <param name="maxGram"> the largest n-gram to generate </param>
-	  public Lucene43EdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, int minGram, int maxGram) : this(version, factory, input, Side.FRONT, minGram, maxGram)
-	  {
-	  }
-
-	  private void init(Version version, Side side, int minGram, int maxGram)
-	  {
-		if (version == null)
-		{
-		  throw new System.ArgumentException("version must not be null");
-		}
-
-		if (side == null)
-		{
-		  throw new System.ArgumentException("sideLabel must be either front or back");
-		}
-
-		if (minGram < 1)
-		{
-		  throw new System.ArgumentException("minGram must be greater than zero");
-		}
-
-		if (minGram > maxGram)
-		{
-		  throw new System.ArgumentException("minGram must not be greater than maxGram");
-		}
-
-		if (version.onOrAfter(Version.LUCENE_44))
-		{
-		  if (side == Side.BACK)
-		  {
-			throw new System.ArgumentException("Side.BACK is not supported anymore as of Lucene 4.4");
-		  }
-		}
-		else
-		{
-		  maxGram = Math.Min(maxGram, 1024);
-		}
-
-		this.minGram = minGram;
-		this.maxGram = maxGram;
-		this.side = side;
-	  }
-
-	  /// <summary>
-	  /// Returns the next token in the stream, or null at EOS. </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
-	  public override bool incrementToken()
-	  {
-		clearAttributes();
-		// if we are just starting, read the whole input
-		if (!started)
-		{
-		  started = true;
-		  gramSize = minGram;
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int limit = side == Side.FRONT ? maxGram : 1024;
-		  int limit = side == Side.FRONT ? maxGram : 1024;
-		  char[] chars = new char[Math.Min(1024, limit)];
-		  charsRead = 0;
-		  // TODO: refactor to a shared readFully somewhere:
-		  bool exhausted = false;
-		  while (charsRead < limit)
-		  {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int inc = input.read(chars, charsRead, chars.length-charsRead);
-			int inc = input.read(chars, charsRead, chars.Length - charsRead);
-			if (inc == -1)
-			{
-			  exhausted = true;
-			  break;
-			}
-			charsRead += inc;
-			if (charsRead == chars.Length && charsRead < limit)
-			{
-			  chars = ArrayUtil.grow(chars);
-			}
-		  }
-
-		  inStr = new string(chars, 0, charsRead);
-		  inStr = inStr.Trim();
-
-		  if (!exhausted)
-		  {
-			// Read extra throwaway chars so that on end() we
-			// report the correct offset:
-			char[] throwaway = new char[1024];
-			while (true)
-			{
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int inc = input.read(throwaway, 0, throwaway.length);
-			  int inc = input.read(throwaway, 0, throwaway.Length);
-			  if (inc == -1)
-			  {
-				break;
-			  }
-			  charsRead += inc;
-			}
-		  }
-
-		  inLen = inStr.length();
-		  if (inLen == 0)
-		  {
-			return false;
-		  }
-		  posIncrAtt.PositionIncrement = 1;
-		}
-		else
-		{
-		  posIncrAtt.PositionIncrement = 0;
-		}
-
-		// if the remaining input is too short, we can't generate any n-grams
-		if (gramSize > inLen)
-		{
-		  return false;
-		}
-
-		// if we have hit the end of our n-gram size range, quit
-		if (gramSize > maxGram || gramSize > inLen)
-		{
-		  return false;
-		}
-
-		// grab gramSize chars from front or back
-		int start = side == Side.FRONT ? 0 : inLen - gramSize;
-		int end = start + gramSize;
-		termAtt.setEmpty().append(inStr, start, end);
-		offsetAtt.setOffset(correctOffset(start), correctOffset(end));
-		gramSize++;
-		return true;
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void end() throws java.io.IOException
-	  public override void end()
-	  {
-		base.end();
-		// set final offset
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int finalOffset = correctOffset(charsRead);
-		int finalOffset = correctOffset(charsRead);
-		this.offsetAtt.setOffset(finalOffset, finalOffset);
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
-	  public override void reset()
-	  {
-		base.reset();
-		started = false;
-	  }
-}
+    /// Old version of <seealso cref="EdgeNGramTokenizer"/> which doesn't handle correctly
+    /// supplementary characters.
+    /// </summary>
+    [Obsolete]
+    public sealed class Lucene43EdgeNGramTokenizer : Tokenizer
+    {
+        public const Side DEFAULT_SIDE = Side.FRONT;
+        public const int DEFAULT_MAX_GRAM_SIZE = 1;
+        public const int DEFAULT_MIN_GRAM_SIZE = 1;
+
+        private readonly CharTermAttribute termAtt;
+        private readonly OffsetAttribute offsetAtt;
+        private readonly PositionIncrementAttribute posIncrAtt;
+
+        /// <summary>
+        /// Specifies which side of the input the n-gram should be generated from </summary>
+        public enum Side
+        {
+
+            /// <summary>
+            /// Get the n-gram from the front of the input </summary>
+            FRONT,
+
+            /// <summary>
+            /// Get the n-gram from the end of the input </summary>
+            BACK,
+        }
+
+        private static string GetSideLabel(Side side)
+        {
+            if (side == Side.FRONT) return "front";
+            if (side == Side.BACK) return "back";
+            return null;
+        }
+
+
+        // Get the appropriate Side from a string
+        internal static Side? GetSide(String sideName)
+        {
+            if (GetSideLabel(Side.FRONT).Equals(sideName))
+            {
+                return Side.FRONT;
+            }
+            if (GetSideLabel(Side.BACK).Equals(sideName))
+            {
+                return Side.BACK;
+            }
+            return null;
+        }
+
+        private int minGram;
+        private int maxGram;
+        private int gramSize;
+        private Side side;
+        private bool started;
+        private int inLen; // length of the input AFTER trim()
+        private int charsRead; // length of the input
+        private string inStr;
+
+
+        /// <summary>
+        /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+        /// </summary>
+        /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
+        /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
+        /// <param name="side"> the <seealso cref="Side"/> from which to chop off an n-gram </param>
+        /// <param name="minGram"> the smallest n-gram to generate </param>
+        /// <param name="maxGram"> the largest n-gram to generate </param>
+        [Obsolete]
+        public Lucene43EdgeNGramTokenizer(Version version, Reader input, Side side, int minGram, int maxGram)
+            : base(input)
+        {
+            init(version, side, minGram, maxGram);
+        }
+
+        /// <summary>
+        /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+        /// </summary>
+        /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
+        /// <param name="factory"> <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> to use </param>
+        /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
+        /// <param name="side"> the <seealso cref="Side"/> from which to chop off an n-gram </param>
+        /// <param name="minGram"> the smallest n-gram to generate </param>
+        /// <param name="maxGram"> the largest n-gram to generate </param>
+        [Obsolete]
+        public Lucene43EdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, Side side, int minGram, int maxGram)
+            : base(factory, input)
+        {
+            init(version, side, minGram, maxGram);
+        }
+
+        /// <summary>
+        /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+        /// </summary>
+        /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
+        /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
+        /// <param name="sideLabel"> the name of the <seealso cref="Side"/> from which to chop off an n-gram </param>
+        /// <param name="minGram"> the smallest n-gram to generate </param>
+        /// <param name="maxGram"> the largest n-gram to generate </param>
+        [Obsolete]
+        public Lucene43EdgeNGramTokenizer(Version version, Reader input, string sideLabel, int minGram, int maxGram)
+            : this(version, input, GetSide(sideLabel), minGram, maxGram)
+        {
+        }
+
+        /// <summary>
+        /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+        /// </summary>
+        /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
+        /// <param name="factory"> <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> to use </param>
+        /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
+        /// <param name="sideLabel"> the name of the <seealso cref="Side"/> from which to chop off an n-gram </param>
+        /// <param name="minGram"> the smallest n-gram to generate </param>
+        /// <param name="maxGram"> the largest n-gram to generate </param>
+        [Obsolete]
+        public Lucene43EdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, string sideLabel, int minGram, int maxGram)
+            : this(version, factory, input, GetSide(sideLabel), minGram, maxGram)
+        {
+        }
+
+        /// <summary>
+        /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+        /// </summary>
+        /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
+        /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
+        /// <param name="minGram"> the smallest n-gram to generate </param>
+        /// <param name="maxGram"> the largest n-gram to generate </param>
+        public Lucene43EdgeNGramTokenizer(Version version, Reader input, int minGram, int maxGram)
+            : this(version, input, Side.FRONT, minGram, maxGram)
+        {
+        }
+
+        /// <summary>
+        /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+        /// </summary>
+        /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
+        /// <param name="factory"> <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> to use </param>
+        /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
+        /// <param name="minGram"> the smallest n-gram to generate </param>
+        /// <param name="maxGram"> the largest n-gram to generate </param>
+        public Lucene43EdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, int minGram, int maxGram)
+            : this(version, factory, input, Side.FRONT, minGram, maxGram)
+        {
+        }
+
+        private void init(Version version, Side side, int minGram, int maxGram)
+        {
+            if (version == null)
+            {
+                throw new System.ArgumentException("version must not be null");
+            }
+
+            if (side == null)
+            {
+                throw new System.ArgumentException("sideLabel must be either front or back");
+            }
+
+            if (minGram < 1)
+            {
+                throw new System.ArgumentException("minGram must be greater than zero");
+            }
+
+            if (minGram > maxGram)
+            {
+                throw new System.ArgumentException("minGram must not be greater than maxGram");
+            }
+
+            if (version.OnOrAfter(Version.LUCENE_44))
+            {
+                if (side == Side.BACK)
+                {
+                    throw new System.ArgumentException("Side.BACK is not supported anymore as of Lucene 4.4");
+                }
+            }
+            else
+            {
+                maxGram = Math.Min(maxGram, 1024);
+            }
+
+            this.minGram = minGram;
+            this.maxGram = maxGram;
+            this.side = side;
+        }
+
+        /// <summary>
+        /// Returns the next token in the stream, or null at EOS. </summary>
+        public override bool IncrementToken()
+        {
+            ClearAttributes();
+            // if we are just starting, read the whole input
+            if (!started)
+            {
+                started = true;
+                gramSize = minGram;
+                int limit = side == Side.FRONT ? maxGram : 1024;
+                char[] chars = new char[Math.Min(1024, limit)];
+                charsRead = 0;
+                // TODO: refactor to a shared readFully somewhere:
+                bool exhausted = false;
+                while (charsRead < limit)
+                {
+                    int inc = input.Read(chars, charsRead, chars.Length - charsRead);
+                    if (inc <= 0)
+                    {
+                        exhausted = true;
+                        break;
+                    }
+                    charsRead += inc;
+                    if (charsRead == chars.Length && charsRead < limit)
+                    {
+                        chars = ArrayUtil.Grow(chars);
+                    }
+                }
+
+                inStr = new string(chars, 0, charsRead);
+                inStr = inStr.Trim();
+
+                if (!exhausted)
+                {
+                    // Read extra throwaway chars so that on end() we
+                    // report the correct offset:
+                    var throwaway = new char[1024];
+                    while (true)
+                    {
+                        int inc = input.Read(throwaway, 0, throwaway.Length);
+                        if (inc <= 0)
+                        {
+                            break;
+                        }
+                        charsRead += inc;
+                    }
+                }
+
+                inLen = inStr.Length;
+                if (inLen == 0)
+                {
+                    return false;
+                }
+                posIncrAtt.PositionIncrement = 1;
+            }
+            else
+            {
+                posIncrAtt.PositionIncrement = 0;
+            }
+
+            // if the remaining input is too short, we can't generate any n-grams
+            if (gramSize > inLen)
+            {
+                return false;
+            }
+
+            // if we have hit the end of our n-gram size range, quit
+            if (gramSize > maxGram || gramSize > inLen)
+            {
+                return false;
+            }
+
+            // grab gramSize chars from front or back
+            int start = side == Side.FRONT ? 0 : inLen - gramSize;
+            int end = start + gramSize;
+            termAtt.SetEmpty().Append(inStr, start, end);
+            offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(end));
+            gramSize++;
+            return true;
+        }
+
+        public override void End()
+        {
+            base.End();
+            // set final offset
+            int finalOffset = CorrectOffset(charsRead);
+            this.offsetAtt.SetOffset(finalOffset, finalOffset);
+        }
+
+        public override void Reset()
+        {
+            base.Reset();
+            started = false;
+        }
+    }
 
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs
index de32d23..0dd0529 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs
@@ -1,164 +1,149 @@
 using Lucene.Net.Analysis.Core;
-using Lucene.Net.Analysis.Standard;
 using Lucene.Net.Analysis.Util;
-using StopwordAnalyzerBase = Lucene.Net.Analysis.Util.StopwordAnalyzerBase;
+using Lucene.Net.Util;
+using org.apache.lucene.analysis.standard;
+using Reader = System.IO.TextReader;
 
-namespace org.apache.lucene.analysis.standard
+namespace Lucene.Net.Analysis.Standard
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
-
-	using org.apache.lucene.analysis;
-	using LowerCaseFilter = LowerCaseFilter;
-	using StopAnalyzer = StopAnalyzer;
-	using StopFilter = StopFilter;
-	using CharArraySet = CharArraySet;
-	using StopwordAnalyzerBase = StopwordAnalyzerBase;
-	using WordlistLoader = WordlistLoader;
-	using Version = org.apache.lucene.util.Version;
-
-
-	/// <summary>
-	/// Filters <seealso cref="ClassicTokenizer"/> with <seealso cref="ClassicFilter"/>, {@link
-	/// LowerCaseFilter} and <seealso cref="StopFilter"/>, using a list of
-	/// English stop words.
-	/// 
-	/// <a name="version"/>
-	/// <para>You must specify the required <seealso cref="Version"/>
-	/// compatibility when creating ClassicAnalyzer:
-	/// <ul>
-	///   <li> As of 3.1, StopFilter correctly handles Unicode 4.0
-	///         supplementary characters in stopwords
-	///   <li> As of 2.9, StopFilter preserves position
-	///        increments
-	///   <li> As of 2.4, Tokens incorrectly identified as acronyms
-	///        are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
-	/// </ul>
-	/// 
-	/// ClassicAnalyzer was named StandardAnalyzer in Lucene versions prior to 3.1. 
-	/// As of 3.1, <seealso cref="StandardAnalyzer"/> implements Unicode text segmentation,
-	/// as specified by UAX#29.
-	/// </para>
-	/// </summary>
-	public sealed class ClassicAnalyzer : StopwordAnalyzerBase
-	{
-
-	  /// <summary>
-	  /// Default maximum allowed token length </summary>
-	  public const int DEFAULT_MAX_TOKEN_LENGTH = 255;
-
-	  private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
-
-	  /// <summary>
-	  /// An unmodifiable set containing some common English words that are usually not
-	  /// useful for searching. 
-	  /// </summary>
-	  public static readonly CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
-
-	  /// <summary>
-	  /// Builds an analyzer with the given stop words. </summary>
-	  /// <param name="matchVersion"> Lucene version to match See {@link
-	  /// <a href="#version">above</a>} </param>
-	  /// <param name="stopWords"> stop words  </param>
-	  public ClassicAnalyzer(Version matchVersion, CharArraySet stopWords) : base(matchVersion, stopWords)
-	  {
-	  }
-
-	  /// <summary>
-	  /// Builds an analyzer with the default stop words ({@link
-	  /// #STOP_WORDS_SET}). </summary>
-	  /// <param name="matchVersion"> Lucene version to match See {@link
-	  /// <a href="#version">above</a>} </param>
-	  public ClassicAnalyzer(Version matchVersion) : this(matchVersion, STOP_WORDS_SET)
-	  {
-	  }
-
-	  /// <summary>
-	  /// Builds an analyzer with the stop words from the given reader. </summary>
-	  /// <seealso cref= WordlistLoader#getWordSet(Reader, Version) </seealso>
-	  /// <param name="matchVersion"> Lucene version to match See {@link
-	  /// <a href="#version">above</a>} </param>
-	  /// <param name="stopwords"> Reader to read stop words from  </param>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public ClassicAnalyzer(org.apache.lucene.util.Version matchVersion, java.io.Reader stopwords) throws java.io.IOException
-	  public ClassicAnalyzer(Version matchVersion, Reader stopwords) : this(matchVersion, loadStopwordSet(stopwords, matchVersion))
-	  {
-	  }
-
-	  /// <summary>
-	  /// Set maximum allowed token length.  If a token is seen
-	  /// that exceeds this length then it is discarded.  This
-	  /// setting only takes effect the next time tokenStream or
-	  /// tokenStream is called.
-	  /// </summary>
-	  public int MaxTokenLength
-	  {
-		  set
-		  {
-			maxTokenLength = value;
-		  }
-		  get
-		  {
-			return maxTokenLength;
-		  }
-	  }
-
-
-//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
-//ORIGINAL LINE: @Override protected TokenStreamComponents createComponents(final String fieldName, final java.io.Reader reader)
-	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
-	  {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final ClassicTokenizer src = new ClassicTokenizer(matchVersion, reader);
-		ClassicTokenizer src = new ClassicTokenizer(matchVersion, reader);
-		src.MaxTokenLength = maxTokenLength;
-		TokenStream tok = new ClassicFilter(src);
-		tok = new LowerCaseFilter(matchVersion, tok);
-		tok = new StopFilter(matchVersion, tok, stopwords);
-		return new TokenStreamComponentsAnonymousInnerClassHelper(this, src, tok, reader);
-	  }
-
-	  private class TokenStreamComponentsAnonymousInnerClassHelper : TokenStreamComponents
-	  {
-		  private readonly ClassicAnalyzer outerInstance;
-
-		  private Reader reader;
-		  private org.apache.lucene.analysis.standard.ClassicTokenizer src;
-
-		  public TokenStreamComponentsAnonymousInnerClassHelper(ClassicAnalyzer outerInstance, org.apache.lucene.analysis.standard.ClassicTokenizer src, TokenStream tok, Reader reader) : base(src, tok)
-		  {
-			  this.outerInstance = outerInstance;
-			  this.reader = reader;
-			  this.src = src;
-		  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override protected void setReader(final java.io.Reader reader) throws java.io.IOException
-//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
-		  protected internal override Reader Reader
-		  {
-			  set
-			  {
-				src.MaxTokenLength = outerInstance.maxTokenLength;
-				base.Reader = value;
-			  }
-		  }
-	  }
-	}
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// Filters <seealso cref="ClassicTokenizer"/> with <seealso cref="ClassicFilter"/>, {@link
+    /// LowerCaseFilter} and <seealso cref="StopFilter"/>, using a list of
+    /// English stop words.
+    /// 
+    /// <a name="version"/>
+    /// <para>You must specify the required <seealso cref="LuceneVersion"/>
+    /// compatibility when creating ClassicAnalyzer:
+    /// <ul>
+    ///   <li> As of 3.1, StopFilter correctly handles Unicode 4.0
+    ///         supplementary characters in stopwords
+    ///   <li> As of 2.9, StopFilter preserves position
+    ///        increments
+    ///   <li> As of 2.4, Tokens incorrectly identified as acronyms
+    ///        are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
+    /// </ul>
+    /// 
+    /// ClassicAnalyzer was named StandardAnalyzer in Lucene versions prior to 3.1. 
+    /// As of 3.1, <seealso cref="StandardAnalyzer"/> implements Unicode text segmentation,
+    /// as specified by UAX#29.
+    /// </para>
+    /// </summary>
+    public sealed class ClassicAnalyzer : StopwordAnalyzerBase
+    {
+
+        /// <summary>
+        /// Default maximum allowed token length </summary>
+        public const int DEFAULT_MAX_TOKEN_LENGTH = 255;
+
+        private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
+
+        /// <summary>
+        /// An unmodifiable set containing some common English words that are usually not
+        /// useful for searching. 
+        /// </summary>
+        public static readonly CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
+
+        /// <summary>
+        /// Builds an analyzer with the given stop words. </summary>
+        /// <param name="matchVersion"> Lucene version to match See {@link
+        /// <a href="#version">above</a>} </param>
+        /// <param name="stopWords"> stop words  </param>
+        public ClassicAnalyzer(LuceneVersion matchVersion, CharArraySet stopWords)
+            : base(matchVersion, stopWords)
+        {
+        }
+
+        /// <summary>
+        /// Builds an analyzer with the default stop words ({@link
+        /// #STOP_WORDS_SET}). </summary>
+        /// <param name="matchVersion"> Lucene version to match See {@link
+        /// <a href="#version">above</a>} </param>
+        public ClassicAnalyzer(LuceneVersion matchVersion)
+            : this(matchVersion, STOP_WORDS_SET)
+        {
+        }
+
+        /// <summary>
+        /// Builds an analyzer with the stop words from the given reader. </summary>
+        /// <seealso cref= WordlistLoader#getWordSet(Reader, Version) </seealso>
+        /// <param name="matchVersion"> Lucene version to match See {@link
+        /// <a href="#version">above</a>} </param>
+        /// <param name="stopwords"> Reader to read stop words from  </param>
+        public ClassicAnalyzer(LuceneVersion matchVersion, Reader stopwords)
+            : this(matchVersion, loadStopwordSet(stopwords, matchVersion))
+        {
+        }
+
+        /// <summary>
+        /// Set maximum allowed token length.  If a token is seen
+        /// that exceeds this length then it is discarded.  This
+        /// setting only takes effect the next time tokenStream or
+        /// tokenStream is called.
+        /// </summary>
+        public int MaxTokenLength
+        {
+            set
+            {
+                maxTokenLength = value;
+            }
+            get
+            {
+                return maxTokenLength;
+            }
+        }
+
+
+        public override TokenStreamComponents CreateComponents(string fieldName, Reader reader)
+        {
+            var src = new ClassicTokenizer(matchVersion, reader);
+            src.MaxTokenLength = maxTokenLength;
+            TokenStream tok = new ClassicFilter(src);
+            tok = new LowerCaseFilter(matchVersion, tok);
+            tok = new StopFilter(matchVersion, tok, stopwords);
+            return new TokenStreamComponentsAnonymousInnerClassHelper(this, src, tok, reader);
+        }
+
+        private class TokenStreamComponentsAnonymousInnerClassHelper : TokenStreamComponents
+        {
+            private readonly ClassicAnalyzer outerInstance;
+
+            private Reader reader;
+            private ClassicTokenizer src;
+
+            public TokenStreamComponentsAnonymousInnerClassHelper(ClassicAnalyzer outerInstance, ClassicTokenizer src, TokenStream tok, Reader reader)
+                : base(src, tok)
+            {
+                this.outerInstance = outerInstance;
+                this.reader = reader;
+                this.src = src;
+            }
+
+            protected override Reader Reader
+            {
+                set
+                {
+                    src.MaxTokenLength = outerInstance.maxTokenLength;
+                    base.Reader = value;
+                }
+            }
+        }
+    }
 
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilter.cs
index 9ee4b32..60bd1dd 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilter.cs
@@ -1,92 +1,85 @@
-namespace org.apache.lucene.analysis.standard
-{
-
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+using Lucene.Net.Analysis.Tokenattributes;
 
-	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+namespace Lucene.Net.Analysis.Standard
+{
 
-	/// <summary>
-	/// Normalizes tokens extracted with <seealso cref="ClassicTokenizer"/>. </summary>
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// Normalizes tokens extracted with <seealso cref="ClassicTokenizer"/>. </summary>
 
-	public class ClassicFilter : TokenFilter
-	{
+    public class ClassicFilter : TokenFilter
+    {
 
-	  /// <summary>
-	  /// Construct filtering <i>in</i>. </summary>
-	  public ClassicFilter(TokenStream @in) : base(@in)
-	  {
-	  }
+        /// <summary>
+        /// Construct filtering <i>in</i>. </summary>
+        public ClassicFilter(TokenStream @in)
+            : base(@in)
+        {
+            typeAtt = AddAttribute<ITypeAttribute>();
+            termAtt = AddAttribute<ICharTermAttribute>();
+        }
 
-	  private static readonly string APOSTROPHE_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.APOSTROPHE];
-	  private static readonly string ACRONYM_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.ACRONYM];
+        private static readonly string APOSTROPHE_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.APOSTROPHE];
+        private static readonly string ACRONYM_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.ACRONYM];
 
-	  // this filters uses attribute type
-	  private readonly TypeAttribute typeAtt = addAttribute(typeof(TypeAttribute));
-	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+        // this filters uses attribute type
+        private readonly ITypeAttribute typeAtt;
+        private readonly ICharTermAttribute termAtt;
 
-	  /// <summary>
-	  /// Returns the next token in the stream, or null at EOS.
-	  /// <para>Removes <tt>'s</tt> from the end of words.
-	  /// </para>
-	  /// <para>Removes dots from acronyms.
-	  /// </para>
-	  /// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
-	  public override bool incrementToken()
-	  {
-		if (!input.incrementToken())
-		{
-		  return false;
-		}
+        /// <summary>
+        /// Returns the next token in the stream, or null at EOS.
+        /// <para>Removes <tt>'s</tt> from the end of words.
+        /// </para>
+        /// <para>Removes dots from acronyms.
+        /// </para>
+        /// </summary>
+        public override bool IncrementToken()
+        {
+            if (!input.IncrementToken())
+            {
+                return false;
+            }
 
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final char[] buffer = termAtt.buffer();
-		char[] buffer = termAtt.buffer();
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int bufferLength = termAtt.length();
-		int bufferLength = termAtt.length();
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final String type = typeAtt.type();
-		string type = typeAtt.type();
+            char[] buffer = termAtt.Buffer();
+            int bufferLength = termAtt.Length;
+            string type = typeAtt.Type;
 
-		if (type == APOSTROPHE_TYPE && bufferLength >= 2 && buffer[bufferLength - 2] == '\'' && (buffer[bufferLength - 1] == 's' || buffer[bufferLength - 1] == 'S')) // remove 's
-		{
-		  // Strip last 2 characters off
-		  termAtt.Length = bufferLength - 2;
-		} // remove dots
-		else if (type == ACRONYM_TYPE)
-		{
-		  int upto = 0;
-		  for (int i = 0;i < bufferLength;i++)
-		  {
-			char c = buffer[i];
-			if (c != '.')
-			{
-			  buffer[upto++] = c;
-			}
-		  }
-		  termAtt.Length = upto;
-		}
+            if (type == APOSTROPHE_TYPE && bufferLength >= 2 && buffer[bufferLength - 2] == '\'' && (buffer[bufferLength - 1] == 's' || buffer[bufferLength - 1] == 'S')) // remove 's
+            {
+                // Strip last 2 characters off
+                termAtt.Length = bufferLength - 2;
+            } // remove dots
+            else if (type == ACRONYM_TYPE)
+            {
+                int upto = 0;
+                for (int i = 0; i < bufferLength; i++)
+                {
+                    char c = buffer[i];
+                    if (c != '.')
+                    {
+                        buffer[upto++] = c;
+                    }
+                }
+                termAtt.Length = upto;
+            }
 
-		return true;
-	  }
-	}
+            return true;
+        }
+    }
 
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilterFactory.cs
index 2107ccc..45d7cd0 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilterFactory.cs
@@ -1,55 +1,53 @@
 using System.Collections.Generic;
-using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+using Lucene.Net.Analysis.Util;
 
-namespace org.apache.lucene.analysis.standard
+namespace Lucene.Net.Analysis.Standard
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// Factory for <seealso cref="ClassicFilter"/>.
+    /// <pre class="prettyprint">
+    /// &lt;fieldType name="text_clssc" class="solr.TextField" positionIncrementGap="100"&gt;
+    ///   &lt;analyzer&gt;
+    ///     &lt;tokenizer class="solr.ClassicTokenizerFactory"/&gt;
+    ///     &lt;filter class="solr.ClassicFilterFactory"/&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;</pre>
+    /// </summary>
+    public class ClassicFilterFactory : TokenFilterFactory
+    {
 
-	using TokenFilterFactory = TokenFilterFactory;
+        /// <summary>
+        /// Creates a new ClassicFilterFactory </summary>
+        public ClassicFilterFactory(IDictionary<string, string> args)
+            : base(args)
+        {
+            if (args.Count > 0)
+            {
+                throw new System.ArgumentException("Unknown parameters: " + args);
+            }
+        }
 
-	/// <summary>
-	/// Factory for <seealso cref="ClassicFilter"/>.
-	/// <pre class="prettyprint">
-	/// &lt;fieldType name="text_clssc" class="solr.TextField" positionIncrementGap="100"&gt;
-	///   &lt;analyzer&gt;
-	///     &lt;tokenizer class="solr.ClassicTokenizerFactory"/&gt;
-	///     &lt;filter class="solr.ClassicFilterFactory"/&gt;
-	///   &lt;/analyzer&gt;
-	/// &lt;/fieldType&gt;</pre>
-	/// </summary>
-	public class ClassicFilterFactory : TokenFilterFactory
-	{
-
-	  /// <summary>
-	  /// Creates a new ClassicFilterFactory </summary>
-	  public ClassicFilterFactory(IDictionary<string, string> args) : base(args)
-	  {
-		if (args.Count > 0)
-		{
-		  throw new System.ArgumentException("Unknown parameters: " + args);
-		}
-	  }
-
-	  public override TokenFilter create(TokenStream input)
-	  {
-		return new ClassicFilter(input);
-	  }
-	}
+        public override TokenStream Create(TokenStream input)
+        {
+            return new ClassicFilter(input);
+        }
+    }
 
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs
index f9c680e..3ef7a9e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs
@@ -15,198 +15,185 @@
  * limitations under the License.
  */
 
-using Lucene.Net.Analysis.Standard;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+using org.apache.lucene.analysis.standard;
+using Reader = System.IO.TextReader;
 
-namespace org.apache.lucene.analysis.standard
+namespace Lucene.Net.Analysis.Standard
 {
-
-
-	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-	using Version = org.apache.lucene.util.Version;
-
-	/// <summary>
-	/// A grammar-based tokenizer constructed with JFlex
-	/// 
-	/// <para> This should be a good tokenizer for most European-language documents:
-	/// 
-	/// <ul>
-	///   <li>Splits words at punctuation characters, removing punctuation. However, a 
-	///     dot that's not followed by whitespace is considered part of a token.
-	///   <li>Splits words at hyphens, unless there's a number in the token, in which case
-	///     the whole token is interpreted as a product number and is not split.
-	///   <li>Recognizes email addresses and internet hostnames as one token.
-	/// </ul>
-	/// 
-	/// </para>
-	/// <para>Many applications have specific tokenizer needs.  If this tokenizer does
-	/// not suit your application, please consider copying this source code
-	/// directory to your project and maintaining your own grammar-based tokenizer.
-	/// 
-	/// ClassicTokenizer was named StandardTokenizer in Lucene versions prior to 3.1.
-	/// As of 3.1, <seealso cref="StandardTokenizer"/> implements Unicode text segmentation,
-	/// as specified by UAX#29.
-	/// </para>
-	/// </summary>
-
-	public sealed class ClassicTokenizer : Tokenizer
-	{
-	  /// <summary>
-	  /// A private instance of the JFlex-constructed scanner </summary>
-	  private StandardTokenizerInterface scanner;
-
-	  public const int ALPHANUM = 0;
-	  public const int APOSTROPHE = 1;
-	  public const int ACRONYM = 2;
-	  public const int COMPANY = 3;
-	  public const int EMAIL = 4;
-	  public const int HOST = 5;
-	  public const int NUM = 6;
-	  public const int CJ = 7;
-
-	  public const int ACRONYM_DEP = 8;
-
-	  /// <summary>
-	  /// String token types that correspond to token type int constants </summary>
-	  public static readonly string[] TOKEN_TYPES = new string [] {"<ALPHANUM>", "<APOSTROPHE>", "<ACRONYM>", "<COMPANY>", "<EMAIL>", "<HOST>", "<NUM>", "<CJ>", "<ACRONYM_DEP>"};
-
-	  private int skippedPositions;
-
-	  private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
-
-	  /// <summary>
-	  /// Set the max allowed token length.  Any token longer
-	  ///  than this is skipped. 
-	  /// </summary>
-	  public int MaxTokenLength
-	  {
-		  set
-		  {
-			if (value < 1)
-			{
-			  throw new System.ArgumentException("maxTokenLength must be greater than zero");
-			}
-			this.maxTokenLength = value;
-		  }
-		  get
-		  {
-			return maxTokenLength;
-		  }
-	  }
-
-
-	  /// <summary>
-	  /// Creates a new instance of the <seealso cref="ClassicTokenizer"/>.  Attaches
-	  /// the <code>input</code> to the newly created JFlex scanner.
-	  /// </summary>
-	  /// <param name="input"> The input reader
-	  /// 
-	  /// See http://issues.apache.org/jira/browse/LUCENE-1068 </param>
-	  public ClassicTokenizer(Version matchVersion, Reader input) : base(input)
-	  {
-		init(matchVersion);
-	  }
-
-	  /// <summary>
-	  /// Creates a new ClassicTokenizer with a given <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> 
-	  /// </summary>
-	  public ClassicTokenizer(Version matchVersion, AttributeFactory factory, Reader input) : base(factory, input)
-	  {
-		init(matchVersion);
-	  }
-
-	  private void init(Version matchVersion)
-	  {
-		this.scanner = new ClassicTokenizerImpl(input);
-	  }
-
-	  // this tokenizer generates three attributes:
-	  // term offset, positionIncrement and type
-	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
-	  private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
-	  private readonly PositionIncrementAttribute posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
-	  private readonly TypeAttribute typeAtt = addAttribute(typeof(TypeAttribute));
-
-	  /*
-	   * (non-Javadoc)
-	   *
-	   * @see org.apache.lucene.analysis.TokenStream#next()
-	   */
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
-	  public override bool incrementToken()
-	  {
-		clearAttributes();
-		skippedPositions = 0;
-
-		while (true)
-		{
-		  int tokenType = scanner.NextToken;
-
-		  if (tokenType == StandardTokenizerInterface_Fields.YYEOF)
-		  {
-			return false;
-		  }
-
-		  if (scanner.yylength() <= maxTokenLength)
-		  {
-			posIncrAtt.PositionIncrement = skippedPositions + 1;
-			scanner.getText(termAtt);
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int start = scanner.yychar();
-			int start = scanner.yychar();
-			offsetAtt.setOffset(correctOffset(start), correctOffset(start + termAtt.length()));
-
-			if (tokenType == ClassicTokenizer.ACRONYM_DEP)
-			{
-			  typeAtt.Type = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.HOST];
-			  termAtt.Length = termAtt.length() - 1; // remove extra '.'
-			}
-			else
-			{
-			  typeAtt.Type = ClassicTokenizer.TOKEN_TYPES[tokenType];
-			}
-			return true;
-		  }
-		  else
-			// When we skip a too-long term, we still increment the
-			// position increment
-		  {
-			skippedPositions++;
-		  }
-		}
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
-	  public override void end()
-	  {
-		base.end();
-		// set final offset
-		int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
-		offsetAtt.setOffset(finalOffset, finalOffset);
-		// adjust any skipped tokens
-		posIncrAtt.PositionIncrement = posIncrAtt.PositionIncrement + skippedPositions;
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void close() throws java.io.IOException
-	  public override void close()
-	  {
-		base.close();
-		scanner.yyreset(input);
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
-	  public override void reset()
-	  {
-		base.reset();
-		scanner.yyreset(input);
-		skippedPositions = 0;
-	  }
-	}
+    /// <summary>
+    /// A grammar-based tokenizer constructed with JFlex
+    /// 
+    /// <para> This should be a good tokenizer for most European-language documents:
+    /// 
+    /// <ul>
+    ///   <li>Splits words at punctuation characters, removing punctuation. However, a 
+    ///     dot that's not followed by whitespace is considered part of a token.
+    ///   <li>Splits words at hyphens, unless there's a number in the token, in which case
+    ///     the whole token is interpreted as a product number and is not split.
+    ///   <li>Recognizes email addresses and internet hostnames as one token.
+    /// </ul>
+    /// 
+    /// </para>
+    /// <para>Many applications have specific tokenizer needs.  If this tokenizer does
+    /// not suit your application, please consider copying this source code
+    /// directory to your project and maintaining your own grammar-based tokenizer.
+    /// 
+    /// ClassicTokenizer was named StandardTokenizer in Lucene versions prior to 3.1.
+    /// As of 3.1, <seealso cref="StandardTokenizer"/> implements Unicode text segmentation,
+    /// as specified by UAX#29.
+    /// </para>
+    /// </summary>
+
+    public sealed class ClassicTokenizer : Tokenizer
+    {
+        /// <summary>
+        /// A private instance of the JFlex-constructed scanner </summary>
+        private StandardTokenizerInterface scanner;
+
+        public const int ALPHANUM = 0;
+        public const int APOSTROPHE = 1;
+        public const int ACRONYM = 2;
+        public const int COMPANY = 3;
+        public const int EMAIL = 4;
+        public const int HOST = 5;
+        public const int NUM = 6;
+        public const int CJ = 7;
+
+        public const int ACRONYM_DEP = 8;
+
+        /// <summary>
+        /// String token types that correspond to token type int constants </summary>
+        public static readonly string[] TOKEN_TYPES = new string[] { "<ALPHANUM>", "<APOSTROPHE>", "<ACRONYM>", "<COMPANY>", "<EMAIL>", "<HOST>", "<NUM>", "<CJ>", "<ACRONYM_DEP>" };
+
+        private int skippedPositions;
+
+        private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
+
+        /// <summary>
+        /// Set the max allowed token length.  Any token longer
+        ///  than this is skipped. 
+        /// </summary>
+        public int MaxTokenLength
+        {
+            set
+            {
+                if (value < 1)
+                {
+                    throw new System.ArgumentException("maxTokenLength must be greater than zero");
+                }
+                this.maxTokenLength = value;
+            }
+            get
+            {
+                return maxTokenLength;
+            }
+        }
+
+
+        /// <summary>
+        /// Creates a new instance of the <seealso cref="ClassicTokenizer"/>.  Attaches
+        /// the <code>input</code> to the newly created JFlex scanner.
+        /// </summary>
+        /// <param name="input"> The input reader
+        /// 
+        /// See http://issues.apache.org/jira/browse/LUCENE-1068 </param>
+        public ClassicTokenizer(LuceneVersion matchVersion, Reader input)
+            : base(input)
+        {
+            Init(matchVersion);
+        }
+
+        /// <summary>
+        /// Creates a new ClassicTokenizer with a given <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> 
+        /// </summary>
+        public ClassicTokenizer(LuceneVersion matchVersion, AttributeFactory factory, Reader input)
+            : base(factory, input)
+        {
+            Init(matchVersion);
+        }
+
+        private void Init(LuceneVersion matchVersion)
+        {
+            this.scanner = new ClassicTokenizerImpl(input);
+        }
+
+        // this tokenizer generates three attributes:
+        // term offset, positionIncrement and type
+        private readonly CharTermAttribute termAtt;
+        private readonly OffsetAttribute offsetAtt;
+        private readonly PositionIncrementAttribute posIncrAtt;
+        private readonly TypeAttribute typeAtt;
+        /*
+         * (non-Javadoc)
+         *
+         * @see org.apache.lucene.analysis.TokenStream#next()
+         */
+        public override bool IncrementToken()
+        {
+            ClearAttributes();
+            skippedPositions = 0;
+
+            while (true)
+            {
+                int tokenType = scanner.NextToken;
+
+                if (tokenType == StandardTokenizerInterface_Fields.YYEOF)
+                {
+                    return false;
+                }
+
+                if (scanner.yylength() <= maxTokenLength)
+                {
+                    posIncrAtt.PositionIncrement = skippedPositions + 1;
+                    scanner.getText(termAtt);
+
+                    int start = scanner.yychar();
+                    offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(start + termAtt.Length));
+
+                    if (tokenType == ClassicTokenizer.ACRONYM_DEP)
+                    {
+                        typeAtt.Type = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.HOST];
+                        termAtt.Length = termAtt.Length - 1; // remove extra '.'
+                    }
+                    else
+                    {
+                        typeAtt.Type = ClassicTokenizer.TOKEN_TYPES[tokenType];
+                    }
+                    return true;
+                }
+                else
+                // When we skip a too-long term, we still increment the
+                // position increment
+                {
+                    skippedPositions++;
+                }
+            }
+        }
+
+        public override void End()
+        {
+            base.End();
+            // set final offset
+            int finalOffset = CorrectOffset(scanner.yychar() + scanner.yylength());
+            offsetAtt.SetOffset(finalOffset, finalOffset);
+            // adjust any skipped tokens
+            posIncrAtt.PositionIncrement = posIncrAtt.PositionIncrement + skippedPositions;
+        }
+
+        public override void Dispose()
+        {
+            base.Dispose();
+            scanner.yyreset(input);
+        }
+
+        public override void Reset()
+        {
+            base.Reset();
+            scanner.yyreset(input);
+            skippedPositions = 0;
+        }
+    }
 
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs
index 4d30289..f2ad424 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs
@@ -1,7 +1,9 @@
 /* The following code was generated by JFlex 1.5.1 */
 using System;
 using System.IO;
+using Lucene.Net.Analysis.Tokenattributes;
 using org.apache.lucene.analysis.standard;
+using Reader = System.IO.TextReader;
 
 namespace Lucene.Net.Analysis.Standard
 {
@@ -286,9 +288,9 @@ namespace Lucene.Net.Analysis.Standard
 	/// <summary>
 	/// Fills CharTermAttribute with the current token text.
 	/// </summary>
-	public void getText(CharTermAttribute t)
+	public void getText(ICharTermAttribute t)
 	{
-	  t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
+	  t.CopyBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
 	}
 
 
@@ -359,7 +361,7 @@ namespace Lucene.Net.Analysis.Standard
 		}
 
 		/* finally: fill the buffer with new input */
-		int numRead = zzReader.read(zzBuffer, zzEndRead, zzBuffer.Length - zzEndRead);
+		int numRead = zzReader.Read(zzBuffer, zzEndRead, zzBuffer.Length - zzEndRead);
 
 		if (numRead > 0)
 		{
@@ -369,7 +371,7 @@ namespace Lucene.Net.Analysis.Standard
 		// unlikely but not impossible: read 0 characters, but not at end of stream    
 		if (numRead == 0)
 		{
-		  int c = zzReader.read();
+		  int c = zzReader.Read();
 		  if (c == -1)
 		  {
 			return true;
@@ -389,8 +391,6 @@ namespace Lucene.Net.Analysis.Standard
 	  /// <summary>
 	  /// Closes the input stream.
 	  /// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public final void yyclose() throws java.io.IOException
 	  public void yyclose()
 	  {
 		zzAtEOF = true; // indicate end of file
@@ -398,7 +398,7 @@ namespace Lucene.Net.Analysis.Standard
 
 		if (zzReader != null)
 		{
-		  zzReader.close();
+		  zzReader.Close();
 		}
 	  }