You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2014/11/08 00:12:05 UTC

[01/34] lucenenet git commit: Changing naming to make porting of more stuff easier

Repository: lucenenet
Updated Branches:
  refs/heads/master 6d26b3c7e -> 997171765


Changing naming to make porting of more stuff easier


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/b8454a31
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/b8454a31
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/b8454a31

Branch: refs/heads/master
Commit: b8454a31334ec1a82afc48e4214d48aa89bbc95f
Parents: 6d26b3c
Author: Itamar Syn-Hershko <it...@code972.com>
Authored: Sat Nov 8 00:53:27 2014 +0200
Committer: Itamar Syn-Hershko <it...@code972.com>
Committed: Sat Nov 8 00:53:27 2014 +0200

----------------------------------------------------------------------
 src/Lucene.Net.Core/Analysis/Tokenizer.cs              | 13 +++++++------
 .../core/Analysis/TestGraphTokenizers.cs               |  2 +-
 src/Lucene.Net.Tests/core/Index/TestIndexWriter.cs     |  2 +-
 src/Lucene.Net.Tests/core/Search/TestTermRangeQuery.cs |  2 +-
 src/Lucene.Net.Tests/core/Util/TestQueryBuilder.cs     |  4 ++--
 5 files changed, 12 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b8454a31/src/Lucene.Net.Core/Analysis/Tokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Analysis/Tokenizer.cs b/src/Lucene.Net.Core/Analysis/Tokenizer.cs
index 11a2728..f4b7b7f 100644
--- a/src/Lucene.Net.Core/Analysis/Tokenizer.cs
+++ b/src/Lucene.Net.Core/Analysis/Tokenizer.cs
@@ -1,6 +1,7 @@
 using System;
 using System.Diagnostics;
 using System.IO;
+using Lucene.Net.Util;
 
 namespace Lucene.Net.Analysis
 {
@@ -34,7 +35,7 @@ namespace Lucene.Net.Analysis
     {
         /// <summary>
         /// The text source for this Tokenizer. </summary>
-        protected internal TextReader Input = ILLEGAL_STATE_READER;
+        protected internal TextReader input = ILLEGAL_STATE_READER;
 
         /// <summary>
         /// Pending reader: not actually assigned to input until reset() </summary>
@@ -72,11 +73,11 @@ namespace Lucene.Net.Analysis
         /// </summary>
         public override void Dispose()
         {
-            Input.Close();
+            input.Close();
             // LUCENE-2387: don't hold onto Reader after close, so
             // GC can reclaim
             InputPending = ILLEGAL_STATE_READER;
-            Input = ILLEGAL_STATE_READER;
+            input = ILLEGAL_STATE_READER;
         }
 
         /// <summary>
@@ -87,7 +88,7 @@ namespace Lucene.Net.Analysis
         /// <seealso> cref= CharFilter#correctOffset </seealso>
         protected internal int CorrectOffset(int currentOff)
         {
-            return (Input is CharFilter) ? ((CharFilter)Input).CorrectOffset(currentOff) : currentOff;
+            return (input is CharFilter) ? ((CharFilter)input).CorrectOffset(currentOff) : currentOff;
         }
 
         /// <summary>
@@ -103,7 +104,7 @@ namespace Lucene.Net.Analysis
                 {
                     throw new System.NullReferenceException("input must not be null");
                 }
-                else if (this.Input != ILLEGAL_STATE_READER)
+                else if (this.input != ILLEGAL_STATE_READER)
                 {
                     //throw new Exception("TokenStream contract violation: close() call missing");
                 }
@@ -115,7 +116,7 @@ namespace Lucene.Net.Analysis
         public override void Reset()
         {
             base.Reset();
-            Input = InputPending;
+            input = InputPending;
             InputPending = ILLEGAL_STATE_READER;
         }
 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b8454a31/src/Lucene.Net.Tests/core/Analysis/TestGraphTokenizers.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/core/Analysis/TestGraphTokenizers.cs b/src/Lucene.Net.Tests/core/Analysis/TestGraphTokenizers.cs
index 23aac1d..4fe19b1 100644
--- a/src/Lucene.Net.Tests/core/Analysis/TestGraphTokenizers.cs
+++ b/src/Lucene.Net.Tests/core/Analysis/TestGraphTokenizers.cs
@@ -120,7 +120,7 @@ namespace Lucene.Net.Analysis
                 char[] buffer = new char[256];
                 while (true)
                 {
-                    int count = Input.Read(buffer, 0, buffer.Length);
+                    int count = input.Read(buffer, 0, buffer.Length);
 
                     //.NET TextReader.Read(buff, int, int) returns 0, not -1 on no chars
                     if (count == 0)

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b8454a31/src/Lucene.Net.Tests/core/Index/TestIndexWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/core/Index/TestIndexWriter.cs b/src/Lucene.Net.Tests/core/Index/TestIndexWriter.cs
index 5b4ff59..a9801f3 100644
--- a/src/Lucene.Net.Tests/core/Index/TestIndexWriter.cs
+++ b/src/Lucene.Net.Tests/core/Index/TestIndexWriter.cs
@@ -1914,7 +1914,7 @@ namespace Lucene.Net.Index
                 StringBuilder b = new StringBuilder();
                 char[] buffer = new char[1024];
                 int n;
-                while ((n = Input.Read(buffer, 0, buffer.Length)) != -1)
+                while ((n = input.Read(buffer, 0, buffer.Length)) != -1)
                 {
                     b.Append(buffer, 0, n);
                 }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b8454a31/src/Lucene.Net.Tests/core/Search/TestTermRangeQuery.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/core/Search/TestTermRangeQuery.cs b/src/Lucene.Net.Tests/core/Search/TestTermRangeQuery.cs
index 186942b..e6541b1 100644
--- a/src/Lucene.Net.Tests/core/Search/TestTermRangeQuery.cs
+++ b/src/Lucene.Net.Tests/core/Search/TestTermRangeQuery.cs
@@ -241,7 +241,7 @@ namespace Lucene.Net.Search
                     }
                     else
                     {
-                        int count = Input.Read(Buffer, 0, Buffer.Length);
+                        int count = input.Read(Buffer, 0, Buffer.Length);
                         ClearAttributes();
                         Done = true;
                         if (count == 1)

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b8454a31/src/Lucene.Net.Tests/core/Util/TestQueryBuilder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/core/Util/TestQueryBuilder.cs b/src/Lucene.Net.Tests/core/Util/TestQueryBuilder.cs
index e245bf1..0140fc0 100644
--- a/src/Lucene.Net.Tests/core/Util/TestQueryBuilder.cs
+++ b/src/Lucene.Net.Tests/core/Util/TestQueryBuilder.cs
@@ -1,4 +1,4 @@
-using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Analysis.Tokenattributes;
 using NUnit.Framework;
 using System.IO;
 
@@ -220,7 +220,7 @@ namespace Lucene.Net.Util
 
             public override bool IncrementToken()
             {
-                int ch = Input.Read();
+                int ch = input.Read();
                 if (ch < 0)
                 {
                     return false;


[19/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Stemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Stemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Stemmer.cs
new file mode 100644
index 0000000..70b4a94
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Stemmer.cs
@@ -0,0 +1,475 @@
+using System;
+using System.Diagnostics;
+using System.Collections.Generic;
+using System.Text;
+
+namespace org.apache.lucene.analysis.hunspell
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using ByteArrayDataInput = org.apache.lucene.store.ByteArrayDataInput;
+	using ArrayUtil = org.apache.lucene.util.ArrayUtil;
+	using BytesRef = org.apache.lucene.util.BytesRef;
+	using CharsRef = org.apache.lucene.util.CharsRef;
+	using IntsRef = org.apache.lucene.util.IntsRef;
+	using Version = org.apache.lucene.util.Version;
+	using CharacterRunAutomaton = org.apache.lucene.util.automaton.CharacterRunAutomaton;
+
+	/// <summary>
+	/// Stemmer uses the affix rules declared in the Dictionary to generate one or more stems for a word.  It
+	/// conforms to the algorithm in the original hunspell algorithm, including recursive suffix stripping.
+	/// </summary>
+	internal sealed class Stemmer
+	{
+	  private readonly Dictionary dictionary;
+	  private readonly BytesRef scratch = new BytesRef();
+	  private readonly StringBuilder segment = new StringBuilder();
+	  private readonly ByteArrayDataInput affixReader;
+
+	  // used for normalization
+	  private readonly StringBuilder scratchSegment = new StringBuilder();
+	  private char[] scratchBuffer = new char[32];
+
+	  /// <summary>
+	  /// Constructs a new Stemmer which will use the provided Dictionary to create its stems.
+	  /// </summary>
+	  /// <param name="dictionary"> Dictionary that will be used to create the stems </param>
+	  public Stemmer(Dictionary dictionary)
+	  {
+		this.dictionary = dictionary;
+		this.affixReader = new ByteArrayDataInput(dictionary.affixData);
+	  }
+
+	  /// <summary>
+	  /// Find the stem(s) of the provided word.
+	  /// </summary>
+	  /// <param name="word"> Word to find the stems for </param>
+	  /// <returns> List of stems for the word </returns>
+	  public IList<CharsRef> stem(string word)
+	  {
+		return stem(word.ToCharArray(), word.Length);
+	  }
+
+	  /// <summary>
+	  /// Find the stem(s) of the provided word
+	  /// </summary>
+	  /// <param name="word"> Word to find the stems for </param>
+	  /// <returns> List of stems for the word </returns>
+	  public IList<CharsRef> stem(char[] word, int length)
+	  {
+
+		if (dictionary.needsInputCleaning)
+		{
+		  scratchSegment.Length = 0;
+		  scratchSegment.Append(word, 0, length);
+		  CharSequence cleaned = dictionary.cleanInput(scratchSegment, segment);
+		  scratchBuffer = ArrayUtil.grow(scratchBuffer, cleaned.length());
+		  length = segment.Length;
+		  segment.getChars(0, length, scratchBuffer, 0);
+		  word = scratchBuffer;
+		}
+
+		IList<CharsRef> stems = new List<CharsRef>();
+		IntsRef forms = dictionary.lookupWord(word, 0, length);
+		if (forms != null)
+		{
+		  // TODO: some forms should not be added, e.g. ONLYINCOMPOUND
+		  // just because it exists, does not make it valid...
+		  for (int i = 0; i < forms.length; i++)
+		  {
+			stems.Add(newStem(word, length));
+		  }
+		}
+		stems.AddRange(stem(word, length, -1, -1, -1, 0, true, true, false, false));
+		return stems;
+	  }
+
+	  /// <summary>
+	  /// Find the unique stem(s) of the provided word
+	  /// </summary>
+	  /// <param name="word"> Word to find the stems for </param>
+	  /// <returns> List of stems for the word </returns>
+	  public IList<CharsRef> uniqueStems(char[] word, int length)
+	  {
+		IList<CharsRef> stems = stem(word, length);
+		if (stems.Count < 2)
+		{
+		  return stems;
+		}
+		CharArraySet terms = new CharArraySet(Version.LUCENE_CURRENT, 8, dictionary.ignoreCase);
+		IList<CharsRef> deduped = new List<CharsRef>();
+		foreach (CharsRef s in stems)
+		{
+		  if (!terms.contains(s))
+		  {
+			deduped.Add(s);
+			terms.add(s);
+		  }
+		}
+		return deduped;
+	  }
+
+	  private CharsRef newStem(char[] buffer, int length)
+	  {
+		if (dictionary.needsOutputCleaning)
+		{
+		  scratchSegment.Length = 0;
+		  scratchSegment.Append(buffer, 0, length);
+		  try
+		  {
+			Dictionary.applyMappings(dictionary.oconv, scratchSegment);
+		  }
+		  catch (IOException bogus)
+		  {
+			throw new Exception(bogus);
+		  }
+		  char[] cleaned = new char[scratchSegment.Length];
+		  scratchSegment.getChars(0, cleaned.Length, cleaned, 0);
+		  return new CharsRef(cleaned, 0, cleaned.Length);
+		}
+		else
+		{
+		  return new CharsRef(buffer, 0, length);
+		}
+	  }
+
+	  // ================================================= Helper Methods ================================================
+
+	  /// <summary>
+	  /// Generates a list of stems for the provided word
+	  /// </summary>
+	  /// <param name="word"> Word to generate the stems for </param>
+	  /// <param name="previous"> previous affix that was removed (so we dont remove same one twice) </param>
+	  /// <param name="prevFlag"> Flag from a previous stemming step that need to be cross-checked with any affixes in this recursive step </param>
+	  /// <param name="prefixFlag"> flag of the most inner removed prefix, so that when removing a suffix, its also checked against the word </param>
+	  /// <param name="recursionDepth"> current recursiondepth </param>
+	  /// <param name="doPrefix"> true if we should remove prefixes </param>
+	  /// <param name="doSuffix"> true if we should remove suffixes </param>
+	  /// <param name="previousWasPrefix"> true if the previous removal was a prefix:
+	  ///        if we are removing a suffix, and it has no continuation requirements, its ok.
+	  ///        but two prefixes (COMPLEXPREFIXES) or two suffixes must have continuation requirements to recurse. </param>
+	  /// <param name="circumfix"> true if the previous prefix removal was signed as a circumfix
+	  ///        this means inner most suffix must also contain circumfix flag. </param>
+	  /// <returns> List of stems, or empty list if no stems are found </returns>
+	  private IList<CharsRef> stem(char[] word, int length, int previous, int prevFlag, int prefixFlag, int recursionDepth, bool doPrefix, bool doSuffix, bool previousWasPrefix, bool circumfix)
+	  {
+
+		// TODO: allow this stuff to be reused by tokenfilter
+		IList<CharsRef> stems = new List<CharsRef>();
+
+		if (doPrefix && dictionary.prefixes != null)
+		{
+		  for (int i = length - 1; i >= 0; i--)
+		  {
+			IntsRef prefixes = dictionary.lookupPrefix(word, 0, i);
+			if (prefixes == null)
+			{
+			  continue;
+			}
+
+			for (int j = 0; j < prefixes.length; j++)
+			{
+			  int prefix = prefixes.ints[prefixes.offset + j];
+			  if (prefix == previous)
+			  {
+				continue;
+			  }
+			  affixReader.Position = 8 * prefix;
+			  char flag = (char)(affixReader.readShort() & 0xffff);
+			  char stripOrd = (char)(affixReader.readShort() & 0xffff);
+			  int condition = (char)(affixReader.readShort() & 0xffff);
+			  bool crossProduct = (condition & 1) == 1;
+			  condition = (int)((uint)condition >> 1);
+			  char append = (char)(affixReader.readShort() & 0xffff);
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final boolean compatible;
+			  bool compatible;
+			  if (recursionDepth == 0)
+			  {
+				compatible = true;
+			  }
+			  else if (crossProduct)
+			  {
+				// cross check incoming continuation class (flag of previous affix) against list.
+				dictionary.flagLookup.get(append, scratch);
+				char[] appendFlags = Dictionary.decodeFlags(scratch);
+				Debug.Assert(prevFlag >= 0);
+				compatible = hasCrossCheckedFlag((char)prevFlag, appendFlags, false);
+			  }
+			  else
+			  {
+				compatible = false;
+			  }
+
+			  if (compatible)
+			  {
+				int deAffixedStart = i;
+				int deAffixedLength = length - deAffixedStart;
+
+				int stripStart = dictionary.stripOffsets[stripOrd];
+				int stripEnd = dictionary.stripOffsets[stripOrd + 1];
+				int stripLength = stripEnd - stripStart;
+
+				if (!checkCondition(condition, dictionary.stripData, stripStart, stripLength, word, deAffixedStart, deAffixedLength))
+				{
+				  continue;
+				}
+
+				char[] strippedWord = new char[stripLength + deAffixedLength];
+				Array.Copy(dictionary.stripData, stripStart, strippedWord, 0, stripLength);
+				Array.Copy(word, deAffixedStart, strippedWord, stripLength, deAffixedLength);
+
+				IList<CharsRef> stemList = applyAffix(strippedWord, strippedWord.Length, prefix, -1, recursionDepth, true, circumfix);
+
+				stems.AddRange(stemList);
+			  }
+			}
+		  }
+		}
+
+		if (doSuffix && dictionary.suffixes != null)
+		{
+		  for (int i = 0; i < length; i++)
+		  {
+			IntsRef suffixes = dictionary.lookupSuffix(word, i, length - i);
+			if (suffixes == null)
+			{
+			  continue;
+			}
+
+			for (int j = 0; j < suffixes.length; j++)
+			{
+			  int suffix = suffixes.ints[suffixes.offset + j];
+			  if (suffix == previous)
+			  {
+				continue;
+			  }
+			  affixReader.Position = 8 * suffix;
+			  char flag = (char)(affixReader.readShort() & 0xffff);
+			  char stripOrd = (char)(affixReader.readShort() & 0xffff);
+			  int condition = (char)(affixReader.readShort() & 0xffff);
+			  bool crossProduct = (condition & 1) == 1;
+			  condition = (int)((uint)condition >> 1);
+			  char append = (char)(affixReader.readShort() & 0xffff);
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final boolean compatible;
+			  bool compatible;
+			  if (recursionDepth == 0)
+			  {
+				compatible = true;
+			  }
+			  else if (crossProduct)
+			  {
+				// cross check incoming continuation class (flag of previous affix) against list.
+				dictionary.flagLookup.get(append, scratch);
+				char[] appendFlags = Dictionary.decodeFlags(scratch);
+				Debug.Assert(prevFlag >= 0);
+				compatible = hasCrossCheckedFlag((char)prevFlag, appendFlags, previousWasPrefix);
+			  }
+			  else
+			  {
+				compatible = false;
+			  }
+
+			  if (compatible)
+			  {
+				int appendLength = length - i;
+				int deAffixedLength = length - appendLength;
+
+				int stripStart = dictionary.stripOffsets[stripOrd];
+				int stripEnd = dictionary.stripOffsets[stripOrd + 1];
+				int stripLength = stripEnd - stripStart;
+
+				if (!checkCondition(condition, word, 0, deAffixedLength, dictionary.stripData, stripStart, stripLength))
+				{
+				  continue;
+				}
+
+				char[] strippedWord = new char[stripLength + deAffixedLength];
+				Array.Copy(word, 0, strippedWord, 0, deAffixedLength);
+				Array.Copy(dictionary.stripData, stripStart, strippedWord, deAffixedLength, stripLength);
+
+				IList<CharsRef> stemList = applyAffix(strippedWord, strippedWord.Length, suffix, prefixFlag, recursionDepth, false, circumfix);
+
+				stems.AddRange(stemList);
+			  }
+			}
+		  }
+		}
+
+		return stems;
+	  }
+
+	  /// <summary>
+	  /// checks condition of the concatenation of two strings </summary>
+	  // note: this is pretty stupid, we really should subtract strip from the condition up front and just check the stem
+	  // but this is a little bit more complicated.
+	  private bool checkCondition(int condition, char[] c1, int c1off, int c1len, char[] c2, int c2off, int c2len)
+	  {
+		if (condition != 0)
+		{
+		  CharacterRunAutomaton pattern = dictionary.patterns[condition];
+		  int state = pattern.InitialState;
+		  for (int i = c1off; i < c1off + c1len; i++)
+		  {
+			state = pattern.step(state, c1[i]);
+			if (state == -1)
+			{
+			  return false;
+			}
+		  }
+		  for (int i = c2off; i < c2off + c2len; i++)
+		  {
+			state = pattern.step(state, c2[i]);
+			if (state == -1)
+			{
+			  return false;
+			}
+		  }
+		  return pattern.isAccept(state);
+		}
+		return true;
+	  }
+
+	  /// <summary>
+	  /// Applies the affix rule to the given word, producing a list of stems if any are found
+	  /// </summary>
+	  /// <param name="strippedWord"> Word the affix has been removed and the strip added </param>
+	  /// <param name="length"> valid length of stripped word </param>
+	  /// <param name="affix"> HunspellAffix representing the affix rule itself </param>
+	  /// <param name="prefixFlag"> when we already stripped a prefix, we cant simply recurse and check the suffix, unless both are compatible
+	  ///                   so we must check dictionary form against both to add it as a stem! </param>
+	  /// <param name="recursionDepth"> current recursion depth </param>
+	  /// <param name="prefix"> true if we are removing a prefix (false if its a suffix) </param>
+	  /// <returns> List of stems for the word, or an empty list if none are found </returns>
+	  internal IList<CharsRef> applyAffix(char[] strippedWord, int length, int affix, int prefixFlag, int recursionDepth, bool prefix, bool circumfix)
+	  {
+		// TODO: just pass this in from before, no need to decode it twice
+		affixReader.Position = 8 * affix;
+		char flag = (char)(affixReader.readShort() & 0xffff);
+		affixReader.skipBytes(2); // strip
+		int condition = (char)(affixReader.readShort() & 0xffff);
+		bool crossProduct = (condition & 1) == 1;
+		condition = (int)((uint)condition >> 1);
+		char append = (char)(affixReader.readShort() & 0xffff);
+
+		IList<CharsRef> stems = new List<CharsRef>();
+
+		IntsRef forms = dictionary.lookupWord(strippedWord, 0, length);
+		if (forms != null)
+		{
+		  for (int i = 0; i < forms.length; i++)
+		  {
+			dictionary.flagLookup.get(forms.ints[forms.offset + i], scratch);
+			char[] wordFlags = Dictionary.decodeFlags(scratch);
+			if (Dictionary.hasFlag(wordFlags, flag))
+			{
+			  // confusing: in this one exception, we already chained the first prefix against the second,
+			  // so it doesnt need to be checked against the word
+			  bool chainedPrefix = dictionary.complexPrefixes && recursionDepth == 1 && prefix;
+			  if (chainedPrefix == false && prefixFlag >= 0 && !Dictionary.hasFlag(wordFlags, (char)prefixFlag))
+			  {
+				// see if we can chain prefix thru the suffix continuation class (only if it has any!)
+				dictionary.flagLookup.get(append, scratch);
+				char[] appendFlags = Dictionary.decodeFlags(scratch);
+				if (!hasCrossCheckedFlag((char)prefixFlag, appendFlags, false))
+				{
+				  continue;
+				}
+			  }
+
+			  // if circumfix was previously set by a prefix, we must check this suffix,
+			  // to ensure it has it, and vice versa
+			  if (dictionary.circumfix != -1)
+			  {
+				dictionary.flagLookup.get(append, scratch);
+				char[] appendFlags = Dictionary.decodeFlags(scratch);
+				bool suffixCircumfix = Dictionary.hasFlag(appendFlags, (char)dictionary.circumfix);
+				if (circumfix != suffixCircumfix)
+				{
+				  continue;
+				}
+			  }
+			  stems.Add(newStem(strippedWord, length));
+			}
+		  }
+		}
+
+		// if a circumfix flag is defined in the dictionary, and we are a prefix, we need to check if we have that flag
+		if (dictionary.circumfix != -1 && !circumfix && prefix)
+		{
+		  dictionary.flagLookup.get(append, scratch);
+		  char[] appendFlags = Dictionary.decodeFlags(scratch);
+		  circumfix = Dictionary.hasFlag(appendFlags, (char)dictionary.circumfix);
+		}
+
+		if (crossProduct)
+		{
+		  if (recursionDepth == 0)
+		  {
+			if (prefix)
+			{
+			  // we took away the first prefix.
+			  // COMPLEXPREFIXES = true:  combine with a second prefix and another suffix 
+			  // COMPLEXPREFIXES = false: combine with a suffix
+			  stems.AddRange(stem(strippedWord, length, affix, flag, flag, ++recursionDepth, dictionary.complexPrefixes && dictionary.twoStageAffix, true, true, circumfix));
+			}
+			else if (dictionary.complexPrefixes == false && dictionary.twoStageAffix)
+			{
+			  // we took away a suffix.
+			  // COMPLEXPREFIXES = true: we don't recurse! only one suffix allowed
+			  // COMPLEXPREFIXES = false: combine with another suffix
+			  stems.AddRange(stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix));
+			}
+		  }
+		  else if (recursionDepth == 1)
+		  {
+			if (prefix && dictionary.complexPrefixes)
+			{
+			  // we took away the second prefix: go look for another suffix
+			  stems.AddRange(stem(strippedWord, length, affix, flag, flag, ++recursionDepth, false, true, true, circumfix));
+			}
+			else if (prefix == false && dictionary.complexPrefixes == false && dictionary.twoStageAffix)
+			{
+			  // we took away a prefix, then a suffix: go look for another suffix
+			  stems.AddRange(stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix));
+			}
+		  }
+		}
+
+		return stems;
+	  }
+
+	  /// <summary>
+	  /// Checks if the given flag cross checks with the given array of flags
+	  /// </summary>
+	  /// <param name="flag"> Flag to cross check with the array of flags </param>
+	  /// <param name="flags"> Array of flags to cross check against.  Can be {@code null} </param>
+	  /// <returns> {@code true} if the flag is found in the array or the array is {@code null}, {@code false} otherwise </returns>
+	  private bool hasCrossCheckedFlag(char flag, char[] flags, bool matchEmpty)
+	  {
+		return (flags.Length == 0 && matchEmpty) || Arrays.binarySearch(flags, flag) >= 0;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Hy/ArmenianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hy/ArmenianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hy/ArmenianAnalyzer.cs
new file mode 100644
index 0000000..06368d4
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hy/ArmenianAnalyzer.cs
@@ -0,0 +1,137 @@
+using System;
+
+namespace org.apache.lucene.analysis.hy
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using Version = org.apache.lucene.util.Version;
+	using ArmenianStemmer = org.tartarus.snowball.ext.ArmenianStemmer;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Armenian.
+	/// </summary>
+	public sealed class ArmenianAnalyzer : StopwordAnalyzerBase
+	{
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// File containing default Armenian stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "stopwords.txt";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop words set. </summary>
+	  /// <returns> default stop words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = loadStopwordSet(false, typeof(ArmenianAnalyzer), DEFAULT_STOPWORD_FILE, "#");
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public ArmenianAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  public ArmenianAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+	  /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+	  /// stemming.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
+	  public ArmenianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates a
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> A
+	  ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided and <seealso cref="SnowballFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		result = new LowerCaseFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		result = new SnowballFilter(result, new ArmenianStemmer());
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianAnalyzer.cs
new file mode 100644
index 0000000..df18160
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianAnalyzer.cs
@@ -0,0 +1,138 @@
+using System;
+
+namespace org.apache.lucene.analysis.id
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Analyzer for Indonesian (Bahasa)
+	/// </summary>
+	public sealed class IndonesianAnalyzer : StopwordAnalyzerBase
+	{
+	  /// <summary>
+	  /// File containing default Indonesian stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "stopwords.txt";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop-words set. </summary>
+	  /// <returns> an unmodifiable instance of the default stop-words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = loadStopwordSet(false, typeof(IndonesianAnalyzer), DEFAULT_STOPWORD_FILE, "#");
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public IndonesianAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          lucene compatibility version </param>
+	  /// <param name="stopwords">
+	  ///          a stopword set </param>
+	  public IndonesianAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop word. If a none-empty stem exclusion set is
+	  /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+	  /// <seealso cref="IndonesianStemFilter"/>.
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          lucene compatibility version </param>
+	  /// <param name="stopwords">
+	  ///          a stopword set </param>
+	  /// <param name="stemExclusionSet">
+	  ///          a set of terms not to be stemmed </param>
+	  public IndonesianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>,
+	  ///         <seealso cref="StopFilter"/>, <seealso cref="SetKeywordMarkerFilter"/>
+	  ///         if a stem exclusion set is provided and <seealso cref="IndonesianStemFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		result = new LowerCaseFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		return new TokenStreamComponents(source, new IndonesianStemFilter(result));
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemFilter.cs
new file mode 100644
index 0000000..38521a8
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemFilter.cs
@@ -0,0 +1,75 @@
+namespace org.apache.lucene.analysis.id
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="IndonesianStemmer"/> to stem Indonesian words.
+	/// </summary>
+	public sealed class IndonesianStemFilter : TokenFilter
+	{
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAtt = addAttribute(typeof(KeywordAttribute));
+	  private readonly IndonesianStemmer stemmer = new IndonesianStemmer();
+	  private readonly bool stemDerivational;
+
+	  /// <summary>
+	  /// Calls <seealso cref="#IndonesianStemFilter(TokenStream, boolean) IndonesianStemFilter(input, true)"/>
+	  /// </summary>
+	  public IndonesianStemFilter(TokenStream input) : this(input, true)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Create a new IndonesianStemFilter.
+	  /// <para>
+	  /// If <code>stemDerivational</code> is false, 
+	  /// only inflectional suffixes (particles and possessive pronouns) are stemmed.
+	  /// </para>
+	  /// </summary>
+	  public IndonesianStemFilter(TokenStream input, bool stemDerivational) : base(input)
+	  {
+		this.stemDerivational = stemDerivational;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAtt.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length(), stemDerivational);
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length(), stemDerivational);
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemFilterFactory.cs
new file mode 100644
index 0000000..a7bfe5b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemFilterFactory.cs
@@ -0,0 +1,57 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.id
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="IndonesianStemFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_idstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class IndonesianStemFilterFactory : TokenFilterFactory
+	{
+	  private readonly bool stemDerivational;
+
+	  /// <summary>
+	  /// Creates a new IndonesianStemFilterFactory </summary>
+	  public IndonesianStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		stemDerivational = getBoolean(args, "stemDerivational", true);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new IndonesianStemFilter(input, stemDerivational);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemmer.cs
new file mode 100644
index 0000000..6e339fe
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemmer.cs
@@ -0,0 +1,334 @@
+namespace org.apache.lucene.analysis.id
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+	/// <summary>
+	/// Stemmer for Indonesian.
+	/// <para>
+	/// Stems Indonesian words with the algorithm presented in:
+	/// <i>A Study of Stemming Effects on Information Retrieval in 
+	/// Bahasa Indonesia</i>, Fadillah Z Tala.
+	/// http://www.illc.uva.nl/Publications/ResearchReports/MoL-2003-02.text.pdf
+	/// </para>
+	/// </summary>
+	public class IndonesianStemmer
+	{
+	  private int numSyllables;
+	  private int flags;
+	  private const int REMOVED_KE = 1;
+	  private const int REMOVED_PENG = 2;
+	  private const int REMOVED_DI = 4;
+	  private const int REMOVED_MENG = 8;
+	  private const int REMOVED_TER = 16;
+	  private const int REMOVED_BER = 32;
+	  private const int REMOVED_PE = 64;
+
+	  /// <summary>
+	  /// Stem a term (returning its new length).
+	  /// <para>
+	  /// Use <code>stemDerivational</code> to control whether full stemming
+	  /// or only light inflectional stemming is done.
+	  /// </para>
+	  /// </summary>
+	  public virtual int stem(char[] text, int length, bool stemDerivational)
+	  {
+		flags = 0;
+		numSyllables = 0;
+		for (int i = 0; i < length; i++)
+		{
+		  if (isVowel(text[i]))
+		  {
+			  numSyllables++;
+		  }
+		}
+
+		if (numSyllables > 2)
+		{
+			length = removeParticle(text, length);
+		}
+		if (numSyllables > 2)
+		{
+			length = removePossessivePronoun(text, length);
+		}
+
+		if (stemDerivational_Renamed)
+		{
+		  length = stemDerivational(text, length);
+		}
+		return length;
+	  }
+
+	  private int stemDerivational(char[] text, int length)
+	  {
+		int oldLength = length;
+		if (numSyllables > 2)
+		{
+			length = removeFirstOrderPrefix(text, length);
+		}
+		if (oldLength != length) // a rule is fired
+		{
+		  oldLength = length;
+		  if (numSyllables > 2)
+		  {
+			  length = removeSuffix(text, length);
+		  }
+		  if (oldLength != length) // a rule is fired
+		  {
+			if (numSyllables > 2)
+			{
+				length = removeSecondOrderPrefix(text, length);
+			}
+		  }
+		} // fail
+		else
+		{
+		  if (numSyllables > 2)
+		  {
+			  length = removeSecondOrderPrefix(text, length);
+		  }
+		  if (numSyllables > 2)
+		  {
+			  length = removeSuffix(text, length);
+		  }
+		}
+		return length;
+	  }
+
+	  private bool isVowel(char ch)
+	  {
+		switch (ch)
+		{
+		  case 'a':
+		  case 'e':
+		  case 'i':
+		  case 'o':
+		  case 'u':
+			return true;
+		  default:
+			return false;
+		}
+	  }
+
+	  private int removeParticle(char[] text, int length)
+	  {
+		if (StemmerUtil.EndsWith(text, length, "kah") || StemmerUtil.EndsWith(text, length, "lah") || StemmerUtil.EndsWith(text, length, "pun"))
+		{
+			numSyllables--;
+			return length - 3;
+		}
+
+		return length;
+	  }
+
+	  private int removePossessivePronoun(char[] text, int length)
+	  {
+		if (StemmerUtil.EndsWith(text, length, "ku") || StemmerUtil.EndsWith(text, length, "mu"))
+		{
+		  numSyllables--;
+		  return length - 2;
+		}
+
+		if (StemmerUtil.EndsWith(text, length, "nya"))
+		{
+		  numSyllables--;
+		  return length - 3;
+		}
+
+		return length;
+	  }
+
+	  private int removeFirstOrderPrefix(char[] text, int length)
+	  {
+		if (StemmerUtil.StartsWith(text, length, "meng"))
+		{
+		  flags |= REMOVED_MENG;
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 4);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "meny") && length > 4 && isVowel(text[4]))
+		{
+		  flags |= REMOVED_MENG;
+		  text[3] = 's';
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 3);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "men"))
+		{
+		  flags |= REMOVED_MENG;
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 3);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "mem"))
+		{
+		  flags |= REMOVED_MENG;
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 3);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "me"))
+		{
+		  flags |= REMOVED_MENG;
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 2);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "peng"))
+		{
+		  flags |= REMOVED_PENG;
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 4);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "peny") && length > 4 && isVowel(text[4]))
+		{
+		  flags |= REMOVED_PENG;
+		  text[3] = 's';
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 3);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "peny"))
+		{
+		  flags |= REMOVED_PENG;
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 4);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "pen") && length > 3 && isVowel(text[3]))
+		{
+		  flags |= REMOVED_PENG;
+		  text[2] = 't';
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 2);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "pen"))
+		{
+		  flags |= REMOVED_PENG;
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 3);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "pem"))
+		{
+		  flags |= REMOVED_PENG;
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 3);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "di"))
+		{
+		  flags |= REMOVED_DI;
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 2);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "ter"))
+		{
+		  flags |= REMOVED_TER;
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 3);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "ke"))
+		{
+		  flags |= REMOVED_KE;
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 2);
+		}
+
+		return length;
+	  }
+
+	  private int removeSecondOrderPrefix(char[] text, int length)
+	  {
+		if (StemmerUtil.StartsWith(text, length, "ber"))
+		{
+		  flags |= REMOVED_BER;
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 3);
+		}
+
+		if (length == 7 && StemmerUtil.StartsWith(text, length, "belajar"))
+		{
+		  flags |= REMOVED_BER;
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 3);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "be") && length > 4 && !isVowel(text[2]) && text[3] == 'e' && text[4] == 'r')
+		{
+		  flags |= REMOVED_BER;
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 2);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "per"))
+		{
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 3);
+		}
+
+		if (length == 7 && StemmerUtil.StartsWith(text, length, "pelajar"))
+		{
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 3);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "pe"))
+		{
+		  flags |= REMOVED_PE;
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 2);
+		}
+
+		return length;
+	  }
+
+	  private int removeSuffix(char[] text, int length)
+	  {
+		if (StemmerUtil.EndsWith(text, length, "kan") && (flags & REMOVED_KE) == 0 && (flags & REMOVED_PENG) == 0 && (flags & REMOVED_PE) == 0)
+		{
+		  numSyllables--;
+		  return length - 3;
+		}
+
+		if (StemmerUtil.EndsWith(text, length, "an") && (flags & REMOVED_DI) == 0 && (flags & REMOVED_MENG) == 0 && (flags & REMOVED_TER) == 0)
+		{
+		  numSyllables--;
+		  return length - 2;
+		}
+
+		if (StemmerUtil.EndsWith(text, length, "i") && !StemmerUtil.EndsWith(text, length, "si") && (flags & REMOVED_BER) == 0 && (flags & REMOVED_KE) == 0 && (flags & REMOVED_PENG) == 0)
+		{
+		  numSyllables--;
+		  return length - 1;
+		}
+		return length;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizationFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizationFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizationFilter.cs
new file mode 100644
index 0000000..ef3ee00
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizationFilter.cs
@@ -0,0 +1,52 @@
+namespace org.apache.lucene.analysis.@in
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="IndicNormalizer"/> to normalize text
+	/// in Indian Languages.
+	/// </summary>
+	public sealed class IndicNormalizationFilter : TokenFilter
+	{
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly IndicNormalizer normalizer = new IndicNormalizer();
+
+	  public IndicNormalizationFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  termAtt.Length = normalizer.normalize(termAtt.buffer(), termAtt.length());
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizationFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizationFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizationFilterFactory.cs
new file mode 100644
index 0000000..f4112ad
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizationFilterFactory.cs
@@ -0,0 +1,64 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.@in
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using AbstractAnalysisFactory = org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+	using MultiTermAwareComponent = org.apache.lucene.analysis.util.MultiTermAwareComponent;
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="IndicNormalizationFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_innormal" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.IndicNormalizationFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class IndicNormalizationFilterFactory : TokenFilterFactory, MultiTermAwareComponent
+	{
+
+	  /// <summary>
+	  /// Creates a new IndicNormalizationFilterFactory </summary>
+	  public IndicNormalizationFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new IndicNormalizationFilter(input);
+	  }
+
+	  public virtual AbstractAnalysisFactory MultiTermComponent
+	  {
+		  get
+		  {
+			return this;
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizer.cs
new file mode 100644
index 0000000..599e030
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizer.cs
@@ -0,0 +1,194 @@
+using System.Collections;
+
+namespace org.apache.lucene.analysis.@in
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static Character.UnicodeBlock.*;
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+	/// <summary>
+	/// Normalizes the Unicode representation of text in Indian languages.
+	/// <para>
+	/// Follows guidelines from Unicode 5.2, chapter 6, South Asian Scripts I
+	/// and graphical decompositions from http://ldc.upenn.edu/myl/IndianScriptsUnicode.html
+	/// </para>
+	/// </summary>
+	public class IndicNormalizer
+	{
+
+	  private class ScriptData
+	  {
+		internal readonly int flag;
+		internal readonly int @base;
+		internal BitArray decompMask;
+
+		internal ScriptData(int flag, int @base)
+		{
+		  this.flag = flag;
+		  this.@base = @base;
+		}
+	  }
+
+	  private static readonly IdentityHashMap<char.UnicodeBlock, ScriptData> scripts = new IdentityHashMap<char.UnicodeBlock, ScriptData>(9);
+
+	  private static int flag(char.UnicodeBlock ub)
+	  {
+		return scripts.get(ub).flag;
+	  }
+
+	  static IndicNormalizer()
+	  {
+		scripts.put(DEVANAGARI, new ScriptData(1, 0x0900));
+		scripts.put(BENGALI, new ScriptData(2, 0x0980));
+		scripts.put(GURMUKHI, new ScriptData(4, 0x0A00));
+		scripts.put(GUJARATI, new ScriptData(8, 0x0A80));
+		scripts.put(ORIYA, new ScriptData(16, 0x0B00));
+		scripts.put(TAMIL, new ScriptData(32, 0x0B80));
+		scripts.put(TELUGU, new ScriptData(64, 0x0C00));
+		scripts.put(KANNADA, new ScriptData(128, 0x0C80));
+		scripts.put(MALAYALAM, new ScriptData(256, 0x0D00));
+		foreach (ScriptData sd in scripts.values())
+		{
+		  sd.decompMask = new BitArray(0x7F);
+		  for (int i = 0; i < decompositions.Length; i++)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int ch = decompositions[i][0];
+			int ch = decompositions[i][0];
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int flags = decompositions[i][4];
+			int flags = decompositions[i][4];
+			if ((flags & sd.flag) != 0)
+			{
+			  sd.decompMask.Set(ch, true);
+			}
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Decompositions according to Unicode 5.2, 
+	  /// and http://ldc.upenn.edu/myl/IndianScriptsUnicode.html
+	  /// 
+	  /// Most of these are not handled by unicode normalization anyway.
+	  /// 
+	  /// The numbers here represent offsets into the respective codepages,
+	  /// with -1 representing null and 0xFF representing zero-width joiner.
+	  /// 
+	  /// the columns are: ch1, ch2, ch3, res, flags
+	  /// ch1, ch2, and ch3 are the decomposition
+	  /// res is the composition, and flags are the scripts to which it applies.
+	  /// </summary>
+	  private static readonly int[][] decompositions = {};
+
+
+	  /// <summary>
+	  /// Normalizes input text, and returns the new length.
+	  /// The length will always be less than or equal to the existing length.
+	  /// </summary>
+	  /// <param name="text"> input text </param>
+	  /// <param name="len"> valid length </param>
+	  /// <returns> normalized length </returns>
+	  public virtual int normalize(char[] text, int len)
+	  {
+		for (int i = 0; i < len; i++)
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final Character.UnicodeBlock block = Character.UnicodeBlock.of(text[i]);
+		  char.UnicodeBlock block = char.UnicodeBlock.of(text[i]);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final ScriptData sd = scripts.get(block);
+		  ScriptData sd = scripts.get(block);
+		  if (sd != null)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int ch = text[i] - sd.base;
+			int ch = text[i] - sd.@base;
+			if (sd.decompMask.Get(ch))
+			{
+			  len = compose(ch, block, sd, text, i, len);
+			}
+		  }
+		}
+		return len;
+	  }
+
+	  /// <summary>
+	  /// Compose into standard form any compositions in the decompositions table.
+	  /// </summary>
+	  private int compose(int ch0, char.UnicodeBlock block0, ScriptData sd, char[] text, int pos, int len)
+	  {
+		if (pos + 1 >= len) // need at least 2 chars!
+		{
+		  return len;
+		}
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int ch1 = text[pos + 1] - sd.base;
+		int ch1 = text[pos + 1] - sd.@base;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final Character.UnicodeBlock block1 = Character.UnicodeBlock.of(text[pos + 1]);
+		char.UnicodeBlock block1 = char.UnicodeBlock.of(text[pos + 1]);
+		if (block1 != block0) // needs to be the same writing system
+		{
+		  return len;
+		}
+
+		int ch2 = -1;
+
+		if (pos + 2 < len)
+		{
+		  ch2 = text[pos + 2] - sd.@base;
+		  char.UnicodeBlock block2 = char.UnicodeBlock.of(text[pos + 2]);
+		  if (text[pos + 2] == '\u200D') // ZWJ
+		  {
+			ch2 = 0xFF;
+		  }
+		  else if (block2 != block1) // still allow a 2-char match
+		  {
+			ch2 = -1;
+		  }
+		}
+
+		for (int i = 0; i < decompositions.Length; i++)
+		{
+		  if (decompositions[i][0] == ch0 && (decompositions[i][4] & sd.flag) != 0)
+		  {
+			if (decompositions[i][1] == ch1 && (decompositions[i][2] < 0 || decompositions[i][2] == ch2))
+			{
+			  text[pos] = (char)(sd.@base + decompositions[i][3]);
+			  len = StemmerUtil.delete(text, pos + 1, len);
+			  if (decompositions[i][2] >= 0)
+			  {
+				len = StemmerUtil.delete(text, pos + 1, len);
+			  }
+			  return len;
+			}
+		  }
+		}
+
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/In/IndicTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/In/IndicTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/In/IndicTokenizer.cs
new file mode 100644
index 0000000..2355448
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/In/IndicTokenizer.cs
@@ -0,0 +1,48 @@
+using System;
+
+namespace org.apache.lucene.analysis.@in
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTokenizer = org.apache.lucene.analysis.util.CharTokenizer;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer; // javadocs
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Simple Tokenizer for text in Indian Languages. </summary>
+	/// @deprecated (3.6) Use <seealso cref="StandardTokenizer"/> instead. 
+	[Obsolete("(3.6) Use <seealso cref="StandardTokenizer"/> instead.")]
+	public sealed class IndicTokenizer : CharTokenizer
+	{
+
+	  public IndicTokenizer(Version matchVersion, AttributeFactory factory, Reader input) : base(matchVersion, factory, input)
+	  {
+	  }
+
+	  public IndicTokenizer(Version matchVersion, Reader input) : base(matchVersion, input)
+	  {
+	  }
+
+	  protected internal override bool isTokenChar(int c)
+	  {
+		return char.IsLetter(c) || char.getType(c) == char.NON_SPACING_MARK || char.getType(c) == char.FORMAT || char.getType(c) == char.COMBINING_SPACING_MARK;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianAnalyzer.cs
new file mode 100644
index 0000000..053fe5b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianAnalyzer.cs
@@ -0,0 +1,164 @@
+using System;
+
+namespace org.apache.lucene.analysis.it
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using ElisionFilter = org.apache.lucene.analysis.util.ElisionFilter;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using Version = org.apache.lucene.util.Version;
+	using ItalianStemmer = org.tartarus.snowball.ext.ItalianStemmer;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Italian.
+	/// <para>
+	/// <a name="version"/>
+	/// </para>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating ItalianAnalyzer:
+	/// <ul>
+	///   <li> As of 3.6, ItalianLightStemFilter is used for less aggressive stemming.
+	///   <li> As of 3.2, ElisionFilter with a set of Italian 
+	///        contractions is used by default.
+	/// </ul>
+	/// </para>
+	/// </summary>
+	public sealed class ItalianAnalyzer : StopwordAnalyzerBase
+	{
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// File containing default Italian stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "italian_stop.txt";
+
+	  private static readonly CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("c", "l", "all", "dall", "dell", "nell", "sull", "coll", "pell", "gl", "agl", "dagl", "degl", "negl", "sugl", "un", "m", "t", "s", "v", "d"), true));
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop words set. </summary>
+	  /// <returns> default stop words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(typeof(SnowballFilter), DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public ItalianAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  public ItalianAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+	  /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+	  /// stemming.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
+	  public ItalianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates a
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> A
+	  ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="ElisionFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided and <seealso cref="ItalianLightStemFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		if (matchVersion.onOrAfter(Version.LUCENE_32))
+		{
+		  result = new ElisionFilter(result, DEFAULT_ARTICLES);
+		}
+		result = new LowerCaseFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		if (matchVersion.onOrAfter(Version.LUCENE_36))
+		{
+		  result = new ItalianLightStemFilter(result);
+		}
+		else
+		{
+		  result = new SnowballFilter(result, new ItalianStemmer());
+		}
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemFilter.cs
new file mode 100644
index 0000000..1b1023a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemFilter.cs
@@ -0,0 +1,66 @@
+namespace org.apache.lucene.analysis.it
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="ItalianLightStemmer"/> to stem Italian
+	/// words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// </summary>
+	public sealed class ItalianLightStemFilter : TokenFilter
+	{
+	  private readonly ItalianLightStemmer stemmer = new ItalianLightStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public ItalianLightStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemFilterFactory.cs
new file mode 100644
index 0000000..8377b02
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.it
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="ItalianLightStemFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_itlgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.ItalianLightStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre> 
+	/// </summary>
+	public class ItalianLightStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new ItalianLightStemFilterFactory </summary>
+	  public ItalianLightStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new ItalianLightStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemmer.cs
new file mode 100644
index 0000000..c125272
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemmer.cs
@@ -0,0 +1,155 @@
+namespace org.apache.lucene.analysis.it
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/* 
+	 * This algorithm is updated based on code located at:
+	 * http://members.unine.ch/jacques.savoy/clef/
+	 * 
+	 * Full copyright for that code follows:
+	 */
+
+	/*
+	 * Copyright (c) 2005, Jacques Savoy
+	 * All rights reserved.
+	 *
+	 * Redistribution and use in source and binary forms, with or without 
+	 * modification, are permitted provided that the following conditions are met:
+	 *
+	 * Redistributions of source code must retain the above copyright notice, this 
+	 * list of conditions and the following disclaimer. Redistributions in binary 
+	 * form must reproduce the above copyright notice, this list of conditions and
+	 * the following disclaimer in the documentation and/or other materials 
+	 * provided with the distribution. Neither the name of the author nor the names 
+	 * of its contributors may be used to endorse or promote products derived from 
+	 * this software without specific prior written permission.
+	 * 
+	 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+	 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+	 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+	 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+	 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+	 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+	 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+	 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+	 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+	 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+	 * POSSIBILITY OF SUCH DAMAGE.
+	 */
+
+	/// <summary>
+	/// Light Stemmer for Italian.
+	/// <para>
+	/// This stemmer implements the algorithm described in:
+	/// <i>Report on CLEF-2001 Experiments</i>
+	/// Jacques Savoy
+	/// </para>
+	/// </summary>
+	public class ItalianLightStemmer
+	{
+
+	  public virtual int stem(char[] s, int len)
+	  {
+		if (len < 6)
+		{
+		  return len;
+		}
+
+		for (int i = 0; i < len; i++)
+		{
+		  switch (s[i])
+		  {
+			case 'à':
+			case 'á':
+			case 'â':
+			case 'ä':
+				s[i] = 'a';
+				break;
+			case 'ò':
+			case 'ó':
+			case 'ô':
+			case 'ö':
+				s[i] = 'o';
+				break;
+			case 'è':
+			case 'é':
+			case 'ê':
+			case 'ë':
+				s[i] = 'e';
+				break;
+			case 'ù':
+			case 'ú':
+			case 'û':
+			case 'ü':
+				s[i] = 'u';
+				break;
+			case 'ì':
+			case 'í':
+			case 'î':
+			case 'ï':
+				s[i] = 'i';
+				break;
+		  }
+		}
+
+		switch (s[len - 1])
+		{
+		  case 'e':
+			if (s[len - 2] == 'i' || s[len - 2] == 'h')
+			{
+			  return len - 2;
+			}
+			else
+			{
+			  return len - 1;
+			}
+		  case 'i':
+			if (s[len - 2] == 'h' || s[len - 2] == 'i')
+			{
+			  return len - 2;
+			}
+			else
+			{
+			  return len - 1;
+			}
+		  case 'a':
+			if (s[len - 2] == 'i')
+			{
+			  return len - 2;
+			}
+			else
+			{
+			  return len - 1;
+			}
+		  case 'o':
+			if (s[len - 2] == 'i')
+			{
+			  return len - 2;
+			}
+			else
+			{
+			  return len - 1;
+			}
+		}
+
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianAnalyzer.cs
new file mode 100644
index 0000000..1a65410
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianAnalyzer.cs
@@ -0,0 +1,137 @@
+using System;
+
+namespace org.apache.lucene.analysis.lv
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Latvian.
+	/// </summary>
+	public sealed class LatvianAnalyzer : StopwordAnalyzerBase
+	{
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// File containing default Latvian stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "stopwords.txt";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop words set. </summary>
+	  /// <returns> default stop words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(typeof(LatvianAnalyzer), DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public LatvianAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  public LatvianAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+	  /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+	  /// stemming.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
+	  public LatvianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates a
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> A
+	  ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided and <seealso cref="LatvianStemFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		result = new LowerCaseFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		result = new LatvianStemFilter(result);
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemFilter.cs
new file mode 100644
index 0000000..3c18f24
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemFilter.cs
@@ -0,0 +1,66 @@
+namespace org.apache.lucene.analysis.lv
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="LatvianStemmer"/> to stem Latvian
+	/// words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// </summary>
+	public sealed class LatvianStemFilter : TokenFilter
+	{
+	  private readonly LatvianStemmer stemmer = new LatvianStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public LatvianStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file


[27/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData1.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData1.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData1.cs
new file mode 100644
index 0000000..99dafed
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData1.cs
@@ -0,0 +1,55 @@
+/*
+Copyright © 2003,
+Center for Intelligent Information Retrieval,
+University of Massachusetts, Amherst.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+3. The names "Center for Intelligent Information Retrieval" and
+"University of Massachusetts" must not be used to endorse or promote products
+derived from this software without prior written permission. To obtain
+permission, contact info@ciir.cs.umass.edu.
+
+THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF MASSACHUSETTS AND OTHER CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
+*/
+
+/* This is a java version of Bob Krovetz' KStem.
+ *
+ * Java version by Sergio Guzman-Lara.
+ * CIIR-UMass Amherst http://ciir.cs.umass.edu
+ */
+namespace org.apache.lucene.analysis.en
+{
+
+	/// <summary>
+	/// A list of words used by Kstem
+	/// </summary>
+	internal class KStemData1
+	{
+		private KStemData1()
+		{
+		}
+	// KStemData1 ... KStemData8 are created from "head_word_list.txt"
+	   internal static string[] data = new string[] {"aback","abacus","abandon","abandoned","abase", "abash","abate","abattoir","abbess","abbey", "abbot","abbreviate","abbreviation","abc","abdicate", "abdomen","abduct","abed","aberrant","aberration", "abet","abeyance","abhor","abhorrent","abide", "abiding","abilities","ability","abject","abjure", "ablative","ablaut","ablaze","able","ablution", "ablutions","ably","abnegation","abnormal","abo", "aboard","abode","abolish","abolition","abominable", "abominate","abomination","aboriginal","aborigine","abort", "abortion","abortionist","abortive","abound","about", "above","aboveboard","abracadabra","abrade","abrasion", "abrasive","abreast","abridge","abridgement","abridgment", "abroad","abrogate","abrupt","abscess","abscond", "absence","absent","absentee","absenteeism","absently", "absinth","absinthe","absolute","absolutely","absolution", "absolutism","absolve","absorb","absorbent","absorbing", "absorption","abstain","abstemious","abstention",
 "abstinence", "abstract","abstracted","abstraction","abstruse","absurd", "abundance","abundant","abuse","abusive","abut", "abutment","abysmal","abyss","acacia","academic", "academician","academy","accede","accelerate","acceleration", "accelerator","accent","accentuate","accept","acceptable", "acceptance","access","accessible","accession","accessory", "accidence","accident","accidental","acclaim","acclamation", "acclimatize","acclivity","accolade","accommodate","accommodating", "accommodation","accommodations","accompaniment","accompanist","accompany", "accomplice","accomplish","accomplished","accomplishment","accord", "accordance","according","accordingly","accordion","accost", "account","accountable","accountancy","accountant","accoutrements", "accredit","accretion","accrue","accumulate","accumulation", "accumulative","accumulator","accuracy","accurate","accursed", "accusation","accusative","accuse","accused","accustom", "accustomed","ace","acerbity","acetate","acetic", "acetylene"
 ,"ache","achieve","achievement","achoo", "acid","acidify","acidity","acidulated","acidulous", "acknowledge","acknowledgement","acknowledgment","acme","acne", "acolyte","aconite","acorn","acoustic","acoustics", "acquaint","acquaintance","acquaintanceship","acquiesce","acquiescent", "acquire","acquisition","acquisitive","acquit","acquittal", "acre","acreage","acrid","acrimony","acrobat", "acrobatic","acrobatics","acronym","across","acrostic", "act","acting","actinism","action","actionable", "activate","active","activist","activity","actor", "actress","acts","actual","actuality","actually", "actuary","actuate","acuity","acumen","acupuncture", "acute","adage","adagio","adam","adamant", "adapt","adaptable","adaptation","adapter","adaptor", "adc","add","addendum","adder","addict", "addiction","addictive","addition","additional","additive", "addle","address","addressee","adduce","adenoidal", "adenoids","adept","adequate","adhere","adherence", "adherent","adhesion","adhesive","adieu","adipo
 se", "adj","adjacent","adjective","adjoin","adjourn", "adjudge","adjudicate","adjunct","adjure","adjust", "adjutant","adman","admass","administer","administration", "administrative","administrator","admirable","admiral","admiralty", "admiration","admire","admirer","admissible","admission", "admit","admittance","admitted","admittedly","admixture", "admonish","admonition","admonitory","ado","adobe", "adolescent","adopt","adoption","adoptive","adorable", "adoration","adore","adorn","adornment","adrenalin", "adrift","adroit","adulate","adulation","adult", "adulterate","adulterer","adultery","adumbrate","adv", "advance","advanced","advancement","advances","advantage", "advantageous","advent","adventist","adventitious","adventure", "adventurer","adventuress","adventurous","adverb","adverbial", "adversary","adverse","adversity","advert","advertise", "advertisement","advertising","advice","advisable","advise", "advisedly","adviser","advisor","advisory","advocacy", "advocate","adz","adze","a
 egis","aeon", "aerate","aerial","aerie","aerobatic","aerobatics", "aerodrome","aerodynamic","aerodynamics","aeronautics","aeroplane", "aerosol","aerospace","aertex","aery","aesthete", "aesthetic","aesthetics","aether","aethereal","aetiology", "afar","affable","affair","affect","affectation", "affected","affecting","affection","affectionate","affiance", "affidavit","affiliate","affiliation","affinity","affirm", "affirmative","affix","afflict","affliction","affluent", "afford","afforest","affray","affricate","affront", "aficionado","afield","afire","aflame","afloat", "afoot","aforesaid","aforethought","afraid","afresh", "afrikaans","afrikaner","afro","aft","after", "afterbirth","aftercare","aftereffect","afterglow","afterlife", "aftermath","afternoon","afternoons","afters","aftershave", "aftertaste","afterthought","afterwards","again","against", "agape","agate","age","ageing","ageless", "agency","agenda","agent","agglomerate","agglutination", "agglutinative","aggrandisement","aggrandi
 zement","aggravate","aggravation", "aggregate","aggregation","aggression","aggressive","aggressor", "aggrieved","aggro","aghast","agile","agitate", "agitation","agitator","aglow","agnostic","ago", "agog","agonise","agonised","agonising","agonize", "agonized","agonizing","agony","agoraphobia","agoraphobic", "agrarian","agree","agreeable","agreeably","agreement", "agriculture","agronomy","aground","ague","aha", "ahead","ahem","ahoy","aid","ail", "aileron","ailment","aim","aimless","air", "airbase","airbed","airbladder","airborne","airbrake", "airbrick","airbus","aircraft","aircraftman","aircrew", "aircushion","airdrop","airedale","airfield","airflow", "airforce","airgun","airhole","airhostess","airily", "airing","airlane","airless","airletter","airlift", "airline","airliner","airlock","airmail","airman", "airplane","airpocket","airport","airs","airshaft", "airship","airsick","airspace","airspeed","airstrip", "airtight","airway","airwoman","airworthy","airy", "aisle","aitch","ajar","ak
 imbo","akin", "alabaster","alack","alacrity","alarm","alarmist", "alas","albatross","albeit","albino","album", "albumen","alchemist","alchemy","alcohol","alcoholic", "alcoholism","alcove","alder","alderman","ale", "alehouse","alert","alfalfa","alfresco","algae", "algebra","algorithm","alias","alibi","alien", "alienate","alienation","alienist","alight","align", "alignment","alike","alimentary","alimony","aline", "alinement","alive","alkali","alkaline","all", "allah","allay","allegation","allege","allegedly", "allegiance","allegorical","allegory","allegretto","allegro", "alleluia","allergic","allergy","alleviate","alley", "alleyway","alliance","allied","alligator","alliteration", "alliterative","allocate","allocation","allopathy","allot", "allotment","allow","allowable","allowance","alloy", "allspice","allude","allure","allurement","allusion", "alluvial","alluvium","ally","almanac","almanack", "almighty","almond","almoner","almost","alms", "aloe","aloft","alone","along","alongside", "
 aloof","alopecia","aloud","alpaca","alpenhorn", "alpenstock","alpha","alphabet","alphabetical","alpine", "already","alright","alsatian","also","altar", "altarpiece","alter","alteration","altercation","alternate", "alternative","alternator","although","altimeter","altitude", "alto","altogether","altruism","altruist","alum", "aluminium","alumna","alumnus","alveolar","always", "alyssum","amalgam","amalgamate","amanuensis","amass", "amateur","amateurish","amatory","amaze","amazing", "amazon","ambassador","ambassadorial","amber","ambergris", "ambidextrous","ambience","ambient","ambiguous","ambit", "ambition","ambitious","ambivalent","amble","ambrosia", "ambulance","ambush","ame","ameba","ameliorate", "amen","amenable","amend","amendment","amends", "amenity","americanise","americanism","americanize","amethyst", "amiable","amicable","amid","amidships","amir", "amiss","amity","ammeter","ammo","ammonia", "ammonite","ammunition","amnesia","amnesty","amoeba", "amoebic","amok","among","amoral",
 "amorous", "amorphous","amortise","amortize","amount","amour", "amp","amperage","ampersand","amphetamine","amphibian", "amphibious","amphitheater","amphitheatre","amphora","ample", "amplifier","amplify","amplitude","ampoule","amputate", "amputee","amuck","amulet","amuse","amusement", "anachronism","anaconda","anaemia","anaemic","anaesthesia", "anaesthetic","anaesthetist","anagram","anal","analgesia", "analgesic","analog","analogize","analogous","analogue", "analogy","analyse","analysis","analyst","analytic", "anapaest","anarchic","anarchism","anarchist","anarchy", "anathema","anathematize","anatomical","anatomist","anatomy", "ancestor","ancestral","ancestry","anchor","anchorage", "anchorite","anchovy","ancient","ancients","ancillary", "and","andante","andiron","androgynous","anecdotal", "anecdote","anemia","anemometer","anemone","anesthesia", "anesthetise","anesthetize","anew","angel","angelica", "angelus","anger","angle","anglican","anglicise", "anglicism","anglicize","angling","an
 glophile","anglophilia", "anglophobe","anglophobia","angora","angostura","angry", "angst","anguish","anguished","angular","aniline", "animadversion","animadvert","animal","animalcule","animalism", "animate","animation","animism","animosity","animus", "anis","anise","aniseed","ankle","anklet", "annals","anneal","annex","annexation","annexe", "annihilate","anniversary","annotate","annotation","announce", "announcement","announcer","annoy","annoyance","annual", "annuity","annul","annular","annunciation","anode", "anodyne","anoint","anomalous","anomaly","anon", "anonymity","anonymous","anopheles","anorak","anorexia", "another","answer","answerable","ant","antacid", "antagonism","antagonist","antagonize","antarctic","ante", "anteater","antecedence","antecedent","antecedents","antechamber", "antedate","antediluvian","antelope","antenatal","antenna", "antepenultimate","anterior","anteroom","anthem","anther", "anthill","anthology","anthracite","anthrax","anthropocentric", "anthropoid","anth
 ropologist","anthropology","anthropomorphic","anthropomorphism", "anthropophagous","anthropophagy","antiaircraft","antibiotic","antibody", "antic","anticipate","anticipation","anticipatory","anticlerical", "anticlimax","anticlockwise","antics","anticyclone","antidote", "antifreeze","antigen","antihero","antihistamine","antiknock", "antilogarithm","antimacassar","antimatter","antimony","antipathetic", "antipathy","antipersonnel","antipodal","antipodes","antiquarian", "antiquary","antiquated","antique","antiquity","antirrhinum", "antiseptic","antisocial","antithesis","antithetic","antitoxin", "antler","antonym","anus","anvil","anxiety", "anxious","any","anybody","anyhow","anyplace", "anyroad","anything","anyway","anywhere","aorta", "apace","apanage","apart","apartheid","apartment", "apartments","apathetic","apathy","ape","aperient", "aperitif","aperture","apex","aphasia","aphasic", "aphid","aphorism","aphoristic","aphrodisiac","apiarist", "apiary","apices","apiculture","apiece","apish
 ", "aplomb","apocalypse","apocalyptic","apocrypha","apocryphal", "apogee","apologetic","apologetics","apologia","apologise", "apologist","apologize","apology","apophthegm","apoplectic", "apoplexy","apostasy","apostate","apostatise","apostatize", "apostle","apostolic","apostrophe","apostrophize","apothecary", "apothegm","apotheosis","appal","appall","appalling", "appanage","apparatus","apparel","apparent","apparently", "apparition","appeal","appealing","appear","appearance", "appearances","appease","appeasement","appellant","appellate", "appellation","append","appendage","appendectomy","appendicitis", "appendix","appertain","appetite","appetizer","appetizing", "applaud","applause","apple","applejack","appliance", "applicable","applicant","application","applied","apply", "appoint","appointment","appointments","apportion","apposite", "apposition","appraisal","appraise","appreciable","appreciate", "appreciation","appreciative","apprehend","apprehension","apprehensive", "apprentice","app
 renticeship","apprise","appro","approach", "approachable","approbation","approbatory","appropriate","appropriation", "approval","approve","approx","approximate","approximation", "appurtenance","apricot","april","apron","apropos", "apse","apt","aptitude","aqualung","aquamarine", "aquaplane","aquarium","aquatic","aquatint","aqueduct", "aqueous","aquiline","arab","arabesque","arabic", "arable","arachnid","arak","arbiter","arbitrary", "arbitrate","arbitration","arbitrator","arbor","arboreal", "arboretum","arbour","arc","arcade","arcadia", "arcane","arch","archaeology","archaic","archaism", "archangel","archbishop","archbishopric","archdeacon","archdeaconry", "archdiocese","archduke","archeology","archer","archery", "archetype","archimandrite","archipelago","architect","architecture", "archive","archway","arctic","ardent","ardor", "ardour","arduous","are","area","areca", "arena","argent","argon","argot","arguable", "argue","argument","argumentative","aria","arid", "aries","aright","arise
 ","aristocracy","aristocrat", "aristocratic","arithmetic","arithmetician","ark","arm", "armada","armadillo","armament","armature","armband", "armchair","armed","armful","armhole","armistice", "armlet","armor","armorer","armorial","armory", "armour","armoured","armourer","armoury","armpit", "arms","army","aroma","aromatic","arose", "around","arouse","arpeggio","arquebus","arrack", "arraign","arrange","arrangement","arrant","arras", "array","arrears","arrest","arrival","arrive", "arrogance","arrogant","arrogate","arrow","arrowhead", "arrowroot","arse","arsenal","arsenic","arson", "art","artefact","arterial","arteriosclerosis","artery", "artful","arthritis","artichoke","article","articles", "articulate","articulated","articulateness","articulation","artifact", "artifice","artificer","artificial","artillery","artisan", "artist","artiste","artistic","artistry","artless", "arts","arty","arum","asbestos","ascend", "ascendancy","ascendant","ascendency","ascendent","ascension", "ascent","asc
 ertain","ascetic","ascribe","ascription", "asepsis","aseptic","asexual","ash","ashamed", "ashbin","ashcan","ashen","ashes","ashore", "ashtray","ashy","aside","asinine","ask", "askance","askew","aslant","asleep","asp", "asparagus","aspect","aspectual","aspen","asperity", "aspersion","asphalt","asphodel","asphyxia","asphyxiate", "aspic","aspidistra","aspirant","aspirate","aspiration", "aspire","aspirin","ass","assagai","assail", "assailant","assassin","assassinate","assault","assay", "assegai","assemblage","assemble","assembly","assemblyman", "assent","assert","assertion","assertive","assess", "assessment","assessor","asset","asseverate","assiduity", "assiduous","assign","assignation","assignment","assimilate", "assimilation","assist","assistance","assistant","assize", "assizes","associate","association","assonance","assort", "assorted","assortment","asst","assuage","assume", "assumption","assurance","assure","assured","aster", "asterisk","astern","asteroid","asthma","astigmatic", "as
 tigmatism","astir","astonish","astonishment","astound", "astrakhan","astral","astray","astride","astringent", "astrolabe","astrologer","astrology","astronaut","astronautics", "astronomer","astronomical","astronomy","astrophysics","astute", "asunder","asylum","asymmetric","atavism","atchoo", "ate","atelier","atheism","atheist","athlete", "athletic","athletics","athwart","atishoo","atlas", "atmosphere","atmospheric","atmospherics","atoll","atom", "atomic","atomise","atomize","atonal","atonality", "atone","atop","atrocious","atrocity","atrophy", "attach","attachment","attack","attain","attainder", "attainment","attar","attempt","attend","attendance", "attendant","attention","attentive","attenuate","attest", "attestation","attested","attic","attire","attitude", "attitudinise","attitudinize","attorney","attract","attraction", "attractive","attributable","attribute","attribution","attributive", "attrition","attune","atypical","aubergine","aubrietia", "auburn","auction","auctioneer","audac
 ious","audacity", "audible","audience","audio","audiometer","audit", "audition","auditor","auditorium","auditory","auger", "aught","augment","augmentation","augur","augury", "august","auk","aunt","aura","aural", "aureole","auricle","auricular","auriferous","aurora", "auscultation","auspices","auspicious","aussie","austere", "austerity","australasian","autarchy","autarky","authentic", "authenticate","authenticity","author","authoress","authorisation", "authorise","authoritarian","authoritative","authority","authorization", "authorize","authorship","autism","autistic","auto", "autobahn","autobiographical","autobiography","autocracy","autocrat", "autoeroticism","autograph","automat","automate","automatic", "automation","automatism","automaton","automobile","autonomous", "autonomy","autopsy","autostrada","autosuggestion","autumn", "autumnal","auxiliary","avail","available","avalanche", "avarice","avaricious","avatar","avaunt","avenge", "avenue","aver","average","averse","aversion", "ave
 rsive","avert","aviary","aviation","aviator", "avid","avocado","avocation","avocet","avoid", "avoidance","avoirdupois","avow","avowal","avowed", "avuncular","await","awake","awaken","awakening", "award","aware","awash","away","awe", "awesome","awestruck","awful","awfully","awhile", "awkward","awl","awning","awoke","awoken", "awry","axe","axiom","axiomatic","axis", "axle","axolotl","ayah","aye","azalea", "azimuth","azure","baa","babble","babbler", "babe","babel","baboo","baboon","babu", "baby","babyhood","babyish","baccalaureate","baccara", "baccarat","bacchanal","baccy","bachelor","bacillus", "back","backache","backbench","backbite","backbone", "backbreaking","backchat","backcloth","backcomb","backdate", "backdrop","backer","backfire","backgammon","background", "backhand","backhanded","backhander","backing","backlash", "backlog","backmost","backpedal","backside","backslide", "backspace","backstage","backstairs","backstay","backstroke", "backtrack","backup","backward","backwards","ba
 ckwash", "backwater","backwoods","backwoodsman","backyard","bacon", "bacteria","bacteriology","bactrian","bad","bade", "badge","badger","badinage","badly","badminton", "baffle","baffling","bag","bagatelle","bagful", "baggage","baggy","bagpipes","bags","bah", "bail","bailey","bailiff","bairn","bait", "baize","bake","bakelite","baker","bakery", "baksheesh","balaclava","balalaika","balance","balanced", "balcony","bald","balderdash","balding","baldly", "baldric","bale","baleful","balk","ball", "ballad","ballade","ballast","ballcock","ballerina", "ballet","ballistic","ballistics","ballocks","balloon", "ballooning","balloonist","ballot","ballpoint","ballroom", "balls","bally","ballyhoo","balm","balmy", "baloney","balsa","balsam","balustrade","bamboo", "bamboozle","ban","banal","banana","band", "bandage","bandana","bandanna","bandbox","bandeau", "bandit","banditry","bandmaster","bandoleer","bandolier", "bandsman","bandstand","bandwagon","bandy","bane", "baneful","bang","banger","bangle","b
 anian", "banish","banister","banjo","bank","bankbook", "banker","banking","bankrupt","bankruptcy","banner", "bannock","banns","banquet","banshee","bantam", "bantamweight","banter","banyan","baobab","baptise", "baptism","baptist","baptize","bar","barb", "barbarian","barbaric","barbarise","barbarism","barbarize", "barbarous","barbecue","barbed","barbel","barber", "barbican","barbiturate","barcarole","barcarolle","bard", "bare","bareback","barebacked","barefaced","barefoot", "bareheaded","barelegged","barely","bargain","barge", "bargee","baritone","barium","bark","barker", "barley","barleycorn","barmaid","barman","barmy", "barn","barnacle","barnstorm","barnyard","barograph", "barometer","baron","baroness","baronet","baronetcy", "baronial","barony","baroque","barque","barrack", "barracks","barracuda","barrage","barred","barrel", "barren","barricade","barricades","barrier","barring", "barrister","barrow","bartender","barter","basalt", "base","baseball","baseboard","baseless","baseline", 
 "basement","bases","bash","bashful","basic", "basically","basics","basil","basilica","basilisk", "basin","basis","bask","basket","basketball", "basketful","basketry","basketwork","bass","basset", "bassinet","bassoon","bast","bastard","bastardise", "bastardize","bastardy","baste","bastinado","bastion", "bat","batch","bated","bath","bathing", "bathos","bathrobe","bathroom","baths","bathtub", "bathysphere","batik","batiste","batman","baton", "bats","batsman","battalion","batten","batter", "battery","battle","battleax","battleaxe","battlefield", "battlements","battleship","batty","bauble","baulk", "bauxite","bawd","bawdy","bawl","bay", "bayonet","bayou","bazaar","bazooka","bbc", "beach","beachcomber","beachhead","beachwear","beacon", "bead","beading","beadle","beady","beagle", "beagling","beak","beaker","beam","bean", "beanpole","beanstalk","bear","bearable","beard", "bearded","bearer","bearing","bearings","bearish", "bearskin","beast","beastly","beat","beaten", "beater","beatific","bea
 tification","beatify","beating", "beatitude","beatitudes","beatnik","beau","beaujolais", "beaut","beauteous","beautician","beautiful","beautify", "beauty","beaver","bebop","becalmed","because", "beck","beckon","become","becoming","bed", "bedaub","bedbug","bedclothes","bedding","bedeck", "bedevil","bedewed","bedfellow","bedimmed","bedlam", "bedouin","bedpan","bedpost","bedraggled","bedridden", "bedrock","bedroom","bedside","bedsore","bedspread", "bedstead","bedtime","bee","beech","beef", "beefcake","beefeater","beefsteak","beefy","beehive", "beeline","been","beer","beery","beeswax", "beet","beetle","beetling","beetroot","beeves", "befall","befit","befitting","before","beforehand", "befriend","befuddle","beg","beget","beggar", "beggarly","beggary","begin","beginner","beginning", "begone","begonia","begorra","begot","begotten", "begrudge","beguile","begum","begun","behalf", "behave","behavior","behaviorism","behaviour","behaviourism", "behead","behemoth","behest","behind","behindhand",
  "behold","beholden","behove","beige","being", "belabor","belabour","belated","belay","belch", "beleaguer","belfry","belie","belief","believable", "believe","believer","belittle","bell","belladonna", "bellboy","belle","bellflower","bellicose","belligerency", "belligerent","bellow","bellows","belly","bellyache", "bellyful","belong","belongings","beloved","below", "belt","belted","belting","beltway","bemoan", "bemused","ben","bench","bencher","bend", "bended","bends","beneath","benedictine","benediction", "benedictus","benefaction","benefactor","benefice","beneficent", "beneficial","beneficiary","benefit","benevolence","benevolent", "benighted","benign","benignity","bent","benumbed", "benzedrine","benzene","benzine","bequeath","bequest", "berate","bereave","bereaved","bereavement","bereft", "beret","beriberi","berk","berry","berserk", "berth","beryl","beseech","beseem","beset", "besetting","beside","besides","besiege","besmear", "besmirch","besom","besotted","besought","bespattered", 
 "bespeak","bespoke","best","bestial","bestiality", "bestiary","bestir","bestow","bestrew","bestride", "bet","beta","betake","betel","bethel", "bethink","betide","betimes","betoken","betray", "betrayal","betroth","betrothal","betrothed","better", "betterment","betters","bettor","between","betwixt", "bevel","beverage","bevy","bewail","beware", "bewilder","bewitch","bey","beyond","bezique", "bhang","bias","bib","bible","biblical", "bibliographer","bibliography","bibliophile","bibulous","bicarb", "bicarbonate","bicentenary","bicentennial","biceps","bicker", "bicycle","bid","biddable","bidding","bide", "bidet","biennial","bier","biff","bifocals", "bifurcate","big","bigamist","bigamous","bigamy", "bighead","bight","bigot","bigoted","bigotry", "bigwig","bijou","bike","bikini","bilabial", "bilateral","bilberry","bile","bilge","bilingual", "bilious","bilk","bill","billboard","billet", "billfold","billhook","billiard","billiards","billion", "billow","billposter","billy","biltong","bimetallic"
 , "bimetallism","bimonthly","bin","binary","bind", "binder","bindery","binding","bindweed","binge", "bingo","binnacle","binocular","binoculars","binomial", "biochemistry","biodegradable","biographer","biographical","biography", "biological","biology","biomedical","bionic","biosphere", "biotechnology","bipartisan","bipartite","biped","biplane", "birch","bird","birdie","birdlime","birdseed", "biretta","biro","birth","birthday","birthmark", "birthplace","birthrate","birthright","biscuit","bisect", "bisexual","bishop","bishopric","bismuth","bison", "bisque","bistro","bit","bitch","bitchy", "bite","biting","bitter","bittern","bitters", "bittersweet","bitty","bitumen","bituminous","bivalve", "bivouac","biweekly","bizarre","blab","blabber", "blabbermouth","black","blackamoor","blackball","blackberry", "blackbird","blackboard","blackcurrant","blacken","blackguard", "blackhead","blacking","blackjack","blackleg","blacklist", "blackly","blackmail","blackout","blackshirt","blacksmith", "blackth
 orn","bladder","blade","blaeberry","blah", "blame","blameless","blameworthy","blanch","blancmange", "bland","blandishments","blank","blanket","blare", "blarney","blaspheme","blasphemous","blasphemy","blast", "blasted","blatant","blather","blaze","blazer", "blazes","blazing","blazon","blazonry","bleach", "bleachers","bleak","bleary","bleat","bleed", "bleeder","bleeding","bleep","blemish","blench", "blend","blender","bless","blessed","blessing", "blether","blew","blight","blighter","blimey", "blimp","blind","blinder","blinders","blindfold", "blink","blinkered","blinkers","blinking","blip", "bliss","blister","blistering","blithe","blithering", "blitz","blizzard","bloated","bloater","blob", "bloc","block","blockade","blockage","blockbuster", "blockhead","blockhouse","bloke","blond","blood", "bloodbath","bloodcurdling","bloodhound","bloodless","bloodletting", "bloodshed","bloodshot","bloodstain","bloodstock","bloodstream", "bloodsucker","bloodthirsty","bloody","bloom","bloomer", "bloomer
 s","blooming","blossom","blot","blotch", "blotter","blotto","blouse","blow","blower", "blowfly","blowgun","blowhard","blowhole","blowlamp", "blown","blowout","blowpipe","blowsy","blowy", "blowzy","blubber","bludgeon","blue","bluebag", "bluebeard","bluebell","blueberry","bluebird","bluebottle", "bluecoat","bluefish","bluejacket","blueprint","blues", "bluestocking","bluff","blunder","blunderbuss","blunt", "bluntly","blur","blurb","blurt","blush", "bluster","blustery","boa","boar","board", "boarder","boarding","boardinghouse","boardroom","boards", "boardwalk","boast","boaster","boastful","boat", "boater","boathouse","boatman","boatswain","bob", "bobbin","bobby","bobcat","bobolink","bobsleigh", "bobtail","bobtailed","bock","bod","bode", "bodice","bodily","boding","bodkin","body", "bodyguard","bodywork","boer","boffin","bog", "bogey","boggle","boggy","bogie","bogus", "bohemian","boil","boiler","boisterous","bold", "boldface","boldfaced","bole","bolero","boll", "bollard","bollocks","bolon
 ey","bolshevik","bolshevism", "bolshy","bolster","bolt","bolthole","bomb", "bombard","bombardier","bombardment","bombast","bomber", "bombproof","bombshell","bombsight","bombsite","bonanza", "bonbon","bond","bondage","bonded","bondholder", "bonds","bone","boned","bonehead","boner", "bonesetter","boneshaker","bonfire","bongo","bonhomie", "bonito","bonkers","bonnet","bonny","bonsai", "bonus","bony","bonzer","boo","boob", "boobs","booby","boodle","boohoo","book", "bookable","bookbindery","bookbinding","bookcase","bookend", "booking","bookish","bookkeeping","booklet","bookmaker", "bookmark","bookmobile","bookplate","books","bookseller", "bookshop","bookstall","bookwork","bookworm","boom", "boomerang","boon","boor","boost","booster", "boot","bootblack","booted","bootee","booth", "bootlace","bootleg","bootless","boots","bootstraps", "booty","booze","boozer","boozy","bop", "bopper","boracic","borage","borax","bordeaux", "bordello","border","borderer","borderland","borderline", "bore","borea
 lis","borehole","borer","born", "borne","boron","borough","borrow","borrowing", "borscht","borshcht","borstal","borzoi","bosh", "bosom","bosomy","boss","bossy","bosun", "botanical","botanise","botanist","botanize","botany", "botch","both","bother","botheration","bothersome", "bottle","bottleful","bottleneck","bottom","bottomless", "botulism","boudoir","bouffant","bougainvillaea","bougainvillea", "bough","bought","bouillabaisse","bouillon","boulder", "boulevard","bounce","bouncer","bouncing","bouncy", "bound","boundary","bounden","bounder","boundless", "bounds","bounteous","bountiful","bounty","bouquet", "bourbon","bourgeois","bourgeoisie","bourn","bourne", "bourse","bout","boutique","bouzouki","bovine", "bovril","bovver","bow","bowdlerise","bowdlerize", "bowed","bowel","bowels","bower","bowerbird", "bowing","bowl","bowler","bowlful","bowline", "bowling","bowls","bowman","bowser","bowshot", "bowsprit","bowwow","box","boxer","boxful", "boxing","boxwood","boy","boycott","boyfriend", "b
 oyhood","boyish","boys","bra","brace", "bracelet","bracelets","braces","bracing","bracken", "bracket","brackish","bract","bradawl","brae", "brag","braggadocio","braggart","brahman","braid", "braille","brain","brainchild","brainless","brainpan", "brains","brainstorm","brainwash","brainwashing","brainwave", "brainy","braise","brake","bramble","bran", "branch","brand","brandish","brandy","brash", "brass","brasserie","brassiere","brassy","brat", "bravado","brave","bravo","bravura","brawl", "brawn","brawny","bray","brazen","brazier", "bre","breach","bread","breadbasket","breadboard", "breadcrumb","breaded","breadfruit","breadline","breadth", "breadthways","breadwinner","break","breakage","breakaway", "breakdown","breaker","breakfast","breakneck","breakout", "breakthrough","breakup","breakwater","bream","breast", "breastbone","breastplate","breaststroke","breastwork","breath", "breathalyse","breathalyser","breathe","breather","breathing", "breathless","breathtaking","breathy","breech","br
 eeches", "breed","breeder","breeding","breeze","breezeblock", "breezy","brethren","breve","brevet","breviary", "brevity","brew","brewer","brewery","briar", "bribe","bribery","brick","brickbat","brickfield", "bricklayer","brickwork","bridal","bride","bridegroom", "bridesmaid","bridge","bridgehead","bridgework","bridle", "brie","brief","briefcase","briefing","briefs", "brier","brig","brigade","brigadier","brigand", "brigandage","brigantine","bright","brighten","brill", "brilliancy","brilliant","brilliantine","brim","brimful", "brimfull","brimstone","brindled","brine","bring", "brink","brinkmanship","brioche","briquet","briquette", "brisk","brisket","bristle","bristly","bristols", "brit","britches","britisher","briton","brittle", "broach","broad","broadcast","broadcasting","broadcloth", "broaden","broadloom","broadminded","broadsheet","broadside", "broadsword","broadways","brocade","broccoli","brochure", "brogue","broil","broiler","broke","broken", "broker","brolly","bromide","bromine"
 ,"bronchial", "bronchitis","bronco","brontosaurus","bronze","brooch", "brood","broody","brook","broom","broomstick", "broth","brothel","brother","brotherhood","brougham", "brought","brouhaha","brow","browbeat","brown", "brownie","brownstone","browse","brucellosis","bruin", "bruise","bruiser","bruising","bruit","brunch", "brunet","brunette","brunt","brush","brushwood", "brushwork","brusque","brutal","brutalise","brutality", "brutalize","brute","brutish","bubble","bubbly", "buccaneer","buck","buckboard","bucked","bucket", "buckle","buckler","buckram","buckshee","buckshot", "buckskin","bucktooth","buckwheat","bucolic","bud", "buddhism","budding","buddy","budge","budgerigar", "budget","budgetary","buff","buffalo","buffer", "buffet","buffoon","buffoonery","bug","bugaboo", "bugbear","bugger","buggered","buggery","buggy", "bughouse","bugle","bugrake","buhl","build", "builder","building","buildup","bulb","bulbous", "bulbul","bulge","bulk","bulkhead","bulky", "bull","bulldog","bulldoze","bul
 ldozer","bullet", "bulletin","bulletproof","bullfight","bullfighting","bullfinch", "bullfrog","bullheaded","bullion","bullnecked","bullock", "bullring","bullshit","bully","bullyboy","bulrush", "bulwark","bum","bumble","bumblebee","bumboat", "bumf","bummer","bump","bumper","bumph", "bumpkin","bumptious","bumpy","bun","bunch", "bundle","bung","bungalow","bunghole","bungle", "bunion","bunk","bunker","bunkered","bunkhouse", "bunkum","bunny","bunting","buoy","buoyancy", "bur","burberry","burble","burden","burdensome", "burdock","bureau","bureaucracy","bureaucrat","bureaucratic", "burg","burgeon","burgess","burgh","burgher", "burglar","burglary","burgle","burgomaster","burgundy", "burial","burlap","burlesque","burly","burn", "burner","burning","burnish","burnous","burnouse", "burnt","burp","burr","burro","burrow", "bursar","bursary","burst","burthen","burton", "bury","bus","busby","bush","bushbaby", "bushed","bushel","bushwhack","bushy","business", "businesslike","businessman","busk","bus
 ker","busman", "bust","bustard","buster","bustle","busy", "busybody","but","butane","butch","butcher", "butchery","butler","butt","butter","buttercup", "butterfingers","butterfly","buttermilk","butterscotch","buttery", "buttock","buttocks","button","buttonhole","buttonhook", "buttons","buttress","buxom","buy","buyer", "buzz","buzzard","buzzer","bye","byelaw", "bygone","bygones","bylaw","bypass","byplay", "byre","bystander","byway","byways","byword", "byzantine","cab","cabal","cabaret","cabbage", "cabbie","cabby","cabdriver","caber","cabin", "cabinet","cable","cablegram","caboodle","caboose", "cabriolet","cacao","cache","cachet","cachou", "cackle","cacophony","cactus","cad","cadaver", "cadaverous","caddie","caddy","cadence","cadenza", "cadet","cadge","cadi","cadmium","cadre", "caerphilly","caesura","cafeteria","caffeine","caftan", "cage","cagey","cahoots","caiman","caique", "cairn","caisson","cajole","cake","calabash", "calaboose","calamitous","calamity","calcify","calcination", "cal
 cine","calcium","calculable","calculate","calculating", "calculation","calculator","calculus","caldron","calendar", "calender","calends","calf","calfskin","caliber", "calibrate","calibration","calibre","calico","caliper", "calipers","caliph","caliphate","calisthenic","calisthenics", "calk","call","calla","callboy","caller", "calligraphy","calling","calliper","callipers","callisthenic", "callisthenics","callous","callow","callus","calm", "calomel","calorie","calorific","calumniate","calumny", "calvary","calve","calves","calvinism","calypso", "calyx","cam","camaraderie","camber","cambric", "came","camel","camelhair","camellia","camembert", "cameo","camera","cameraman","camisole","camomile", "camouflage","camp","campaign","campanile","campanology", "campanula","camper","campfire","campground","camphor", "camphorated","campion","campsite","campus","camshaft", "can","canal","canalise","canalize","canard", "canary","canasta","cancan","cancel","cancellation", "cancer","cancerous","candela"
 ,"candelabrum","candid", "candidate","candidature","candidly","candied","candle", "candlelight","candlemas","candlepower","candlestick","candlewick", "candor","candour","candy","candyfloss","candytuft", "cane","canine","canis","canister","canker", "canna","cannabis","canned","cannelloni","cannery", "cannibal","cannibalise","cannibalism","cannibalize","cannon", "cannonade","cannonball","cannot","canny","canoe", "canon","canonical","canonicals","canonise","canonize", "canoodle","canopy","canst","cant","cantab", "cantabrigian","cantaloup","cantaloupe","cantankerous","cantata", "canteen","canter","canticle","cantilever","canto", "canton","cantonment","cantor","canvas","canvass", "canyon","cap","capabilities","capability","capable", "capacious","capacity","caparison","cape","caper", "capillarity","capillary","capital","capitalisation","capitalise", "capitalism","capitalist","capitalization","capitalize","capitals", "capitation","capitol","capitulate","capitulation","capitulations", "capo
 n","capriccio","caprice","capricious","capricorn", "capsicum","capsize","capstan","capsule","captain", "caption","captious","captivate","captive","captivity", "captor","capture","car","carafe","caramel", "carapace","carat","caravan","caravanning","caravanserai", "caraway","carbide","carbine","carbohydrate","carbolic", "carbon","carbonated","carbonation","carboniferous","carbonise", "carbonize","carborundum","carboy","carbuncle","carburetor", "carburettor","carcase","carcass","carcinogen","card", "cardamom","cardboard","cardiac","cardigan","cardinal", "cardpunch","cards","cardsharp","care","careen", "career","careerist","carefree","careful","careless", "caress","caret","caretaker","careworn","cargo", "caribou","caricature","caries","carillon","carious", "carmelite","carmine","carnage","carnal","carnation", "carnelian","carnival","carnivore","carnivorous","carob", "carol","carotid","carousal","carouse","carousel", "carp","carpal","carpenter","carpentry","carpet", "carpetbag","carpetba
 gger","carpeting","carport","carpus", "carriage","carriageway","carrier","carrion","carrot", "carroty","carrousel","carry","carryall","carrycot", "carryout","carsick","cart","cartage","cartel", "carter","carthorse","cartilage","cartilaginous","cartographer", "cartography","carton","cartoon","cartridge","cartwheel", "carve","carver","carving","caryatid","cascade", "cascara","case","casebook","casein","casework"};
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData2.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData2.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData2.cs
new file mode 100644
index 0000000..b3eccd5
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData2.cs
@@ -0,0 +1,53 @@
+/*
+Copyright © 2003,
+Center for Intelligent Information Retrieval,
+University of Massachusetts, Amherst.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+3. The names "Center for Intelligent Information Retrieval" and
+"University of Massachusetts" must not be used to endorse or promote products
+derived from this software without prior written permission. To obtain
+permission, contact info@ciir.cs.umass.edu.
+
+THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF MASSACHUSETTS AND OTHER CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
+*/
+/* This is a java version of Bob Krovetz' KStem.
+ *
+ * Java version by Sergio Guzman-Lara.
+ * CIIR-UMass Amherst http://ciir.cs.umass.edu
+ */
+namespace org.apache.lucene.analysis.en
+{
+
+	/// <summary>
+	/// A list of words used by Kstem
+	/// </summary>
+	internal class KStemData2
+	{
+		private KStemData2()
+		{
+		}
+	   internal static string[] data = new string[] {"cash","cashew","cashier","cashmere","casing", "casino","cask","casket","casque","cassava", "casserole","cassette","cassock","cassowary","cast", "castanets","castaway","castellated","caster","castigate", "casting","castle","castor","castrate","casual", "casualty","casuist","casuistry","cat","cataclysm", "catacomb","catafalque","catalepsy","catalog","catalogue", "catalpa","catalysis","catalyst","catamaran","catapult", "cataract","catarrh","catastrophe","catatonic","catcall", "catch","catcher","catching","catchpenny","catchphrase", "catchword","catchy","catechise","catechism","catechize", "categorical","categorise","categorize","category","cater", "caterer","caterpillar","caterwaul","catfish","catgut", "catharsis","cathartic","cathedral","catheter","cathode", "catholic","catholicism","catholicity","catkin","catnap", "catnip","catsup","cattle","catty","catwalk", "caucus","caudal","caught","caul","cauldron", "cauliflower","caulk","causal
 ","causality","causation", "causative","cause","causeless","causeway","caustic", "cauterise","cauterize","caution","cautionary","cautious", "cavalcade","cavalier","cavalry","cavalryman","cave", "caveat","caveman","cavern","cavernous","caviar", "caviare","cavil","cavity","cavort","cavy", "caw","cay","cayman","cease","ceaseless", "cedar","cede","cedilla","ceiling","celandine", "celebrant","celebrate","celebrated","celebration","celebrity", "celerity","celery","celestial","celibacy","celibate", "cell","cellar","cellarage","cellist","cello", "cellophane","cellular","celluloid","cellulose","celsius", "celtic","cement","cemetery","cenotaph","censor", "censorious","censorship","censure","census","cent", "centaur","centavo","centenarian","centenary","centennial", "center","centerboard","centerpiece","centigrade","centigram", "centigramme","centime","centimeter","centimetre","centipede", "central","centralise","centralism","centralize","centre", "centreboard","centrepiece","centrifugal","cen
 trifuge","centripetal", "centrist","centurion","century","cephalic","ceramic", "ceramics","cereal","cerebellum","cerebral","cerebration", "cerebrum","ceremonial","ceremonious","ceremony","cerise", "cert","certain","certainly","certainty","certifiable", "certificate","certificated","certify","certitude","cerulean", "cervical","cervix","cessation","cession","cesspit", "cetacean","chablis","chaconne","chafe","chaff", "chaffinch","chagrin","chain","chair","chairman", "chairmanship","chairperson","chairwoman","chaise","chalet", "chalice","chalk","chalky","challenge","challenging", "chamber","chamberlain","chambermaid","chambers","chameleon", "chamiomile","chamois","chamomile","champ","champagne", "champaign","champion","championship","chance","chancel", "chancellery","chancellor","chancery","chancy","chandelier", "chandler","change","changeable","changeless","changeling", "changeover","channel","chant","chanterelle","chanticleer", "chantry","chanty","chaos","chaotic","chap", "chapel","ch
 apelgoer","chaperon","chaperone","chapfallen", "chaplain","chaplaincy","chaplet","chaps","chapter", "char","charabanc","character","characterise","characteristic", "characterization","characterize","characterless","charade","charades", "charcoal","chard","charge","chargeable","charged", "charger","chariot","charioteer","charisma","charismatic", "charitable","charity","charlady","charlatan","charleston", "charlock","charlotte","charm","charmer","charming", "chart","charter","chartreuse","charwoman","chary", "charybdis","chase","chaser","chasm","chassis", "chaste","chasten","chastise","chastisement","chastity", "chasuble","chat","chatelaine","chattel","chatter", "chatterbox","chatty","chauffeur","chauvinism","chauvinist", "cheap","cheapen","cheapskate","cheat","check", "checkbook","checked","checker","checkerboard","checkers", "checklist","checkmate","checkoff","checkout","checkpoint", "checkrail","checkrein","checkroom","checkup","cheddar", "cheek","cheekbone","cheeky","cheep","cheer
 ", "cheerful","cheering","cheerio","cheerleader","cheerless", "cheers","cheery","cheese","cheesecake","cheesecloth", "cheeseparing","cheetah","chef","chem","chemical", "chemise","chemist","chemistry","chemotherapy","chenille", "cheque","chequebook","chequer","cherish","cheroot", "cherry","cherub","chervil","chess","chessboard", "chessman","chest","chesterfield","chestnut","chesty", "chevalier","chevron","chevvy","chevy","chew", "chi","chianti","chiaroscuro","chic","chicanery", "chicano","chichi","chick","chicken","chickenfeed", "chickenhearted","chickpea","chickweed","chicle","chicory", "chide","chief","chiefly","chieftain","chieftainship", "chiffon","chiffonier","chiffonnier","chigger","chignon", "chihuahua","chilblain","child","childbearing","childbirth", "childhood","childish","childlike","chile","chill", "chiller","chilli","chilly","chimaera","chime", "chimera","chimerical","chimney","chimneybreast","chimneypiece", "chimneypot","chimneystack","chimneysweep","chimpanzee","chin", 
 "china","chinatown","chinaware","chinchilla","chine", "chink","chinless","chinook","chinstrap","chintz", "chinwag","chip","chipboard","chipmunk","chippendale", "chipping","chippy","chiromancy","chiropody","chiropractic", "chirp","chirpy","chisel","chiseler","chiseller", "chit","chitchat","chivalrous","chivalry","chive", "chivvy","chivy","chloride","chlorinate","chlorine", "chloroform","chlorophyll","chock","chocolate","choice", "choir","choirboy","choirmaster","choke","choker", "chokey","choky","choler","cholera","choleric", "cholesterol","chomp","choose","choosey","choosy", "chop","chopfallen","chophouse","chopper","choppers", "choppy","chopstick","choral","chorale","chord", "chore","choreographer","choreography","chorine","chorister", "chortle","chorus","chose","chosen","chow", "chowder","christ","christen","christendom","christening", "christian","christianity","christlike","christmastime","chromatic", "chrome","chromium","chromosome","chronic","chronicle", "chronograph","chronol
 ogical","chronology","chronometer","chrysalis", "chrysanthemum","chub","chubby","chuck","chuckle", "chug","chukker","chum","chummy","chump", "chunk","chunky","church","churchgoer","churching", "churchwarden","churchyard","churl","churlish","churn", "chute","chutney","cia","cicada","cicatrice", "cicerone","cid","cider","cif","cigar", "cigaret","cigarette","cinch","cincture","cinder", "cinderella","cinders","cine","cinema","cinematograph", "cinematography","cinnamon","cinquefoil","cipher","circa", "circadian","circle","circlet","circuit","circuitous", "circular","circularise","circularize","circulate","circulation", "circumcise","circumcision","circumference","circumflex","circumlocution", "circumnavigate","circumscribe","circumscription","circumspect","circumstance", "circumstances","circumstantial","circumvent","circus","cirque", "cirrhosis","cirrus","cissy","cistern","citadel", "citation","cite","citizen","citizenry","citizenship", "citron","citrous","citrus","city","civet", "civic
 ","civics","civies","civil","civilian", "civilisation","civilise","civility","civilization","civilize", "civilly","civvies","clack","clad","claim", "claimant","clairvoyance","clairvoyant","clam","clambake", "clamber","clammy","clamor","clamorous","clamour", "clamp","clampdown","clamshell","clan","clandestine", "clang","clanger","clangor","clangour","clank", "clannish","clansman","clap","clapboard","clapper", "clapperboard","clappers","claptrap","claque","claret", "clarification","clarify","clarinet","clarinetist","clarinettist", "clarion","clarity","clarts","clash","clasp", "class","classic","classical","classicism","classicist", "classics","classification","classified","classify","classless", "classmate","classroom","classy","clatter","clause", "claustrophobia","claustrophobic","clavichord","clavicle","claw", "clay","claymore","clean","cleaner","cleanliness", "cleanly","cleanse","cleanser","cleanup","clear", "clearance","clearing","clearinghouse","clearly","clearout", "clearway","c
 leat","cleavage","cleave","cleaver", "clef","cleft","clematis","clemency","clement", "clench","clerestory","clergy","clergyman","clerical", "clerihew","clerk","clever","clew","click", "client","clientele","cliff","cliffhanger","climacteric", "climactic","climate","climatic","climatology","climax", "climb","climber","clime","clinch","clincher", "cline","cling","clinging","clingy","clinic", "clinical","clink","clinker","clip","clipboard", "clipper","clippers","clippie","clipping","clique", "cliquey","cliquish","clitoris","cloaca","cloak", "cloakroom","clobber","cloche","clock","clockwise", "clockwork","clod","cloddish","clodhopper","clog", "cloggy","cloister","clone","clop","close", "closed","closedown","closefisted","closet","closure", "clot","cloth","clothe","clothes","clothesbasket", "clotheshorse","clothesline","clothier","clothing","cloture", "cloud","cloudbank","cloudburst","cloudless","cloudy", "clout","clove","cloven","clover","cloverleaf", "clown","clownish","cloy","club","cl
 ubbable", "clubfoot","clubhouse","cluck","clue","clueless", "clump","clumsy","clung","cluster","clutch", "clutches","clutter","coach","coachbuilder","coachman", "coachwork","coadjutor","coagulant","coagulate","coal", "coalbunker","coalesce","coalface","coalfield","coalhole", "coalhouse","coalition","coalmine","coalscuttle","coarse", "coarsen","coast","coastal","coaster","coastguard", "coastguardsman","coastline","coastwise","coat","coating", "coax","cob","cobalt","cobber","cobble", "cobbler","cobblers","cobblestone","cobra","cobweb", "cocaine","coccyx","cochineal","cochlea","cock", "cockade","cockatoo","cockchafer","cockcrow","cockerel", "cockeyed","cockfight","cockhorse","cockle","cockleshell", "cockney","cockpit","cockroach","cockscomb","cocksure", "cocktail","cocky","coco","cocoa","coconut", "cocoon","cod","coda","coddle","code", "codeine","codex","codger","codicil","codify", "codling","codpiece","codswallop","coed","coeducation", "coefficient","coelacanth","coequal","coerce","co
 ercion", "coercive","coeternal","coeval","coexist","coexistence", "coffee","coffeepot","coffer","cofferdam","coffers", "coffin","cog","cogency","cogent","cogitate", "cogitation","cognac","cognate","cognition","cognitive", "cognizance","cognizant","cognomen","cognoscenti","cogwheel", "cohabit","cohere","coherence","coherent","cohesion", "cohesive","cohort","coif","coiffeur","coiffure", "coil","coin","coinage","coincide","coincidence", "coincident","coincidental","coir","coitus","coke", "col","cola","colander","cold","coleslaw", "coley","colic","colicky","colitis","collaborate", "collaboration","collaborationist","collage","collapse","collapsible", "collar","collarbone","collate","collateral","collation", "colleague","collect","collected","collection","collective", "collectivise","collectivism","collectivize","collector","colleen", "college","collegiate","collide","collie","collier", "colliery","collision","collocate","collocation","colloquial", "colloquialism","colloquy","collude","c
 ollusion","collywobbles", "cologne","colon","colonel","colonial","colonialism", "colonialist","colonies","colonise","colonist","colonize", "colonnade","colony","color","coloration","coloratura", "colored","colorfast","colorful","coloring","colorless", "colors","colossal","colossally","colossus","colostrum", "colour","coloured","colourfast","colourful","colouring", "colourless","colours","colt","colter","coltish", "columbine","column","columnist","coma","comatose", "comb","combat","combatant","combative","comber", "combination","combinations","combinatorial","combine","combo", "combustible","combustion","come","comeback","comecon", "comedian","comedienne","comedown","comedy","comely", "comer","comestible","comet","comfit","comfort", "comfortable","comforter","comfrey","comfy","comic", "comical","comics","cominform","coming","comintern", "comity","comma","command","commandant","commandeer", "commander","commanding","commandment","commando","commemorate", "commemoration","commemorative
 ","commence","commencement","commend", "commendable","commendation","commendatory","commensurable","commensurate", "comment","commentary","commentate","commentator","commerce", "commercial","commercialise","commercialism","commercialize","commie", "commiserate","commiseration","commissar","commissariat","commissary", "commission","commissionaire","commissioner","commit","commitment", "committal","committed","committee","committeeman","commode", "commodious","commodity","commodore","common","commonage", "commonalty","commoner","commonly","commonplace","commons", "commonweal","commonwealth","commotion","communal","commune", "communicable","communicant","communicate","communication","communications", "communicative","communion","communism","communist","community", "commutable","commutation","commutative","commutator","commute", "commuter","compact","compacted","companion","companionable", "companionship","companionway","company","comparable","comparative", "comparatively","compare","co
 mparison","compartment","compartmentalise", "compartmentalize","compass","compassion","compassionate","compatibility", "compatible","compatriot","compeer","compel","compendious", "compendium","compensate","compensation","compensatory","compere", "compete","competence","competent","competition","competitive", "competitor","compilation","compile","complacency","complacent", "complain","complainant","complaint","complaisance","complaisant", "complement","complementary","complete","completely","completion", "complex","complexion","complexity","compliance","compliant", "complicate","complicated","complication","complicity","compliment", "complimentary","compliments","complin","compline","comply", "compo","component","comport","comportment","compose", "composer","composite","composition","compositor","compost", "composure","compote","compound","comprehend","comprehensible", "comprehension","comprehensive","compress","compressible","compression", "compressor","comprise","compromise","compt
 ometer","comptroller", "compulsion","compulsive","compulsory","compunction","computation", "compute","computer","computerize","comrade","comradeship", "coms","con","concatenate","concatenation","concave", "concavity","conceal","concealment","concede","conceit", "conceited","conceivable","conceive","concentrate","concentrated", "concentration","concentric","concept","conception","conceptual", "conceptualise","conceptualize","concern","concerned","concernedly", "concerning","concert","concerted","concertgoer","concertina", "concertmaster","concerto","concession","concessionaire","concessive", "conch","conchology","concierge","conciliate","conciliation", "conciliatory","concise","concision","conclave","conclude", "conclusion","conclusive","concoct","concoction","concomitance", "concomitant","concord","concordance","concordant","concordat", "concourse","concrete","concubinage","concubine","concupiscence", "concur","concurrence","concurrent","concuss","concussion", "condemn","condemnatio
 n","condensation","condense","condenser", "condescend","condescension","condign","condiment","condition", "conditional","conditions","condole","condolence","condom", "condominium","condone","condor","conduce","conducive", "conduct","conduction","conductive","conductivity","conductor", "conduit","cone","coney","confabulate","confabulation", "confection","confectioner","confectionery","confederacy","confederate", "confederation","confer","conference","confess","confessed", "confession","confessional","confessor","confetti","confidant", "confide","confidence","confident","confidential","confiding", "configuration","confine","confinement","confines","confirm", "confirmation","confirmed","confiscate","confiscatory","conflagration", "conflate","conflict","confluence","conform","conformable", "conformation","conformist","conformity","confound","confounded", "confraternity","confront","confrontation","confucian","confucianism", "confuse","confusion","confute","conga","congeal", "congenial",
 "congenital","congest","congestion","conglomerate", "conglomeration","congrats","congratulate","congratulations","congratulatory", "congregate","congregation","congregational","congregationalism","congress", "congressional","congressman","congruent","congruity","congruous", "conic","conical","conifer","coniferous","conj", "conjectural","conjecture","conjoin","conjoint","conjugal", "conjugate","conjugation","conjunction","conjunctiva","conjunctive", "conjunctivitis","conjuncture","conjure","conjurer","conjuror", "conk","conker","conkers","connect","connected", "connection","connective","connexion","connivance","connive", "connoisseur","connotation","connotative","connote","connubial", "conquer","conquest","conquistador","consanguineous","consanguinity", "conscience","conscientious","conscious","consciousness","conscript", "conscription","consecrate","consecration","consecutive","consensus", "consent","consequence","consequent","consequential","consequently", "conservancy","conservati
 on","conservationist","conservatism","conservative", "conservatoire","conservatory","conserve","consider","considerable", "considerably","considerate","consideration","considered","considering", "consign","consignee","consigner","consignment","consignor", "consist","consistency","consistent","consistory","consolation", "consolatory","console","consolidate","consols","consonance", "consonant","consort","consortium","conspectus","conspicuous", "conspiracy","conspirator","conspiratorial","conspire","constable", "constabulary","constancy","constant","constellation","consternation", "constipate","constipation","constituency","constituent","constitute", "constitution","constitutional","constitutionalism","constitutionally","constitutive", "constrain","constrained","constraint","constrict","constriction", "constrictor","construct","construction","constructive","constructor", "construe","consubstantiation","consul","consular","consulate", "consult","consultancy","consultant","consultation",
 "consultative", "consulting","consume","consumer","consummate","consummation", "consumption","consumptive","contact","contagion","contagious", "contain","contained","container","containerise","containerize", "containment","contaminate","contamination","contemplate","contemplation", "contemplative","contemporaneous","contemporary","contempt","contemptible", "contemptuous","contend","contender","content","contented", "contention","contentious","contentment","contents","contest", "contestant","context","contextual","contiguity","contiguous", "continence","continent","continental","contingency","contingent", "continual","continuance","continuation","continue","continuity", "continuo","continuous","continuum","contort","contortion", "contortionist","contour","contraband","contrabass","contraception", "contraceptive","contract","contractile","contraction","contractor", "contractual","contradict","contradiction","contradictory","contradistinction", "contrail","contraindication","contralto"
 ,"contraption","contrapuntal", "contrariety","contrariwise","contrary","contrast","contravene", "contravention","contretemps","contribute","contribution","contributor", "contributory","contrite","contrition","contrivance","contrive", "contrived","control","controller","controversial","controversy", "controvert","contumacious","contumacy","contumelious","contumely", "contuse","contusion","conundrum","conurbation","convalesce", "convalescence","convalescent","convection","convector","convene", "convener","convenience","convenient","convenor","convent", "conventicle","convention","conventional","conventionality","converge", "conversant","conversation","conversational","conversationalist","conversazione", "converse","conversion","convert","converter","convertible", "convex","convexity","convey","conveyance","conveyancer", "conveyancing","conveyer","conveyor","convict","conviction", "convince","convinced","convincing","convivial","convocation", "convoke","convoluted","convolution","convo
 lvulus","convoy", "convulse","convulsion","convulsive","cony","coo", "cook","cooker","cookery","cookhouse","cookie", "cooking","cookout","cool","coolant","cooler", "coolie","coon","coop","cooper","cooperate", "cooperation","cooperative","coordinate","coordinates","coordination", "coot","cop","cope","copeck","copier", "copilot","coping","copingstone","copious","copper", "copperhead","copperplate","coppersmith","coppice","copra", "coptic","copula","copulate","copulative","copy", "copybook","copyboy","copycat","copydesk","copyhold", "copyist","copyright","copywriter","coquetry","coquette", "cor","coracle","coral","corbel","cord", "cordage","cordial","cordiality","cordially","cordillera", "cordite","cordon","cords","corduroy","core", "corelate","coreligionist","corer","corespondent","corgi", "coriander","corinthian","cork","corkage","corked", "corker","corkscrew","corm","cormorant","corn", "corncob","corncrake","cornea","cornelian","corner", "cornerstone","cornet","cornfield","cornflake
 s","cornflower", "cornice","cornish","cornucopia","corny","corolla", "corollary","corona","coronary","coronation","coroner", "coronet","corpora","corporal","corporate","corporation", "corporeal","corps","corpse","corpulence","corpulent", "corpus","corpuscle","corral","correct","correction", "correctitude","corrective","correlate","correlation","correlative", "correspond","correspondence","correspondent","corresponding","corridor", "corrie","corrigendum","corroborate","corroboration","corroborative", "corroboree","corrode","corrosion","corrosive","corrugate", "corrugation","corrupt","corruption","corsage","corsair", "corse","corselet","corset","cortex","cortisone", "corundum","coruscate","corvette","cos","cosh", "cosignatory","cosine","cosmetic","cosmetician","cosmic", "cosmogony","cosmology","cosmonaut","cosmopolitan","cosmos", "cosset","cost","costermonger","costive","costly", "costs","costume","costumier","cosy","cot", "cotangent","cote","coterie","coterminous","cotillion", "cotta
 ge","cottager","cottar","cotter","cotton", "cottonseed","cottontail","cotyledon","couch","couchant", "couchette","cougar","cough","could","couldst", "coulter","council","councillor","counsel","counsellor", "counselor","count","countable","countdown","countenance", "counter","counteract","counterattack","counterattraction","counterbalance", "counterblast","counterclaim","counterclockwise","counterespionage","counterfeit", "counterfoil","counterintelligence","counterirritant","countermand","countermarch", "countermeasure","counteroffensive","counterpane","counterpart","counterpoint", "counterpoise","countersign","countersink","countertenor","countervail", "countess","countinghouse","countless","countrified","country", "countryman","countryside","county","coup","couple", "couplet","coupling","coupon","courage","courageous", "courgette","courier","course","courser","coursing", "court","courteous","courtesan","courtesy","courthouse", "courtier","courting","courtly","courtroom","courtship
 ", "courtyard","couscous","cousin","couture","cove", "coven","covenant","coventry","cover","coverage", "covering","coverlet","covert","covet","covetous", "covey","cow","coward","cowardice","cowardly", "cowbell","cowboy","cowcatcher","cower","cowgirl", "cowhand","cowheel","cowherd","cowhide","cowl", "cowlick","cowling","cowman","cowpat","cowpox", "cowrie","cowry","cowshed","cowslip","cox", "coxcomb","coy","coyote","coypu","cozen", "cozy","cpa","crab","crabbed","crabby", "crabgrass","crabwise","crack","crackbrained","crackdown", "cracked","cracker","crackers","crackle","crackleware", "crackling","crackpot","cracksman","crackup","cradle", "craft","craftsman","crafty","crag","craggy", "crake","cram","crammer","cramp","cramped", "crampon","cramps","cranberry","crane","cranial", "cranium","crank","crankshaft","cranky","cranny", "crap","crape","crappy","craps","crash", "crashing","crass","crate","crater","cravat", "crave","craven","craving","crawl","crawler", "crawlers","crayfish","crayon"
 ,"craze","crazy", "creak","creaky","cream","creamer","creamery", "creamy","crease","create","creation","creative", "creativity","creator","creature","credence","credentials", "credibility","credible","credit","creditable","creditor", "credo","credulous","creed","creek","creel", "creep","creeper","creepers","creeps","creepy", "cremate","crematorium","crenelated","crenellated","creole", "creosote","crept","crepuscular","crescendo","crescent", "cress","crest","crested","crestfallen","cretaceous", "cretin","cretonne","crevasse","crevice","crew", "crewman","crib","cribbage","crick","cricket", "cricketer","crier","cries","crikey","crime", "criminal","criminology","crimp","crimplene","crimson", "cringe","crinkle","crinkly","crinoid","crinoline", "cripes","cripple","crisis","crisp","crispy", "crisscross","criterion","critic","critical","criticise", "criticism","criticize","critique","critter","croak", "crochet","crock","crockery","crocodile","crocus", "croft","crofter","croissant","cromlech
 ","crone", "crony","crook","crooked","croon","crooner", "crop","cropper","croquet","croquette","crore", "crosier","cross","crossbar","crossbeam","crossbenches", "crossbones","crossbow","crossbred","crossbreed","crosscheck", "crosscurrent","crosscut","crossfire","crossing","crossover", "crosspatch","crosspiece","crossply","crossroad","crossroads", "crosstree","crosswalk","crosswind","crosswise","crossword", "crotch","crotchet","crotchety","crouch","croup", "croupier","crouton","crow","crowbar","crowd", "crowded","crowfoot","crown","crozier","crucial", "crucible","crucifix","crucifixion","cruciform","crucify", "crude","crudity","cruel","cruelty","cruet", "cruise","cruiser","crumb","crumble","crumbly", "crummy","crumpet","crumple","crunch","crupper", "crusade","cruse","crush","crust","crustacean", "crusty","crutch","crux","cry","crybaby", "crying","crypt","cryptic","cryptogram","cryptography", "crystal","crystalline","crystallise","crystallize","cub", "cubbyhole","cube","cubic","cubica
 l","cubicle", "cubism","cubit","cubs","cuckold","cuckoldry", "cuckoo","cucumber","cud","cuddle","cuddlesome", "cuddly","cudgel","cue","cuff","cuffs", "cuirass","cuisine","culinary","cull","cullender", "culminate","culmination","culotte","culottes","culpable", "culprit","cult","cultivable","cultivate","cultivated", "cultivation","cultivator","cultural","culture","cultured", "culvert","cumber","cumbersome","cumin","cummerbund", "cumulative","cumulonimbus","cumulus","cuneiform","cunnilingus", "cunning","cunt","cup","cupbearer","cupboard", "cupid","cupidity","cupola","cuppa","cupping", "cupric","cur","curable","curacy","curate", "curative","curator","curb","curd","curdle", "cure","curettage","curfew","curia","curio", "curiosity","curious","curl","curler","curlew", "curlicue","curling","curly","curlycue","curmudgeon", "currant","currency","current","curriculum","currish", "curry","curse","cursed","cursive","cursory", "curt","curtail","curtain","curtains","curtsey", "curtsy","curvaceous",
 "curvacious","curvature","curve", "cushion","cushy","cusp","cuspidor","cuss", "cussed","custard","custodial","custodian","custody", "custom","customary","customer","customs","cut", "cutaway","cutback","cuticle","cutlass","cutler", "cutlery","cutlet","cutoff","cutout","cutpurse", "cutter","cutthroat","cutting","cuttlefish","cutworm", "cwm","cwt","cyanide","cybernetics","cyclamate", "cyclamen","cycle","cyclic","cyclist","cyclone", "cyclopaedia","cyclopedia","cyclostyle","cyclotron","cyder", "cygnet","cylinder","cymbal","cynic","cynical", "cynicism","cynosure","cypher","cypress","cyrillic", "cyst","cystitis","cytology","czar","czarina", "czech","dab","dabble","dabchick","dabs", "dace","dachshund","dactyl","dad","daddy", "dado","daemon","daffodil","daft","dagger", "dago","daguerreotype","dahlia","daily","dainty", "daiquiri","dairy","dairying","dairymaid","dairyman", "dais","daisy","dale","dalliance","dally", "dalmation","dam","damage","damages","damascene", "damask","damn","damnable","d
 amnation","damnedest", "damning","damocles","damp","dampen","damper", "dampish","damsel","damson","dance","dandelion", "dander","dandified","dandle","dandruff","dandy", "danger","dangerous","dangle","dank","dapper", "dappled","dare","daredevil","daresay","daring", "dark","darken","darkey","darkroom","darky", "darling","darn","darning","dart","dartboard", "dartmoor","darts","dash","dashboard","dashed", "dashing","data","date","dated","dateless", "dateline","dates","dative","daub","daughter", "daunt","dauntless","dauphin","davit","dawdle", "dawn","day","dayboy","daybreak","daydream", "daylight","dayroom","days","daytime","daze", "dazzle","ddt","deacon","dead","deaden", "deadline","deadlock","deadly","deadpan","deadweight", "deaf","deafen","deal","dealer","dealing", "dealings","dean","deanery","dear","dearest", "dearie","dearly","dearth","deary","death", "deathbed","deathblow","deathless","deathlike","deathly", "deathwatch","deb","debar","debark","debase", "debatable","debate","debater
 ","debauch","debauchee", "debauchery","debenture","debilitate","debility","debit", "debonair","debone","debouch","debrief","debris", "debt","debtor","debug","debunk","debut", "debutante","decade","decadence","decadent","decalogue", "decamp","decant","decanter","decapitate","decathlon", "decay","decease","deceased","deceit","deceitful", "deceive","decelerate","december","decencies","decency", "decent","decentralise","decentralize","deception","deceptive", "decibel","decide","decided","decidedly","deciduous", "decimal","decimalise","decimalize","decimate","decipher", "decision","decisive","deck","deckchair","deckhand", "declaim","declamation","declaration","declare","declared", "declassify","declension","declination","decline","declivity", "declutch","decoction","decode","decolonise","decolonize", "decompose","decompress","decongestant","decontaminate","decontrol", "decorate","decoration","decorative","decorator","decorous", "decorum","decoy","decrease","decree","decrepit", "decrepitu
 de","decry","dedicate","dedicated","dedication", "deduce","deduct","deduction","deductive","deed", "deem","deep","deepen","deer","deerstalker", "def","deface","defame","default","defeat", "defeatism","defecate","defect","defection","defective", "defence","defend","defendant","defense","defensible", "defensive","defer","deference","defiance","defiant", "deficiency","deficient","deficit","defile","define", "definite","definitely","definition","definitive","deflate", "deflation","deflationary","deflect","deflection","deflower", "defoliant","defoliate","deforest","deform","deformation", "deformity","defraud","defray","defrock","defrost", "deft","defunct","defuse","defy","degauss", "degeneracy","degenerate","degeneration","degenerative","degrade", "degree","dehorn","dehumanise","dehumanize","dehydrate", "deice","deification","deify","deign","deism", "deity","dejected","dejection","dekko","delay", "delectable","delectation","delegacy","delegate","delegation", "delete","deleterious","delet
 ion","delft","deliberate", "deliberation","deliberative","delicacy","delicate","delicatessen", "delicious","delight","delightful","delimit","delineate", "delinquency","delinquent","deliquescent","delirious","delirium", "deliver","deliverance","delivery","deliveryman","dell", "delouse","delphic","delphinium","delta","delude", "deluge","delusion","delusive","delve","demagnetise", "demagnetize","demagogic","demagogue","demagoguery","demand", "demanding","demarcate","demarcation","demean","demeanor", "demeanour","demented","demerit","demesne","demigod", "demijohn","demilitarise","demilitarize","demise","demist", "demister","demo","demob","demobilise","demobilize", "democracy","democrat","democratic","democratise","democratize", "demography","demolish","demolition","demon","demonetise", "demonetize","demoniacal","demonic","demonstrable","demonstrate", "demonstration","demonstrative","demonstrator","demoralise","demoralize", "demote","demotic","demur","demure","demystify", "den","denation
 alise","denationalize","denial","denier", "denigrate","denim","denims","denizen","denominate", "denomination","denominational","denominator","denotation","denote", "denouement","denounce","dense","density","dent", "dental","dentifrice","dentist","dentistry","denture", "dentures","denude","denunciation","deny","deodorant", "deodorise","deodorize","depart","departed","department", "departure","depend","dependable","dependant","dependence", "dependency","dependent","depict","depilatory","deplete", "deplorable","deplore","deploy","deponent","depopulate", "deport","deportee","deportment","depose","deposit", "deposition","depositor","depository","depot","deprave", "depravity","deprecate","deprecatory","depreciate","depreciatory", "depredation","depress","depressed","depression","deprivation", "deprive","deprived","depth","depths","deputation", "depute","deputise","deputize","deputy","derail", "derange","derby","derelict","dereliction","deride", "derision","derisive","derisory","derivative
 ","derive", "dermatitis","dermatology","derogate","derogatory","derrick", "derv","dervish","des","desalinise","desalinize", "descale","descant","descend","descendant","descended", "descent","describe","description","descriptive","descry", "desecrate","desegregate","desensitise","desensitize","desert", "deserter","desertion","deserts","deserve","deservedly", "deserving","desiccant","desiccate","desideratum","design", "designate","designation","designedly","designer","designing", "designs","desirable","desire","desirous","desist", "desk","deskwork","desolate","despair","despairing", "despatch","despatches","desperado","desperate","desperation", "despicable","despise","despite","despoil","despondent", "despot","despotic","despotism","dessert","dessertspoon", "dessertspoonful","destination","destined","destiny","destitute", "destroy","destroyer","destruction","destructive","desuetude", "desultory","detach","detached","detachedly","detachment", "detail","detailed","detain","detainee","de
 tect", "detection","detective","detector","detention","deter", "detergent","deteriorate","determinant","determination","determine", "determined","determiner","determinism","deterrent","detest", "dethrone","detonate","detonation","detonator","detour", "detract","detractor","detrain","detriment","detritus", "deuce","deuced","deuteronomy","devaluation","devalue", "devastate","devastating","develop","developer","development", "developmental","deviance","deviant","deviate","deviation", "deviationist","device","devil","devilish","devilishly", "devilment","devious","devise","devitalise","devitalize", "devoid","devolution","devolve","devote","devoted", "devotee","devotion","devotional","devotions","devour", "devout","devoutly","dew","dewdrop","dewlap", "dewpond","dewy","dexterity","dexterous","dextrose", "dhoti","dhow","diabetes","diabetic","diabolic", "diabolical","diacritic","diacritical","diadem","diaeresis", "diagnose","diagnosis","diagnostic","diagonal","diagram", "dial","dialect","dia
 lectic","dialectician","dialog", "dialogue","diameter","diametrically","diamond","diaper", "diaphanous","diaphragm","diarist","diarrhea","diarrhoea", "diary","diaspora","diatom","diatribe","dibble", "dice","dicey","dichotomy","dick","dicker", "dickie","dicky","dickybird","dictaphone","dictate", "dictation","dictator","dictatorial","dictatorship","diction", "dictionary","dictum","did","didactic","diddle", "didst","die","diehard","dieresis","diet", "dietary","dietetic","dietetics","dietician","dietitian", "differ","difference","different","differential","differentiate", "difficult","difficulty","diffident","diffract","diffuse", "diffusion","dig","digest","digestion","digestive", "digger","digging","diggings","digit","digital", "dignified","dignify","dignitary","dignity","digraph", "digress","digression","digs","dike","dilapidated", "dilapidation","dilapidations","dilate","dilatory","dildo", "dilemma","dilettante","diligence","diligent","dill", "dillydally","dilute","dilution","dim","d
 imension", "dimensions","diminish","diminuendo","diminution","diminutive", "dimity","dimple","dimwit","din","dinar", "dine","diner","dingdong","dinghy","dingle", "dingo","dingy","dink","dinkum","dinky", "dinner","dinosaur","dint","diocese","dioxide", "dip","diphtheria","diphthong","diploma","diplomacy", "diplomat","diplomatic","diplomatically","diplomatist","dipper", "dipsomania","dipsomaniac","dipstick","dipswitch","diptych", "dire","direct","direction","directional","directions", "directive","directly","director","directorate","directorship", "directory","direful","dirge","dirigible","dirk", "dirndl","dirt","dirty","disability","disable", "disabled","disabuse","disadvantage","disadvantageous","disaffected", "disaffection","disaffiliate","disafforest","disagree","disagreeable", "disagreement","disallow","disappear","disappearance","disappoint", "disappointed","disappointing","disappointment","disapprobation","disapproval", "disapprove","disarm","disarmament","disarrange","disarray"
 , "disassociate","disaster","disastrous","disavow","disband", "disbar","disbelief","disbelieve","disburden","disburse", "disbursement","disc","discard","discern","discerning", "discernment","discharge","disciple","discipleship","disciplinarian", "disciplinary","discipline","disclaim","disclaimer","disclose", "disclosure","disco","discolor","discoloration","discolour", "discolouration","discomfit","discomfiture","discomfort","discommode", "discompose","disconcert","disconnect","disconnected","disconnection", "disconsolate","discontent","discontented","discontinue","discontinuity", "discontinuous","discord","discordance","discordant","discotheque", "discount","discountenance","discourage","discouragement","discourse", "discourteous","discourtesy","discover","discovery","discredit", "discreditable","discreet","discrepancy","discrete","discretion", "discretionary","discriminate","discriminating","discrimination","discriminatory", "discursive","discus","discuss","discussion","disdain", "
 disdainful","disease","disembark","disembarrass","disembodied", "disembowel","disembroil","disenchant","disencumber","disendow", "disengage","disengaged","disentangle","disequilibrium","disestablish", "disfavor","disfavour","disfigure","disforest","disfranchise", "disfrock","disgorge","disgrace","disgraceful","disgruntled", "disguise","disgust","dish","dishabille","disharmony", "dishcloth","dishearten","dishes","dishevelled","dishful", "dishonest","dishonesty","dishonor","dishonorable","dishonour", "dishonourable","dishwasher","dishwater","dishy","disillusion", "disillusioned","disillusionment","disincentive","disinclination","disinclined", "disinfect","disinfectant","disinfest","disingenuous","disinherit", "disintegrate","disinter","disinterested","disjoint","disjointed", "disjunctive","disk","dislike","dislocate","dislocation", "dislodge","disloyal","dismal","dismantle","dismast", "dismay","dismember","dismiss","dismissal","dismount", "disobedient","disobey","disoblige","disorder"
 ,"disorderly", "disorganise","disorganize","disorientate","disown","disparage", "disparate","disparity","dispassionate","dispatch","dispatches", "dispel","dispensable","dispensary","dispensation","dispense", "dispenser","dispersal","disperse","dispersion","dispirit", "displace","displacement","display","displease","displeasure", "disport","disposable","disposal","dispose","disposed", "disposition","dispossess","dispossessed","disproof","disproportion", "disproportionate","disprove","disputable","disputant","disputation", "disputatious","dispute","disqualification","disqualify","disquiet", "disquietude","disquisition","disregard","disrelish","disremember", "disrepair","disreputable","disrepute","disrespect","disrobe", "disrupt","dissatisfaction","dissatisfy","dissect","dissection", "dissemble","disseminate","dissension","dissent","dissenter", "dissenting","dissertation","disservice","dissever","dissident", "dissimilar","dissimilarity","dissimulate","dissipate","dissipated", "dissipat
 ion","dissociate","dissoluble","dissolute","dissolution", "dissolve","dissonance","dissonant","dissuade","distaff", "distal","distance","distant","distantly","distaste"};
+	}
+
+}
\ No newline at end of file


[10/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseMinimalStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseMinimalStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseMinimalStemmer.cs
new file mode 100644
index 0000000..bfd311a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseMinimalStemmer.cs
@@ -0,0 +1,44 @@
+namespace org.apache.lucene.analysis.pt
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/// <summary>
+	/// Minimal Stemmer for Portuguese
+	/// <para>
+	/// This follows the "RSLP-S" algorithm presented in:
+	/// <i>A study on the Use of Stemming for Monolingual Ad-Hoc Portuguese
+	/// Information Retrieval</i> (Orengo, et al)
+	/// which is just the plural reduction step of the RSLP
+	/// algorithm from <i>A Stemming Algorithm for the Portuguese Language</i>,
+	/// Orengo et al.
+	/// </para>
+	/// </summary>
+	/// <seealso cref= RSLPStemmerBase </seealso>
+	public class PortugueseMinimalStemmer : RSLPStemmerBase
+	{
+
+	  private static readonly Step pluralStep = parse(typeof(PortugueseMinimalStemmer), "portuguese.rslp")["Plural"];
+
+	  public virtual int stem(char[] s, int len)
+	  {
+		return pluralStep.apply(s, len);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseStemFilter.cs
new file mode 100644
index 0000000..1c046c0
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseStemFilter.cs
@@ -0,0 +1,70 @@
+namespace org.apache.lucene.analysis.pt
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="PortugueseStemmer"/> to stem 
+	/// Portuguese words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// </summary>
+	public sealed class PortugueseStemFilter : TokenFilter
+	{
+	  private readonly PortugueseStemmer stemmer = new PortugueseStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public PortugueseStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+			// this stemmer increases word length by 1: worst case '*ã' -> '*ão'
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int len = termAtt.length();
+			int len = termAtt.length();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.resizeBuffer(len+1), len);
+			int newlen = stemmer.stem(termAtt.resizeBuffer(len + 1), len);
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseStemFilterFactory.cs
new file mode 100644
index 0000000..b3895f5
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseStemFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.pt
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="PortugueseStemFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_ptstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.PortugueseStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class PortugueseStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new PortugueseStemFilterFactory </summary>
+	  public PortugueseStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new PortugueseStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseStemmer.cs
new file mode 100644
index 0000000..90bfbf2
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseStemmer.cs
@@ -0,0 +1,126 @@
+using System.Diagnostics;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.pt
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/// <summary>
+	/// Portuguese stemmer implementing the RSLP (Removedor de Sufixos da Lingua Portuguesa)
+	/// algorithm. This is sometimes also referred to as the Orengo stemmer.
+	/// </summary>
+	/// <seealso cref= RSLPStemmerBase </seealso>
+	public class PortugueseStemmer : RSLPStemmerBase
+	{
+	  private static readonly Step plural, feminine, adverb, augmentative, noun, verb, vowel;
+
+	  static PortugueseStemmer()
+	  {
+		IDictionary<string, Step> steps = parse(typeof(PortugueseStemmer), "portuguese.rslp");
+		plural = steps["Plural"];
+		feminine = steps["Feminine"];
+		adverb = steps["Adverb"];
+		augmentative = steps["Augmentative"];
+		noun = steps["Noun"];
+		verb = steps["Verb"];
+		vowel = steps["Vowel"];
+	  }
+
+	  /// <param name="s"> buffer, oversized to at least <code>len+1</code> </param>
+	  /// <param name="len"> initial valid length of buffer </param>
+	  /// <returns> new valid length, stemmed </returns>
+	  public virtual int stem(char[] s, int len)
+	  {
+		Debug.Assert(s.Length >= len + 1, "this stemmer requires an oversized array of at least 1");
+
+		len = plural.apply(s, len);
+		len = adverb.apply(s, len);
+		len = feminine.apply(s, len);
+		len = augmentative.apply(s, len);
+
+		int oldlen = len;
+		len = noun.apply(s, len);
+
+		if (len == oldlen) // suffix not removed
+		{
+		  oldlen = len;
+
+		  len = verb.apply(s, len);
+
+		  if (len == oldlen) // suffix not removed
+		  {
+			len = vowel.apply(s, len);
+		  }
+		}
+
+		// rslp accent removal
+		for (int i = 0; i < len; i++)
+		{
+		  switch (s[i])
+		  {
+			case 'à':
+			case 'á':
+			case 'â':
+			case 'ã':
+			case 'ä':
+			case 'å':
+				s[i] = 'a';
+				break;
+			case 'ç':
+				s[i] = 'c';
+				break;
+			case 'è':
+			case 'é':
+			case 'ê':
+			case 'ë':
+				s[i] = 'e';
+				break;
+			case 'ì':
+			case 'í':
+			case 'î':
+			case 'ï':
+				s[i] = 'i';
+				break;
+			case 'ñ':
+				s[i] = 'n';
+				break;
+			case 'ò':
+			case 'ó':
+			case 'ô':
+			case 'õ':
+			case 'ö':
+				s[i] = 'o';
+				break;
+			case 'ù':
+			case 'ú':
+			case 'û':
+			case 'ü':
+				s[i] = 'u';
+				break;
+			case 'ý':
+			case 'ÿ':
+				s[i] = 'y';
+				break;
+		  }
+		}
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Pt/RSLPStemmerBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pt/RSLPStemmerBase.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pt/RSLPStemmerBase.cs
new file mode 100644
index 0000000..252c795
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pt/RSLPStemmerBase.cs
@@ -0,0 +1,410 @@
+using System;
+using System.Diagnostics;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.pt
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using Version = org.apache.lucene.util.Version;
+
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+	/// <summary>
+	/// Base class for stemmers that use a set of RSLP-like stemming steps.
+	/// <para>
+	/// RSLP (Removedor de Sufixos da Lingua Portuguesa) is an algorithm designed
+	/// originally for stemming the Portuguese language, described in the paper
+	/// <i>A Stemming Algorithm for the Portuguese Language</i>, Orengo et. al.
+	/// </para>
+	/// <para>
+	/// Since this time a plural-only modification (RSLP-S) as well as a modification
+	/// for the Galician language have been implemented. This class parses a configuration
+	/// file that describes <seealso cref="Step"/>s, where each Step contains a set of <seealso cref="Rule"/>s.
+	/// </para>
+	/// <para>
+	/// The general rule format is: 
+	/// <blockquote>{ "suffix", N, "replacement", { "exception1", "exception2", ...}}</blockquote>
+	/// where:
+	/// <ul>
+	///   <li><code>suffix</code> is the suffix to be removed (such as "inho").
+	///   <li><code>N</code> is the min stem size, where stem is defined as the candidate stem 
+	///       after removing the suffix (but before appending the replacement!)
+	///   <li><code>replacement</code> is an optimal string to append after removing the suffix.
+	///       This can be the empty string.
+	///   <li><code>exceptions</code> is an optional list of exceptions, patterns that should 
+	///       not be stemmed. These patterns can be specified as whole word or suffix (ends-with) 
+	///       patterns, depending upon the exceptions format flag in the step header.
+	/// </ul>
+	/// </para>
+	/// <para>
+	/// A step is an ordered list of rules, with a structure in this format:
+	/// <blockquote>{ "name", N, B, { "cond1", "cond2", ... }
+	///               ... rules ... };
+	/// </blockquote>
+	/// where:
+	/// <ul>
+	///   <li><code>name</code> is a name for the step (such as "Plural").
+	///   <li><code>N</code> is the min word size. Words that are less than this length bypass
+	///       the step completely, as an optimization. Note: N can be zero, in this case this 
+	///       implementation will automatically calculate the appropriate value from the underlying 
+	///       rules.
+	///   <li><code>B</code> is a "boolean" flag specifying how exceptions in the rules are matched.
+	///       A value of 1 indicates whole-word pattern matching, a value of 0 indicates that 
+	///       exceptions are actually suffixes and should be matched with ends-with.
+	///   <li><code>conds</code> are an optional list of conditions to enter the step at all. If
+	///       the list is non-empty, then a word must end with one of these conditions or it will
+	///       bypass the step completely as an optimization.
+	/// </ul>
+	/// </para>
+	/// <para>
+	/// </para>
+	/// </summary>
+	/// <seealso cref= <a href="http://www.inf.ufrgs.br/~viviane/rslp/index.htm">RSLP description</a>
+	/// @lucene.internal </seealso>
+	public abstract class RSLPStemmerBase
+	{
+
+	  /// <summary>
+	  /// A basic rule, with no exceptions.
+	  /// </summary>
+	  protected internal class Rule
+	  {
+		protected internal readonly char[] suffix;
+		protected internal readonly char[] replacement;
+		protected internal readonly int min;
+
+		/// <summary>
+		/// Create a rule. </summary>
+		/// <param name="suffix"> suffix to remove </param>
+		/// <param name="min"> minimum stem length </param>
+		/// <param name="replacement"> replacement string </param>
+		public Rule(string suffix, int min, string replacement)
+		{
+		  this.suffix = suffix.ToCharArray();
+		  this.replacement = replacement.ToCharArray();
+		  this.min = min;
+		}
+
+		/// <returns> true if the word matches this rule. </returns>
+		public virtual bool matches(char[] s, int len)
+		{
+		  return (len - suffix.Length >= min && StemmerUtil.EndsWith(s, len, suffix));
+		}
+
+		/// <returns> new valid length of the string after firing this rule. </returns>
+		public virtual int replace(char[] s, int len)
+		{
+		  if (replacement.Length > 0)
+		  {
+			Array.Copy(replacement, 0, s, len - suffix.Length, replacement.Length);
+		  }
+		  return len - suffix.Length + replacement.Length;
+		}
+	  }
+
+	  /// <summary>
+	  /// A rule with a set of whole-word exceptions.
+	  /// </summary>
+	  protected internal class RuleWithSetExceptions : Rule
+	  {
+		protected internal readonly CharArraySet exceptions;
+
+		public RuleWithSetExceptions(string suffix, int min, string replacement, string[] exceptions) : base(suffix, min, replacement)
+		{
+		  for (int i = 0; i < exceptions.Length; i++)
+		  {
+			if (!exceptions[i].EndsWith(suffix, StringComparison.Ordinal))
+			{
+			  throw new Exception("useless exception '" + exceptions[i] + "' does not end with '" + suffix + "'");
+			}
+		  }
+		  this.exceptions = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(exceptions), false);
+		}
+
+		public override bool matches(char[] s, int len)
+		{
+		  return base.matches(s, len) && !exceptions.contains(s, 0, len);
+		}
+	  }
+
+	  /// <summary>
+	  /// A rule with a set of exceptional suffixes.
+	  /// </summary>
+	  protected internal class RuleWithSuffixExceptions : Rule
+	  {
+		// TODO: use a more efficient datastructure: automaton?
+		protected internal readonly char[][] exceptions;
+
+		public RuleWithSuffixExceptions(string suffix, int min, string replacement, string[] exceptions) : base(suffix, min, replacement)
+		{
+		  for (int i = 0; i < exceptions.Length; i++)
+		  {
+			if (!exceptions[i].EndsWith(suffix, StringComparison.Ordinal))
+			{
+			  throw new Exception("warning: useless exception '" + exceptions[i] + "' does not end with '" + suffix + "'");
+			}
+		  }
+		  this.exceptions = new char[exceptions.Length][];
+		  for (int i = 0; i < exceptions.Length; i++)
+		  {
+			this.exceptions[i] = exceptions[i].ToCharArray();
+		  }
+		}
+
+		public override bool matches(char[] s, int len)
+		{
+		  if (!base.matches(s, len))
+		  {
+			return false;
+		  }
+
+		  for (int i = 0; i < exceptions.Length; i++)
+		  {
+			if (StemmerUtil.EndsWith(s, len, exceptions[i]))
+			{
+			  return false;
+			}
+		  }
+
+		  return true;
+		}
+	  }
+
+	  /// <summary>
+	  /// A step containing a list of rules.
+	  /// </summary>
+	  protected internal class Step
+	  {
+		protected internal readonly string name;
+		protected internal readonly Rule[] rules;
+		protected internal readonly int min;
+		protected internal readonly char[][] suffixes;
+
+		/// <summary>
+		/// Create a new step </summary>
+		/// <param name="name"> Step's name. </param>
+		/// <param name="rules"> an ordered list of rules. </param>
+		/// <param name="min"> minimum word size. if this is 0 it is automatically calculated. </param>
+		/// <param name="suffixes"> optional list of conditional suffixes. may be null. </param>
+		public Step(string name, Rule[] rules, int min, string[] suffixes)
+		{
+		  this.name = name;
+		  this.rules = rules;
+		  if (min == 0)
+		  {
+			min = int.MaxValue;
+			foreach (Rule r in rules)
+			{
+			  min = Math.Min(min, r.min + r.suffix.Length);
+			}
+		  }
+		  this.min = min;
+
+		  if (suffixes == null || suffixes.Length == 0)
+		  {
+			this.suffixes = null;
+		  }
+		  else
+		  {
+			this.suffixes = new char[suffixes.Length][];
+			for (int i = 0; i < suffixes.Length; i++)
+			{
+			  this.suffixes[i] = suffixes[i].ToCharArray();
+			}
+		  }
+		}
+
+		/// <returns> new valid length of the string after applying the entire step. </returns>
+		public virtual int apply(char[] s, int len)
+		{
+		  if (len < min)
+		  {
+			return len;
+		  }
+
+		  if (suffixes != null)
+		  {
+			bool found = false;
+
+			for (int i = 0; i < suffixes.Length; i++)
+			{
+			  if (StemmerUtil.EndsWith(s, len, suffixes[i]))
+			  {
+				found = true;
+				break;
+			  }
+			}
+
+			if (!found)
+			{
+				return len;
+			}
+		  }
+
+		  for (int i = 0; i < rules.Length; i++)
+		  {
+			if (rules[i].matches(s, len))
+			{
+			  return rules[i].replace(s, len);
+			}
+		  }
+
+		  return len;
+		}
+	  }
+
+	  /// <summary>
+	  /// Parse a resource file into an RSLP stemmer description. </summary>
+	  /// <returns> a Map containing the named Steps in this description. </returns>
+	  protected internal static IDictionary<string, Step> parse(Type clazz, string resource)
+	  {
+		// TODO: this parser is ugly, but works. use a jflex grammar instead.
+		try
+		{
+		  InputStream @is = clazz.getResourceAsStream(resource);
+		  LineNumberReader r = new LineNumberReader(new InputStreamReader(@is, StandardCharsets.UTF_8));
+		  IDictionary<string, Step> steps = new Dictionary<string, Step>();
+		  string step;
+		  while ((step = readLine(r)) != null)
+		  {
+			Step s = parseStep(r, step);
+			steps[s.name] = s;
+		  }
+		  r.close();
+		  return steps;
+		}
+		catch (IOException e)
+		{
+		  throw new Exception(e);
+		}
+	  }
+
+	  private static readonly Pattern headerPattern = Pattern.compile("^\\{\\s*\"([^\"]*)\",\\s*([0-9]+),\\s*(0|1),\\s*\\{(.*)\\},\\s*$");
+	  private static readonly Pattern stripPattern = Pattern.compile("^\\{\\s*\"([^\"]*)\",\\s*([0-9]+)\\s*\\}\\s*(,|(\\}\\s*;))$");
+	  private static readonly Pattern repPattern = Pattern.compile("^\\{\\s*\"([^\"]*)\",\\s*([0-9]+),\\s*\"([^\"]*)\"\\}\\s*(,|(\\}\\s*;))$");
+	  private static readonly Pattern excPattern = Pattern.compile("^\\{\\s*\"([^\"]*)\",\\s*([0-9]+),\\s*\"([^\"]*)\",\\s*\\{(.*)\\}\\s*\\}\\s*(,|(\\}\\s*;))$");
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private static Step parseStep(java.io.LineNumberReader r, String header) throws java.io.IOException
+	  private static Step parseStep(LineNumberReader r, string header)
+	  {
+		Matcher matcher = headerPattern.matcher(header);
+		if (!matcher.find())
+		{
+		  throw new Exception("Illegal Step header specified at line " + r.LineNumber);
+		}
+		Debug.Assert(matcher.groupCount() == 4);
+		string name = matcher.group(1);
+		int min = int.Parse(matcher.group(2));
+		int type = int.Parse(matcher.group(3));
+		string[] suffixes = parseList(matcher.group(4));
+		Rule[] rules = parseRules(r, type);
+		return new Step(name, rules, min, suffixes);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private static Rule[] parseRules(java.io.LineNumberReader r, int type) throws java.io.IOException
+	  private static Rule[] parseRules(LineNumberReader r, int type)
+	  {
+		IList<Rule> rules = new List<Rule>();
+		string line;
+		while ((line = readLine(r)) != null)
+		{
+		  Matcher matcher = stripPattern.matcher(line);
+		  if (matcher.matches())
+		  {
+			rules.Add(new Rule(matcher.group(1), int.Parse(matcher.group(2)), ""));
+		  }
+		  else
+		  {
+			matcher = repPattern.matcher(line);
+			if (matcher.matches())
+			{
+			  rules.Add(new Rule(matcher.group(1), int.Parse(matcher.group(2)), matcher.group(3)));
+			}
+			else
+			{
+			  matcher = excPattern.matcher(line);
+			  if (matcher.matches())
+			  {
+				if (type == 0)
+				{
+				  rules.Add(new RuleWithSuffixExceptions(matcher.group(1), int.Parse(matcher.group(2)), matcher.group(3), parseList(matcher.group(4))));
+				}
+				else
+				{
+				  rules.Add(new RuleWithSetExceptions(matcher.group(1), int.Parse(matcher.group(2)), matcher.group(3), parseList(matcher.group(4))));
+				}
+			  }
+			  else
+			  {
+				throw new Exception("Illegal Step rule specified at line " + r.LineNumber);
+			  }
+			}
+		  }
+		  if (line.EndsWith(";", StringComparison.Ordinal))
+		  {
+			return rules.ToArray();
+		  }
+		}
+		return null;
+	  }
+
+	  private static string[] parseList(string s)
+	  {
+		if (s.Length == 0)
+		{
+		  return null;
+		}
+		string[] list = s.Split(",", true);
+		for (int i = 0; i < list.Length; i++)
+		{
+		  list[i] = parseString(list[i].Trim());
+		}
+		return list;
+	  }
+
+	  private static string parseString(string s)
+	  {
+		return s.Substring(1, s.Length - 1 - 1);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private static String readLine(java.io.LineNumberReader r) throws java.io.IOException
+	  private static string readLine(LineNumberReader r)
+	  {
+		string line = null;
+		while ((line = r.readLine()) != null)
+		{
+		  line = line.Trim();
+		  if (line.Length > 0 && line[0] != '#')
+		  {
+			return line;
+		  }
+		}
+		return line;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Query/QueryAutoStopWordAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Query/QueryAutoStopWordAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Query/QueryAutoStopWordAnalyzer.cs
new file mode 100644
index 0000000..2daf790
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Query/QueryAutoStopWordAnalyzer.cs
@@ -0,0 +1,213 @@
+using System.Collections.Generic;
+using Lucene.Net.Analysis.Core;
+
+namespace org.apache.lucene.analysis.query
+{
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using StopFilter = StopFilter;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using IndexReader = org.apache.lucene.index.IndexReader;
+	using MultiFields = org.apache.lucene.index.MultiFields;
+	using Term = org.apache.lucene.index.Term;
+	using Terms = org.apache.lucene.index.Terms;
+	using TermsEnum = org.apache.lucene.index.TermsEnum;
+	using BytesRef = org.apache.lucene.util.BytesRef;
+	using CharsRef = org.apache.lucene.util.CharsRef;
+	using UnicodeUtil = org.apache.lucene.util.UnicodeUtil;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// An <seealso cref="Analyzer"/> used primarily at query time to wrap another analyzer and provide a layer of protection
+	/// which prevents very common words from being passed into queries. 
+	/// <para>
+	/// For very large indexes the cost
+	/// of reading TermDocs for a very common word can be  high. This analyzer was created after experience with
+	/// a 38 million doc index which had a term in around 50% of docs and was causing TermQueries for 
+	/// this term to take 2 seconds.
+	/// </para>
+	/// </summary>
+	public sealed class QueryAutoStopWordAnalyzer : AnalyzerWrapper
+	{
+
+	  private readonly Analyzer @delegate;
+	  private readonly IDictionary<string, HashSet<string>> stopWordsPerField = new Dictionary<string, HashSet<string>>();
+	  //The default maximum percentage (40%) of index documents which
+	  //can contain a term, after which the term is considered to be a stop word.
+	  public const float defaultMaxDocFreqPercent = 0.4f;
+	  private readonly Version matchVersion;
+
+	  /// <summary>
+	  /// Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for all
+	  /// indexed fields from terms with a document frequency percentage greater than
+	  /// <seealso cref="#defaultMaxDocFreqPercent"/>
+	  /// </summary>
+	  /// <param name="matchVersion"> Version to be used in <seealso cref="StopFilter"/> </param>
+	  /// <param name="delegate"> Analyzer whose TokenStream will be filtered </param>
+	  /// <param name="indexReader"> IndexReader to identify the stopwords from </param>
+	  /// <exception cref="IOException"> Can be thrown while reading from the IndexReader </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public QueryAutoStopWordAnalyzer(org.apache.lucene.util.Version matchVersion, org.apache.lucene.analysis.Analyzer delegate, org.apache.lucene.index.IndexReader indexReader) throws java.io.IOException
+	  public QueryAutoStopWordAnalyzer(Version matchVersion, Analyzer @delegate, IndexReader indexReader) : this(matchVersion, @delegate, indexReader, defaultMaxDocFreqPercent)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for all
+	  /// indexed fields from terms with a document frequency greater than the given
+	  /// maxDocFreq
+	  /// </summary>
+	  /// <param name="matchVersion"> Version to be used in <seealso cref="StopFilter"/> </param>
+	  /// <param name="delegate"> Analyzer whose TokenStream will be filtered </param>
+	  /// <param name="indexReader"> IndexReader to identify the stopwords from </param>
+	  /// <param name="maxDocFreq"> Document frequency terms should be above in order to be stopwords </param>
+	  /// <exception cref="IOException"> Can be thrown while reading from the IndexReader </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public QueryAutoStopWordAnalyzer(org.apache.lucene.util.Version matchVersion, org.apache.lucene.analysis.Analyzer delegate, org.apache.lucene.index.IndexReader indexReader, int maxDocFreq) throws java.io.IOException
+	  public QueryAutoStopWordAnalyzer(Version matchVersion, Analyzer @delegate, IndexReader indexReader, int maxDocFreq) : this(matchVersion, @delegate, indexReader, MultiFields.getIndexedFields(indexReader), maxDocFreq)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for all
+	  /// indexed fields from terms with a document frequency percentage greater than
+	  /// the given maxPercentDocs
+	  /// </summary>
+	  /// <param name="matchVersion"> Version to be used in <seealso cref="StopFilter"/> </param>
+	  /// <param name="delegate"> Analyzer whose TokenStream will be filtered </param>
+	  /// <param name="indexReader"> IndexReader to identify the stopwords from </param>
+	  /// <param name="maxPercentDocs"> The maximum percentage (between 0.0 and 1.0) of index documents which
+	  ///                      contain a term, after which the word is considered to be a stop word </param>
+	  /// <exception cref="IOException"> Can be thrown while reading from the IndexReader </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public QueryAutoStopWordAnalyzer(org.apache.lucene.util.Version matchVersion, org.apache.lucene.analysis.Analyzer delegate, org.apache.lucene.index.IndexReader indexReader, float maxPercentDocs) throws java.io.IOException
+	  public QueryAutoStopWordAnalyzer(Version matchVersion, Analyzer @delegate, IndexReader indexReader, float maxPercentDocs) : this(matchVersion, @delegate, indexReader, MultiFields.getIndexedFields(indexReader), maxPercentDocs)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
+	  /// given selection of fields from terms with a document frequency percentage
+	  /// greater than the given maxPercentDocs
+	  /// </summary>
+	  /// <param name="matchVersion"> Version to be used in <seealso cref="StopFilter"/> </param>
+	  /// <param name="delegate"> Analyzer whose TokenStream will be filtered </param>
+	  /// <param name="indexReader"> IndexReader to identify the stopwords from </param>
+	  /// <param name="fields"> Selection of fields to calculate stopwords for </param>
+	  /// <param name="maxPercentDocs"> The maximum percentage (between 0.0 and 1.0) of index documents which
+	  ///                      contain a term, after which the word is considered to be a stop word </param>
+	  /// <exception cref="IOException"> Can be thrown while reading from the IndexReader </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public QueryAutoStopWordAnalyzer(org.apache.lucene.util.Version matchVersion, org.apache.lucene.analysis.Analyzer delegate, org.apache.lucene.index.IndexReader indexReader, Collection<String> fields, float maxPercentDocs) throws java.io.IOException
+	  public QueryAutoStopWordAnalyzer(Version matchVersion, Analyzer @delegate, IndexReader indexReader, ICollection<string> fields, float maxPercentDocs) : this(matchVersion, @delegate, indexReader, fields, (int)(indexReader.numDocs() * maxPercentDocs))
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
+	  /// given selection of fields from terms with a document frequency greater than
+	  /// the given maxDocFreq
+	  /// </summary>
+	  /// <param name="matchVersion"> Version to be used in <seealso cref="StopFilter"/> </param>
+	  /// <param name="delegate"> Analyzer whose TokenStream will be filtered </param>
+	  /// <param name="indexReader"> IndexReader to identify the stopwords from </param>
+	  /// <param name="fields"> Selection of fields to calculate stopwords for </param>
+	  /// <param name="maxDocFreq"> Document frequency terms should be above in order to be stopwords </param>
+	  /// <exception cref="IOException"> Can be thrown while reading from the IndexReader </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public QueryAutoStopWordAnalyzer(org.apache.lucene.util.Version matchVersion, org.apache.lucene.analysis.Analyzer delegate, org.apache.lucene.index.IndexReader indexReader, Collection<String> fields, int maxDocFreq) throws java.io.IOException
+	  public QueryAutoStopWordAnalyzer(Version matchVersion, Analyzer @delegate, IndexReader indexReader, ICollection<string> fields, int maxDocFreq) : base(@delegate.ReuseStrategy)
+	  {
+		this.matchVersion = matchVersion;
+		this.@delegate = @delegate;
+
+		foreach (string field in fields)
+		{
+		  HashSet<string> stopWords = new HashSet<string>();
+		  Terms terms = MultiFields.getTerms(indexReader, field);
+		  CharsRef spare = new CharsRef();
+		  if (terms != null)
+		  {
+			TermsEnum te = terms.iterator(null);
+			BytesRef text;
+			while ((text = te.next()) != null)
+			{
+			  if (te.docFreq() > maxDocFreq)
+			  {
+				UnicodeUtil.UTF8toUTF16(text, spare);
+				stopWords.Add(spare.ToString());
+			  }
+			}
+		  }
+		  stopWordsPerField[field] = stopWords;
+		}
+	  }
+
+	  protected internal override Analyzer getWrappedAnalyzer(string fieldName)
+	  {
+		return @delegate;
+	  }
+
+	  protected internal override TokenStreamComponents wrapComponents(string fieldName, TokenStreamComponents components)
+	  {
+		HashSet<string> stopWords = stopWordsPerField[fieldName];
+		if (stopWords == null)
+		{
+		  return components;
+		}
+		StopFilter stopFilter = new StopFilter(matchVersion, components.TokenStream, new CharArraySet(matchVersion, stopWords, false));
+		return new TokenStreamComponents(components.Tokenizer, stopFilter);
+	  }
+
+	  /// <summary>
+	  /// Provides information on which stop words have been identified for a field
+	  /// </summary>
+	  /// <param name="fieldName"> The field for which stop words identified in "addStopWords"
+	  ///                  method calls will be returned </param>
+	  /// <returns> the stop words identified for a field </returns>
+	  public string[] getStopWords(string fieldName)
+	  {
+		HashSet<string> stopWords = stopWordsPerField[fieldName];
+		return stopWords != null ? stopWords.toArray(new string[stopWords.Count]) : new string[0];
+	  }
+
+	  /// <summary>
+	  /// Provides information on which stop words have been identified for all fields
+	  /// </summary>
+	  /// <returns> the stop words (as terms) </returns>
+	  public Term[] StopWords
+	  {
+		  get
+		  {
+			IList<Term> allStopWords = new List<Term>();
+			foreach (string fieldName in stopWordsPerField.Keys)
+			{
+			  HashSet<string> stopWords = stopWordsPerField[fieldName];
+			  foreach (string text in stopWords)
+			  {
+				allStopWords.Add(new Term(fieldName, text));
+			  }
+			}
+			return allStopWords.ToArray();
+		  }
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Reverse/ReverseStringFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Reverse/ReverseStringFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Reverse/ReverseStringFilter.cs
new file mode 100644
index 0000000..9382516
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Reverse/ReverseStringFilter.cs
@@ -0,0 +1,281 @@
+using System;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.analysis.reverse
+{
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Reverse token string, for example "country" => "yrtnuoc".
+	/// <para>
+	/// If <code>marker</code> is supplied, then tokens will be also prepended by
+	/// that character. For example, with a marker of &#x5C;u0001, "country" =>
+	/// "&#x5C;u0001yrtnuoc". This is useful when implementing efficient leading
+	/// wildcards search.
+	/// </para>
+	/// <a name="version"/>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating ReverseStringFilter, or when using any of
+	/// its static methods:
+	/// <ul>
+	///   <li> As of 3.1, supplementary characters are handled correctly
+	/// </ul>
+	/// </para>
+	/// </summary>
+	public sealed class ReverseStringFilter : TokenFilter
+	{
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly char marker;
+	  private readonly Version matchVersion;
+	  private const char NOMARKER = '\uFFFF';
+
+	  /// <summary>
+	  /// Example marker character: U+0001 (START OF HEADING) 
+	  /// </summary>
+	  public const char START_OF_HEADING_MARKER = '\u0001';
+
+	  /// <summary>
+	  /// Example marker character: U+001F (INFORMATION SEPARATOR ONE)
+	  /// </summary>
+	  public const char INFORMATION_SEPARATOR_MARKER = '\u001F';
+
+	  /// <summary>
+	  /// Example marker character: U+EC00 (PRIVATE USE AREA: EC00) 
+	  /// </summary>
+	  public const char PUA_EC00_MARKER = '\uEC00';
+
+	  /// <summary>
+	  /// Example marker character: U+200F (RIGHT-TO-LEFT MARK)
+	  /// </summary>
+	  public const char RTL_DIRECTION_MARKER = '\u200F';
+
+	  /// <summary>
+	  /// Create a new ReverseStringFilter that reverses all tokens in the 
+	  /// supplied <seealso cref="TokenStream"/>.
+	  /// <para>
+	  /// The reversed tokens will not be marked. 
+	  /// </para>
+	  /// </summary>
+	  /// <param name="matchVersion"> See <a href="#version">above</a> </param>
+	  /// <param name="in"> <seealso cref="TokenStream"/> to filter </param>
+	  public ReverseStringFilter(Version matchVersion, TokenStream @in) : this(matchVersion, @in, NOMARKER)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Create a new ReverseStringFilter that reverses and marks all tokens in the
+	  /// supplied <seealso cref="TokenStream"/>.
+	  /// <para>
+	  /// The reversed tokens will be prepended (marked) by the <code>marker</code>
+	  /// character.
+	  /// </para>
+	  /// </summary>
+	  /// <param name="matchVersion"> See <a href="#version">above</a> </param>
+	  /// <param name="in"> <seealso cref="TokenStream"/> to filter </param>
+	  /// <param name="marker"> A character used to mark reversed tokens </param>
+	  public ReverseStringFilter(Version matchVersion, TokenStream @in, char marker) : base(@in)
+	  {
+		this.matchVersion = matchVersion;
+		this.marker = marker;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  int len = termAtt.length();
+		  if (marker != NOMARKER)
+		  {
+			len++;
+			termAtt.resizeBuffer(len);
+			termAtt.buffer()[len - 1] = marker;
+		  }
+		  reverse(matchVersion, termAtt.buffer(), 0, len);
+		  termAtt.Length = len;
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+
+	  /// <summary>
+	  /// Reverses the given input string
+	  /// </summary>
+	  /// <param name="matchVersion"> See <a href="#version">above</a> </param>
+	  /// <param name="input"> the string to reverse </param>
+	  /// <returns> the given input string in reversed order </returns>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public static String reverse(org.apache.lucene.util.Version matchVersion, final String input)
+	  public static string reverse(Version matchVersion, string input)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] charInput = input.toCharArray();
+		char[] charInput = input.ToCharArray();
+		reverse(matchVersion, charInput, 0, charInput.Length);
+		return new string(charInput);
+	  }
+
+	  /// <summary>
+	  /// Reverses the given input buffer in-place </summary>
+	  /// <param name="matchVersion"> See <a href="#version">above</a> </param>
+	  /// <param name="buffer"> the input char array to reverse </param>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public static void reverse(org.apache.lucene.util.Version matchVersion, final char[] buffer)
+	  public static void reverse(Version matchVersion, char[] buffer)
+	  {
+		reverse(matchVersion, buffer, 0, buffer.Length);
+	  }
+
+	  /// <summary>
+	  /// Partially reverses the given input buffer in-place from offset 0
+	  /// up to the given length. </summary>
+	  /// <param name="matchVersion"> See <a href="#version">above</a> </param>
+	  /// <param name="buffer"> the input char array to reverse </param>
+	  /// <param name="len"> the length in the buffer up to where the
+	  ///        buffer should be reversed </param>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public static void reverse(org.apache.lucene.util.Version matchVersion, final char[] buffer, final int len)
+	  public static void reverse(Version matchVersion, char[] buffer, int len)
+	  {
+		reverse(matchVersion, buffer, 0, len);
+	  }
+
+	  /// @deprecated (3.1) Remove this when support for 3.0 indexes is no longer needed. 
+	  [Obsolete("(3.1) Remove this when support for 3.0 indexes is no longer needed.")]
+	  private static void reverseUnicode3(char[] buffer, int start, int len)
+	  {
+		if (len <= 1)
+		{
+			return;
+		}
+		int num = len >> 1;
+		for (int i = start; i < (start + num); i++)
+		{
+		  char c = buffer[i];
+		  buffer[i] = buffer[start * 2 + len - i - 1];
+		  buffer[start * 2 + len - i - 1] = c;
+		}
+	  }
+
+	  /// <summary>
+	  /// Partially reverses the given input buffer in-place from the given offset
+	  /// up to the given length. </summary>
+	  /// <param name="matchVersion"> See <a href="#version">above</a> </param>
+	  /// <param name="buffer"> the input char array to reverse </param>
+	  /// <param name="start"> the offset from where to reverse the buffer </param>
+	  /// <param name="len"> the length in the buffer up to where the
+	  ///        buffer should be reversed </param>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public static void reverse(org.apache.lucene.util.Version matchVersion, final char[] buffer, final int start, final int len)
+	  public static void reverse(Version matchVersion, char[] buffer, int start, int len)
+	  {
+		if (!matchVersion.onOrAfter(Version.LUCENE_31))
+		{
+		  reverseUnicode3(buffer, start, len);
+		  return;
+		}
+		/* modified version of Apache Harmony AbstractStringBuilder reverse0() */
+		if (len < 2)
+		{
+		  return;
+		}
+		int end = (start + len) - 1;
+		char frontHigh = buffer[start];
+		char endLow = buffer[end];
+		bool allowFrontSur = true, allowEndSur = true;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int mid = start + (len >> 1);
+		int mid = start + (len >> 1);
+		for (int i = start; i < mid; ++i, --end)
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char frontLow = buffer[i + 1];
+		  char frontLow = buffer[i + 1];
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char endHigh = buffer[end - 1];
+		  char endHigh = buffer[end - 1];
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final boolean surAtFront = allowFrontSur && Character.isSurrogatePair(frontHigh, frontLow);
+		  bool surAtFront = allowFrontSur && char.IsSurrogatePair(frontHigh, frontLow);
+		  if (surAtFront && (len < 3))
+		  {
+			// nothing to do since surAtFront is allowed and 1 char left
+			return;
+		  }
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final boolean surAtEnd = allowEndSur && Character.isSurrogatePair(endHigh, endLow);
+		  bool surAtEnd = allowEndSur && char.IsSurrogatePair(endHigh, endLow);
+		  allowFrontSur = allowEndSur = true;
+		  if (surAtFront == surAtEnd)
+		  {
+			if (surAtFront)
+			{
+			  // both surrogates
+			  buffer[end] = frontLow;
+			  buffer[--end] = frontHigh;
+			  buffer[i] = endHigh;
+			  buffer[++i] = endLow;
+			  frontHigh = buffer[i + 1];
+			  endLow = buffer[end - 1];
+			}
+			else
+			{
+			  // neither surrogates
+			  buffer[end] = frontHigh;
+			  buffer[i] = endLow;
+			  frontHigh = frontLow;
+			  endLow = endHigh;
+			}
+		  }
+		  else
+		  {
+			if (surAtFront)
+			{
+			  // surrogate only at the front
+			  buffer[end] = frontLow;
+			  buffer[i] = endLow;
+			  endLow = endHigh;
+			  allowFrontSur = false;
+			}
+			else
+			{
+			  // surrogate only at the end
+			  buffer[end] = frontHigh;
+			  buffer[i] = endHigh;
+			  frontHigh = frontLow;
+			  allowEndSur = false;
+			}
+		  }
+		}
+		if ((len & 0x01) == 1 && !(allowFrontSur && allowEndSur))
+		{
+		  // only if odd length
+		  buffer[end] = allowFrontSur ? endLow : frontHigh;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Reverse/ReverseStringFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Reverse/ReverseStringFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Reverse/ReverseStringFilterFactory.cs
new file mode 100644
index 0000000..abc8d24
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Reverse/ReverseStringFilterFactory.cs
@@ -0,0 +1,59 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.reverse
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="ReverseStringFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_rvsstr" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.ReverseStringFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// 
+	/// @since solr 1.4
+	/// </summary>
+	public class ReverseStringFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new ReverseStringFilterFactory </summary>
+	  public ReverseStringFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		assureMatchVersion();
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override ReverseStringFilter create(TokenStream @in)
+	  {
+		return new ReverseStringFilter(luceneMatchVersion,@in);
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ro/RomanianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ro/RomanianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ro/RomanianAnalyzer.cs
new file mode 100644
index 0000000..a68928c
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ro/RomanianAnalyzer.cs
@@ -0,0 +1,142 @@
+using System;
+
+namespace org.apache.lucene.analysis.ro
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using Version = org.apache.lucene.util.Version;
+	using RomanianStemmer = org.tartarus.snowball.ext.RomanianStemmer;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Romanian.
+	/// </summary>
+	public sealed class RomanianAnalyzer : StopwordAnalyzerBase
+	{
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// File containing default Romanian stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "stopwords.txt";
+	  /// <summary>
+	  /// The comment character in the stopwords file.  
+	  /// All lines prefixed with this will be ignored.
+	  /// </summary>
+	  private const string STOPWORDS_COMMENT = "#";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop words set. </summary>
+	  /// <returns> default stop words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = loadStopwordSet(false, typeof(RomanianAnalyzer), DEFAULT_STOPWORD_FILE, STOPWORDS_COMMENT);
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public RomanianAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  public RomanianAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+	  /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+	  /// stemming.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
+	  public RomanianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates a
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> A
+	  ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided and <seealso cref="SnowballFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		result = new LowerCaseFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		result = new SnowballFilter(result, new RomanianStemmer());
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianAnalyzer.cs
new file mode 100644
index 0000000..955f021
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianAnalyzer.cs
@@ -0,0 +1,172 @@
+using System;
+
+namespace org.apache.lucene.analysis.ru
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader;
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Russian language. 
+	/// <para>
+	/// Supports an external list of stopwords (words that
+	/// will not be indexed at all).
+	/// A default set of stopwords is used unless an alternative list is specified.
+	/// </para>
+	/// <a name="version"/>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating RussianAnalyzer:
+	/// <ul>
+	///   <li> As of 3.1, StandardTokenizer is used, Snowball stemming is done with
+	///        SnowballFilter, and Snowball stopwords are used by default.
+	/// </ul>
+	/// </para>
+	/// </summary>
+	public sealed class RussianAnalyzer : StopwordAnalyzerBase
+	{
+		/// <summary>
+		/// List of typical Russian stopwords. (for backwards compatibility) </summary>
+		/// @deprecated (3.1) Remove this for LUCENE 5.0 
+		[Obsolete("(3.1) Remove this for LUCENE 5.0")]
+		private static readonly string[] RUSSIAN_STOP_WORDS_30 = new string[] {"а", "без", "более", "бы", "был", "была", "были", "было", "быть", "в", "вам", "вас", "весь", "во", "вот", "все", "всего", "всех", "вы", "где", "да", "даже", "для", "до", "его", "ее", "ей", "ею", "если", "есть", "еще", "же", "за", "здесь", "и", "из", "или", "им", "их", "к", "как", "ко", "когда", "кто", "ли", "либо", "мне", "может", "мы", "на", "надо", "наш", "не", "него", "нее", "нет", "ни", "них", "но", "ну", "о", "об", "однако", "он", "она", "они", "оно", "от", "очень", "по", "под", "при", "с", "со", "так", "также", "такой", "там", "те", "тем", "то", "того", "тоже", "той", "только", "том", "ты", "у", "уже", "хотя", "чего", "чей", "чем", "ч�
 �о", "чтобы", "чье", "чья", "эта", "эти", "это", "я"};
+
+		/// <summary>
+		/// File containing default Russian stopwords. </summary>
+		public const string DEFAULT_STOPWORD_FILE = "russian_stop.txt";
+
+		private class DefaultSetHolder
+		{
+		  /// @deprecated (3.1) remove this for Lucene 5.0 
+		  [Obsolete("(3.1) remove this for Lucene 5.0")]
+		  internal static readonly CharArraySet DEFAULT_STOP_SET_30 = CharArraySet.unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(RUSSIAN_STOP_WORDS_30), false));
+		  internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		  static DefaultSetHolder()
+		  {
+			try
+			{
+			  DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(typeof(SnowballFilter), DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+			}
+			catch (IOException ex)
+			{
+			  // default set should always be present as it is part of the
+			  // distribution (JAR)
+			  throw new Exception("Unable to load default stopword set", ex);
+			}
+		  }
+		}
+
+		private readonly CharArraySet stemExclusionSet;
+
+		/// <summary>
+		/// Returns an unmodifiable instance of the default stop-words set.
+		/// </summary>
+		/// <returns> an unmodifiable instance of the default stop-words set. </returns>
+		public static CharArraySet DefaultStopSet
+		{
+			get
+			{
+			  return DefaultSetHolder.DEFAULT_STOP_SET;
+			}
+		}
+
+		public RussianAnalyzer(Version matchVersion) : this(matchVersion, matchVersion.onOrAfter(Version.LUCENE_31) ? DefaultSetHolder.DEFAULT_STOP_SET : DefaultSetHolder.DEFAULT_STOP_SET_30)
+		{
+		}
+
+		/// <summary>
+		/// Builds an analyzer with the given stop words
+		/// </summary>
+		/// <param name="matchVersion">
+		///          lucene compatibility version </param>
+		/// <param name="stopwords">
+		///          a stopword set </param>
+		public RussianAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+		{
+		}
+
+		/// <summary>
+		/// Builds an analyzer with the given stop words
+		/// </summary>
+		/// <param name="matchVersion">
+		///          lucene compatibility version </param>
+		/// <param name="stopwords">
+		///          a stopword set </param>
+		/// <param name="stemExclusionSet"> a set of words not to be stemmed </param>
+		public RussianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+		{
+		  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+		}
+
+	  /// <summary>
+	  /// Creates
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from a <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided, and <seealso cref="SnowballFilter"/> </returns>
+		protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		{
+		  if (matchVersion.onOrAfter(Version.LUCENE_31))
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+			Tokenizer source = new StandardTokenizer(matchVersion, reader);
+			TokenStream result = new StandardFilter(matchVersion, source);
+			result = new LowerCaseFilter(matchVersion, result);
+			result = new StopFilter(matchVersion, result, stopwords);
+			if (!stemExclusionSet.Empty)
+			{
+				result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+			}
+			result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer());
+			return new TokenStreamComponents(source, result);
+		  }
+		  else
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new RussianLetterTokenizer(matchVersion, reader);
+			Tokenizer source = new RussianLetterTokenizer(matchVersion, reader);
+			TokenStream result = new LowerCaseFilter(matchVersion, source);
+			result = new StopFilter(matchVersion, result, stopwords);
+			if (!stemExclusionSet.Empty)
+			{
+				result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+			}
+			result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer());
+			return new TokenStreamComponents(source, result);
+		  }
+		}
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizer.cs
new file mode 100644
index 0000000..5ef27f2
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizer.cs
@@ -0,0 +1,83 @@
+using System;
+
+namespace org.apache.lucene.analysis.ru
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTokenizer = org.apache.lucene.analysis.util.CharTokenizer;
+	using LetterTokenizer = org.apache.lucene.analysis.core.LetterTokenizer;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer; // for javadocs
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// A RussianLetterTokenizer is a <seealso cref="Tokenizer"/> that extends <seealso cref="LetterTokenizer"/>
+	/// by also allowing the basic Latin digits 0-9.
+	/// <para>
+	/// <a name="version"/>
+	/// You must specify the required <seealso cref="Version"/> compatibility when creating
+	/// <seealso cref="RussianLetterTokenizer"/>:
+	/// <ul>
+	/// <li>As of 3.1, <seealso cref="CharTokenizer"/> uses an int based API to normalize and
+	/// detect token characters. See <seealso cref="CharTokenizer#isTokenChar(int)"/> and
+	/// <seealso cref="CharTokenizer#normalize(int)"/> for details.</li>
+	/// </ul>
+	/// </para>
+	/// </summary>
+	/// @deprecated (3.1) Use <seealso cref="StandardTokenizer"/> instead, which has the same functionality.
+	/// This filter will be removed in Lucene 5.0  
+	[Obsolete("(3.1) Use <seealso cref="StandardTokenizer"/> instead, which has the same functionality.")]
+	public class RussianLetterTokenizer : CharTokenizer
+	{
+		private const int DIGIT_0 = '0';
+		private const int DIGIT_9 = '9';
+
+		/// Construct a new RussianLetterTokenizer. * <param name="matchVersion"> Lucene version
+		/// to match See <seealso cref="<a href="#version">above</a>"/>
+		/// </param>
+		/// <param name="in">
+		///          the input to split up into tokens </param>
+		public RussianLetterTokenizer(Version matchVersion, Reader @in) : base(matchVersion, @in)
+		{
+		}
+
+		/// <summary>
+		/// Construct a new RussianLetterTokenizer using a given
+		/// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>. * @param
+		/// matchVersion Lucene version to match See
+		/// <seealso cref="<a href="#version">above</a>"/>
+		/// </summary>
+		/// <param name="factory">
+		///          the attribute factory to use for this <seealso cref="Tokenizer"/> </param>
+		/// <param name="in">
+		///          the input to split up into tokens </param>
+		public RussianLetterTokenizer(Version matchVersion, AttributeFactory factory, Reader @in) : base(matchVersion, factory, @in)
+		{
+		}
+
+		 /// <summary>
+		 /// Collects only characters which satisfy
+		 /// <seealso cref="Character#isLetter(int)"/>.
+		 /// </summary>
+		protected internal override bool isTokenChar(int c)
+		{
+			return char.IsLetter(c) || (c >= DIGIT_0 && c <= DIGIT_9);
+		}
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizerFactory.cs
new file mode 100644
index 0000000..b308426
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizerFactory.cs
@@ -0,0 +1,52 @@
+using System;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.ru
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using TokenizerFactory = org.apache.lucene.analysis.util.TokenizerFactory;
+	using AttributeFactory = org.apache.lucene.util.AttributeSource.AttributeFactory;
+
+	/// @deprecated Use <seealso cref="org.apache.lucene.analysis.standard.StandardTokenizerFactory"/> instead.
+	///  This tokenizer has no Russian-specific functionality. 
+	[Obsolete("Use <seealso cref="org.apache.lucene.analysis.standard.StandardTokenizerFactory"/> instead.")]
+	public class RussianLetterTokenizerFactory : TokenizerFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new RussianLetterTokenizerFactory </summary>
+	  public RussianLetterTokenizerFactory(IDictionary<string, string> args) : base(args)
+	  {
+		assureMatchVersion();
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override RussianLetterTokenizer create(AttributeFactory factory, Reader @in)
+	  {
+		return new RussianLetterTokenizer(luceneMatchVersion, factory, @in);
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilter.cs
new file mode 100644
index 0000000..0573bd3
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilter.cs
@@ -0,0 +1,66 @@
+namespace org.apache.lucene.analysis.ru
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="RussianLightStemmer"/> to stem Russian
+	/// words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// </summary>
+	public sealed class RussianLightStemFilter : TokenFilter
+	{
+	  private readonly RussianLightStemmer stemmer = new RussianLightStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public RussianLightStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilterFactory.cs
new file mode 100644
index 0000000..157a5df
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.ru
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="RussianLightStemFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_rulgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.RussianLightStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class RussianLightStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new RussianLightStemFilterFactory </summary>
+	  public RussianLightStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new RussianLightStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemmer.cs
new file mode 100644
index 0000000..7550c4a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemmer.cs
@@ -0,0 +1,134 @@
+namespace org.apache.lucene.analysis.ru
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/* 
+	 * This algorithm is updated based on code located at:
+	 * http://members.unine.ch/jacques.savoy/clef/
+	 * 
+	 * Full copyright for that code follows:
+	 */
+
+	/*
+	 * Copyright (c) 2005, Jacques Savoy
+	 * All rights reserved.
+	 *
+	 * Redistribution and use in source and binary forms, with or without 
+	 * modification, are permitted provided that the following conditions are met:
+	 *
+	 * Redistributions of source code must retain the above copyright notice, this 
+	 * list of conditions and the following disclaimer. Redistributions in binary 
+	 * form must reproduce the above copyright notice, this list of conditions and
+	 * the following disclaimer in the documentation and/or other materials 
+	 * provided with the distribution. Neither the name of the author nor the names 
+	 * of its contributors may be used to endorse or promote products derived from 
+	 * this software without specific prior written permission.
+	 * 
+	 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+	 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+	 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+	 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+	 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+	 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+	 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+	 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+	 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+	 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+	 * POSSIBILITY OF SUCH DAMAGE.
+	 */
+
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+	/// <summary>
+	/// Light Stemmer for Russian.
+	/// <para>
+	/// This stemmer implements the following algorithm:
+	/// <i>Indexing and Searching Strategies for the Russian Language.</i>
+	/// Ljiljana Dolamic and Jacques Savoy.
+	/// </para>
+	/// </summary>
+	public class RussianLightStemmer
+	{
+
+	  public virtual int stem(char[] s, int len)
+	  {
+		len = removeCase(s, len);
+		return normalize(s, len);
+	  }
+
+	  private int normalize(char[] s, int len)
+	  {
+		if (len > 3)
+		{
+		  switch (s[len - 1])
+		  {
+			case 'ь':
+			case 'и':
+				return len - 1;
+			case 'н':
+				if (s[len - 2] == 'н')
+				{
+					return len - 1;
+				}
+		  }
+		}
+		return len;
+	  }
+
+	  private int removeCase(char[] s, int len)
+	  {
+		if (len > 6 && (StemmerUtil.EndsWith(s, len, "иями") || StemmerUtil.EndsWith(s, len, "оями")))
+		{
+		  return len - 4;
+		}
+
+		if (len > 5 && (StemmerUtil.EndsWith(s, len, "иям") || StemmerUtil.EndsWith(s, len, "иях") || StemmerUtil.EndsWith(s, len, "оях") || StemmerUtil.EndsWith(s, len, "ями") || StemmerUtil.EndsWith(s, len, "оям") || StemmerUtil.EndsWith(s, len, "оьв") || StemmerUtil.EndsWith(s, len, "ами") || StemmerUtil.EndsWith(s, len, "его") || StemmerUtil.EndsWith(s, len, "ему") || StemmerUtil.EndsWith(s, len, "ери") || StemmerUtil.EndsWith(s, len, "ими") || StemmerUtil.EndsWith(s, len, "ого") || StemmerUtil.EndsWith(s, len, "ому") || StemmerUtil.EndsWith(s, len, "ыми") || StemmerUtil.EndsWith(s, len, "оев")))
+		{
+		  return len - 3;
+		}
+
+		if (len > 4 && (StemmerUtil.EndsWith(s, len, "ая") || StemmerUtil.EndsWith(s, len, "яя") || StemmerUtil.EndsWith(s, len, "ях") || StemmerUtil.EndsWith(s, len, "юю") || StemmerUtil.EndsWith(s, len, "ах") || StemmerUtil.EndsWith(s, len, "ею") || StemmerUtil.EndsWith(s, len, "их") || StemmerUtil.EndsWith(s, len, "ия") || StemmerUtil.EndsWith(s, len, "ию") || StemmerUtil.EndsWith(s, len, "ьв") || StemmerUtil.EndsWith(s, len, "ою") || StemmerUtil.EndsWith(s, len, "ую") || StemmerUtil.EndsWith(s, len, "ям") || StemmerUtil.EndsWith(s, len, "ых") || StemmerUtil.EndsWith(s, len, "ея") || StemmerUtil.EndsWith(s, len, "ам") || StemmerUtil.EndsWith(s, len, "ем") || StemmerUtil.EndsWith(s, len, "ей") || StemmerUtil.EndsWith(s, len, "ём") || StemmerUtil.EndsWith(s, len, "ев") || StemmerUtil.EndsWith(s, len, "ий") || StemmerUtil.EndsWith(s, len, "им") || StemmerUtil.EndsWith(s, len, "ое") || StemmerUtil.EndsWith(s, len, "ой") || StemmerUtil.EndsWit
 h(s, len, "ом") || StemmerUtil.EndsWith(s, len, "ов") || StemmerUtil.EndsWith(s, len, "ые") || StemmerUtil.EndsWith(s, len, "ый") || StemmerUtil.EndsWith(s, len, "ым") || StemmerUtil.EndsWith(s, len, "ми")))
+		{
+		  return len - 2;
+		}
+
+		if (len > 3)
+		{
+		  switch (s[len - 1])
+		  {
+			case 'а':
+			case 'е':
+			case 'и':
+			case 'о':
+			case 'у':
+			case 'й':
+			case 'ы':
+			case 'я':
+			case 'ь':
+				return len - 1;
+		  }
+		}
+
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file


[17/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilter.cs
new file mode 100644
index 0000000..87574cc
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilter.cs
@@ -0,0 +1,2118 @@
+using System.Diagnostics;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using ArrayUtil = org.apache.lucene.util.ArrayUtil;
+	using RamUsageEstimator = org.apache.lucene.util.RamUsageEstimator;
+
+	/// <summary>
+	/// This class converts alphabetic, numeric, and symbolic Unicode characters
+	/// which are not in the first 127 ASCII characters (the "Basic Latin" Unicode
+	/// block) into their ASCII equivalents, if one exists.
+	/// 
+	/// Characters from the following Unicode blocks are converted; however, only
+	/// those characters with reasonable ASCII alternatives are converted:
+	/// 
+	/// <ul>
+	///   <li>C1 Controls and Latin-1 Supplement: <a href="http://www.unicode.org/charts/PDF/U0080.pdf">http://www.unicode.org/charts/PDF/U0080.pdf</a>
+	///   <li>Latin Extended-A: <a href="http://www.unicode.org/charts/PDF/U0100.pdf">http://www.unicode.org/charts/PDF/U0100.pdf</a>
+	///   <li>Latin Extended-B: <a href="http://www.unicode.org/charts/PDF/U0180.pdf">http://www.unicode.org/charts/PDF/U0180.pdf</a>
+	///   <li>Latin Extended Additional: <a href="http://www.unicode.org/charts/PDF/U1E00.pdf">http://www.unicode.org/charts/PDF/U1E00.pdf</a>
+	///   <li>Latin Extended-C: <a href="http://www.unicode.org/charts/PDF/U2C60.pdf">http://www.unicode.org/charts/PDF/U2C60.pdf</a>
+	///   <li>Latin Extended-D: <a href="http://www.unicode.org/charts/PDF/UA720.pdf">http://www.unicode.org/charts/PDF/UA720.pdf</a>
+	///   <li>IPA Extensions: <a href="http://www.unicode.org/charts/PDF/U0250.pdf">http://www.unicode.org/charts/PDF/U0250.pdf</a>
+	///   <li>Phonetic Extensions: <a href="http://www.unicode.org/charts/PDF/U1D00.pdf">http://www.unicode.org/charts/PDF/U1D00.pdf</a>
+	///   <li>Phonetic Extensions Supplement: <a href="http://www.unicode.org/charts/PDF/U1D80.pdf">http://www.unicode.org/charts/PDF/U1D80.pdf</a>
+	///   <li>General Punctuation: <a href="http://www.unicode.org/charts/PDF/U2000.pdf">http://www.unicode.org/charts/PDF/U2000.pdf</a>
+	///   <li>Superscripts and Subscripts: <a href="http://www.unicode.org/charts/PDF/U2070.pdf">http://www.unicode.org/charts/PDF/U2070.pdf</a>
+	///   <li>Enclosed Alphanumerics: <a href="http://www.unicode.org/charts/PDF/U2460.pdf">http://www.unicode.org/charts/PDF/U2460.pdf</a>
+	///   <li>Dingbats: <a href="http://www.unicode.org/charts/PDF/U2700.pdf">http://www.unicode.org/charts/PDF/U2700.pdf</a>
+	///   <li>Supplemental Punctuation: <a href="http://www.unicode.org/charts/PDF/U2E00.pdf">http://www.unicode.org/charts/PDF/U2E00.pdf</a>
+	///   <li>Alphabetic Presentation Forms: <a href="http://www.unicode.org/charts/PDF/UFB00.pdf">http://www.unicode.org/charts/PDF/UFB00.pdf</a>
+	///   <li>Halfwidth and Fullwidth Forms: <a href="http://www.unicode.org/charts/PDF/UFF00.pdf">http://www.unicode.org/charts/PDF/UFF00.pdf</a>
+	/// </ul>
+	///  
+	/// See: <a href="http://en.wikipedia.org/wiki/Latin_characters_in_Unicode">http://en.wikipedia.org/wiki/Latin_characters_in_Unicode</a>
+	/// 
+	/// For example, '&agrave;' will be replaced by 'a'.
+	/// </summary>
+	public sealed class ASCIIFoldingFilter : TokenFilter
+	{
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly PositionIncrementAttribute posIncAttr = addAttribute(typeof(PositionIncrementAttribute));
+	  private readonly bool preserveOriginal;
+	  private char[] output = new char[512];
+	  private int outputPos;
+	  private State state;
+
+	  public ASCIIFoldingFilter(TokenStream input) : this(input, false)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Create a new <seealso cref="ASCIIFoldingFilter"/>.
+	  /// </summary>
+	  /// <param name="input">
+	  ///          TokenStream to filter </param>
+	  /// <param name="preserveOriginal">
+	  ///          should the original tokens be kept on the input stream with a 0 position increment
+	  ///          from the folded tokens?
+	  ///  </param>
+	  public ASCIIFoldingFilter(TokenStream input, bool preserveOriginal) : base(input)
+	  {
+		this.preserveOriginal = preserveOriginal;
+	  }
+
+	  /// <summary>
+	  /// Does the filter preserve the original tokens?
+	  /// </summary>
+	  public bool PreserveOriginal
+	  {
+		  get
+		  {
+			return preserveOriginal;
+		  }
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (state != null)
+		{
+		  Debug.Assert(preserveOriginal, "state should only be captured if preserveOriginal is true");
+		  restoreState(state);
+		  posIncAttr.PositionIncrement = 0;
+		  state = null;
+		  return true;
+		}
+		if (input.incrementToken())
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] buffer = termAtt.buffer();
+		  char[] buffer = termAtt.buffer();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int length = termAtt.length();
+		  int length = termAtt.length();
+
+		  // If no characters actually require rewriting then we
+		  // just return token as-is:
+		  for (int i = 0 ; i < length ; ++i)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char c = buffer[i];
+			char c = buffer[i];
+			if (c >= '\u0080')
+			{
+			  foldToASCII(buffer, length);
+			  termAtt.copyBuffer(output, 0, outputPos);
+			  break;
+			}
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		state = null;
+	  }
+
+	  /// <summary>
+	  /// Converts characters above ASCII to their ASCII equivalents.  For example,
+	  /// accents are removed from accented characters. </summary>
+	  /// <param name="input"> The string to fold </param>
+	  /// <param name="length"> The number of characters in the input string </param>
+	  public void foldToASCII(char[] input, int length)
+	  {
+		if (preserveOriginal)
+		{
+		  state = captureState();
+		}
+		// Worst-case length required:
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int maxSizeNeeded = 4 * length;
+		int maxSizeNeeded = 4 * length;
+		if (output.Length < maxSizeNeeded)
+		{
+		  output = new char[ArrayUtil.oversize(maxSizeNeeded, RamUsageEstimator.NUM_BYTES_CHAR)];
+		}
+
+		outputPos = foldToASCII(input, 0, output, 0, length);
+	  }
+
+	  /// <summary>
+	  /// Converts characters above ASCII to their ASCII equivalents.  For example,
+	  /// accents are removed from accented characters. </summary>
+	  /// <param name="input">     The characters to fold </param>
+	  /// <param name="inputPos">  Index of the first character to fold </param>
+	  /// <param name="output">    The result of the folding. Should be of size >= {@code length * 4}. </param>
+	  /// <param name="outputPos"> Index of output where to put the result of the folding </param>
+	  /// <param name="length">    The number of characters to fold </param>
+	  /// <returns> length of output
+	  /// @lucene.internal </returns>
+	  public static int foldToASCII(char[] input, int inputPos, char[] output, int outputPos, int length)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int end = inputPos + length;
+		int end = inputPos + length;
+		for (int pos = inputPos; pos < end ; ++pos)
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char c = input[pos];
+		  char c = input[pos];
+
+		  // Quick test: if it's not in range then just keep current character
+		  if (c < '\u0080')
+		  {
+			output[outputPos++] = c;
+		  }
+		  else
+		  {
+			switch (c)
+			{
+			  case '\u00C0': // À  [LATIN CAPITAL LETTER A WITH GRAVE]
+			  case '\u00C1': // Á  [LATIN CAPITAL LETTER A WITH ACUTE]
+			  case '\u00C2': // Â  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX]
+			  case '\u00C3': // Ã  [LATIN CAPITAL LETTER A WITH TILDE]
+			  case '\u00C4': // Ä  [LATIN CAPITAL LETTER A WITH DIAERESIS]
+			  case '\u00C5': // Å  [LATIN CAPITAL LETTER A WITH RING ABOVE]
+			  case '\u0100': // Ā  [LATIN CAPITAL LETTER A WITH MACRON]
+			  case '\u0102': // Ă  [LATIN CAPITAL LETTER A WITH BREVE]
+			  case '\u0104': // Ą  [LATIN CAPITAL LETTER A WITH OGONEK]
+			  case '\u018F': // Ə  http://en.wikipedia.org/wiki/Schwa  [LATIN CAPITAL LETTER SCHWA]
+			  case '\u01CD': // Ǎ  [LATIN CAPITAL LETTER A WITH CARON]
+			  case '\u01DE': // Ǟ  [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON]
+			  case '\u01E0': // Ǡ  [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON]
+			  case '\u01FA': // Ǻ  [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE]
+			  case '\u0200': // Ȁ  [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE]
+			  case '\u0202': // Ȃ  [LATIN CAPITAL LETTER A WITH INVERTED BREVE]
+			  case '\u0226': // Ȧ  [LATIN CAPITAL LETTER A WITH DOT ABOVE]
+			  case '\u023A': // Ⱥ  [LATIN CAPITAL LETTER A WITH STROKE]
+			  case '\u1D00': // ᴀ  [LATIN LETTER SMALL CAPITAL A]
+			  case '\u1E00': // Ḁ  [LATIN CAPITAL LETTER A WITH RING BELOW]
+			  case '\u1EA0': // Ạ  [LATIN CAPITAL LETTER A WITH DOT BELOW]
+			  case '\u1EA2': // Ả  [LATIN CAPITAL LETTER A WITH HOOK ABOVE]
+			  case '\u1EA4': // Ấ  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE]
+			  case '\u1EA6': // Ầ  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE]
+			  case '\u1EA8': // Ẩ  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
+			  case '\u1EAA': // Ẫ  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE]
+			  case '\u1EAC': // Ậ  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
+			  case '\u1EAE': // Ắ  [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE]
+			  case '\u1EB0': // Ằ  [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE]
+			  case '\u1EB2': // Ẳ  [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE]
+			  case '\u1EB4': // Ẵ  [LATIN CAPITAL LETTER A WITH BREVE AND TILDE]
+			  case '\u1EB6': // Ặ  [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW]
+			  case '\u24B6': // Ⓐ  [CIRCLED LATIN CAPITAL LETTER A]
+			  case '\uFF21': // A  [FULLWIDTH LATIN CAPITAL LETTER A]
+				output[outputPos++] = 'A';
+				break;
+			  case '\u00E0': // à  [LATIN SMALL LETTER A WITH GRAVE]
+			  case '\u00E1': // á  [LATIN SMALL LETTER A WITH ACUTE]
+			  case '\u00E2': // â  [LATIN SMALL LETTER A WITH CIRCUMFLEX]
+			  case '\u00E3': // ã  [LATIN SMALL LETTER A WITH TILDE]
+			  case '\u00E4': // ä  [LATIN SMALL LETTER A WITH DIAERESIS]
+			  case '\u00E5': // å  [LATIN SMALL LETTER A WITH RING ABOVE]
+			  case '\u0101': // ā  [LATIN SMALL LETTER A WITH MACRON]
+			  case '\u0103': // ă  [LATIN SMALL LETTER A WITH BREVE]
+			  case '\u0105': // ą  [LATIN SMALL LETTER A WITH OGONEK]
+			  case '\u01CE': // ǎ  [LATIN SMALL LETTER A WITH CARON]
+			  case '\u01DF': // ǟ  [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON]
+			  case '\u01E1': // ǡ  [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON]
+			  case '\u01FB': // ǻ  [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE]
+			  case '\u0201': // ȁ  [LATIN SMALL LETTER A WITH DOUBLE GRAVE]
+			  case '\u0203': // ȃ  [LATIN SMALL LETTER A WITH INVERTED BREVE]
+			  case '\u0227': // ȧ  [LATIN SMALL LETTER A WITH DOT ABOVE]
+			  case '\u0250': // ɐ  [LATIN SMALL LETTER TURNED A]
+			  case '\u0259': // ə  [LATIN SMALL LETTER SCHWA]
+			  case '\u025A': // ɚ  [LATIN SMALL LETTER SCHWA WITH HOOK]
+			  case '\u1D8F': // ᶏ  [LATIN SMALL LETTER A WITH RETROFLEX HOOK]
+			  case '\u1D95': // ᶕ  [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK]
+			  case '\u1E01': // ạ  [LATIN SMALL LETTER A WITH RING BELOW]
+			  case '\u1E9A': // ả  [LATIN SMALL LETTER A WITH RIGHT HALF RING]
+			  case '\u1EA1': // ạ  [LATIN SMALL LETTER A WITH DOT BELOW]
+			  case '\u1EA3': // ả  [LATIN SMALL LETTER A WITH HOOK ABOVE]
+			  case '\u1EA5': // ấ  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE]
+			  case '\u1EA7': // ầ  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE]
+			  case '\u1EA9': // ẩ  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
+			  case '\u1EAB': // ẫ  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE]
+			  case '\u1EAD': // ậ  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
+			  case '\u1EAF': // ắ  [LATIN SMALL LETTER A WITH BREVE AND ACUTE]
+			  case '\u1EB1': // ằ  [LATIN SMALL LETTER A WITH BREVE AND GRAVE]
+			  case '\u1EB3': // ẳ  [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE]
+			  case '\u1EB5': // ẵ  [LATIN SMALL LETTER A WITH BREVE AND TILDE]
+			  case '\u1EB7': // ặ  [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW]
+			  case '\u2090': // ₐ  [LATIN SUBSCRIPT SMALL LETTER A]
+			  case '\u2094': // ₔ  [LATIN SUBSCRIPT SMALL LETTER SCHWA]
+			  case '\u24D0': // ⓐ  [CIRCLED LATIN SMALL LETTER A]
+			  case '\u2C65': // ⱥ  [LATIN SMALL LETTER A WITH STROKE]
+			  case '\u2C6F': // Ɐ  [LATIN CAPITAL LETTER TURNED A]
+			  case '\uFF41': // a  [FULLWIDTH LATIN SMALL LETTER A]
+				output[outputPos++] = 'a';
+				break;
+			  case '\uA732': // Ꜳ  [LATIN CAPITAL LETTER AA]
+				output[outputPos++] = 'A';
+				output[outputPos++] = 'A';
+				break;
+			  case '\u00C6': // Æ  [LATIN CAPITAL LETTER AE]
+			  case '\u01E2': // Ǣ  [LATIN CAPITAL LETTER AE WITH MACRON]
+			  case '\u01FC': // Ǽ  [LATIN CAPITAL LETTER AE WITH ACUTE]
+			  case '\u1D01': // ᴁ  [LATIN LETTER SMALL CAPITAL AE]
+				output[outputPos++] = 'A';
+				output[outputPos++] = 'E';
+				break;
+			  case '\uA734': // Ꜵ  [LATIN CAPITAL LETTER AO]
+				output[outputPos++] = 'A';
+				output[outputPos++] = 'O';
+				break;
+			  case '\uA736': // Ꜷ  [LATIN CAPITAL LETTER AU]
+				output[outputPos++] = 'A';
+				output[outputPos++] = 'U';
+				break;
+			  case '\uA738': // Ꜹ  [LATIN CAPITAL LETTER AV]
+			  case '\uA73A': // Ꜻ  [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR]
+				output[outputPos++] = 'A';
+				output[outputPos++] = 'V';
+				break;
+			  case '\uA73C': // Ꜽ  [LATIN CAPITAL LETTER AY]
+				output[outputPos++] = 'A';
+				output[outputPos++] = 'Y';
+				break;
+			  case '\u249C': // ⒜  [PARENTHESIZED LATIN SMALL LETTER A]
+				output[outputPos++] = '(';
+				output[outputPos++] = 'a';
+				output[outputPos++] = ')';
+				break;
+			  case '\uA733': // ꜳ  [LATIN SMALL LETTER AA]
+				output[outputPos++] = 'a';
+				output[outputPos++] = 'a';
+				break;
+			  case '\u00E6': // æ  [LATIN SMALL LETTER AE]
+			  case '\u01E3': // ǣ  [LATIN SMALL LETTER AE WITH MACRON]
+			  case '\u01FD': // ǽ  [LATIN SMALL LETTER AE WITH ACUTE]
+			  case '\u1D02': // ᴂ  [LATIN SMALL LETTER TURNED AE]
+				output[outputPos++] = 'a';
+				output[outputPos++] = 'e';
+				break;
+			  case '\uA735': // ꜵ  [LATIN SMALL LETTER AO]
+				output[outputPos++] = 'a';
+				output[outputPos++] = 'o';
+				break;
+			  case '\uA737': // ꜷ  [LATIN SMALL LETTER AU]
+				output[outputPos++] = 'a';
+				output[outputPos++] = 'u';
+				break;
+			  case '\uA739': // ꜹ  [LATIN SMALL LETTER AV]
+			  case '\uA73B': // ꜻ  [LATIN SMALL LETTER AV WITH HORIZONTAL BAR]
+				output[outputPos++] = 'a';
+				output[outputPos++] = 'v';
+				break;
+			  case '\uA73D': // ꜽ  [LATIN SMALL LETTER AY]
+				output[outputPos++] = 'a';
+				output[outputPos++] = 'y';
+				break;
+			  case '\u0181': // Ɓ  [LATIN CAPITAL LETTER B WITH HOOK]
+			  case '\u0182': // Ƃ  [LATIN CAPITAL LETTER B WITH TOPBAR]
+			  case '\u0243': // Ƀ  [LATIN CAPITAL LETTER B WITH STROKE]
+			  case '\u0299': // ʙ  [LATIN LETTER SMALL CAPITAL B]
+			  case '\u1D03': // ᴃ  [LATIN LETTER SMALL CAPITAL BARRED B]
+			  case '\u1E02': // Ḃ  [LATIN CAPITAL LETTER B WITH DOT ABOVE]
+			  case '\u1E04': // Ḅ  [LATIN CAPITAL LETTER B WITH DOT BELOW]
+			  case '\u1E06': // Ḇ  [LATIN CAPITAL LETTER B WITH LINE BELOW]
+			  case '\u24B7': // Ⓑ  [CIRCLED LATIN CAPITAL LETTER B]
+			  case '\uFF22': // B  [FULLWIDTH LATIN CAPITAL LETTER B]
+				output[outputPos++] = 'B';
+				break;
+			  case '\u0180': // ƀ  [LATIN SMALL LETTER B WITH STROKE]
+			  case '\u0183': // ƃ  [LATIN SMALL LETTER B WITH TOPBAR]
+			  case '\u0253': // ɓ  [LATIN SMALL LETTER B WITH HOOK]
+			  case '\u1D6C': // ᵬ  [LATIN SMALL LETTER B WITH MIDDLE TILDE]
+			  case '\u1D80': // ᶀ  [LATIN SMALL LETTER B WITH PALATAL HOOK]
+			  case '\u1E03': // ḃ  [LATIN SMALL LETTER B WITH DOT ABOVE]
+			  case '\u1E05': // ḅ  [LATIN SMALL LETTER B WITH DOT BELOW]
+			  case '\u1E07': // ḇ  [LATIN SMALL LETTER B WITH LINE BELOW]
+			  case '\u24D1': // ⓑ  [CIRCLED LATIN SMALL LETTER B]
+			  case '\uFF42': // b  [FULLWIDTH LATIN SMALL LETTER B]
+				output[outputPos++] = 'b';
+				break;
+			  case '\u249D': // ⒝  [PARENTHESIZED LATIN SMALL LETTER B]
+				output[outputPos++] = '(';
+				output[outputPos++] = 'b';
+				output[outputPos++] = ')';
+				break;
+			  case '\u00C7': // Ç  [LATIN CAPITAL LETTER C WITH CEDILLA]
+			  case '\u0106': // Ć  [LATIN CAPITAL LETTER C WITH ACUTE]
+			  case '\u0108': // Ĉ  [LATIN CAPITAL LETTER C WITH CIRCUMFLEX]
+			  case '\u010A': // Ċ  [LATIN CAPITAL LETTER C WITH DOT ABOVE]
+			  case '\u010C': // Č  [LATIN CAPITAL LETTER C WITH CARON]
+			  case '\u0187': // Ƈ  [LATIN CAPITAL LETTER C WITH HOOK]
+			  case '\u023B': // Ȼ  [LATIN CAPITAL LETTER C WITH STROKE]
+			  case '\u0297': // ʗ  [LATIN LETTER STRETCHED C]
+			  case '\u1D04': // ᴄ  [LATIN LETTER SMALL CAPITAL C]
+			  case '\u1E08': // Ḉ  [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE]
+			  case '\u24B8': // Ⓒ  [CIRCLED LATIN CAPITAL LETTER C]
+			  case '\uFF23': // C  [FULLWIDTH LATIN CAPITAL LETTER C]
+				output[outputPos++] = 'C';
+				break;
+			  case '\u00E7': // ç  [LATIN SMALL LETTER C WITH CEDILLA]
+			  case '\u0107': // ć  [LATIN SMALL LETTER C WITH ACUTE]
+			  case '\u0109': // ĉ  [LATIN SMALL LETTER C WITH CIRCUMFLEX]
+			  case '\u010B': // ċ  [LATIN SMALL LETTER C WITH DOT ABOVE]
+			  case '\u010D': // č  [LATIN SMALL LETTER C WITH CARON]
+			  case '\u0188': // ƈ  [LATIN SMALL LETTER C WITH HOOK]
+			  case '\u023C': // ȼ  [LATIN SMALL LETTER C WITH STROKE]
+			  case '\u0255': // ɕ  [LATIN SMALL LETTER C WITH CURL]
+			  case '\u1E09': // ḉ  [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE]
+			  case '\u2184': // ↄ  [LATIN SMALL LETTER REVERSED C]
+			  case '\u24D2': // ⓒ  [CIRCLED LATIN SMALL LETTER C]
+			  case '\uA73E': // Ꜿ  [LATIN CAPITAL LETTER REVERSED C WITH DOT]
+			  case '\uA73F': // ꜿ  [LATIN SMALL LETTER REVERSED C WITH DOT]
+			  case '\uFF43': // c  [FULLWIDTH LATIN SMALL LETTER C]
+				output[outputPos++] = 'c';
+				break;
+			  case '\u249E': // ⒞  [PARENTHESIZED LATIN SMALL LETTER C]
+				output[outputPos++] = '(';
+				output[outputPos++] = 'c';
+				output[outputPos++] = ')';
+				break;
+			  case '\u00D0': // Ð  [LATIN CAPITAL LETTER ETH]
+			  case '\u010E': // Ď  [LATIN CAPITAL LETTER D WITH CARON]
+			  case '\u0110': // Đ  [LATIN CAPITAL LETTER D WITH STROKE]
+			  case '\u0189': // Ɖ  [LATIN CAPITAL LETTER AFRICAN D]
+			  case '\u018A': // Ɗ  [LATIN CAPITAL LETTER D WITH HOOK]
+			  case '\u018B': // Ƌ  [LATIN CAPITAL LETTER D WITH TOPBAR]
+			  case '\u1D05': // ᴅ  [LATIN LETTER SMALL CAPITAL D]
+			  case '\u1D06': // ᴆ  [LATIN LETTER SMALL CAPITAL ETH]
+			  case '\u1E0A': // Ḋ  [LATIN CAPITAL LETTER D WITH DOT ABOVE]
+			  case '\u1E0C': // Ḍ  [LATIN CAPITAL LETTER D WITH DOT BELOW]
+			  case '\u1E0E': // Ḏ  [LATIN CAPITAL LETTER D WITH LINE BELOW]
+			  case '\u1E10': // Ḑ  [LATIN CAPITAL LETTER D WITH CEDILLA]
+			  case '\u1E12': // Ḓ  [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW]
+			  case '\u24B9': // Ⓓ  [CIRCLED LATIN CAPITAL LETTER D]
+			  case '\uA779': // Ꝺ  [LATIN CAPITAL LETTER INSULAR D]
+			  case '\uFF24': // D  [FULLWIDTH LATIN CAPITAL LETTER D]
+				output[outputPos++] = 'D';
+				break;
+			  case '\u00F0': // ð  [LATIN SMALL LETTER ETH]
+			  case '\u010F': // ď  [LATIN SMALL LETTER D WITH CARON]
+			  case '\u0111': // đ  [LATIN SMALL LETTER D WITH STROKE]
+			  case '\u018C': // ƌ  [LATIN SMALL LETTER D WITH TOPBAR]
+			  case '\u0221': // ȡ  [LATIN SMALL LETTER D WITH CURL]
+			  case '\u0256': // ɖ  [LATIN SMALL LETTER D WITH TAIL]
+			  case '\u0257': // ɗ  [LATIN SMALL LETTER D WITH HOOK]
+			  case '\u1D6D': // ᵭ  [LATIN SMALL LETTER D WITH MIDDLE TILDE]
+			  case '\u1D81': // ᶁ  [LATIN SMALL LETTER D WITH PALATAL HOOK]
+			  case '\u1D91': // ᶑ  [LATIN SMALL LETTER D WITH HOOK AND TAIL]
+			  case '\u1E0B': // ḋ  [LATIN SMALL LETTER D WITH DOT ABOVE]
+			  case '\u1E0D': // ḍ  [LATIN SMALL LETTER D WITH DOT BELOW]
+			  case '\u1E0F': // ḏ  [LATIN SMALL LETTER D WITH LINE BELOW]
+			  case '\u1E11': // ḑ  [LATIN SMALL LETTER D WITH CEDILLA]
+			  case '\u1E13': // ḓ  [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW]
+			  case '\u24D3': // ⓓ  [CIRCLED LATIN SMALL LETTER D]
+			  case '\uA77A': // ꝺ  [LATIN SMALL LETTER INSULAR D]
+			  case '\uFF44': // d  [FULLWIDTH LATIN SMALL LETTER D]
+				output[outputPos++] = 'd';
+				break;
+			  case '\u01C4': // DŽ  [LATIN CAPITAL LETTER DZ WITH CARON]
+			  case '\u01F1': // DZ  [LATIN CAPITAL LETTER DZ]
+				output[outputPos++] = 'D';
+				output[outputPos++] = 'Z';
+				break;
+			  case '\u01C5': // Dž  [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON]
+			  case '\u01F2': // Dz  [LATIN CAPITAL LETTER D WITH SMALL LETTER Z]
+				output[outputPos++] = 'D';
+				output[outputPos++] = 'z';
+				break;
+			  case '\u249F': // ⒟  [PARENTHESIZED LATIN SMALL LETTER D]
+				output[outputPos++] = '(';
+				output[outputPos++] = 'd';
+				output[outputPos++] = ')';
+				break;
+			  case '\u0238': // ȸ  [LATIN SMALL LETTER DB DIGRAPH]
+				output[outputPos++] = 'd';
+				output[outputPos++] = 'b';
+				break;
+			  case '\u01C6': // dž  [LATIN SMALL LETTER DZ WITH CARON]
+			  case '\u01F3': // dz  [LATIN SMALL LETTER DZ]
+			  case '\u02A3': // ʣ  [LATIN SMALL LETTER DZ DIGRAPH]
+			  case '\u02A5': // ʥ  [LATIN SMALL LETTER DZ DIGRAPH WITH CURL]
+				output[outputPos++] = 'd';
+				output[outputPos++] = 'z';
+				break;
+			  case '\u00C8': // È  [LATIN CAPITAL LETTER E WITH GRAVE]
+			  case '\u00C9': // É  [LATIN CAPITAL LETTER E WITH ACUTE]
+			  case '\u00CA': // Ê  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX]
+			  case '\u00CB': // Ë  [LATIN CAPITAL LETTER E WITH DIAERESIS]
+			  case '\u0112': // Ē  [LATIN CAPITAL LETTER E WITH MACRON]
+			  case '\u0114': // Ĕ  [LATIN CAPITAL LETTER E WITH BREVE]
+			  case '\u0116': // Ė  [LATIN CAPITAL LETTER E WITH DOT ABOVE]
+			  case '\u0118': // Ę  [LATIN CAPITAL LETTER E WITH OGONEK]
+			  case '\u011A': // Ě  [LATIN CAPITAL LETTER E WITH CARON]
+			  case '\u018E': // Ǝ  [LATIN CAPITAL LETTER REVERSED E]
+			  case '\u0190': // Ɛ  [LATIN CAPITAL LETTER OPEN E]
+			  case '\u0204': // Ȅ  [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE]
+			  case '\u0206': // Ȇ  [LATIN CAPITAL LETTER E WITH INVERTED BREVE]
+			  case '\u0228': // Ȩ  [LATIN CAPITAL LETTER E WITH CEDILLA]
+			  case '\u0246': // Ɇ  [LATIN CAPITAL LETTER E WITH STROKE]
+			  case '\u1D07': // ᴇ  [LATIN LETTER SMALL CAPITAL E]
+			  case '\u1E14': // Ḕ  [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE]
+			  case '\u1E16': // Ḗ  [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE]
+			  case '\u1E18': // Ḙ  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW]
+			  case '\u1E1A': // Ḛ  [LATIN CAPITAL LETTER E WITH TILDE BELOW]
+			  case '\u1E1C': // Ḝ  [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE]
+			  case '\u1EB8': // Ẹ  [LATIN CAPITAL LETTER E WITH DOT BELOW]
+			  case '\u1EBA': // Ẻ  [LATIN CAPITAL LETTER E WITH HOOK ABOVE]
+			  case '\u1EBC': // Ẽ  [LATIN CAPITAL LETTER E WITH TILDE]
+			  case '\u1EBE': // Ế  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE]
+			  case '\u1EC0': // Ề  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE]
+			  case '\u1EC2': // Ể  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
+			  case '\u1EC4': // Ễ  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE]
+			  case '\u1EC6': // Ệ  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
+			  case '\u24BA': // Ⓔ  [CIRCLED LATIN CAPITAL LETTER E]
+			  case '\u2C7B': // ⱻ  [LATIN LETTER SMALL CAPITAL TURNED E]
+			  case '\uFF25': // E  [FULLWIDTH LATIN CAPITAL LETTER E]
+				output[outputPos++] = 'E';
+				break;
+			  case '\u00E8': // è  [LATIN SMALL LETTER E WITH GRAVE]
+			  case '\u00E9': // é  [LATIN SMALL LETTER E WITH ACUTE]
+			  case '\u00EA': // ê  [LATIN SMALL LETTER E WITH CIRCUMFLEX]
+			  case '\u00EB': // ë  [LATIN SMALL LETTER E WITH DIAERESIS]
+			  case '\u0113': // ē  [LATIN SMALL LETTER E WITH MACRON]
+			  case '\u0115': // ĕ  [LATIN SMALL LETTER E WITH BREVE]
+			  case '\u0117': // ė  [LATIN SMALL LETTER E WITH DOT ABOVE]
+			  case '\u0119': // ę  [LATIN SMALL LETTER E WITH OGONEK]
+			  case '\u011B': // ě  [LATIN SMALL LETTER E WITH CARON]
+			  case '\u01DD': // ǝ  [LATIN SMALL LETTER TURNED E]
+			  case '\u0205': // ȅ  [LATIN SMALL LETTER E WITH DOUBLE GRAVE]
+			  case '\u0207': // ȇ  [LATIN SMALL LETTER E WITH INVERTED BREVE]
+			  case '\u0229': // ȩ  [LATIN SMALL LETTER E WITH CEDILLA]
+			  case '\u0247': // ɇ  [LATIN SMALL LETTER E WITH STROKE]
+			  case '\u0258': // ɘ  [LATIN SMALL LETTER REVERSED E]
+			  case '\u025B': // ɛ  [LATIN SMALL LETTER OPEN E]
+			  case '\u025C': // ɜ  [LATIN SMALL LETTER REVERSED OPEN E]
+			  case '\u025D': // ɝ  [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK]
+			  case '\u025E': // ɞ  [LATIN SMALL LETTER CLOSED REVERSED OPEN E]
+			  case '\u029A': // ʚ  [LATIN SMALL LETTER CLOSED OPEN E]
+			  case '\u1D08': // ᴈ  [LATIN SMALL LETTER TURNED OPEN E]
+			  case '\u1D92': // ᶒ  [LATIN SMALL LETTER E WITH RETROFLEX HOOK]
+			  case '\u1D93': // ᶓ  [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK]
+			  case '\u1D94': // ᶔ  [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK]
+			  case '\u1E15': // ḕ  [LATIN SMALL LETTER E WITH MACRON AND GRAVE]
+			  case '\u1E17': // ḗ  [LATIN SMALL LETTER E WITH MACRON AND ACUTE]
+			  case '\u1E19': // ḙ  [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW]
+			  case '\u1E1B': // ḛ  [LATIN SMALL LETTER E WITH TILDE BELOW]
+			  case '\u1E1D': // ḝ  [LATIN SMALL LETTER E WITH CEDILLA AND BREVE]
+			  case '\u1EB9': // ẹ  [LATIN SMALL LETTER E WITH DOT BELOW]
+			  case '\u1EBB': // ẻ  [LATIN SMALL LETTER E WITH HOOK ABOVE]
+			  case '\u1EBD': // ẽ  [LATIN SMALL LETTER E WITH TILDE]
+			  case '\u1EBF': // ế  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE]
+			  case '\u1EC1': // ề  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE]
+			  case '\u1EC3': // ể  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
+			  case '\u1EC5': // ễ  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE]
+			  case '\u1EC7': // ệ  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
+			  case '\u2091': // ₑ  [LATIN SUBSCRIPT SMALL LETTER E]
+			  case '\u24D4': // ⓔ  [CIRCLED LATIN SMALL LETTER E]
+			  case '\u2C78': // ⱸ  [LATIN SMALL LETTER E WITH NOTCH]
+			  case '\uFF45': // e  [FULLWIDTH LATIN SMALL LETTER E]
+				output[outputPos++] = 'e';
+				break;
+			  case '\u24A0': // ⒠  [PARENTHESIZED LATIN SMALL LETTER E]
+				output[outputPos++] = '(';
+				output[outputPos++] = 'e';
+				output[outputPos++] = ')';
+				break;
+			  case '\u0191': // Ƒ  [LATIN CAPITAL LETTER F WITH HOOK]
+			  case '\u1E1E': // Ḟ  [LATIN CAPITAL LETTER F WITH DOT ABOVE]
+			  case '\u24BB': // Ⓕ  [CIRCLED LATIN CAPITAL LETTER F]
+			  case '\uA730': // ꜰ  [LATIN LETTER SMALL CAPITAL F]
+			  case '\uA77B': // Ꝼ  [LATIN CAPITAL LETTER INSULAR F]
+			  case '\uA7FB': // ꟻ  [LATIN EPIGRAPHIC LETTER REVERSED F]
+			  case '\uFF26': // F  [FULLWIDTH LATIN CAPITAL LETTER F]
+				output[outputPos++] = 'F';
+				break;
+			  case '\u0192': // ƒ  [LATIN SMALL LETTER F WITH HOOK]
+			  case '\u1D6E': // ᵮ  [LATIN SMALL LETTER F WITH MIDDLE TILDE]
+			  case '\u1D82': // ᶂ  [LATIN SMALL LETTER F WITH PALATAL HOOK]
+			  case '\u1E1F': // ḟ  [LATIN SMALL LETTER F WITH DOT ABOVE]
+			  case '\u1E9B': // ẛ  [LATIN SMALL LETTER LONG S WITH DOT ABOVE]
+			  case '\u24D5': // ⓕ  [CIRCLED LATIN SMALL LETTER F]
+			  case '\uA77C': // ꝼ  [LATIN SMALL LETTER INSULAR F]
+			  case '\uFF46': // f  [FULLWIDTH LATIN SMALL LETTER F]
+				output[outputPos++] = 'f';
+				break;
+			  case '\u24A1': // ⒡  [PARENTHESIZED LATIN SMALL LETTER F]
+				output[outputPos++] = '(';
+				output[outputPos++] = 'f';
+				output[outputPos++] = ')';
+				break;
+			  case '\uFB00': // ff  [LATIN SMALL LIGATURE FF]
+				output[outputPos++] = 'f';
+				output[outputPos++] = 'f';
+				break;
+			  case '\uFB03': // ffi  [LATIN SMALL LIGATURE FFI]
+				output[outputPos++] = 'f';
+				output[outputPos++] = 'f';
+				output[outputPos++] = 'i';
+				break;
+			  case '\uFB04': // ffl  [LATIN SMALL LIGATURE FFL]
+				output[outputPos++] = 'f';
+				output[outputPos++] = 'f';
+				output[outputPos++] = 'l';
+				break;
+			  case '\uFB01': // fi  [LATIN SMALL LIGATURE FI]
+				output[outputPos++] = 'f';
+				output[outputPos++] = 'i';
+				break;
+			  case '\uFB02': // fl  [LATIN SMALL LIGATURE FL]
+				output[outputPos++] = 'f';
+				output[outputPos++] = 'l';
+				break;
+			  case '\u011C': // Ĝ  [LATIN CAPITAL LETTER G WITH CIRCUMFLEX]
+			  case '\u011E': // Ğ  [LATIN CAPITAL LETTER G WITH BREVE]
+			  case '\u0120': // Ġ  [LATIN CAPITAL LETTER G WITH DOT ABOVE]
+			  case '\u0122': // Ģ  [LATIN CAPITAL LETTER G WITH CEDILLA]
+			  case '\u0193': // Ɠ  [LATIN CAPITAL LETTER G WITH HOOK]
+			  case '\u01E4': // Ǥ  [LATIN CAPITAL LETTER G WITH STROKE]
+			  case '\u01E5': // ǥ  [LATIN SMALL LETTER G WITH STROKE]
+			  case '\u01E6': // Ǧ  [LATIN CAPITAL LETTER G WITH CARON]
+			  case '\u01E7': // ǧ  [LATIN SMALL LETTER G WITH CARON]
+			  case '\u01F4': // Ǵ  [LATIN CAPITAL LETTER G WITH ACUTE]
+			  case '\u0262': // ɢ  [LATIN LETTER SMALL CAPITAL G]
+			  case '\u029B': // ʛ  [LATIN LETTER SMALL CAPITAL G WITH HOOK]
+			  case '\u1E20': // Ḡ  [LATIN CAPITAL LETTER G WITH MACRON]
+			  case '\u24BC': // Ⓖ  [CIRCLED LATIN CAPITAL LETTER G]
+			  case '\uA77D': // Ᵹ  [LATIN CAPITAL LETTER INSULAR G]
+			  case '\uA77E': // Ꝿ  [LATIN CAPITAL LETTER TURNED INSULAR G]
+			  case '\uFF27': // G  [FULLWIDTH LATIN CAPITAL LETTER G]
+				output[outputPos++] = 'G';
+				break;
+			  case '\u011D': // ĝ  [LATIN SMALL LETTER G WITH CIRCUMFLEX]
+			  case '\u011F': // ğ  [LATIN SMALL LETTER G WITH BREVE]
+			  case '\u0121': // ġ  [LATIN SMALL LETTER G WITH DOT ABOVE]
+			  case '\u0123': // ģ  [LATIN SMALL LETTER G WITH CEDILLA]
+			  case '\u01F5': // ǵ  [LATIN SMALL LETTER G WITH ACUTE]
+			  case '\u0260': // ɠ  [LATIN SMALL LETTER G WITH HOOK]
+			  case '\u0261': // ɡ  [LATIN SMALL LETTER SCRIPT G]
+			  case '\u1D77': // ᵷ  [LATIN SMALL LETTER TURNED G]
+			  case '\u1D79': // ᵹ  [LATIN SMALL LETTER INSULAR G]
+			  case '\u1D83': // ᶃ  [LATIN SMALL LETTER G WITH PALATAL HOOK]
+			  case '\u1E21': // ḡ  [LATIN SMALL LETTER G WITH MACRON]
+			  case '\u24D6': // ⓖ  [CIRCLED LATIN SMALL LETTER G]
+			  case '\uA77F': // ꝿ  [LATIN SMALL LETTER TURNED INSULAR G]
+			  case '\uFF47': // g  [FULLWIDTH LATIN SMALL LETTER G]
+				output[outputPos++] = 'g';
+				break;
+			  case '\u24A2': // ⒢  [PARENTHESIZED LATIN SMALL LETTER G]
+				output[outputPos++] = '(';
+				output[outputPos++] = 'g';
+				output[outputPos++] = ')';
+				break;
+			  case '\u0124': // Ĥ  [LATIN CAPITAL LETTER H WITH CIRCUMFLEX]
+			  case '\u0126': // Ħ  [LATIN CAPITAL LETTER H WITH STROKE]
+			  case '\u021E': // Ȟ  [LATIN CAPITAL LETTER H WITH CARON]
+			  case '\u029C': // ʜ  [LATIN LETTER SMALL CAPITAL H]
+			  case '\u1E22': // Ḣ  [LATIN CAPITAL LETTER H WITH DOT ABOVE]
+			  case '\u1E24': // Ḥ  [LATIN CAPITAL LETTER H WITH DOT BELOW]
+			  case '\u1E26': // Ḧ  [LATIN CAPITAL LETTER H WITH DIAERESIS]
+			  case '\u1E28': // Ḩ  [LATIN CAPITAL LETTER H WITH CEDILLA]
+			  case '\u1E2A': // Ḫ  [LATIN CAPITAL LETTER H WITH BREVE BELOW]
+			  case '\u24BD': // Ⓗ  [CIRCLED LATIN CAPITAL LETTER H]
+			  case '\u2C67': // Ⱨ  [LATIN CAPITAL LETTER H WITH DESCENDER]
+			  case '\u2C75': // Ⱶ  [LATIN CAPITAL LETTER HALF H]
+			  case '\uFF28': // H  [FULLWIDTH LATIN CAPITAL LETTER H]
+				output[outputPos++] = 'H';
+				break;
+			  case '\u0125': // ĥ  [LATIN SMALL LETTER H WITH CIRCUMFLEX]
+			  case '\u0127': // ħ  [LATIN SMALL LETTER H WITH STROKE]
+			  case '\u021F': // ȟ  [LATIN SMALL LETTER H WITH CARON]
+			  case '\u0265': // ɥ  [LATIN SMALL LETTER TURNED H]
+			  case '\u0266': // ɦ  [LATIN SMALL LETTER H WITH HOOK]
+			  case '\u02AE': // ʮ  [LATIN SMALL LETTER TURNED H WITH FISHHOOK]
+			  case '\u02AF': // ʯ  [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL]
+			  case '\u1E23': // ḣ  [LATIN SMALL LETTER H WITH DOT ABOVE]
+			  case '\u1E25': // ḥ  [LATIN SMALL LETTER H WITH DOT BELOW]
+			  case '\u1E27': // ḧ  [LATIN SMALL LETTER H WITH DIAERESIS]
+			  case '\u1E29': // ḩ  [LATIN SMALL LETTER H WITH CEDILLA]
+			  case '\u1E2B': // ḫ  [LATIN SMALL LETTER H WITH BREVE BELOW]
+			  case '\u1E96': // ẖ  [LATIN SMALL LETTER H WITH LINE BELOW]
+			  case '\u24D7': // ⓗ  [CIRCLED LATIN SMALL LETTER H]
+			  case '\u2C68': // ⱨ  [LATIN SMALL LETTER H WITH DESCENDER]
+			  case '\u2C76': // ⱶ  [LATIN SMALL LETTER HALF H]
+			  case '\uFF48': // h  [FULLWIDTH LATIN SMALL LETTER H]
+				output[outputPos++] = 'h';
+				break;
+			  case '\u01F6': // Ƕ  http://en.wikipedia.org/wiki/Hwair  [LATIN CAPITAL LETTER HWAIR]
+				output[outputPos++] = 'H';
+				output[outputPos++] = 'V';
+				break;
+			  case '\u24A3': // ⒣  [PARENTHESIZED LATIN SMALL LETTER H]
+				output[outputPos++] = '(';
+				output[outputPos++] = 'h';
+				output[outputPos++] = ')';
+				break;
+			  case '\u0195': // ƕ  [LATIN SMALL LETTER HV]
+				output[outputPos++] = 'h';
+				output[outputPos++] = 'v';
+				break;
+			  case '\u00CC': // Ì  [LATIN CAPITAL LETTER I WITH GRAVE]
+			  case '\u00CD': // Í  [LATIN CAPITAL LETTER I WITH ACUTE]
+			  case '\u00CE': // Î  [LATIN CAPITAL LETTER I WITH CIRCUMFLEX]
+			  case '\u00CF': // Ï  [LATIN CAPITAL LETTER I WITH DIAERESIS]
+			  case '\u0128': // Ĩ  [LATIN CAPITAL LETTER I WITH TILDE]
+			  case '\u012A': // Ī  [LATIN CAPITAL LETTER I WITH MACRON]
+			  case '\u012C': // Ĭ  [LATIN CAPITAL LETTER I WITH BREVE]
+			  case '\u012E': // Į  [LATIN CAPITAL LETTER I WITH OGONEK]
+			  case '\u0130': // İ  [LATIN CAPITAL LETTER I WITH DOT ABOVE]
+			  case '\u0196': // Ɩ  [LATIN CAPITAL LETTER IOTA]
+			  case '\u0197': // Ɨ  [LATIN CAPITAL LETTER I WITH STROKE]
+			  case '\u01CF': // Ǐ  [LATIN CAPITAL LETTER I WITH CARON]
+			  case '\u0208': // Ȉ  [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE]
+			  case '\u020A': // Ȋ  [LATIN CAPITAL LETTER I WITH INVERTED BREVE]
+			  case '\u026A': // ɪ  [LATIN LETTER SMALL CAPITAL I]
+			  case '\u1D7B': // ᵻ  [LATIN SMALL CAPITAL LETTER I WITH STROKE]
+			  case '\u1E2C': // Ḭ  [LATIN CAPITAL LETTER I WITH TILDE BELOW]
+			  case '\u1E2E': // Ḯ  [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE]
+			  case '\u1EC8': // Ỉ  [LATIN CAPITAL LETTER I WITH HOOK ABOVE]
+			  case '\u1ECA': // Ị  [LATIN CAPITAL LETTER I WITH DOT BELOW]
+			  case '\u24BE': // Ⓘ  [CIRCLED LATIN CAPITAL LETTER I]
+			  case '\uA7FE': // ꟾ  [LATIN EPIGRAPHIC LETTER I LONGA]
+			  case '\uFF29': // I  [FULLWIDTH LATIN CAPITAL LETTER I]
+				output[outputPos++] = 'I';
+				break;
+			  case '\u00EC': // ì  [LATIN SMALL LETTER I WITH GRAVE]
+			  case '\u00ED': // í  [LATIN SMALL LETTER I WITH ACUTE]
+			  case '\u00EE': // î  [LATIN SMALL LETTER I WITH CIRCUMFLEX]
+			  case '\u00EF': // ï  [LATIN SMALL LETTER I WITH DIAERESIS]
+			  case '\u0129': // ĩ  [LATIN SMALL LETTER I WITH TILDE]
+			  case '\u012B': // ī  [LATIN SMALL LETTER I WITH MACRON]
+			  case '\u012D': // ĭ  [LATIN SMALL LETTER I WITH BREVE]
+			  case '\u012F': // į  [LATIN SMALL LETTER I WITH OGONEK]
+			  case '\u0131': // ı  [LATIN SMALL LETTER DOTLESS I]
+			  case '\u01D0': // ǐ  [LATIN SMALL LETTER I WITH CARON]
+			  case '\u0209': // ȉ  [LATIN SMALL LETTER I WITH DOUBLE GRAVE]
+			  case '\u020B': // ȋ  [LATIN SMALL LETTER I WITH INVERTED BREVE]
+			  case '\u0268': // ɨ  [LATIN SMALL LETTER I WITH STROKE]
+			  case '\u1D09': // ᴉ  [LATIN SMALL LETTER TURNED I]
+			  case '\u1D62': // ᵢ  [LATIN SUBSCRIPT SMALL LETTER I]
+			  case '\u1D7C': // ᵼ  [LATIN SMALL LETTER IOTA WITH STROKE]
+			  case '\u1D96': // ᶖ  [LATIN SMALL LETTER I WITH RETROFLEX HOOK]
+			  case '\u1E2D': // ḭ  [LATIN SMALL LETTER I WITH TILDE BELOW]
+			  case '\u1E2F': // ḯ  [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE]
+			  case '\u1EC9': // ỉ  [LATIN SMALL LETTER I WITH HOOK ABOVE]
+			  case '\u1ECB': // ị  [LATIN SMALL LETTER I WITH DOT BELOW]
+			  case '\u2071': // ⁱ  [SUPERSCRIPT LATIN SMALL LETTER I]
+			  case '\u24D8': // ⓘ  [CIRCLED LATIN SMALL LETTER I]
+			  case '\uFF49': // i  [FULLWIDTH LATIN SMALL LETTER I]
+				output[outputPos++] = 'i';
+				break;
+			  case '\u0132': // IJ  [LATIN CAPITAL LIGATURE IJ]
+				output[outputPos++] = 'I';
+				output[outputPos++] = 'J';
+				break;
+			  case '\u24A4': // ⒤  [PARENTHESIZED LATIN SMALL LETTER I]
+				output[outputPos++] = '(';
+				output[outputPos++] = 'i';
+				output[outputPos++] = ')';
+				break;
+			  case '\u0133': // ij  [LATIN SMALL LIGATURE IJ]
+				output[outputPos++] = 'i';
+				output[outputPos++] = 'j';
+				break;
+			  case '\u0134': // Ĵ  [LATIN CAPITAL LETTER J WITH CIRCUMFLEX]
+			  case '\u0248': // Ɉ  [LATIN CAPITAL LETTER J WITH STROKE]
+			  case '\u1D0A': // ᴊ  [LATIN LETTER SMALL CAPITAL J]
+			  case '\u24BF': // Ⓙ  [CIRCLED LATIN CAPITAL LETTER J]
+			  case '\uFF2A': // J  [FULLWIDTH LATIN CAPITAL LETTER J]
+				output[outputPos++] = 'J';
+				break;
+			  case '\u0135': // ĵ  [LATIN SMALL LETTER J WITH CIRCUMFLEX]
+			  case '\u01F0': // ǰ  [LATIN SMALL LETTER J WITH CARON]
+			  case '\u0237': // ȷ  [LATIN SMALL LETTER DOTLESS J]
+			  case '\u0249': // ɉ  [LATIN SMALL LETTER J WITH STROKE]
+			  case '\u025F': // ɟ  [LATIN SMALL LETTER DOTLESS J WITH STROKE]
+			  case '\u0284': // ʄ  [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK]
+			  case '\u029D': // ʝ  [LATIN SMALL LETTER J WITH CROSSED-TAIL]
+			  case '\u24D9': // ⓙ  [CIRCLED LATIN SMALL LETTER J]
+			  case '\u2C7C': // ⱼ  [LATIN SUBSCRIPT SMALL LETTER J]
+			  case '\uFF4A': // j  [FULLWIDTH LATIN SMALL LETTER J]
+				output[outputPos++] = 'j';
+				break;
+			  case '\u24A5': // ⒥  [PARENTHESIZED LATIN SMALL LETTER J]
+				output[outputPos++] = '(';
+				output[outputPos++] = 'j';
+				output[outputPos++] = ')';
+				break;
+			  case '\u0136': // Ķ  [LATIN CAPITAL LETTER K WITH CEDILLA]
+			  case '\u0198': // Ƙ  [LATIN CAPITAL LETTER K WITH HOOK]
+			  case '\u01E8': // Ǩ  [LATIN CAPITAL LETTER K WITH CARON]
+			  case '\u1D0B': // ᴋ  [LATIN LETTER SMALL CAPITAL K]
+			  case '\u1E30': // Ḱ  [LATIN CAPITAL LETTER K WITH ACUTE]
+			  case '\u1E32': // Ḳ  [LATIN CAPITAL LETTER K WITH DOT BELOW]
+			  case '\u1E34': // Ḵ  [LATIN CAPITAL LETTER K WITH LINE BELOW]
+			  case '\u24C0': // Ⓚ  [CIRCLED LATIN CAPITAL LETTER K]
+			  case '\u2C69': // Ⱪ  [LATIN CAPITAL LETTER K WITH DESCENDER]
+			  case '\uA740': // Ꝁ  [LATIN CAPITAL LETTER K WITH STROKE]
+			  case '\uA742': // Ꝃ  [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE]
+			  case '\uA744': // Ꝅ  [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE]
+			  case '\uFF2B': // K  [FULLWIDTH LATIN CAPITAL LETTER K]
+				output[outputPos++] = 'K';
+				break;
+			  case '\u0137': // ķ  [LATIN SMALL LETTER K WITH CEDILLA]
+			  case '\u0199': // ƙ  [LATIN SMALL LETTER K WITH HOOK]
+			  case '\u01E9': // ǩ  [LATIN SMALL LETTER K WITH CARON]
+			  case '\u029E': // ʞ  [LATIN SMALL LETTER TURNED K]
+			  case '\u1D84': // ᶄ  [LATIN SMALL LETTER K WITH PALATAL HOOK]
+			  case '\u1E31': // ḱ  [LATIN SMALL LETTER K WITH ACUTE]
+			  case '\u1E33': // ḳ  [LATIN SMALL LETTER K WITH DOT BELOW]
+			  case '\u1E35': // ḵ  [LATIN SMALL LETTER K WITH LINE BELOW]
+			  case '\u24DA': // ⓚ  [CIRCLED LATIN SMALL LETTER K]
+			  case '\u2C6A': // ⱪ  [LATIN SMALL LETTER K WITH DESCENDER]
+			  case '\uA741': // ꝁ  [LATIN SMALL LETTER K WITH STROKE]
+			  case '\uA743': // ꝃ  [LATIN SMALL LETTER K WITH DIAGONAL STROKE]
+			  case '\uA745': // ꝅ  [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE]
+			  case '\uFF4B': // k  [FULLWIDTH LATIN SMALL LETTER K]
+				output[outputPos++] = 'k';
+				break;
+			  case '\u24A6': // ⒦  [PARENTHESIZED LATIN SMALL LETTER K]
+				output[outputPos++] = '(';
+				output[outputPos++] = 'k';
+				output[outputPos++] = ')';
+				break;
+			  case '\u0139': // Ĺ  [LATIN CAPITAL LETTER L WITH ACUTE]
+			  case '\u013B': // Ļ  [LATIN CAPITAL LETTER L WITH CEDILLA]
+			  case '\u013D': // Ľ  [LATIN CAPITAL LETTER L WITH CARON]
+			  case '\u013F': // Ŀ  [LATIN CAPITAL LETTER L WITH MIDDLE DOT]
+			  case '\u0141': // Ł  [LATIN CAPITAL LETTER L WITH STROKE]
+			  case '\u023D': // Ƚ  [LATIN CAPITAL LETTER L WITH BAR]
+			  case '\u029F': // ʟ  [LATIN LETTER SMALL CAPITAL L]
+			  case '\u1D0C': // ᴌ  [LATIN LETTER SMALL CAPITAL L WITH STROKE]
+			  case '\u1E36': // Ḷ  [LATIN CAPITAL LETTER L WITH DOT BELOW]
+			  case '\u1E38': // Ḹ  [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON]
+			  case '\u1E3A': // Ḻ  [LATIN CAPITAL LETTER L WITH LINE BELOW]
+			  case '\u1E3C': // Ḽ  [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW]
+			  case '\u24C1': // Ⓛ  [CIRCLED LATIN CAPITAL LETTER L]
+			  case '\u2C60': // Ⱡ  [LATIN CAPITAL LETTER L WITH DOUBLE BAR]
+			  case '\u2C62': // Ɫ  [LATIN CAPITAL LETTER L WITH MIDDLE TILDE]
+			  case '\uA746': // Ꝇ  [LATIN CAPITAL LETTER BROKEN L]
+			  case '\uA748': // Ꝉ  [LATIN CAPITAL LETTER L WITH HIGH STROKE]
+			  case '\uA780': // Ꞁ  [LATIN CAPITAL LETTER TURNED L]
+			  case '\uFF2C': // L  [FULLWIDTH LATIN CAPITAL LETTER L]
+				output[outputPos++] = 'L';
+				break;
+			  case '\u013A': // ĺ  [LATIN SMALL LETTER L WITH ACUTE]
+			  case '\u013C': // ļ  [LATIN SMALL LETTER L WITH CEDILLA]
+			  case '\u013E': // ľ  [LATIN SMALL LETTER L WITH CARON]
+			  case '\u0140': // ŀ  [LATIN SMALL LETTER L WITH MIDDLE DOT]
+			  case '\u0142': // ł  [LATIN SMALL LETTER L WITH STROKE]
+			  case '\u019A': // ƚ  [LATIN SMALL LETTER L WITH BAR]
+			  case '\u0234': // ȴ  [LATIN SMALL LETTER L WITH CURL]
+			  case '\u026B': // ɫ  [LATIN SMALL LETTER L WITH MIDDLE TILDE]
+			  case '\u026C': // ɬ  [LATIN SMALL LETTER L WITH BELT]
+			  case '\u026D': // ɭ  [LATIN SMALL LETTER L WITH RETROFLEX HOOK]
+			  case '\u1D85': // ᶅ  [LATIN SMALL LETTER L WITH PALATAL HOOK]
+			  case '\u1E37': // ḷ  [LATIN SMALL LETTER L WITH DOT BELOW]
+			  case '\u1E39': // ḹ  [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON]
+			  case '\u1E3B': // ḻ  [LATIN SMALL LETTER L WITH LINE BELOW]
+			  case '\u1E3D': // ḽ  [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW]
+			  case '\u24DB': // ⓛ  [CIRCLED LATIN SMALL LETTER L]
+			  case '\u2C61': // ⱡ  [LATIN SMALL LETTER L WITH DOUBLE BAR]
+			  case '\uA747': // ꝇ  [LATIN SMALL LETTER BROKEN L]
+			  case '\uA749': // ꝉ  [LATIN SMALL LETTER L WITH HIGH STROKE]
+			  case '\uA781': // ꞁ  [LATIN SMALL LETTER TURNED L]
+			  case '\uFF4C': // l  [FULLWIDTH LATIN SMALL LETTER L]
+				output[outputPos++] = 'l';
+				break;
+			  case '\u01C7': // LJ  [LATIN CAPITAL LETTER LJ]
+				output[outputPos++] = 'L';
+				output[outputPos++] = 'J';
+				break;
+			  case '\u1EFA': // Ỻ  [LATIN CAPITAL LETTER MIDDLE-WELSH LL]
+				output[outputPos++] = 'L';
+				output[outputPos++] = 'L';
+				break;
+			  case '\u01C8': // Lj  [LATIN CAPITAL LETTER L WITH SMALL LETTER J]
+				output[outputPos++] = 'L';
+				output[outputPos++] = 'j';
+				break;
+			  case '\u24A7': // ⒧  [PARENTHESIZED LATIN SMALL LETTER L]
+				output[outputPos++] = '(';
+				output[outputPos++] = 'l';
+				output[outputPos++] = ')';
+				break;
+			  case '\u01C9': // lj  [LATIN SMALL LETTER LJ]
+				output[outputPos++] = 'l';
+				output[outputPos++] = 'j';
+				break;
+			  case '\u1EFB': // ỻ  [LATIN SMALL LETTER MIDDLE-WELSH LL]
+				output[outputPos++] = 'l';
+				output[outputPos++] = 'l';
+				break;
+			  case '\u02AA': // ʪ  [LATIN SMALL LETTER LS DIGRAPH]
+				output[outputPos++] = 'l';
+				output[outputPos++] = 's';
+				break;
+			  case '\u02AB': // ʫ  [LATIN SMALL LETTER LZ DIGRAPH]
+				output[outputPos++] = 'l';
+				output[outputPos++] = 'z';
+				break;
+			  case '\u019C': // Ɯ  [LATIN CAPITAL LETTER TURNED M]
+			  case '\u1D0D': // ᴍ  [LATIN LETTER SMALL CAPITAL M]
+			  case '\u1E3E': // Ḿ  [LATIN CAPITAL LETTER M WITH ACUTE]
+			  case '\u1E40': // Ṁ  [LATIN CAPITAL LETTER M WITH DOT ABOVE]
+			  case '\u1E42': // Ṃ  [LATIN CAPITAL LETTER M WITH DOT BELOW]
+			  case '\u24C2': // Ⓜ  [CIRCLED LATIN CAPITAL LETTER M]
+			  case '\u2C6E': // Ɱ  [LATIN CAPITAL LETTER M WITH HOOK]
+			  case '\uA7FD': // ꟽ  [LATIN EPIGRAPHIC LETTER INVERTED M]
+			  case '\uA7FF': // ꟿ  [LATIN EPIGRAPHIC LETTER ARCHAIC M]
+			  case '\uFF2D': // M  [FULLWIDTH LATIN CAPITAL LETTER M]
+				output[outputPos++] = 'M';
+				break;
+			  case '\u026F': // ɯ  [LATIN SMALL LETTER TURNED M]
+			  case '\u0270': // ɰ  [LATIN SMALL LETTER TURNED M WITH LONG LEG]
+			  case '\u0271': // ɱ  [LATIN SMALL LETTER M WITH HOOK]
+			  case '\u1D6F': // ᵯ  [LATIN SMALL LETTER M WITH MIDDLE TILDE]
+			  case '\u1D86': // ᶆ  [LATIN SMALL LETTER M WITH PALATAL HOOK]
+			  case '\u1E3F': // ḿ  [LATIN SMALL LETTER M WITH ACUTE]
+			  case '\u1E41': // ṁ  [LATIN SMALL LETTER M WITH DOT ABOVE]
+			  case '\u1E43': // ṃ  [LATIN SMALL LETTER M WITH DOT BELOW]
+			  case '\u24DC': // ⓜ  [CIRCLED LATIN SMALL LETTER M]
+			  case '\uFF4D': // m  [FULLWIDTH LATIN SMALL LETTER M]
+				output[outputPos++] = 'm';
+				break;
+			  case '\u24A8': // ⒨  [PARENTHESIZED LATIN SMALL LETTER M]
+				output[outputPos++] = '(';
+				output[outputPos++] = 'm';
+				output[outputPos++] = ')';
+				break;
+			  case '\u00D1': // Ñ  [LATIN CAPITAL LETTER N WITH TILDE]
+			  case '\u0143': // Ń  [LATIN CAPITAL LETTER N WITH ACUTE]
+			  case '\u0145': // Ņ  [LATIN CAPITAL LETTER N WITH CEDILLA]
+			  case '\u0147': // Ň  [LATIN CAPITAL LETTER N WITH CARON]
+			  case '\u014A': // Ŋ  http://en.wikipedia.org/wiki/Eng_(letter)  [LATIN CAPITAL LETTER ENG]
+			  case '\u019D': // Ɲ  [LATIN CAPITAL LETTER N WITH LEFT HOOK]
+			  case '\u01F8': // Ǹ  [LATIN CAPITAL LETTER N WITH GRAVE]
+			  case '\u0220': // Ƞ  [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG]
+			  case '\u0274': // ɴ  [LATIN LETTER SMALL CAPITAL N]
+			  case '\u1D0E': // ᴎ  [LATIN LETTER SMALL CAPITAL REVERSED N]
+			  case '\u1E44': // Ṅ  [LATIN CAPITAL LETTER N WITH DOT ABOVE]
+			  case '\u1E46': // Ṇ  [LATIN CAPITAL LETTER N WITH DOT BELOW]
+			  case '\u1E48': // Ṉ  [LATIN CAPITAL LETTER N WITH LINE BELOW]
+			  case '\u1E4A': // Ṋ  [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW]
+			  case '\u24C3': // Ⓝ  [CIRCLED LATIN CAPITAL LETTER N]
+			  case '\uFF2E': // N  [FULLWIDTH LATIN CAPITAL LETTER N]
+				output[outputPos++] = 'N';
+				break;
+			  case '\u00F1': // ñ  [LATIN SMALL LETTER N WITH TILDE]
+			  case '\u0144': // ń  [LATIN SMALL LETTER N WITH ACUTE]
+			  case '\u0146': // ņ  [LATIN SMALL LETTER N WITH CEDILLA]
+			  case '\u0148': // ň  [LATIN SMALL LETTER N WITH CARON]
+			  case '\u0149': // ʼn  [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE]
+			  case '\u014B': // ŋ  http://en.wikipedia.org/wiki/Eng_(letter)  [LATIN SMALL LETTER ENG]
+			  case '\u019E': // ƞ  [LATIN SMALL LETTER N WITH LONG RIGHT LEG]
+			  case '\u01F9': // ǹ  [LATIN SMALL LETTER N WITH GRAVE]
+			  case '\u0235': // ȵ  [LATIN SMALL LETTER N WITH CURL]
+			  case '\u0272': // ɲ  [LATIN SMALL LETTER N WITH LEFT HOOK]
+			  case '\u0273': // ɳ  [LATIN SMALL LETTER N WITH RETROFLEX HOOK]
+			  case '\u1D70': // ᵰ  [LATIN SMALL LETTER N WITH MIDDLE TILDE]
+			  case '\u1D87': // ᶇ  [LATIN SMALL LETTER N WITH PALATAL HOOK]
+			  case '\u1E45': // ṅ  [LATIN SMALL LETTER N WITH DOT ABOVE]
+			  case '\u1E47': // ṇ  [LATIN SMALL LETTER N WITH DOT BELOW]
+			  case '\u1E49': // ṉ  [LATIN SMALL LETTER N WITH LINE BELOW]
+			  case '\u1E4B': // ṋ  [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW]
+			  case '\u207F': // ⁿ  [SUPERSCRIPT LATIN SMALL LETTER N]
+			  case '\u24DD': // ⓝ  [CIRCLED LATIN SMALL LETTER N]
+			  case '\uFF4E': // n  [FULLWIDTH LATIN SMALL LETTER N]
+				output[outputPos++] = 'n';
+				break;
+			  case '\u01CA': // NJ  [LATIN CAPITAL LETTER NJ]
+				output[outputPos++] = 'N';
+				output[outputPos++] = 'J';
+				break;
+			  case '\u01CB': // Nj  [LATIN CAPITAL LETTER N WITH SMALL LETTER J]
+				output[outputPos++] = 'N';
+				output[outputPos++] = 'j';
+				break;
+			  case '\u24A9': // ⒩  [PARENTHESIZED LATIN SMALL LETTER N]
+				output[outputPos++] = '(';
+				output[outputPos++] = 'n';
+				output[outputPos++] = ')';
+				break;
+			  case '\u01CC': // nj  [LATIN SMALL LETTER NJ]
+				output[outputPos++] = 'n';
+				output[outputPos++] = 'j';
+				break;
+			  case '\u00D2': // Ò  [LATIN CAPITAL LETTER O WITH GRAVE]
+			  case '\u00D3': // Ó  [LATIN CAPITAL LETTER O WITH ACUTE]
+			  case '\u00D4': // Ô  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX]
+			  case '\u00D5': // Õ  [LATIN CAPITAL LETTER O WITH TILDE]
+			  case '\u00D6': // Ö  [LATIN CAPITAL LETTER O WITH DIAERESIS]
+			  case '\u00D8': // Ø  [LATIN CAPITAL LETTER O WITH STROKE]
+			  case '\u014C': // Ō  [LATIN CAPITAL LETTER O WITH MACRON]
+			  case '\u014E': // Ŏ  [LATIN CAPITAL LETTER O WITH BREVE]
+			  case '\u0150': // Ő  [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE]
+			  case '\u0186': // Ɔ  [LATIN CAPITAL LETTER OPEN O]
+			  case '\u019F': // Ɵ  [LATIN CAPITAL LETTER O WITH MIDDLE TILDE]
+			  case '\u01A0': // Ơ  [LATIN CAPITAL LETTER O WITH HORN]
+			  case '\u01D1': // Ǒ  [LATIN CAPITAL LETTER O WITH CARON]
+			  case '\u01EA': // Ǫ  [LATIN CAPITAL LETTER O WITH OGONEK]
+			  case '\u01EC': // Ǭ  [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON]
+			  case '\u01FE': // Ǿ  [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE]
+			  case '\u020C': // Ȍ  [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE]
+			  case '\u020E': // Ȏ  [LATIN CAPITAL LETTER O WITH INVERTED BREVE]
+			  case '\u022A': // Ȫ  [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON]
+			  case '\u022C': // Ȭ  [LATIN CAPITAL LETTER O WITH TILDE AND MACRON]
+			  case '\u022E': // Ȯ  [LATIN CAPITAL LETTER O WITH DOT ABOVE]
+			  case '\u0230': // Ȱ  [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON]
+			  case '\u1D0F': // ᴏ  [LATIN LETTER SMALL CAPITAL O]
+			  case '\u1D10': // ᴐ  [LATIN LETTER SMALL CAPITAL OPEN O]
+			  case '\u1E4C': // Ṍ  [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE]
+			  case '\u1E4E': // Ṏ  [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS]
+			  case '\u1E50': // Ṑ  [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE]
+			  case '\u1E52': // Ṓ  [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE]
+			  case '\u1ECC': // Ọ  [LATIN CAPITAL LETTER O WITH DOT BELOW]
+			  case '\u1ECE': // Ỏ  [LATIN CAPITAL LETTER O WITH HOOK ABOVE]
+			  case '\u1ED0': // Ố  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE]
+			  case '\u1ED2': // Ồ  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE]
+			  case '\u1ED4': // Ổ  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
+			  case '\u1ED6': // Ỗ  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE]
+			  case '\u1ED8': // Ộ  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
+			  case '\u1EDA': // Ớ  [LATIN CAPITAL LETTER O WITH HORN AND ACUTE]
+			  case '\u1EDC': // Ờ  [LATIN CAPITAL LETTER O WITH HORN AND GRAVE]
+			  case '\u1EDE': // Ở  [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE]
+			  case '\u1EE0': // Ỡ  [LATIN CAPITAL LETTER O WITH HORN AND TILDE]
+			  case '\u1EE2': // Ợ  [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW]
+			  case '\u24C4': // Ⓞ  [CIRCLED LATIN CAPITAL LETTER O]
+			  case '\uA74A': // Ꝋ  [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY]
+			  case '\uA74C': // Ꝍ  [LATIN CAPITAL LETTER O WITH LOOP]
+			  case '\uFF2F': // O  [FULLWIDTH LATIN CAPITAL LETTER O]
+				output[outputPos++] = 'O';
+				break;
+			  case '\u00F2': // ò  [LATIN SMALL LETTER O WITH GRAVE]
+			  case '\u00F3': // ó  [LATIN SMALL LETTER O WITH ACUTE]
+			  case '\u00F4': // ô  [LATIN SMALL LETTER O WITH CIRCUMFLEX]
+			  case '\u00F5': // õ  [LATIN SMALL LETTER O WITH TILDE]
+			  case '\u00F6': // ö  [LATIN SMALL LETTER O WITH DIAERESIS]
+			  case '\u00F8': // ø  [LATIN SMALL LETTER O WITH STROKE]
+			  case '\u014D': // ō  [LATIN SMALL LETTER O WITH MACRON]
+			  case '\u014F': // ŏ  [LATIN SMALL LETTER O WITH BREVE]
+			  case '\u0151': // ő  [LATIN SMALL LETTER O WITH DOUBLE ACUTE]
+			  case '\u01A1': // ơ  [LATIN SMALL LETTER O WITH HORN]
+			  case '\u01D2': // ǒ  [LATIN SMALL LETTER O WITH CARON]
+			  case '\u01EB': // ǫ  [LATIN SMALL LETTER O WITH OGONEK]
+			  case '\u01ED': // ǭ  [LATIN SMALL LETTER O WITH OGONEK AND MACRON]
+			  case '\u01FF': // ǿ  [LATIN SMALL LETTER O WITH STROKE AND ACUTE]
+			  case '\u020D': // ȍ  [LATIN SMALL LETTER O WITH DOUBLE GRAVE]
+			  case '\u020F': // ȏ  [LATIN SMALL LETTER O WITH INVERTED BREVE]
+			  case '\u022B': // ȫ  [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON]
+			  case '\u022D': // ȭ  [LATIN SMALL LETTER O WITH TILDE AND MACRON]
+			  case '\u022F': // ȯ  [LATIN SMALL LETTER O WITH DOT ABOVE]
+			  case '\u0231': // ȱ  [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON]
+			  case '\u0254': // ɔ  [LATIN SMALL LETTER OPEN O]
+			  case '\u0275': // ɵ  [LATIN SMALL LETTER BARRED O]
+			  case '\u1D16': // ᴖ  [LATIN SMALL LETTER TOP HALF O]
+			  case '\u1D17': // ᴗ  [LATIN SMALL LETTER BOTTOM HALF O]
+			  case '\u1D97': // ᶗ  [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK]
+			  case '\u1E4D': // ṍ  [LATIN SMALL LETTER O WITH TILDE AND ACUTE]
+			  case '\u1E4F': // ṏ  [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS]
+			  case '\u1E51': // ṑ  [LATIN SMALL LETTER O WITH MACRON AND GRAVE]
+			  case '\u1E53': // ṓ  [LATIN SMALL LETTER O WITH MACRON AND ACUTE]
+			  case '\u1ECD': // ọ  [LATIN SMALL LETTER O WITH DOT BELOW]
+			  case '\u1ECF': // ỏ  [LATIN SMALL LETTER O WITH HOOK ABOVE]
+			  case '\u1ED1': // ố  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE]
+			  case '\u1ED3': // ồ  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE]
+			  case '\u1ED5': // ổ  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
+			  case '\u1ED7': // ỗ  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE]
+			  case '\u1ED9': // ộ  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
+			  case '\u1EDB': // ớ  [LATIN SMALL LETTER O WITH HORN AND ACUTE]
+			  case '\u1EDD': // ờ  [LATIN SMALL LETTER O WITH HORN AND GRAVE]
+			  case '\u1EDF': // ở  [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE]
+			  case '\u1EE1': // ỡ  [LATIN SMALL LETTER O WITH HORN AND TILDE]
+			  case '\u1EE3': // ợ  [LATIN SMALL LETTER O WITH HORN AND DOT BELOW]
+			  case '\u2092': // ₒ  [LATIN SUBSCRIPT SMALL LETTER O]
+			  case '\u24DE': // ⓞ  [CIRCLED LATIN SMALL LETTER O]
+			  case '\u2C7A': // ⱺ  [LATIN SMALL LETTER O WITH LOW RING INSIDE]
+			  case '\uA74B': // ꝋ  [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY]
+			  case '\uA74D': // ꝍ  [LATIN SMALL LETTER O WITH LOOP]
+			  case '\uFF4F': // o  [FULLWIDTH LATIN SMALL LETTER O]
+				output[outputPos++] = 'o';
+				break;
+			  case '\u0152': // Π [LATIN CAPITAL LIGATURE OE]
+			  case '\u0276': // ɶ  [LATIN LETTER SMALL CAPITAL OE]
+				output[outputPos++] = 'O';
+				output[outputPos++] = 'E';
+				break;
+			  case '\uA74E': // Ꝏ  [LATIN CAPITAL LETTER OO]
+				output[outputPos++] = 'O';
+				output[outputPos++] = 'O';
+				break;
+			  case '\u0222': // Ȣ  http://en.wikipedia.org/wiki/OU  [LATIN CAPITAL LETTER OU]
+			  case '\u1D15': // ᴕ  [LATIN LETTER SMALL CAPITAL OU]
+				output[outputPos++] = 'O';
+				output[outputPos++] = 'U';
+				break;
+			  case '\u24AA': // ⒪  [PARENTHESIZED LATIN SMALL LETTER O]
+				output[outputPos++] = '(';
+				output[outputPos++] = 'o';
+				output[outputPos++] = ')';
+				break;
+			  case '\u0153': // œ  [LATIN SMALL LIGATURE OE]
+			  case '\u1D14': // ᴔ  [LATIN SMALL LETTER TURNED OE]
+				output[outputPos++] = 'o';
+				output[outputPos++] = 'e';
+				break;
+			  case '\uA74F': // ꝏ  [LATIN SMALL LETTER OO]
+				output[outputPos++] = 'o';
+				output[outputPos++] = 'o';
+				break;
+			  case '\u0223': // ȣ  http://en.wikipedia.org/wiki/OU  [LATIN SMALL LETTER OU]
+				output[outputPos++] = 'o';
+				output[outputPos++] = 'u';
+				break;
+			  case '\u01A4': // Ƥ  [LATIN CAPITAL LETTER P WITH HOOK]
+			  case '\u1D18': // ᴘ  [LATIN LETTER SMALL CAPITAL P]
+			  case '\u1E54': // Ṕ  [LATIN CAPITAL LETTER P WITH ACUTE]
+			  case '\u1E56': // Ṗ  [LATIN CAPITAL LETTER P WITH DOT ABOVE]
+			  case '\u24C5': // Ⓟ  [CIRCLED LATIN CAPITAL LETTER P]
+			  case '\u2C63': // Ᵽ  [LATIN CAPITAL LETTER P WITH STROKE]
+			  case '\uA750': // Ꝑ  [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER]
+			  case '\uA752': // Ꝓ  [LATIN CAPITAL LETTER P WITH FLOURISH]
+			  case '\uA754': // Ꝕ  [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL]
+			  case '\uFF30': // P  [FULLWIDTH LATIN CAPITAL LETTER P]
+				output[outputPos++] = 'P';
+				break;
+			  case '\u01A5': // ƥ  [LATIN SMALL LETTER P WITH HOOK]
+			  case '\u1D71': // ᵱ  [LATIN SMALL LETTER P WITH MIDDLE TILDE]
+			  case '\u1D7D': // ᵽ  [LATIN SMALL LETTER P WITH STROKE]
+			  case '\u1D88': // ᶈ  [LATIN SMALL LETTER P WITH PALATAL HOOK]
+			  case '\u1E55': // ṕ  [LATIN SMALL LETTER P WITH ACUTE]
+			  case '\u1E57': // ṗ  [LATIN SMALL LETTER P WITH DOT ABOVE]
+			  case '\u24DF': // ⓟ  [CIRCLED LATIN SMALL LETTER P]
+			  case '\uA751': // ꝑ  [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER]
+			  case '\uA753': // ꝓ  [LATIN SMALL LETTER P WITH FLOURISH]
+			  case '\uA755': // ꝕ  [LATIN SMALL LETTER P WITH SQUIRREL TAIL]
+			  case '\uA7FC': // ꟼ  [LATIN EPIGRAPHIC LETTER REVERSED P]
+			  case '\uFF50': // p  [FULLWIDTH LATIN SMALL LETTER P]
+				output[outputPos++] = 'p';
+				break;
+			  case '\u24AB': // ⒫  [PARENTHESIZED LATIN SMALL LETTER P]
+				output[outputPos++] = '(';
+				output[outputPos++] = 'p';
+				output[outputPos++] = ')';
+				break;
+			  case '\u024A': // Ɋ  [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL]
+			  case '\u24C6': // Ⓠ  [CIRCLED LATIN CAPITAL LETTER Q]
+			  case '\uA756': // Ꝗ  [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER]
+			  case '\uA758': // Ꝙ  [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE]
+			  case '\uFF31': // Q  [FULLWIDTH LATIN CAPITAL LETTER Q]
+				output[outputPos++] = 'Q';
+				break;
+			  case '\u0138': // ĸ  http://en.wikipedia.org/wiki/Kra_(letter)  [LATIN SMALL LETTER KRA]
+			  case '\u024B': // ɋ  [LATIN SMALL LETTER Q WITH HOOK TAIL]
+			  case '\u02A0': // ʠ  [LATIN SMALL LETTER Q WITH HOOK]
+			  case '\u24E0': // ⓠ  [CIRCLED LATIN SMALL LETTER Q]
+			  case '\uA757': // ꝗ  [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER]
+			  case '\uA759': // ꝙ  [LATIN SMALL LETTER Q WITH DIAGONAL STROKE]
+			  case '\uFF51': // q  [FULLWIDTH LATIN SMALL LETTER Q]
+				output[outputPos++] = 'q';
+				break;
+			  case '\u24AC': // ⒬  [PARENTHESIZED LATIN SMALL LETTER Q]
+				output[outputPos++] = '(';
+				output[outputPos++] = 'q';
+				output[outputPos++] = ')';
+				break;
+			  case '\u0239': // ȹ  [LATIN SMALL LETTER QP DIGRAPH]
+				output[outputPos++] = 'q';
+				output[outputPos++] = 'p';
+				break;
+			  case '\u0154': // Ŕ  [LATIN CAPITAL LETTER R WITH ACUTE]
+			  case '\u0156': // Ŗ  [LATIN CAPITAL LETTER R WITH CEDILLA]
+			  case '\u0158': // Ř  [LATIN CAPITAL LETTER R WITH CARON]
+			  case '\u0210': // Ȓ  [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE]
+			  case '\u0212': // Ȓ  [LATIN CAPITAL LETTER R WITH INVERTED BREVE]
+			  case '\u024C': // Ɍ  [LATIN CAPITAL LETTER R WITH STROKE]
+			  case '\u0280': // ʀ  [LATIN LETTER SMALL CAPITAL R]
+			  case '\u0281': // ʁ  [LATIN LETTER SMALL CAPITAL INVERTED R]
+			  case '\u1D19': // ᴙ  [LATIN LETTER SMALL CAPITAL REVERSED R]
+			  case '\u1D1A': // ᴚ  [LATIN LETTER SMALL CAPITAL TURNED R]
+			  case '\u1E58': // Ṙ  [LATIN CAPITAL LETTER R WITH DOT ABOVE]
+			  case '\u1E5A': // Ṛ  [LATIN CAPITAL LETTER R WITH DOT BELOW]
+			  case '\u1E5C': // Ṝ  [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON]
+			  case '\u1E5E': // Ṟ  [LATIN CAPITAL LETTER R WITH LINE BELOW]
+			  case '\u24C7': // Ⓡ  [CIRCLED LATIN CAPITAL LETTER R]
+			  case '\u2C64': // Ɽ  [LATIN CAPITAL LETTER R WITH TAIL]
+			  case '\uA75A': // Ꝛ  [LATIN CAPITAL LETTER R ROTUNDA]
+			  case '\uA782': // Ꞃ  [LATIN CAPITAL LETTER INSULAR R]
+			  case '\uFF32': // R  [FULLWIDTH LATIN CAPITAL LETTER R]
+				output[outputPos++] = 'R';
+				break;
+			  case '\u0155': // ŕ  [LATIN SMALL LETTER R WITH ACUTE]
+			  case '\u0157': // ŗ  [LATIN SMALL LETTER R WITH CEDILLA]
+			  case '\u0159': // ř  [LATIN SMALL LETTER R WITH CARON]
+			  case '\u0211': // ȑ  [LATIN SMALL LETTER R WITH DOUBLE GRAVE]
+			  case '\u0213': // ȓ  [LATIN SMALL LETTER R WITH INVERTED BREVE]
+			  case '\u024D': // ɍ  [LATIN SMALL LETTER R WITH STROKE]
+			  case '\u027C': // ɼ  [LATIN SMALL LETTER R WITH LONG LEG]
+			  case '\u027D': // ɽ  [LATIN SMALL LETTER R WITH TAIL]
+			  case '\u027E': // ɾ  [LATIN SMALL LETTER R WITH FISHHOOK]
+			  case '\u027F': // ɿ  [LATIN SMALL LETTER REVERSED R WITH FISHHOOK]
+			  case '\u1D63': // ᵣ  [LATIN SUBSCRIPT SMALL LETTER R]
+			  case '\u1D72': // ᵲ  [LATIN SMALL LETTER R WITH MIDDLE TILDE]
+			  case '\u1D73': // ᵳ  [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE]
+			  case '\u1D89': // ᶉ  [LATIN SMALL LETTER R WITH PALATAL HOOK]
+			  case '\u1E59': // ṙ  [LATIN SMALL LETTER R WITH DOT ABOVE]
+			  case '\u1E5B': // ṛ  [LATIN SMALL LETTER R WITH DOT BELOW]
+			  case '\u1E5D': // ṝ  [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON]
+			  case '\u1E5F': // ṟ  [LATIN SMALL LETTER R WITH LINE BELOW]
+			  case '\u24E1': // ⓡ  [CIRCLED LATIN SMALL LETTER R]
+			  case '\uA75B': // ꝛ  [LATIN SMALL LETTER R ROTUNDA]
+			  case '\uA783': // ꞃ  [LATIN SMALL LETTER INSULAR R]
+			  case '\uFF52': // r  [FULLWIDTH LATIN SMALL LETTER R]
+				output[outputPos++] = 'r';
+				break;
+			  case '\u24AD': // ⒭  [PARENTHESIZED LATIN SMALL LETTER R]
+				output[outputPos++] = '(';
+				output[outputPos++] = 'r';
+				output[outputPos++] = ')';
+				break;
+			  case '\u015A': // Ś  [LATIN CAPITAL LETTER S WITH ACUTE]
+			  case '\u015C': // Ŝ  [LATIN CAPITAL LETTER S WITH CIRCUMFLEX]
+			  case '\u015E': // Ş  [LATIN CAPITAL LETTER S WITH CEDILLA]
+			  case '\u0160': // Š  [LATIN CAPITAL LETTER S WITH CARON]
+			  case '\u0218': // Ș  [LATIN CAPITAL LETTER S WITH COMMA BELOW]
+			  case '\u1E60': // Ṡ  [LATIN CAPITAL LETTER S WITH DOT ABOVE]
+			  case '\u1E62': // Ṣ  [LATIN CAPITAL LETTER S WITH DOT BELOW]
+			  case '\u1E64': // Ṥ  [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE]
+			  case '\u1E66': // Ṧ  [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE]
+			  case '\u1E68': // Ṩ  [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE]
+			  case '\u24C8': // Ⓢ  [CIRCLED LATIN CAPITAL LETTER S]
+			  case '\uA731': // ꜱ  [LATIN LETTER SMALL CAPITAL S]
+			  case '\uA785': // ꞅ  [LATIN SMALL LETTER INSULAR S]
+			  case '\uFF33': // S  [FULLWIDTH LATIN CAPITAL LETTER S]
+				output[outputPos++] = 'S';
+				break;
+			  case '\u015B': // ś  [LATIN SMALL LETTER S WITH ACUTE]
+			  case '\u015D': // ŝ  [LATIN SMALL LETTER S WITH CIRCUMFLEX]
+			  case '\u015F': // ş  [LATIN SMALL LETTER S WITH CEDILLA]
+			  case '\u0161': // š  [LATIN SMALL LETTER S WITH CARON]
+			  case '\u017F': // ſ  http://en.wikipedia.org/wiki/Long_S  [LATIN SMALL LETTER LONG S]
+			  case '\u0219': // ș  [LATIN SMALL LETTER S WITH COMMA BELOW]
+			  case '\u023F': // ȿ  [LATIN SMALL LETTER S WITH SWASH TAIL]
+			  case '\u0282': // ʂ  [LATIN SMALL LETTER S WITH HOOK]
+			  case '\u1D74': // ᵴ  [LATIN SMALL LETTER S WITH MIDDLE TILDE]
+			  case '\u1D8A': // ᶊ  [LATIN SMALL LETTER S WITH PALATAL HOOK]
+			  case '\u1E61': // ṡ  [LATIN SMALL LETTER S WITH DOT ABOVE]
+			  case '\u1E63': // ṣ  [LATIN SMALL LETTER S WITH DOT BELOW]
+			  case '\u1E65': // ṥ  [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE]
+			  case '\u1E67': // ṧ  [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE]
+			  case '\u1E69': // ṩ  [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE]
+			  case '\u1E9C': // ẜ  [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE]
+			  case '\u1E9D': // ẝ  [LATIN SMALL LETTER LONG S WITH HIGH STROKE]
+			  case '\u24E2': // ⓢ  [CIRCLED LATIN SMALL LETTER S]
+			  case '\uA784': // Ꞅ  [LATIN CAPITAL LETTER INSULAR S]
+			  case '\uFF53': // s  [FULLWIDTH LATIN SMALL LETTER S]
+				output[outputPos++] = 's';
+				break;
+			  case '\u1E9E': // ẞ  [LATIN CAPITAL LETTER SHARP S]
+				output[outputPos++] = 'S';
+				output[outputPos++] = 'S';
+				break;
+			  case '\u24AE': // ⒮  [PARENTHESIZED LATIN SMALL LETTER S]
+				output[outputPos++] = '(';
+				output[outputPos++] = 's';
+				output[outputPos++] = ')';
+				break;
+			  case '\u00DF': // ß  [LATIN SMALL LETTER SHARP S]
+				output[outputPos++] = 's';
+				output[outputPos++] = 's';
+				break;
+			  case '\uFB06': // st  [LATIN SMALL LIGATURE ST]
+				output[outputPos++] = 's';
+				output[outputPos++] = 't';
+				break;
+			  case '\u0162': // Ţ  [LATIN CAPITAL LETTER T WITH CEDILLA]
+			  case '\u0164': // Ť  [LATIN CAPITAL LETTER T WITH CARON]
+			  case '\u0166': // Ŧ  [LATIN CAPITAL LETTER T WITH STROKE]
+			  case '\u01AC': // Ƭ  [LATIN CAPITAL LETTER T WITH HOOK]
+			  case '\u01AE': // Ʈ  [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK]
+			  case '\u021A': // Ț  [LATIN CAPITAL LETTER T WITH COMMA BELOW]
+			  case '\u023E': // Ⱦ  [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE]
+			  case '\u1D1B': // ᴛ  [LATIN LETTER SMALL CAPITAL T]
+			  case '\u1E6A': // Ṫ  [LATIN CAPITAL LETTER T WITH DOT ABOVE]
+			  case '\u1E6C': // Ṭ  [LATIN CAPITAL LETTER T WITH DOT BELOW]
+			  case '\u1E6E': // Ṯ  [LATIN CAPITAL LETTER T WITH LINE BELOW]
+			  case '\u1E70': // Ṱ  [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW]
+			  case '\u24C9': // Ⓣ  [CIRCLED LATIN CAPITAL LETTER T]
+			  case '\uA786': // Ꞇ  [LATIN CAPITAL LETTER INSULAR T]
+			  case '\uFF34': // T  [FULLWIDTH LATIN CAPITAL LETTER T]
+				output[outputPos++] = 'T';
+				break;
+			  case '\u0163': // ţ  [LATIN SMALL LETTER T WITH CEDILLA]
+			  case '\u0165': // ť  [LATIN SMALL LETTER T WITH CARON]
+			  case '\u0167': // ŧ  [LATIN SMALL LETTER T WITH STROKE]
+			  case '\u01AB': // ƫ  [LATIN SMALL LETTER T WITH PALATAL HOOK]
+			  case '\u01AD': // ƭ  [LATIN SMALL LETTER T WITH HOOK]
+			  case '\u021B': // ț  [LATIN SMALL LETTER T WITH COMMA BELOW]
+			  case '\u0236': // ȶ  [LATIN SMALL LETTER T WITH CURL]
+			  case '\u0287': // ʇ  [LATIN SMALL LETTER TURNED T]
+			  case '\u0288': // ʈ  [LATIN SMALL LETTER T WITH RETROFLEX HOOK]
+			  case '\u1D75': // ᵵ  [LATIN SMALL LETTER T WITH MIDDLE TILDE]
+			  case '\u1E6B': // ṫ  [LATIN SMALL LETTER T WITH DOT ABOVE]
+			  case '\u1E6D': // ṭ  [LATIN SMALL LETTER T WITH DOT BELOW]
+			  case '\u1E6F': // ṯ  [LATIN SMALL LETTER T WITH LINE BELOW]
+			  case '\u1E71': // ṱ  [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW]
+			  case '\u1E97': // ẗ  [LATIN SMALL LETTER T WITH DIAERESIS]
+			  case '\u24E3': // ⓣ  [CIRCLED LATIN SMALL LETTER T]
+			  case '\u2C66': // ⱦ  [LATIN SMALL LETTER T WITH DIAGONAL STROKE]
+			  case '\uFF54': // t  [FULLWIDTH LATIN SMALL LETTER T]
+				output[outputPos++] = 't';
+				break;
+			  case '\u00DE': // Þ  [LATIN CAPITAL LETTER THORN]
+			  case '\uA766': // Ꝧ  [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER]
+				output[outputPos++] = 'T';
+				output[outputPos++] = 'H';
+				break;
+			  case '\uA728': // Ꜩ  [LATIN CAPITAL LETTER TZ]
+				output[outputPos++] = 'T';
+				output[outputPos++] = 'Z';
+				break;
+			  case '\u24AF': // ⒯  [PARENTHESIZED LATIN SMALL LETTER T]
+				output[outputPos++] = '(';
+				output[outputPos++] = 't';
+				output[outputPos++] = ')';
+				break;
+			  case '\u02A8': // ʨ  [LATIN SMALL LETTER TC DIGRAPH WITH CURL]
+				output[outputPos++] = 't';
+				output[outputPos++] = 'c';
+				break;
+			  case '\u00FE': // þ  [LATIN SMALL LETTER THORN]
+			  case '\u1D7A': // ᵺ  [LATIN SMALL LETTER TH WITH STRIKETHROUGH]
+			  case '\uA767': // ꝧ  [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER]
+				output[outputPos++] = 't';
+				output[outputPos++] = 'h';
+				break;
+			  case '\u02A6': // ʦ  [LATIN SMALL LETTER TS DIGRAPH]
+				output[outputPos++] = 't';
+				output[outputPos++] = 's';
+				break;
+			  case '\uA729': // ꜩ  [LATIN SMALL LETTER TZ]
+				output[outputPos++] = 't';
+				output[outputPos++] = 'z';
+				break;
+			  case '\u00D9': // Ù  [LATIN CAPITAL LETTER U WITH GRAVE]
+			  case '\u00DA': // Ú  [LATIN CAPITAL LETTER U WITH ACUTE]
+			  case '\u00DB': // Û  [LATIN CAPITAL LETTER U WITH CIRCUMFLEX]
+			  case '\u00DC': // Ü  [LATIN CAPITAL LETTER U WITH DIAERESIS]
+			  case '\u0168': // Ũ  [LATIN CAPITAL LETTER U WITH TILDE]
+			  case '\u016A': // Ū  [LATIN CAPITAL LETTER U WITH MACRON]
+			  case '\u016C': // Ŭ  [LATIN CAPITAL LETTER U WITH BREVE]
+			  case '\u016E': // Ů  [LATIN CAPITAL LETTER U WITH RING ABOVE]
+			  case '\u0170': // Ű  [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE]
+			  case '\u0172': // Ų  [LATIN CAPITAL LETTER U WITH OGONEK]
+			  case '\u01AF': // Ư  [LATIN CAPITAL LETTER U WITH HORN]
+			  case '\u01D3': // Ǔ  [LATIN CAPITAL LETTER U WITH CARON]
+			  case '\u01D5': // Ǖ  [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON]
+			  case '\u01D7': // Ǘ  [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE]
+			  case '\u01D9': // Ǚ  [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON]
+			  case '\u01DB': // Ǜ  [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE]
+			  case '\u0214': // Ȕ  [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE]
+			  case '\u0216': // Ȗ  [LATIN CAPITAL LETTER U WITH INVERTED BREVE]
+			  case '\u0244': // Ʉ  [LATIN CAPITAL LETTER U BAR]
+			  case '\u1D1C': // ᴜ  [LATIN LETTER SMALL CAPITAL U]
+			  case '\u1D7E': // ᵾ  [LATIN SMALL CAPITAL LETTER U WITH STROKE]
+			  case '\u1E72': // Ṳ  [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW]
+			  case '\u1E74': // Ṵ  [LATIN CAPITAL LETTER U WITH TILDE BELOW]
+			  case '\u1E76': // Ṷ  [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW]
+			  case '\u1E78': // Ṹ  [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE]
+			  case '\u1E7A': // Ṻ  [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS]
+			  case '\u1EE4': // Ụ  [LATIN CAPITAL LETTER U WITH DOT BELOW]
+			  case '\u1EE6': // Ủ  [LATIN CAPITAL LETTER U WITH HOOK ABOVE]
+			  case '\u1EE8': // Ứ  [LATIN CAPITAL LETTER U WITH HORN AND ACUTE]
+			  case '\u1EEA': // Ừ  [LATIN CAPITAL LETTER U WITH HORN AND GRAVE]
+			  case '\u1EEC': // Ử  [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE]
+			  case '\u1EEE': // Ữ  [LATIN CAPITAL LETTER U WITH HORN AND TILDE]
+			  case '\u1EF0': // Ự  [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW]
+			  case '\u24CA': // Ⓤ  [CIRCLED LATIN CAPITAL LETTER U]
+			  case '\uFF35': // U  [FULLWIDTH LATIN CAPITAL LETTER U]
+				output[outputPos++] = 'U';
+				break;
+			  case '\u00F9': // ù  [LATIN SMALL LETTER U WITH GRAVE]
+			  case '\u00FA': // ú  [LATIN SMALL LETTER U WITH ACUTE]
+			  case '\u00FB': // û  [LATIN SMALL LETTER U WITH CIRCUMFLEX]
+			  case '\u00FC': // ü  [LATIN SMALL LETTER U WITH DIAERESIS]
+			  case '\u0169': // ũ  [LATIN SMALL LETTER U WITH TILDE]
+			  case '\u016B': // ū  [LATIN SMALL LETTER U WITH MACRON]
+			  case '\u016D': // ŭ  [LATIN SMALL LETTER U WITH BREVE]
+			  case '\u016F': // ů  [LATIN SMALL LETTER U WITH RING ABOVE]
+			  case '\u0171': // ű  [LATIN SMALL LETTER U WITH DOUBLE ACUTE]
+			  case '\u0173': // ų  [LATIN SMALL LETTER U WITH OGONEK]
+			  case '\u01B0': // ư  [LATIN SMALL LETTER U WITH HORN]
+			  case '\u01D4': // ǔ  [LATIN SMALL LETTER U WITH CARON]
+			  case '\u01D6': // ǖ  [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON]
+			  case '\u01D8': // ǘ  [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE]
+			  case '\u01DA': // ǚ  [LATIN SMALL LETTER U WITH DIAERESIS AND CARON]
+			  case '\u01DC': // ǜ  [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE]
+			  case '\u0215': // ȕ  [LATIN SMALL LETTER U WITH DOUBLE GRAVE]
+			  case '\u0217': // ȗ  [LATIN SMALL LETTER U WITH INVERTED BREVE]
+			  case '\u0289': // ʉ  [LATIN SMALL LETTER U BAR]
+			  case '\u1D64': // ᵤ  [LATIN SUBSCRIPT SMALL LETTER U]
+			  case '\u1D99': // ᶙ  [LATIN SMALL LETTER U WITH RETROFLEX HOOK]
+			  case '\u1E73': // ṳ  [LATIN SMALL LETTER U WITH DIAERESIS BELOW]
+			  case '\u1E75': // ṵ  [LATIN SMALL LETTER U WITH TILDE BELOW]
+			  case '\u1E77': // ṷ  [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW]
+			  case '\u1E79': // ṹ  [LATIN SMALL LETTER U WITH TILDE AND ACUTE]
+			  case '\u1E7B': // ṻ  [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS]
+			  case '\u1EE5': // ụ  [LATIN SMALL LETTER U WITH DOT BELOW]
+			  case '\u1EE7': // ủ  [LATIN SMALL LETTER U WITH HOOK ABOVE]
+			  case '\u1EE9': // ứ  [LATIN SMALL LETTER U WITH HORN AND ACUTE]
+			  case '\u1EEB': // ừ  [LATIN SMALL LETTER U WITH HORN AND GRAVE]
+			  case '\u1EED': // ử  [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE]
+			  case '\u1EEF': // ữ  [LATIN SMALL LETTER U WITH HORN AND TILDE]
+			  case '\u1EF1': // ự  [LATIN SMALL LETTER U WITH HORN AND DOT BELOW]
+			  case '\u24E4': // ⓤ  [CIRCLED LATIN SMALL LETTER U]
+			  case '\uFF55': // u  [FULLWIDTH LATIN SMALL LETTER U]
+				output[outputPos++] = 'u';
+				break;
+			  case '\u24B0': // ⒰  [PARENTHESIZED LATIN SMALL LETTER U]
+				output[outputPos++] = '(';
+				output[outputPos++] = 'u';
+				output[outputPos++] = ')';
+				break;
+			  case '\u1D6B': // ᵫ  [LATIN SMALL LETTER UE]
+				output[outputPos++] = 'u';
+				output[outputPos++] = 'e';
+				break;
+			  case '\u01B2': // Ʋ  [LATIN CAPITAL LETTER V WITH HOOK]
+			  case '\u0245': // Ʌ  [LATIN CAPITAL LETTER TURNED V]
+			  case '\u1D20': // ᴠ  [LATIN LETTER SMALL CAPITAL V]
+			  case '\u1E7C': // Ṽ  [LATIN CAPITAL LETTER V WITH TILDE]
+			  case '\u1E7E': // Ṿ  [LATIN CAPITAL LETTER V WITH DOT BELOW]
+			  case '\u1EFC': // Ỽ  [LATIN CAPITAL LETTER MIDDLE-WELSH V]
+			  case '\u24CB': // Ⓥ  [CIRCLED LATIN CAPITAL LETTER V]
+			  case '\uA75E': // Ꝟ  [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE]
+			  case '\uA768': // Ꝩ  [LATIN CAPITAL LETTER VEND]
+			  case '\uFF36': // V  [FULLWIDTH LATIN CAPITAL LETTER V]
+				output[outputPos++] = 'V';
+				break;
+			  case '\u028B': // ʋ  [LATIN SMALL LETTER V WITH HOOK]
+			  case '\u028C': // ʌ  [LATIN SMALL LETTER TURNED V]
+			  case '\u1D65': // ᵥ  [LATIN SUBSCRIPT SMALL LETTER V]
+			  case '\u1D8C': // ᶌ  [LATIN SMALL LETTER V WITH PALATAL HOOK]
+			  case '\u1E7D': // ṽ  [LATIN SMALL LETTER V WITH TILDE]
+			  case '\u1E7F': // ṿ  [LATIN SMALL LETTER V WITH DOT BELOW]
+			  case '\u24E5': // ⓥ  [CIRCLED LATIN SMALL LETTER V]
+			  case '\u2C71': // ⱱ  [LATIN SMALL LETTER V WITH RIGHT HOOK]
+			  case '\u2C74': // ⱴ  [LATIN SMALL LETTER V WITH CURL]
+			  case '\uA75F': // ꝟ  [LATIN SMALL LETTER V WITH DIAGONAL STROKE]
+			  case '\uFF56': // v  [FULLWIDTH LATIN SMALL LETTER V]
+				output[outputPos++] = 'v';
+				break;
+			  case '\uA760': // Ꝡ  [LATIN CAPITAL LETTER VY]
+				output[outputPos++] = 'V';
+				output[outputPos++] = 'Y';
+				break;
+			  case '\u24B1': // ⒱  [PARENTHESIZED LATIN SMALL LETTER V]
+				output[outputPos++] = '(';
+				output[outputPos++] = 'v';
+				output[outputPos++] = ')';
+				break;
+			  case '\uA761': // ꝡ  [LATIN SMALL LETTER VY]
+				output[outputPos++] = 'v';
+				output[outputPos++] = 'y';
+				break;
+			  case '\u0174': // Ŵ  [LATIN CAPITAL LETTER W WITH CIRCUMFLEX]
+			  case '\u01F7': // Ƿ  http://en.wikipedia.org/wiki/Wynn  [LATIN CAPITAL LETTER WYNN]
+			  case '\u1D21': // ᴡ  [LATIN LETTER SMALL CAPITAL W]
+			  case '\u1E80': // Ẁ  [LATIN CAPITAL LETTER W WITH GRAVE]
+			  case '\u1E82': // Ẃ  [LATIN CAPITAL LETTER W WITH ACUTE]
+			  case '\u1E84': // Ẅ  [LATIN CAPITAL LETTER W WITH DIAERESIS]
+			  case '\u1E86': // Ẇ  [LATIN CAPITAL LETTER W WITH DOT ABOVE]
+			  case '\u1E88': // Ẉ  [LATIN CAPITAL LETTER W WITH DOT BELOW]
+			  case '\u24CC': // Ⓦ  [CIRCLED LATIN CAPITAL LETTER W]
+			  case '\u2C72': // Ⱳ  [LATIN CAPITAL LETTER W WITH HOOK]
+			  case '\uFF37': // W  [FULLWIDTH LATIN CAPITAL LETTER W]
+				output[outputPos++] = 'W';
+				break;
+			  case '\u0175': // ŵ  [LATIN SMALL LETTER W WITH CIRCUMFLEX]
+			  case '\u01BF': // ƿ  http://en.wikipedia.org/wiki/Wynn  [LATIN LETTER WYNN]
+			  case '\u028D': // ʍ  [LATIN SMALL LETTER TURNED W]
+			  case '\u1E81': // ẁ  [LATIN SMALL LETTER W WITH GRAVE]
+			  case '\u1E83': // ẃ  [LATIN SMALL LETTER W WITH ACUTE]
+			  case '\u1E85': // ẅ  [LATIN SMALL LETTER W WITH DIAERESIS]
+			  case '\u1E87': // ẇ  [LATIN SMALL LETTER W WITH DOT ABOVE]
+			  case '\u1E89': // ẉ  [LATIN SMALL LETTER W WITH DOT BELOW]
+			  case '\u1E98': // ẘ  [LATIN SMALL LETTER W WITH RING ABOVE]
+			  case '\u24E6': // ⓦ  [CIRCLED LATIN SMALL LETTER W]
+			  case '\u2C73': // ⱳ  [LATIN SMALL LETTER W WITH HOOK]
+			  case '\uFF57': // w  [FULLWIDTH LATIN SMALL LETTER W]
+				output[outputPos++] = 'w';
+				break;
+			  case '\u24B2': // ⒲  [PARENTHESIZED LATIN SMALL LETTER W]
+				output[outputPos++] = '(';
+				output[outputPos++] = 'w';
+				output[outputPos++] = ')';
+				break;
+			  case '\u1E8A': // Ẋ  [LATIN CAPITAL LETTER X WITH DOT ABOVE]
+			  case '\u1E8C': // Ẍ  [LATIN CAPITAL LETTER X WITH DIAERESIS]
+			  case '\u24CD': // Ⓧ  [CIRCLED LATIN CAPITAL LETTER X]
+			  case '\uFF38': // X  [FULLWIDTH LATIN CAPITAL LETTER X]
+				output[outputPos++] = 'X';
+				break;
+			  case '\u1D8D': // ᶍ  [LATIN SMALL LETTER X WITH PALATAL HOOK]
+			  case '\u1E8B': // ẋ  [LATIN SMALL LETTER X WITH DOT ABOVE]
+			  case '\u1E8D': // ẍ  [LATIN SMALL LETTER X WITH DIAERESIS]
+			  case '\u2093': // ₓ  [LATIN SUBSCRIPT SMALL LETTER X]
+			  case '\u24E7': // ⓧ  [CIRCLED LATIN SMALL LETTER X]
+			  case '\uFF58': // x  [FULLWIDTH LATIN SMALL LETTER X]
+				output[outputPos++] = 'x';
+				break;
+			  case '\u24B3': // ⒳  [PARENTHESIZED LATIN SMALL LETTER X]
+				output[outputPos++] = '(';
+				output[outputPos++] = 'x';
+				output[outputPos++] = ')';
+				break;
+			  case '\u00DD': // Ý  [LATIN CAPITAL LETTER Y WITH ACUTE]
+			  case '\u0176': // Ŷ  [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX]
+			  case '\u0178': // Ÿ  [LATIN CAPITAL LETTER Y WITH DIAERESIS]
+			  case '\u01B3': // Ƴ  [LATIN CAPITAL LETTER Y WITH HOOK]
+			  case '\u0232': // Ȳ  [LATIN CAPITAL LETTER Y WITH MACRON]
+			  case '\u024E': // Ɏ  [LATIN CAPITAL LETTER Y WITH STROKE]
+			  case '\u028F': // ʏ  [LATIN LETTER SMALL CAPITAL Y]
+			  case '\u1E8E': // Ẏ  [LATIN CAPITAL LETTER Y WITH DOT ABOVE]
+			  case '\u1EF2': // Ỳ  [LATIN CAPITAL LETTER Y WITH GRAVE]
+			  case '\u1EF4': // Ỵ  [LATIN CAPITAL LETTER Y WITH DOT BELOW]
+			  case '\u1EF6': // Ỷ  [LATIN CAPITAL LETTER Y WITH HOOK ABOVE]
+			  case '\u1EF8': // Ỹ  [LATIN CAPITAL LETTER Y WITH TILDE]
+			  case '\u1EFE': // Ỿ  [LATIN CAPITAL LETTER Y WITH LOOP]
+			  case '\u24CE': // Ⓨ  [CIRCLED LATIN CAPITAL LETTER Y]
+			  case '\uFF39': // Y  [FULLWIDTH LATIN CAPITAL LETTER Y]
+				output[outputPos++] = 'Y';
+				break;
+			  case '\u00FD': // ý  [LATIN SMALL LETTER Y WITH ACUTE]
+			  case '\u00FF': // ÿ  [LATIN SMALL LETTER Y WITH DIAERESIS]
+			  case '\u0177': // ŷ  [LATIN SMALL LETTER Y WITH CIRCUMFLEX]
+			  case '\u01B4': // ƴ  [LATIN SMALL LETTER Y WITH HOOK]
+			  case '\u0233': // ȳ  [LATIN SMALL LETTER Y WITH MACRON]
+			  case '\u024F': // ɏ  [LATIN SMALL LETTER Y WITH STROKE]
+			  case '\u028E': // ʎ  [LATIN SMALL LETTER TURNED Y]
+			  case '\u1E8F': // ẏ  [LATIN SMALL LETTER Y WITH DOT ABOVE]
+			  case '\u1E99': // ẙ  [LATIN SMALL LETTER Y WITH RING ABOVE]
+			  case '\u1EF3': // ỳ  [LATIN SMALL LETTER Y WITH GRAVE]
+			  case '\u1EF5': // ỵ  [LATIN SMALL LETTER Y WITH DOT BELOW]
+			  case '\u1EF7': // ỷ  [LATIN SMALL LETTER Y WITH HOOK ABOVE]
+			  case '\u1EF9': // ỹ  [LATIN SMALL LETTER Y WITH TILDE]
+			  case '\u1EFF': // ỿ  [LATIN SMALL LETTER Y WITH LOOP]
+			  case '\u24E8': // ⓨ  [CIRCLED LATIN SMALL LETTER Y]
+			  case '\uFF59': // y  [FULLWIDTH LATIN SMALL LETTER Y]
+				output[outputPos++] = 'y';
+				break;
+			  case '\u24B4': // ⒴  [PARENTHESIZED LATIN SMALL LETTER Y]
+				output[outputPos++] = '(';
+				output[outputPos++] = 'y';
+				output[outputPos++] = ')';
+				break;
+			  case '\u0179': // Ź  [LATIN CAPITAL LETTER Z WITH ACUTE]
+			  case '\u017B': // Ż  [LATIN CAPITAL LETTER Z WITH DOT ABOVE]
+			  case '\u017D': // Ž  [LATIN CAPITAL LETTER Z WITH CARON]
+			  case '\u01B5': // Ƶ  [LATIN CAPITAL LETTER Z WITH STROKE]
+			  case '\u021C': // Ȝ  http://en.wikipedia.org/wiki/Yogh  [LATIN CAPITAL LETTER YOGH]
+			  case '\u0224': // Ȥ  [LATIN CAPITAL LETTER Z WITH HOOK]
+			  case '\u1D22': // ᴢ  [LATIN LETTER SMALL CAPITAL Z]
+			  case '\u1E90': // Ẑ  [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX]
+			  case '\u1E92': // Ẓ  [LATIN CAPITAL LETTER Z WITH DOT BELOW]
+			  case '\u1E94': // Ẕ  [LATIN CAPITAL LETTER Z WITH LINE BELOW]
+			  case '\u24CF': // Ⓩ  [CIRCLED LATIN CAPITAL LETTER Z]
+			  case '\u2C6B': // Ⱬ  [LATIN CAPITAL LETTER Z WITH DESCENDER]
+			  case '\uA762': // Ꝣ  [LATIN CAPITAL LETTER VISIGOTHIC Z]
+			  case '\uFF3A': // Z  [FULLWIDTH LATIN CAPITAL LETTER Z]
+				output[outputPos++] = 'Z';
+				break;
+			  case '\u017A': // ź  [LATIN SMALL LETTER Z WITH ACUTE]
+			  case '\u017C': // ż  [LATIN SMALL LETTER Z WITH DOT ABOVE]
+			  case '\u017E': // ž  [LATIN SMALL LETTER Z WITH CARON]
+			  case '\u01B6': // ƶ  [LATIN SMALL LETTER Z WITH STROKE]
+			  case '\u021D': // ȝ  http://en.wikipedia.org/wiki/Yogh  [LATIN SMALL LETTER YOGH]
+			  case '\u0225': // ȥ  [LATIN SMALL LETTER Z WITH HOOK]
+			  case '\u0240': // ɀ  [LATIN SMALL LETTER Z WITH SWASH TAIL]
+			  case '\u0290': // ʐ  [LATIN SMALL LETTER Z WITH RETROFLEX HOOK]
+			  case '\u0291': // ʑ  [LATIN SMALL LETTER Z WITH CURL]
+			  case '\u1D76': // ᵶ  [LATIN SMALL LETTER Z WITH MIDDLE TILDE]
+			  case '\u1D8E': // ᶎ  [LATIN SMALL LETTER Z WITH PALATAL HOOK]
+			  case '\u1E91': // ẑ  [LATIN SMALL LETTER Z WITH CIRCUMFLEX]
+			  case '\u1E93': // ẓ  [LATIN SMALL LETTER Z WITH DOT BELOW]
+			  case '\u1E95': // ẕ  [LATIN SMALL LETTER Z WITH LINE BELOW]
+			  case '\u24E9': // ⓩ  [CIRCLED LATIN SMALL LETTER Z]
+			  case '\u2C6C': // ⱬ  [LATIN SMALL LETTER Z WITH DESCENDER]
+			  case '\uA763': // ꝣ  [LATIN SMALL LETTER VISIGOTHIC Z]
+			  case '\uFF5A': // z  [FULLWIDTH LATIN SMALL LETTER Z]
+				output[outputPos++] = 'z';
+				break;
+			  case '\u24B5': // ⒵  [PARENTHESIZED LATIN SMALL LETTER Z]
+				output[outputPos++] = '(';
+				output[outputPos++] = 'z';
+				output[outputPos++] = ')';
+				break;
+			  case '\u2070': // ⁰  [SUPERSCRIPT ZERO]
+			  case '\u2080': // ₀  [SUBSCRIPT ZERO]
+			  case '\u24EA': // ⓪  [CIRCLED DIGIT ZERO]
+			  case '\u24FF': // ⓿  [NEGATIVE CIRCLED DIGIT ZERO]
+			  case '\uFF10': // 0  [FULLWIDTH DIGIT ZERO]
+				output[outputPos++] = '0';
+				break;
+			  case '\u00B9': // ¹  [SUPERSCRIPT ONE]
+			  case '\u2081': // ₁  [SUBSCRIPT ONE]
+			  case '\u2460': // ①  [CIRCLED DIGIT ONE]
+			  case '\u24F5': // ⓵  [DOUBLE CIRCLED DIGIT ONE]
+			  case '\u2776': // ❶  [DINGBAT NEGATIVE CIRCLED DIGIT ONE]
+			  case '\u2780': // ➀  [DINGBAT CIRCLED SANS-SERIF DIGIT ONE]
+			  case '\u278A': // ➊  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE]
+			  case '\uFF11': // 1  [FULLWIDTH DIGIT ONE]
+				output[outputPos++] = '1';
+				break;
+			  case '\u2488': // ⒈  [DIGIT ONE FULL STOP]
+				output[outputPos++] = '1';
+				output[outputPos++] = '.';
+				break;
+			  case '\u2474': // ⑴  [PARENTHESIZED DIGIT ONE]
+				output[outputPos++] = '(';
+				output[outputPos++] = '1';
+				output[outputPos++] = ')';
+				break;
+			  case '\u00B2': // ²  [SUPERSCRIPT TWO]
+			  case '\u2082': // ₂  [SUBSCRIPT TWO]
+			  case '\u2461': // ②  [CIRCLED DIGIT TWO]
+			  case '\u24F6': // ⓶  [DOUBLE CIRCLED DIGIT TWO]
+			  case '\u2777': // ❷  [DINGBAT NEGATIVE CIRCLED DIGIT TWO]
+			  case '\u2781': // ➁  [DINGBAT CIRCLED SANS-SERIF DIGIT TWO]
+			  case '\u278B': // ➋  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO]
+			  case '\uFF12': // 2  [FULLWIDTH DIGIT TWO]
+				output[outputPos++] = '2';
+				break;
+			  case '\u2489': // ⒉  [DIGIT TWO FULL STOP]
+				output[outputPos++] = '2';
+				output[outputPos++] = '.';
+				break;
+			  case '\u2475': // ⑵  [PARENTHESIZED DIGIT TWO]
+				output[outputPos++] = '(';
+				output[outputPos++] = '2';
+				output[outputPos++] = ')';
+				break;
+			  case '\u00B3': // ³  [SUPERSCRIPT THREE]
+			  case '\u2083': // ₃  [SUBSCRIPT THREE]
+			  case '\u2462': // ③  [CIRCLED DIGIT THREE]
+			  case '\u24F7': // ⓷  [DOUBLE CIRCLED DIGIT THREE]
+			  case '\u2778': // ❸  [DINGBAT NEGATIVE CIRCLED DIGIT THREE]
+			  case '\u2782': // ➂  [DINGBAT CIRCLED SANS-SERIF DIGIT THREE]
+			  case '\u278C': // ➌  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE]
+			  case '\uFF13': // 3  [FULLWIDTH DIGIT THREE]
+				output[outputPos++] = '3';
+				break;
+			  case '\u248A': // ⒊  [DIGIT THREE FULL STOP]
+				output[outputPos++] = '3';
+				output[outputPos++] = '.';
+				break;
+			  case '\u2476': // ⑶  [PARENTHESIZED DIGIT THREE]
+				output[outputPos++] = '(';
+				output[outputPos++] = '3';
+				output[outputPos++] = ')';
+				break;
+			  case '\u2074': // ⁴  [SUPERSCRIPT FOUR]
+			  case '\u2084': // ₄  [SUBSCRIPT FOUR]
+			  case '\u2463': // ④  [CIRCLED DIGIT FOUR]
+			  case '\u24F8': // ⓸  [DOUBLE CIRCLED DIGIT FOUR]
+			  case '\u2779': // ❹  [DINGBAT NEGATIVE CIRCLED DIGIT FOUR]
+			  case '\u2783': // ➃  [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR]
+			  case '\u278D': // ➍  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR]
+			  case '\uFF14': // 4  [FULLWIDTH DIGIT FOUR]
+				output[outputPos++] = '4';
+				break;
+			  case '\u248B': // ⒋  [DIGIT FOUR FULL STOP]
+				output[outputPos++] = '4';
+				output[outputPos++] = '.';
+				break;
+			  case '\u2477': // ⑷  [PARENTHESIZED DIGIT FOUR]
+				output[outputPos++] = '(';
+				output[outputPos++] = '4';
+				output[outputPos++] = ')';
+				break;
+			  case '\u2075': // ⁵  [SUPERSCRIPT FIVE]
+			  case '\u2085': // ₅  [SUBSCRIPT FIVE]
+			  case '\u2464': // ⑤  [CIRCLED DIGIT FIVE]
+			  case '\u24F9': // ⓹  [DOUBLE CIRCLED DIGIT FIVE]
+			  case '\u277A': // ❺  [DINGBAT NEGATIVE CIRCLED DIGIT FIVE]
+			  case '\u2784': // ➄  [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE]
+			  case '\u278E': // ➎  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE]
+			  case '\uFF15': // 5  [FULLWIDTH DIGIT FIVE]
+				output[outputPos++] = '5';
+				break;
+			  case '\u248C': // ⒌  [DIGIT FIVE FULL STOP]
+				output[outputPos++] = '5';
+				output[outputPos++] = '.';
+				break;
+			  case '\u2478': // ⑸  [PARENTHESIZED DIGIT FIVE]
+				output[outputPos++] = '(';
+				output[outputPos++] = '5';
+				output[outputPos++] = ')';
+				break;
+			  case '\u2076': // ⁶  [SUPERSCRIPT SIX]
+			  case '\u2086': // ₆  [SUBSCRIPT SIX]
+			  case '\u2465': // ⑥  [CIRCLED DIGIT SIX]
+			  case '\u24FA': // ⓺  [DOUBLE CIRCLED DIGIT SIX]
+			  case '\u277B': // ❻  [DINGBAT NEGATIVE CIRCLED DIGIT SIX]
+			  case '\u2785': // ➅  [DINGBAT CIRCLED SANS-SERIF DIGIT SIX]
+			  case '\u278F': // ➏  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX]
+			  case '\uFF16': // 6  [FULLWIDTH DIGIT SIX]
+				output[outputPos++] = '6';
+				break;
+			  case '\u248D': // ⒍  [DIGIT SIX FULL STOP]
+				output[outputPos++] = '6';
+				output[outputPos++] = '.';
+				break;
+			  case '\u2479': // ⑹  [PARENTHESIZED DIGIT SIX]
+				output[outputPos++] = '('

<TRUNCATED>

[14/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SingleTokenTokenStream.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SingleTokenTokenStream.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SingleTokenTokenStream.cs
new file mode 100644
index 0000000..435247c
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SingleTokenTokenStream.cs
@@ -0,0 +1,79 @@
+using System.Diagnostics;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using AttributeImpl = org.apache.lucene.util.AttributeImpl;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenStream"/> containing a single token.
+	/// </summary>
+	public sealed class SingleTokenTokenStream : TokenStream
+	{
+
+	  private bool exhausted = false;
+
+	  // The token needs to be immutable, so work with clones!
+	  private Token singleToken;
+	  private readonly AttributeImpl tokenAtt;
+
+	  public SingleTokenTokenStream(Token token) : base(Token.TOKEN_ATTRIBUTE_FACTORY)
+	  {
+
+		Debug.Assert(token != null);
+		this.singleToken = token.clone();
+
+		tokenAtt = (AttributeImpl) addAttribute(typeof(CharTermAttribute));
+		assert(tokenAtt is Token);
+	  }
+
+	  public override bool incrementToken()
+	  {
+		if (exhausted)
+		{
+		  return false;
+		}
+		else
+		{
+		  clearAttributes();
+		  singleToken.copyTo(tokenAtt);
+		  exhausted = true;
+		  return true;
+		}
+	  }
+
+	  public override void reset()
+	  {
+		exhausted = false;
+	  }
+
+	  public Token getToken()
+	  {
+		return singleToken.clone();
+	  }
+
+	  public void setToken(Token token)
+	  {
+		this.singleToken = token.clone();
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilter.cs
new file mode 100644
index 0000000..078ff66
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilter.cs
@@ -0,0 +1,265 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+	using BytesRef = org.apache.lucene.util.BytesRef;
+	using BytesRefHash = org.apache.lucene.util.BytesRefHash;
+	using CharsRef = org.apache.lucene.util.CharsRef;
+	using IntsRef = org.apache.lucene.util.IntsRef;
+	using UnicodeUtil = org.apache.lucene.util.UnicodeUtil;
+	using ByteSequenceOutputs = org.apache.lucene.util.fst.ByteSequenceOutputs;
+	using FST = org.apache.lucene.util.fst.FST;
+	using Arc = org.apache.lucene.util.fst.FST.Arc;
+	using BytesReader = org.apache.lucene.util.fst.FST.BytesReader;
+
+
+	/// <summary>
+	/// Provides the ability to override any <seealso cref="KeywordAttribute"/> aware stemmer
+	/// with custom dictionary-based stemming.
+	/// </summary>
+	public sealed class StemmerOverrideFilter : TokenFilter
+	{
+	  private readonly StemmerOverrideMap stemmerOverrideMap;
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAtt = addAttribute(typeof(KeywordAttribute));
+	  private readonly FST.BytesReader fstReader;
+	  private readonly FST.Arc<BytesRef> scratchArc = new FST.Arc<BytesRef>();
+	  private readonly CharsRef spare = new CharsRef();
+
+	  /// <summary>
+	  /// Create a new StemmerOverrideFilter, performing dictionary-based stemming
+	  /// with the provided <code>dictionary</code>.
+	  /// <para>
+	  /// Any dictionary-stemmed terms will be marked with <seealso cref="KeywordAttribute"/>
+	  /// so that they will not be stemmed with stemmers down the chain.
+	  /// </para>
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public StemmerOverrideFilter(final org.apache.lucene.analysis.TokenStream input, final StemmerOverrideMap stemmerOverrideMap)
+	  public StemmerOverrideFilter(TokenStream input, StemmerOverrideMap stemmerOverrideMap) : base(input)
+	  {
+		this.stemmerOverrideMap = stemmerOverrideMap;
+		fstReader = stemmerOverrideMap.BytesReader;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (fstReader == null)
+		  {
+			// No overrides
+			return true;
+		  }
+		  if (!keywordAtt.Keyword) // don't muck with already-keyworded terms
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.BytesRef stem = stemmerOverrideMap.get(termAtt.buffer(), termAtt.length(), scratchArc, fstReader);
+			BytesRef stem = stemmerOverrideMap.get(termAtt.buffer(), termAtt.length(), scratchArc, fstReader);
+			if (stem != null)
+			{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] buffer = spare.chars = termAtt.buffer();
+			  char[] buffer = spare.chars = termAtt.buffer();
+			  UnicodeUtil.UTF8toUTF16(stem.bytes, stem.offset, stem.length, spare);
+			  if (spare.chars != buffer)
+			  {
+				termAtt.copyBuffer(spare.chars, spare.offset, spare.length);
+			  }
+			  termAtt.Length = spare.length;
+			  keywordAtt.Keyword = true;
+			}
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+
+	  /// <summary>
+	  /// A read-only 4-byte FST backed map that allows fast case-insensitive key
+	  /// value lookups for <seealso cref="StemmerOverrideFilter"/>
+	  /// </summary>
+	  // TODO maybe we can generalize this and reuse this map somehow?
+	  public sealed class StemmerOverrideMap
+	  {
+		internal readonly FST<BytesRef> fst;
+		internal readonly bool ignoreCase;
+
+		/// <summary>
+		/// Creates a new <seealso cref="StemmerOverrideMap"/> </summary>
+		/// <param name="fst"> the fst to lookup the overrides </param>
+		/// <param name="ignoreCase"> if the keys case should be ingored </param>
+		public StemmerOverrideMap(FST<BytesRef> fst, bool ignoreCase)
+		{
+		  this.fst = fst;
+		  this.ignoreCase = ignoreCase;
+		}
+
+		/// <summary>
+		/// Returns a <seealso cref="BytesReader"/> to pass to the <seealso cref="#get(char[], int, FST.Arc, FST.BytesReader)"/> method.
+		/// </summary>
+		public FST.BytesReader BytesReader
+		{
+			get
+			{
+			  if (fst == null)
+			  {
+				return null;
+			  }
+			  else
+			  {
+				return fst.BytesReader;
+			  }
+			}
+		}
+
+		/// <summary>
+		/// Returns the value mapped to the given key or <code>null</code> if the key is not in the FST dictionary.
+		/// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public org.apache.lucene.util.BytesRef get(char[] buffer, int bufferLen, org.apache.lucene.util.fst.FST.Arc<org.apache.lucene.util.BytesRef> scratchArc, org.apache.lucene.util.fst.FST.BytesReader fstReader) throws java.io.IOException
+		public BytesRef get(char[] buffer, int bufferLen, FST.Arc<BytesRef> scratchArc, FST.BytesReader fstReader)
+		{
+		  BytesRef pendingOutput = fst.outputs.NoOutput;
+		  BytesRef matchOutput = null;
+		  int bufUpto = 0;
+		  fst.getFirstArc(scratchArc);
+		  while (bufUpto < bufferLen)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int codePoint = Character.codePointAt(buffer, bufUpto, bufferLen);
+			int codePoint = char.codePointAt(buffer, bufUpto, bufferLen);
+			if (fst.findTargetArc(ignoreCase ? char.ToLower(codePoint) : codePoint, scratchArc, scratchArc, fstReader) == null)
+			{
+			  return null;
+			}
+			pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output);
+			bufUpto += char.charCount(codePoint);
+		  }
+		  if (scratchArc.Final)
+		  {
+			matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput);
+		  }
+		  return matchOutput;
+		}
+
+	  }
+	  /// <summary>
+	  /// This builder builds an <seealso cref="FST"/> for the <seealso cref="StemmerOverrideFilter"/>
+	  /// </summary>
+	  public class Builder
+	  {
+		internal readonly BytesRefHash hash = new BytesRefHash();
+		internal readonly BytesRef spare = new BytesRef();
+		internal readonly List<CharSequence> outputValues = new List<CharSequence>();
+		internal readonly bool ignoreCase;
+		internal readonly CharsRef charsSpare = new CharsRef();
+
+		/// <summary>
+		/// Creates a new <seealso cref="Builder"/> with ignoreCase set to <code>false</code> 
+		/// </summary>
+		public Builder() : this(false)
+		{
+		}
+
+		/// <summary>
+		/// Creates a new <seealso cref="Builder"/> </summary>
+		/// <param name="ignoreCase"> if the input case should be ignored. </param>
+		public Builder(bool ignoreCase)
+		{
+		  this.ignoreCase = ignoreCase;
+		}
+
+		/// <summary>
+		/// Adds an input string and it's stemmer override output to this builder.
+		/// </summary>
+		/// <param name="input"> the input char sequence </param>
+		/// <param name="output"> the stemmer override output char sequence </param>
+		/// <returns> <code>false</code> iff the input has already been added to this builder otherwise <code>true</code>. </returns>
+		public virtual bool add(CharSequence input, CharSequence output)
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int length = input.length();
+		  int length = input.length();
+		  if (ignoreCase)
+		  {
+			// convert on the fly to lowercase
+			charsSpare.grow(length);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] buffer = charsSpare.chars;
+			char[] buffer = charsSpare.chars;
+			for (int i = 0; i < length;)
+			{
+				i += char.toChars(char.ToLower(char.codePointAt(input, i)), buffer, i);
+			}
+			UnicodeUtil.UTF16toUTF8(buffer, 0, length, spare);
+		  }
+		  else
+		  {
+			UnicodeUtil.UTF16toUTF8(input, 0, length, spare);
+		  }
+		  if (hash.add(spare) >= 0)
+		  {
+			outputValues.Add(output);
+			return true;
+		  }
+		  return false;
+		}
+
+		/// <summary>
+		/// Returns an <seealso cref="StemmerOverrideMap"/> to be used with the <seealso cref="StemmerOverrideFilter"/> </summary>
+		/// <returns> an <seealso cref="StemmerOverrideMap"/> to be used with the <seealso cref="StemmerOverrideFilter"/> </returns>
+		/// <exception cref="IOException"> if an <seealso cref="IOException"/> occurs; </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public StemmerOverrideMap build() throws java.io.IOException
+		public virtual StemmerOverrideMap build()
+		{
+		  ByteSequenceOutputs outputs = ByteSequenceOutputs.Singleton;
+		  org.apache.lucene.util.fst.Builder<BytesRef> builder = new org.apache.lucene.util.fst.Builder<BytesRef>(FST.INPUT_TYPE.BYTE4, outputs);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int[] sort = hash.sort(org.apache.lucene.util.BytesRef.getUTF8SortedAsUnicodeComparator());
+		  int[] sort = hash.sort(BytesRef.UTF8SortedAsUnicodeComparator);
+		  IntsRef intsSpare = new IntsRef();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int size = hash.size();
+		  int size = hash.size();
+		  for (int i = 0; i < size; i++)
+		  {
+			int id = sort[i];
+			BytesRef bytesRef = hash.get(id, spare);
+			UnicodeUtil.UTF8toUTF32(bytesRef, intsSpare);
+			builder.add(intsSpare, new BytesRef(outputValues[id]));
+		  }
+		  return new StemmerOverrideMap(builder.finish(), ignoreCase);
+		}
+
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilterFactory.cs
new file mode 100644
index 0000000..f755c1f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilterFactory.cs
@@ -0,0 +1,97 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using StemmerOverrideMap = org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
+	using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
+	using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="StemmerOverrideFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_dicstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.StemmerOverrideFilterFactory" dictionary="dictionary.txt" ignoreCase="false"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class StemmerOverrideFilterFactory : TokenFilterFactory, ResourceLoaderAware
+	{
+	  private StemmerOverrideMap dictionary;
+	  private readonly string dictionaryFiles;
+	  private readonly bool ignoreCase;
+
+	  /// <summary>
+	  /// Creates a new StemmerOverrideFilterFactory </summary>
+	  public StemmerOverrideFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		dictionaryFiles = get(args, "dictionary");
+		ignoreCase = getBoolean(args, "ignoreCase", false);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void inform(org.apache.lucene.analysis.util.ResourceLoader loader) throws java.io.IOException
+	  public virtual void inform(ResourceLoader loader)
+	  {
+		if (dictionaryFiles != null)
+		{
+		  assureMatchVersion();
+		  IList<string> files = splitFileNames(dictionaryFiles);
+		  if (files.Count > 0)
+		  {
+			StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(ignoreCase);
+			foreach (string file in files)
+			{
+			  IList<string> list = getLines(loader, file.Trim());
+			  foreach (string line in list)
+			  {
+				string[] mapping = line.Split("\t", 2);
+				builder.add(mapping[0], mapping[1]);
+			  }
+			}
+			dictionary = builder.build();
+		  }
+		}
+	  }
+
+	  public virtual bool IgnoreCase
+	  {
+		  get
+		  {
+			return ignoreCase;
+		  }
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return dictionary == null ? input : new StemmerOverrideFilter(input, dictionary);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilter.cs
new file mode 100644
index 0000000..5a511bc
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilter.cs
@@ -0,0 +1,114 @@
+using System;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Trims leading and trailing whitespace from Tokens in the stream.
+	/// <para>As of Lucene 4.4, this filter does not support updateOffsets=true anymore
+	/// as it can lead to broken token streams.
+	/// </para>
+	/// </summary>
+	public sealed class TrimFilter : TokenFilter
+	{
+
+	  internal readonly bool updateOffsets;
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+
+	  /// <summary>
+	  /// Create a new <seealso cref="TrimFilter"/>. </summary>
+	  /// <param name="version">       the Lucene match version </param>
+	  /// <param name="in">            the stream to consume </param>
+	  /// <param name="updateOffsets"> whether to update offsets </param>
+	  /// @deprecated Offset updates are not supported anymore as of Lucene 4.4. 
+	  [Obsolete("Offset updates are not supported anymore as of Lucene 4.4.")]
+	  public TrimFilter(Version version, TokenStream @in, bool updateOffsets) : base(@in)
+	  {
+		if (updateOffsets && version.onOrAfter(Version.LUCENE_44))
+		{
+		  throw new System.ArgumentException("updateOffsets=true is not supported anymore as of Lucene 4.4");
+		}
+		this.updateOffsets = updateOffsets;
+	  }
+
+	  /// <summary>
+	  /// Create a new <seealso cref="TrimFilter"/> on top of <code>in</code>. </summary>
+	  public TrimFilter(Version version, TokenStream @in) : this(version, @in, false)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (!input.incrementToken())
+		{
+			return false;
+		}
+
+		char[] termBuffer = termAtt.buffer();
+		int len = termAtt.length();
+		//TODO: Is this the right behavior or should we return false?  Currently, "  ", returns true, so I think this should
+		//also return true
+		if (len == 0)
+		{
+		  return true;
+		}
+		int start = 0;
+		int end = 0;
+		int endOff = 0;
+
+		// eat the first characters
+		for (start = 0; start < len && char.IsWhiteSpace(termBuffer[start]); start++)
+		{
+		}
+		// eat the end characters
+		for (end = len; end >= start && char.IsWhiteSpace(termBuffer[end - 1]); end--)
+		{
+		  endOff++;
+		}
+		if (start > 0 || end < len)
+		{
+		  if (start < end)
+		  {
+			termAtt.copyBuffer(termBuffer, start, (end - start));
+		  }
+		  else
+		  {
+			termAtt.setEmpty();
+		  }
+		  if (updateOffsets && len == offsetAtt.endOffset() - offsetAtt.startOffset())
+		  {
+			int newStart = offsetAtt.startOffset() + start;
+			int newEnd = offsetAtt.endOffset() - (start < end ? endOff:0);
+			offsetAtt.setOffset(newStart, newEnd);
+		  }
+		}
+
+		return true;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilterFactory.cs
new file mode 100644
index 0000000..212d555
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilterFactory.cs
@@ -0,0 +1,63 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="TrimFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_trm" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.NGramTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.TrimFilterFactory" /&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	/// <seealso cref= TrimFilter </seealso>
+	public class TrimFilterFactory : TokenFilterFactory
+	{
+
+	  protected internal readonly bool updateOffsets;
+
+	  /// <summary>
+	  /// Creates a new TrimFilterFactory </summary>
+	  public TrimFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		updateOffsets = getBoolean(args, "updateOffsets", false);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TrimFilter create(TokenStream input)
+	  {
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @SuppressWarnings("deprecation") final org.apache.lucene.analysis.miscellaneous.TrimFilter filter = new org.apache.lucene.analysis.miscellaneous.TrimFilter(luceneMatchVersion, input, updateOffsets);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+		  TrimFilter filter = new TrimFilter(luceneMatchVersion, input, updateOffsets);
+		return filter;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilter.cs
new file mode 100644
index 0000000..df01d3a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilter.cs
@@ -0,0 +1,66 @@
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A token filter for truncating the terms into a specific length.
+	/// Fixed prefix truncation, as a stemming method, produces good results on Turkish language.
+	/// It is reported that F5, using first 5 characters, produced best results in
+	/// <a href="http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf">
+	/// Information Retrieval on Turkish Texts</a>
+	/// </summary>
+	public sealed class TruncateTokenFilter : TokenFilter
+	{
+
+	  private readonly CharTermAttribute termAttribute = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  private readonly int length;
+
+	  public TruncateTokenFilter(TokenStream input, int length) : base(input)
+	  {
+		if (length < 1)
+		{
+		  throw new System.ArgumentException("length parameter must be a positive number: " + length);
+		}
+		this.length = length;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword && termAttribute.length() > length)
+		  {
+			termAttribute.Length = length;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilterFactory.cs
new file mode 100644
index 0000000..06fd1d0
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilterFactory.cs
@@ -0,0 +1,66 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="org.apache.lucene.analysis.miscellaneous.TruncateTokenFilter"/>. The following type is recommended for "<i>diacritics-insensitive search</i>" for Turkish.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_tr_ascii_f5" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.ApostropheFilterFactory"/&gt;
+	///     &lt;filter class="solr.TurkishLowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.ASCIIFoldingFilterFactory" preserveOriginal="true"/&gt;
+	///     &lt;filter class="solr.KeywordRepeatFilterFactory"/&gt;
+	///     &lt;filter class="solr.TruncateTokenFilterFactory" prefixLength="5"/&gt;
+	///     &lt;filter class="solr.RemoveDuplicatesTokenFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class TruncateTokenFilterFactory : TokenFilterFactory
+	{
+
+	  public const string PREFIX_LENGTH_KEY = "prefixLength";
+	  private readonly sbyte prefixLength;
+
+	  public TruncateTokenFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		prefixLength = sbyte.Parse(get(args, PREFIX_LENGTH_KEY, "5"));
+		if (prefixLength < 1)
+		{
+		  throw new System.ArgumentException(PREFIX_LENGTH_KEY + " parameter must be a positive number: " + prefixLength);
+		}
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameter(s): " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new TruncateTokenFilter(input, prefixLength);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
new file mode 100644
index 0000000..16575e6
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
@@ -0,0 +1,761 @@
+using System;
+using System.Text;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+using Lucene.Net.Analysis.Core;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	using WhitespaceTokenizer = WhitespaceTokenizer;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using ArrayUtil = org.apache.lucene.util.ArrayUtil;
+	using AttributeSource = org.apache.lucene.util.AttributeSource;
+	using InPlaceMergeSorter = org.apache.lucene.util.InPlaceMergeSorter;
+	using RamUsageEstimator = org.apache.lucene.util.RamUsageEstimator;
+	using Version = org.apache.lucene.util.Version;
+
+
+	/// <summary>
+	/// Splits words into subwords and performs optional transformations on subword
+	/// groups. Words are split into subwords with the following rules:
+	/// <ul>
+	/// <li>split on intra-word delimiters (by default, all non alpha-numeric
+	/// characters): <code>"Wi-Fi"</code> &#8594; <code>"Wi", "Fi"</code></li>
+	/// <li>split on case transitions: <code>"PowerShot"</code> &#8594;
+	/// <code>"Power", "Shot"</code></li>
+	/// <li>split on letter-number transitions: <code>"SD500"</code> &#8594;
+	/// <code>"SD", "500"</code></li>
+	/// <li>leading and trailing intra-word delimiters on each subword are ignored:
+	/// <code>"//hello---there, 'dude'"</code> &#8594;
+	/// <code>"hello", "there", "dude"</code></li>
+	/// <li>trailing "'s" are removed for each subword: <code>"O'Neil's"</code>
+	/// &#8594; <code>"O", "Neil"</code>
+	/// <ul>
+	/// <li>Note: this step isn't performed in a separate filter because of possible
+	/// subword combinations.</li>
+	/// </ul>
+	/// </li>
+	/// </ul>
+	/// 
+	/// The <b>combinations</b> parameter affects how subwords are combined:
+	/// <ul>
+	/// <li>combinations="0" causes no subword combinations: <code>"PowerShot"</code>
+	/// &#8594; <code>0:"Power", 1:"Shot"</code> (0 and 1 are the token positions)</li>
+	/// <li>combinations="1" means that in addition to the subwords, maximum runs of
+	/// non-numeric subwords are catenated and produced at the same position of the
+	/// last subword in the run:
+	/// <ul>
+	/// <li><code>"PowerShot"</code> &#8594;
+	/// <code>0:"Power", 1:"Shot" 1:"PowerShot"</code></li>
+	/// <li><code>"A's+B's&C's"</code> -gt; <code>0:"A", 1:"B", 2:"C", 2:"ABC"</code>
+	/// </li>
+	/// <li><code>"Super-Duper-XL500-42-AutoCoder!"</code> &#8594;
+	/// <code>0:"Super", 1:"Duper", 2:"XL", 2:"SuperDuperXL", 3:"500" 4:"42", 5:"Auto", 6:"Coder", 6:"AutoCoder"</code>
+	/// </li>
+	/// </ul>
+	/// </li>
+	/// </ul>
+	/// One use for <seealso cref="WordDelimiterFilter"/> is to help match words with different
+	/// subword delimiters. For example, if the source text contained "wi-fi" one may
+	/// want "wifi" "WiFi" "wi-fi" "wi+fi" queries to all match. One way of doing so
+	/// is to specify combinations="1" in the analyzer used for indexing, and
+	/// combinations="0" (the default) in the analyzer used for querying. Given that
+	/// the current <seealso cref="StandardTokenizer"/> immediately removes many intra-word
+	/// delimiters, it is recommended that this filter be used after a tokenizer that
+	/// does not do this (such as <seealso cref="WhitespaceTokenizer"/>).
+	/// </summary>
+	public sealed class WordDelimiterFilter : TokenFilter
+	{
+		private bool InstanceFieldsInitialized = false;
+
+		private void InitializeInstanceFields()
+		{
+			concat = new WordDelimiterConcatenation(this);
+			concatAll = new WordDelimiterConcatenation(this);
+			sorter = new OffsetSorter(this);
+		}
+
+
+	  public const int LOWER = 0x01;
+	  public const int UPPER = 0x02;
+	  public const int DIGIT = 0x04;
+	  public const int SUBWORD_DELIM = 0x08;
+
+	  // combinations: for testing, not for setting bits
+	  public const int ALPHA = 0x03;
+	  public const int ALPHANUM = 0x07;
+
+	  /// <summary>
+	  /// Causes parts of words to be generated:
+	  /// <p/>
+	  /// "PowerShot" => "Power" "Shot"
+	  /// </summary>
+	  public const int GENERATE_WORD_PARTS = 1;
+
+	  /// <summary>
+	  /// Causes number subwords to be generated:
+	  /// <p/>
+	  /// "500-42" => "500" "42"
+	  /// </summary>
+	  public const int GENERATE_NUMBER_PARTS = 2;
+
+	  /// <summary>
+	  /// Causes maximum runs of word parts to be catenated:
+	  /// <p/>
+	  /// "wi-fi" => "wifi"
+	  /// </summary>
+	  public const int CATENATE_WORDS = 4;
+
+	  /// <summary>
+	  /// Causes maximum runs of word parts to be catenated:
+	  /// <p/>
+	  /// "wi-fi" => "wifi"
+	  /// </summary>
+	  public const int CATENATE_NUMBERS = 8;
+
+	  /// <summary>
+	  /// Causes all subword parts to be catenated:
+	  /// <p/>
+	  /// "wi-fi-4000" => "wifi4000"
+	  /// </summary>
+	  public const int CATENATE_ALL = 16;
+
+	  /// <summary>
+	  /// Causes original words are preserved and added to the subword list (Defaults to false)
+	  /// <p/>
+	  /// "500-42" => "500" "42" "500-42"
+	  /// </summary>
+	  public const int PRESERVE_ORIGINAL = 32;
+
+	  /// <summary>
+	  /// If not set, causes case changes to be ignored (subwords will only be generated
+	  /// given SUBWORD_DELIM tokens)
+	  /// </summary>
+	  public const int SPLIT_ON_CASE_CHANGE = 64;
+
+	  /// <summary>
+	  /// If not set, causes numeric changes to be ignored (subwords will only be generated
+	  /// given SUBWORD_DELIM tokens).
+	  /// </summary>
+	  public const int SPLIT_ON_NUMERICS = 128;
+
+	  /// <summary>
+	  /// Causes trailing "'s" to be removed for each subword
+	  /// <p/>
+	  /// "O'Neil's" => "O", "Neil"
+	  /// </summary>
+	  public const int STEM_ENGLISH_POSSESSIVE = 256;
+
+	  /// <summary>
+	  /// If not null is the set of tokens to protect from being delimited
+	  /// 
+	  /// </summary>
+	  internal readonly CharArraySet protWords;
+
+	  private readonly int flags;
+
+	  private readonly CharTermAttribute termAttribute = addAttribute(typeof(CharTermAttribute));
+	  private readonly OffsetAttribute offsetAttribute = addAttribute(typeof(OffsetAttribute));
+	  private readonly PositionIncrementAttribute posIncAttribute = addAttribute(typeof(PositionIncrementAttribute));
+	  private readonly TypeAttribute typeAttribute = addAttribute(typeof(TypeAttribute));
+
+	  // used for iterating word delimiter breaks
+	  private readonly WordDelimiterIterator iterator;
+
+	  // used for concatenating runs of similar typed subwords (word,number)
+	  private WordDelimiterConcatenation concat;
+	  // number of subwords last output by concat.
+	  private int lastConcatCount = 0;
+
+	  // used for catenate all
+	  private WordDelimiterConcatenation concatAll;
+
+	  // used for accumulating position increment gaps
+	  private int accumPosInc = 0;
+
+	  private char[] savedBuffer = new char[1024];
+	  private int savedStartOffset;
+	  private int savedEndOffset;
+	  private string savedType;
+	  private bool hasSavedState = false;
+	  // if length by start + end offsets doesn't match the term text then assume
+	  // this is a synonym and don't adjust the offsets.
+	  private bool hasIllegalOffsets = false;
+
+	  // for a run of the same subword type within a word, have we output anything?
+	  private bool hasOutputToken = false;
+	  // when preserve original is on, have we output any token following it?
+	  // this token must have posInc=0!
+	  private bool hasOutputFollowingOriginal = false;
+
+	  /// <summary>
+	  /// Creates a new WordDelimiterFilter
+	  /// </summary>
+	  /// <param name="in"> TokenStream to be filtered </param>
+	  /// <param name="charTypeTable"> table containing character types </param>
+	  /// <param name="configurationFlags"> Flags configuring the filter </param>
+	  /// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param>
+	  public WordDelimiterFilter(Version matchVersion, TokenStream @in, sbyte[] charTypeTable, int configurationFlags, CharArraySet protWords) : base(@in)
+	  {
+		  if (!InstanceFieldsInitialized)
+		  {
+			  InitializeInstanceFields();
+			  InstanceFieldsInitialized = true;
+		  }
+		if (!matchVersion.onOrAfter(Version.LUCENE_48))
+		{
+		  throw new System.ArgumentException("This class only works with Lucene 4.8+. To emulate the old (broken) behavior of WordDelimiterFilter, use Lucene47WordDelimiterFilter");
+		}
+		this.flags = configurationFlags;
+		this.protWords = protWords;
+		this.iterator = new WordDelimiterIterator(charTypeTable, has(SPLIT_ON_CASE_CHANGE), has(SPLIT_ON_NUMERICS), has(STEM_ENGLISH_POSSESSIVE));
+	  }
+
+	  /// <summary>
+	  /// Creates a new WordDelimiterFilter using <seealso cref="WordDelimiterIterator#DEFAULT_WORD_DELIM_TABLE"/>
+	  /// as its charTypeTable
+	  /// </summary>
+	  /// <param name="in"> TokenStream to be filtered </param>
+	  /// <param name="configurationFlags"> Flags configuring the filter </param>
+	  /// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param>
+	  public WordDelimiterFilter(Version matchVersion, TokenStream @in, int configurationFlags, CharArraySet protWords) : this(matchVersion, @in, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, configurationFlags, protWords)
+	  {
+		  if (!InstanceFieldsInitialized)
+		  {
+			  InitializeInstanceFields();
+			  InstanceFieldsInitialized = true;
+		  }
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		while (true)
+		{
+		  if (!hasSavedState)
+		  {
+			// process a new input word
+			if (!input.incrementToken())
+			{
+			  return false;
+			}
+
+			int termLength = termAttribute.length();
+			char[] termBuffer = termAttribute.buffer();
+
+			accumPosInc += posIncAttribute.PositionIncrement;
+
+			iterator.setText(termBuffer, termLength);
+			iterator.next();
+
+			// word of no delimiters, or protected word: just return it
+			if ((iterator.current == 0 && iterator.end == termLength) || (protWords != null && protWords.contains(termBuffer, 0, termLength)))
+			{
+			  posIncAttribute.PositionIncrement = accumPosInc;
+			  accumPosInc = 0;
+			  first = false;
+			  return true;
+			}
+
+			// word of simply delimiters
+			if (iterator.end == WordDelimiterIterator.DONE && !has(PRESERVE_ORIGINAL))
+			{
+			  // if the posInc is 1, simply ignore it in the accumulation
+			  // TODO: proper hole adjustment (FilteringTokenFilter-like) instead of this previous logic!
+			  if (posIncAttribute.PositionIncrement == 1 && !first)
+			  {
+				accumPosInc--;
+			  }
+			  continue;
+			}
+
+			saveState();
+
+			hasOutputToken = false;
+			hasOutputFollowingOriginal = !has(PRESERVE_ORIGINAL);
+			lastConcatCount = 0;
+
+			if (has(PRESERVE_ORIGINAL))
+			{
+			  posIncAttribute.PositionIncrement = accumPosInc;
+			  accumPosInc = 0;
+			  first = false;
+			  return true;
+			}
+		  }
+
+		  // at the end of the string, output any concatenations
+		  if (iterator.end == WordDelimiterIterator.DONE)
+		  {
+			if (!concat.Empty)
+			{
+			  if (flushConcatenation(concat))
+			  {
+				buffer();
+				continue;
+			  }
+			}
+
+			if (!concatAll.Empty)
+			{
+			  // only if we haven't output this same combo above!
+			  if (concatAll.subwordCount > lastConcatCount)
+			  {
+				concatAll.writeAndClear();
+				buffer();
+				continue;
+			  }
+			  concatAll.clear();
+			}
+
+			if (bufferedPos < bufferedLen)
+			{
+			  if (bufferedPos == 0)
+			  {
+				sorter.sort(0, bufferedLen);
+			  }
+			  clearAttributes();
+			  restoreState(buffered[bufferedPos++]);
+			  if (first && posIncAttribute.PositionIncrement == 0)
+			  {
+				// can easily happen with strange combinations (e.g. not outputting numbers, but concat-all)
+				posIncAttribute.PositionIncrement = 1;
+			  }
+			  first = false;
+			  return true;
+			}
+
+			// no saved concatenations, on to the next input word
+			bufferedPos = bufferedLen = 0;
+			hasSavedState = false;
+			continue;
+		  }
+
+		  // word surrounded by delimiters: always output
+		  if (iterator.SingleWord)
+		  {
+			generatePart(true);
+			iterator.next();
+			first = false;
+			return true;
+		  }
+
+		  int wordType = iterator.type();
+
+		  // do we already have queued up incompatible concatenations?
+		  if (!concat.Empty && (concat.type & wordType) == 0)
+		  {
+			if (flushConcatenation(concat))
+			{
+			  hasOutputToken = false;
+			  buffer();
+			  continue;
+			}
+			hasOutputToken = false;
+		  }
+
+		  // add subwords depending upon options
+		  if (shouldConcatenate(wordType))
+		  {
+			if (concat.Empty)
+			{
+			  concat.type = wordType;
+			}
+			concatenate(concat);
+		  }
+
+		  // add all subwords (catenateAll)
+		  if (has(CATENATE_ALL))
+		  {
+			concatenate(concatAll);
+		  }
+
+		  // if we should output the word or number part
+		  if (shouldGenerateParts(wordType))
+		  {
+			generatePart(false);
+			buffer();
+		  }
+
+		  iterator.next();
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		hasSavedState = false;
+		concat.clear();
+		concatAll.clear();
+		accumPosInc = bufferedPos = bufferedLen = 0;
+		first = true;
+	  }
+
+	  // ================================================= Helper Methods ================================================
+
+
+	  private AttributeSource.State[] buffered = new AttributeSource.State[8];
+	  private int[] startOff = new int[8];
+	  private int[] posInc = new int[8];
+	  private int bufferedLen = 0;
+	  private int bufferedPos = 0;
+	  private bool first;
+
+	  private class OffsetSorter : InPlaceMergeSorter
+	  {
+		  private readonly WordDelimiterFilter outerInstance;
+
+		  public OffsetSorter(WordDelimiterFilter outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		protected internal override int compare(int i, int j)
+		{
+		  int cmp = int.compare(outerInstance.startOff[i], outerInstance.startOff[j]);
+		  if (cmp == 0)
+		  {
+			cmp = int.compare(outerInstance.posInc[j], outerInstance.posInc[i]);
+		  }
+		  return cmp;
+		}
+
+		protected internal override void swap(int i, int j)
+		{
+		  AttributeSource.State tmp = outerInstance.buffered[i];
+		  outerInstance.buffered[i] = outerInstance.buffered[j];
+		  outerInstance.buffered[j] = tmp;
+
+		  int tmp2 = outerInstance.startOff[i];
+		  outerInstance.startOff[i] = outerInstance.startOff[j];
+		  outerInstance.startOff[j] = tmp2;
+
+		  tmp2 = outerInstance.posInc[i];
+		  outerInstance.posInc[i] = outerInstance.posInc[j];
+		  outerInstance.posInc[j] = tmp2;
+		}
+	  }
+
+	  internal OffsetSorter sorter;
+
+	  private void buffer()
+	  {
+		if (bufferedLen == buffered.Length)
+		{
+		  int newSize = ArrayUtil.oversize(bufferedLen + 1, 8);
+		  buffered = Arrays.copyOf(buffered, newSize);
+		  startOff = Arrays.copyOf(startOff, newSize);
+		  posInc = Arrays.copyOf(posInc, newSize);
+		}
+		startOff[bufferedLen] = offsetAttribute.startOffset();
+		posInc[bufferedLen] = posIncAttribute.PositionIncrement;
+		buffered[bufferedLen] = captureState();
+		bufferedLen++;
+	  }
+
+	  /// <summary>
+	  /// Saves the existing attribute states
+	  /// </summary>
+	  private void saveState()
+	  {
+		// otherwise, we have delimiters, save state
+		savedStartOffset = offsetAttribute.startOffset();
+		savedEndOffset = offsetAttribute.endOffset();
+		// if length by start + end offsets doesn't match the term text then assume this is a synonym and don't adjust the offsets.
+		hasIllegalOffsets = (savedEndOffset - savedStartOffset != termAttribute.length());
+		savedType = typeAttribute.type();
+
+		if (savedBuffer.Length < termAttribute.length())
+		{
+		  savedBuffer = new char[ArrayUtil.oversize(termAttribute.length(), RamUsageEstimator.NUM_BYTES_CHAR)];
+		}
+
+		Array.Copy(termAttribute.buffer(), 0, savedBuffer, 0, termAttribute.length());
+		iterator.text = savedBuffer;
+
+		hasSavedState = true;
+	  }
+
+	  /// <summary>
+	  /// Flushes the given WordDelimiterConcatenation by either writing its concat and then clearing, or just clearing.
+	  /// </summary>
+	  /// <param name="concatenation"> WordDelimiterConcatenation that will be flushed </param>
+	  /// <returns> {@code true} if the concatenation was written before it was cleared, {@code false} otherwise </returns>
+	  private bool flushConcatenation(WordDelimiterConcatenation concatenation)
+	  {
+		lastConcatCount = concatenation.subwordCount;
+		if (concatenation.subwordCount != 1 || !shouldGenerateParts(concatenation.type))
+		{
+		  concatenation.writeAndClear();
+		  return true;
+		}
+		concatenation.clear();
+		return false;
+	  }
+
+	  /// <summary>
+	  /// Determines whether to concatenate a word or number if the current word is the given type
+	  /// </summary>
+	  /// <param name="wordType"> Type of the current word used to determine if it should be concatenated </param>
+	  /// <returns> {@code true} if concatenation should occur, {@code false} otherwise </returns>
+	  private bool shouldConcatenate(int wordType)
+	  {
+		return (has(CATENATE_WORDS) && isAlpha(wordType)) || (has(CATENATE_NUMBERS) && isDigit(wordType));
+	  }
+
+	  /// <summary>
+	  /// Determines whether a word/number part should be generated for a word of the given type
+	  /// </summary>
+	  /// <param name="wordType"> Type of the word used to determine if a word/number part should be generated </param>
+	  /// <returns> {@code true} if a word/number part should be generated, {@code false} otherwise </returns>
+	  private bool shouldGenerateParts(int wordType)
+	  {
+		return (has(GENERATE_WORD_PARTS) && isAlpha(wordType)) || (has(GENERATE_NUMBER_PARTS) && isDigit(wordType));
+	  }
+
+	  /// <summary>
+	  /// Concatenates the saved buffer to the given WordDelimiterConcatenation
+	  /// </summary>
+	  /// <param name="concatenation"> WordDelimiterConcatenation to concatenate the buffer to </param>
+	  private void concatenate(WordDelimiterConcatenation concatenation)
+	  {
+		if (concatenation.Empty)
+		{
+		  concatenation.startOffset = savedStartOffset + iterator.current;
+		}
+		concatenation.append(savedBuffer, iterator.current, iterator.end - iterator.current);
+		concatenation.endOffset = savedStartOffset + iterator.end;
+	  }
+
+	  /// <summary>
+	  /// Generates a word/number part, updating the appropriate attributes
+	  /// </summary>
+	  /// <param name="isSingleWord"> {@code true} if the generation is occurring from a single word, {@code false} otherwise </param>
+	  private void generatePart(bool isSingleWord)
+	  {
+		clearAttributes();
+		termAttribute.copyBuffer(savedBuffer, iterator.current, iterator.end - iterator.current);
+
+		int startOffset = savedStartOffset + iterator.current;
+		int endOffset = savedStartOffset + iterator.end;
+
+		if (hasIllegalOffsets)
+		{
+		  // historically this filter did this regardless for 'isSingleWord', 
+		  // but we must do a sanity check:
+		  if (isSingleWord && startOffset <= savedEndOffset)
+		  {
+			offsetAttribute.setOffset(startOffset, savedEndOffset);
+		  }
+		  else
+		  {
+			offsetAttribute.setOffset(savedStartOffset, savedEndOffset);
+		  }
+		}
+		else
+		{
+		  offsetAttribute.setOffset(startOffset, endOffset);
+		}
+		posIncAttribute.PositionIncrement = position(false);
+		typeAttribute.Type = savedType;
+	  }
+
+	  /// <summary>
+	  /// Get the position increment gap for a subword or concatenation
+	  /// </summary>
+	  /// <param name="inject"> true if this token wants to be injected </param>
+	  /// <returns> position increment gap </returns>
+	  private int position(bool inject)
+	  {
+		int posInc = accumPosInc;
+
+		if (hasOutputToken)
+		{
+		  accumPosInc = 0;
+		  return inject ? 0 : Math.Max(1, posInc);
+		}
+
+		hasOutputToken = true;
+
+		if (!hasOutputFollowingOriginal)
+		{
+		  // the first token following the original is 0 regardless
+		  hasOutputFollowingOriginal = true;
+		  return 0;
+		}
+		// clear the accumulated position increment
+		accumPosInc = 0;
+		return Math.Max(1, posInc);
+	  }
+
+	  /// <summary>
+	  /// Checks if the given word type includes <seealso cref="#ALPHA"/>
+	  /// </summary>
+	  /// <param name="type"> Word type to check </param>
+	  /// <returns> {@code true} if the type contains ALPHA, {@code false} otherwise </returns>
+	  internal static bool isAlpha(int type)
+	  {
+		return (type & ALPHA) != 0;
+	  }
+
+	  /// <summary>
+	  /// Checks if the given word type includes <seealso cref="#DIGIT"/>
+	  /// </summary>
+	  /// <param name="type"> Word type to check </param>
+	  /// <returns> {@code true} if the type contains DIGIT, {@code false} otherwise </returns>
+	  internal static bool isDigit(int type)
+	  {
+		return (type & DIGIT) != 0;
+	  }
+
+	  /// <summary>
+	  /// Checks if the given word type includes <seealso cref="#SUBWORD_DELIM"/>
+	  /// </summary>
+	  /// <param name="type"> Word type to check </param>
+	  /// <returns> {@code true} if the type contains SUBWORD_DELIM, {@code false} otherwise </returns>
+	  internal static bool isSubwordDelim(int type)
+	  {
+		return (type & SUBWORD_DELIM) != 0;
+	  }
+
+	  /// <summary>
+	  /// Checks if the given word type includes <seealso cref="#UPPER"/>
+	  /// </summary>
+	  /// <param name="type"> Word type to check </param>
+	  /// <returns> {@code true} if the type contains UPPER, {@code false} otherwise </returns>
+	  internal static bool isUpper(int type)
+	  {
+		return (type & UPPER) != 0;
+	  }
+
+	  /// <summary>
+	  /// Determines whether the given flag is set
+	  /// </summary>
+	  /// <param name="flag"> Flag to see if set </param>
+	  /// <returns> {@code true} if flag is set </returns>
+	  private bool has(int flag)
+	  {
+		return (flags & flag) != 0;
+	  }
+
+	  // ================================================= Inner Classes =================================================
+
+	  /// <summary>
+	  /// A WDF concatenated 'run'
+	  /// </summary>
+	  internal sealed class WordDelimiterConcatenation
+	  {
+		  private readonly WordDelimiterFilter outerInstance;
+
+		  public WordDelimiterConcatenation(WordDelimiterFilter outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		internal readonly StringBuilder buffer = new StringBuilder();
+		internal int startOffset;
+		internal int endOffset;
+		internal int type;
+		internal int subwordCount;
+
+		/// <summary>
+		/// Appends the given text of the given length, to the concetenation at the given offset
+		/// </summary>
+		/// <param name="text"> Text to append </param>
+		/// <param name="offset"> Offset in the concetenation to add the text </param>
+		/// <param name="length"> Length of the text to append </param>
+		internal void append(char[] text, int offset, int length)
+		{
+		  buffer.Append(text, offset, length);
+		  subwordCount++;
+		}
+
+		/// <summary>
+		/// Writes the concatenation to the attributes
+		/// </summary>
+		internal void write()
+		{
+		  clearAttributes();
+		  if (outerInstance.termAttribute.length() < buffer.Length)
+		  {
+			outerInstance.termAttribute.resizeBuffer(buffer.Length);
+		  }
+		  char[] termbuffer = outerInstance.termAttribute.buffer();
+
+		  buffer.getChars(0, buffer.Length, termbuffer, 0);
+		  outerInstance.termAttribute.Length = buffer.Length;
+
+		  if (outerInstance.hasIllegalOffsets)
+		  {
+			outerInstance.offsetAttribute.setOffset(outerInstance.savedStartOffset, outerInstance.savedEndOffset);
+		  }
+		  else
+		  {
+			outerInstance.offsetAttribute.setOffset(startOffset, endOffset);
+		  }
+		  outerInstance.posIncAttribute.PositionIncrement = outerInstance.position(true);
+		  outerInstance.typeAttribute.Type = outerInstance.savedType;
+		  outerInstance.accumPosInc = 0;
+		}
+
+		/// <summary>
+		/// Determines if the concatenation is empty
+		/// </summary>
+		/// <returns> {@code true} if the concatenation is empty, {@code false} otherwise </returns>
+		internal bool Empty
+		{
+			get
+			{
+			  return buffer.Length == 0;
+			}
+		}
+
+		/// <summary>
+		/// Clears the concatenation and resets its state
+		/// </summary>
+		internal void clear()
+		{
+		  buffer.Length = 0;
+		  startOffset = endOffset = type = subwordCount = 0;
+		}
+
+		/// <summary>
+		/// Convenience method for the common scenario of having to write the concetenation and then clearing its state
+		/// </summary>
+		internal void writeAndClear()
+		{
+		  write();
+		  clear();
+		}
+	  }
+	  // questions:
+	  // negative numbers?  -42 indexed as just 42?
+	  // dollar sign?  $42
+	  // percent sign?  33%
+	  // downsides:  if source text is "powershot" then a query of "PowerShot" won't match!
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs
new file mode 100644
index 0000000..747ed48
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs
@@ -0,0 +1,270 @@
+using System;
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
+	using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
+	using TokenFilterFactory = TokenFilterFactory;
+	using Version = org.apache.lucene.util.Version;
+
+
+	using org.apache.lucene.analysis.miscellaneous;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.*;
+
+	/// <summary>
+	/// Factory for <seealso cref="WordDelimiterFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_wd" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.WordDelimiterFilterFactory" protected="protectedword.txt"
+	///             preserveOriginal="0" splitOnNumerics="1" splitOnCaseChange="1"
+	///             catenateWords="0" catenateNumbers="0" catenateAll="0"
+	///             generateWordParts="1" generateNumberParts="1" stemEnglishPossessive="1"
+	///             types="wdfftypes.txt" /&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class WordDelimiterFilterFactory : TokenFilterFactory, ResourceLoaderAware
+	{
+	  public const string PROTECTED_TOKENS = "protected";
+	  public const string TYPES = "types";
+
+	  private readonly string wordFiles;
+	  private readonly string types;
+	  private readonly int flags;
+	  internal sbyte[] typeTable = null;
+	  private CharArraySet protectedWords = null;
+
+	  /// <summary>
+	  /// Creates a new WordDelimiterFilterFactory </summary>
+	  public WordDelimiterFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		assureMatchVersion();
+		int flags = 0;
+		if (getInt(args, "generateWordParts", 1) != 0)
+		{
+		  flags |= WordDelimiterFilter.GENERATE_WORD_PARTS;
+		}
+		if (getInt(args, "generateNumberParts", 1) != 0)
+		{
+		  flags |= WordDelimiterFilter.GENERATE_NUMBER_PARTS;
+		}
+		if (getInt(args, "catenateWords", 0) != 0)
+		{
+		  flags |= WordDelimiterFilter.CATENATE_WORDS;
+		}
+		if (getInt(args, "catenateNumbers", 0) != 0)
+		{
+		  flags |= WordDelimiterFilter.CATENATE_NUMBERS;
+		}
+		if (getInt(args, "catenateAll", 0) != 0)
+		{
+		  flags |= WordDelimiterFilter.CATENATE_ALL;
+		}
+		if (getInt(args, "splitOnCaseChange", 1) != 0)
+		{
+		  flags |= WordDelimiterFilter.SPLIT_ON_CASE_CHANGE;
+		}
+		if (getInt(args, "splitOnNumerics", 1) != 0)
+		{
+		  flags |= WordDelimiterFilter.SPLIT_ON_NUMERICS;
+		}
+		if (getInt(args, "preserveOriginal", 0) != 0)
+		{
+		  flags |= WordDelimiterFilter.PRESERVE_ORIGINAL;
+		}
+		if (getInt(args, "stemEnglishPossessive", 1) != 0)
+		{
+		  flags |= WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE;
+		}
+		wordFiles = get(args, PROTECTED_TOKENS);
+		types = get(args, TYPES);
+		this.flags = flags;
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void inform(org.apache.lucene.analysis.util.ResourceLoader loader) throws java.io.IOException
+	  public virtual void inform(ResourceLoader loader)
+	  {
+		if (wordFiles != null)
+		{
+		  protectedWords = getWordSet(loader, wordFiles, false);
+		}
+		if (types != null)
+		{
+		  IList<string> files = splitFileNames(types);
+		  IList<string> wlist = new List<string>();
+		  foreach (string file in files)
+		  {
+			IList<string> lines = getLines(loader, file.Trim());
+			wlist.AddRange(lines);
+		  }
+		  typeTable = parseTypes(wlist);
+		}
+	  }
+
+	  public override TokenFilter create(TokenStream input)
+	  {
+		if (luceneMatchVersion.onOrAfter(Version.LUCENE_48))
+		{
+		  return new WordDelimiterFilter(luceneMatchVersion, input, typeTable == null ? WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE : typeTable, flags, protectedWords);
+		}
+		else
+		{
+		  return new Lucene47WordDelimiterFilter(input, typeTable == null ? WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE : typeTable, flags, protectedWords);
+		}
+	  }
+
+	  // source => type
+	  private static Pattern typePattern = Pattern.compile("(.*)\\s*=>\\s*(.*)\\s*$");
+
+	  // parses a list of MappingCharFilter style rules into a custom byte[] type table
+	  private sbyte[] parseTypes(IList<string> rules)
+	  {
+		SortedMap<char?, sbyte?> typeMap = new SortedDictionary<char?, sbyte?>();
+		foreach (string rule in rules)
+		{
+		  Matcher m = typePattern.matcher(rule);
+		  if (!m.find())
+		  {
+			throw new System.ArgumentException("Invalid Mapping Rule : [" + rule + "]");
+		  }
+		  string lhs = parseString(m.group(1).Trim());
+		  sbyte? rhs = parseType(m.group(2).Trim());
+		  if (lhs.Length != 1)
+		  {
+			throw new System.ArgumentException("Invalid Mapping Rule : [" + rule + "]. Only a single character is allowed.");
+		  }
+		  if (rhs == null)
+		  {
+			throw new System.ArgumentException("Invalid Mapping Rule : [" + rule + "]. Illegal type.");
+		  }
+		  typeMap.put(lhs[0], rhs);
+		}
+
+		// ensure the table is always at least as big as DEFAULT_WORD_DELIM_TABLE for performance
+		sbyte[] types = new sbyte[Math.Max(typeMap.lastKey() + 1, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE.Length)];
+		for (int i = 0; i < types.Length; i++)
+		{
+		  types[i] = WordDelimiterIterator.getType(i);
+		}
+		foreach (KeyValuePair<char?, sbyte?> mapping in typeMap.entrySet())
+		{
+		  types[mapping.Key] = mapping.Value;
+		}
+		return types;
+	  }
+
+	  private sbyte? parseType(string s)
+	  {
+		if (s.Equals("LOWER"))
+		{
+		  return WordDelimiterFilter.LOWER;
+		}
+		else if (s.Equals("UPPER"))
+		{
+		  return WordDelimiterFilter.UPPER;
+		}
+		else if (s.Equals("ALPHA"))
+		{
+		  return WordDelimiterFilter.ALPHA;
+		}
+		else if (s.Equals("DIGIT"))
+		{
+		  return WordDelimiterFilter.DIGIT;
+		}
+		else if (s.Equals("ALPHANUM"))
+		{
+		  return WordDelimiterFilter.ALPHANUM;
+		}
+		else if (s.Equals("SUBWORD_DELIM"))
+		{
+		  return WordDelimiterFilter.SUBWORD_DELIM;
+		}
+		else
+		{
+		  return null;
+		}
+	  }
+
+	  internal char[] @out = new char[256];
+
+	  private string parseString(string s)
+	  {
+		int readPos = 0;
+		int len = s.Length;
+		int writePos = 0;
+		while (readPos < len)
+		{
+		  char c = s[readPos++];
+		  if (c == '\\')
+		  {
+			if (readPos >= len)
+			{
+			  throw new System.ArgumentException("Invalid escaped char in [" + s + "]");
+			}
+			c = s[readPos++];
+			switch (c)
+			{
+			  case '\\' :
+				  c = '\\';
+				  break;
+			  case 'n' :
+				  c = '\n';
+				  break;
+			  case 't' :
+				  c = '\t';
+				  break;
+			  case 'r' :
+				  c = '\r';
+				  break;
+			  case 'b' :
+				  c = '\b';
+				  break;
+			  case 'f' :
+				  c = '\f';
+				  break;
+			  case 'u' :
+				if (readPos + 3 >= len)
+				{
+				  throw new System.ArgumentException("Invalid escaped char in [" + s + "]");
+				}
+				c = (char)int.Parse(s.Substring(readPos, 4), 16);
+				readPos += 4;
+				break;
+			}
+		  }
+		  @out[writePos++] = c;
+		}
+		return new string(@out, 0, writePos);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterIterator.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterIterator.cs
new file mode 100644
index 0000000..154176b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterIterator.cs
@@ -0,0 +1,367 @@
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using org.apache.lucene.analysis.miscellaneous;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.*;
+
+	/// <summary>
+	/// A BreakIterator-like API for iterating over subwords in text, according to WordDelimiterFilter rules.
+	/// @lucene.internal
+	/// </summary>
+	public sealed class WordDelimiterIterator
+	{
+
+	  /// <summary>
+	  /// Indicates the end of iteration </summary>
+	  public const int DONE = -1;
+
+	  public static readonly sbyte[] DEFAULT_WORD_DELIM_TABLE;
+
+	  internal char[] text;
+	  internal int length;
+
+	  /// <summary>
+	  /// start position of text, excluding leading delimiters </summary>
+	  internal int startBounds;
+	  /// <summary>
+	  /// end position of text, excluding trailing delimiters </summary>
+	  internal int endBounds;
+
+	  /// <summary>
+	  /// Beginning of subword </summary>
+	  internal int current;
+	  /// <summary>
+	  /// End of subword </summary>
+	  internal int end;
+
+	  /* does this string end with a possessive such as 's */
+	  private bool hasFinalPossessive = false;
+
+	  /// <summary>
+	  /// If false, causes case changes to be ignored (subwords will only be generated
+	  /// given SUBWORD_DELIM tokens). (Defaults to true)
+	  /// </summary>
+	  internal readonly bool splitOnCaseChange;
+
+	  /// <summary>
+	  /// If false, causes numeric changes to be ignored (subwords will only be generated
+	  /// given SUBWORD_DELIM tokens). (Defaults to true)
+	  /// </summary>
+	  internal readonly bool splitOnNumerics;
+
+	  /// <summary>
+	  /// If true, causes trailing "'s" to be removed for each subword. (Defaults to true)
+	  /// <p/>
+	  /// "O'Neil's" => "O", "Neil"
+	  /// </summary>
+	  internal readonly bool stemEnglishPossessive;
+
+	  private readonly sbyte[] charTypeTable;
+
+	  /// <summary>
+	  /// if true, need to skip over a possessive found in the last call to next() </summary>
+	  private bool skipPossessive = false;
+
+	  // TODO: should there be a WORD_DELIM category for chars that only separate words (no catenation of subwords will be
+	  // done if separated by these chars?) "," would be an obvious candidate...
+	  static WordDelimiterIterator()
+	  {
+		sbyte[] tab = new sbyte[256];
+		for (int i = 0; i < 256; i++)
+		{
+		  sbyte code = 0;
+		  if (char.IsLower(i))
+		  {
+			code |= (sbyte)WordDelimiterFilter.LOWER;
+		  }
+		  else if (char.IsUpper(i))
+		  {
+			code |= (sbyte)WordDelimiterFilter.UPPER;
+		  }
+		  else if (char.IsDigit(i))
+		  {
+			code |= (sbyte)WordDelimiterFilter.DIGIT;
+		  }
+		  if (code == 0)
+		  {
+			code = WordDelimiterFilter.SUBWORD_DELIM;
+		  }
+		  tab[i] = code;
+		}
+		DEFAULT_WORD_DELIM_TABLE = tab;
+	  }
+
+	  /// <summary>
+	  /// Create a new WordDelimiterIterator operating with the supplied rules.
+	  /// </summary>
+	  /// <param name="charTypeTable"> table containing character types </param>
+	  /// <param name="splitOnCaseChange"> if true, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regards) </param>
+	  /// <param name="splitOnNumerics"> if true, causes "j2se" to be three tokens; "j" "2" "se" </param>
+	  /// <param name="stemEnglishPossessive"> if true, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil" </param>
+	  internal WordDelimiterIterator(sbyte[] charTypeTable, bool splitOnCaseChange, bool splitOnNumerics, bool stemEnglishPossessive)
+	  {
+		this.charTypeTable = charTypeTable;
+		this.splitOnCaseChange = splitOnCaseChange;
+		this.splitOnNumerics = splitOnNumerics;
+		this.stemEnglishPossessive = stemEnglishPossessive;
+	  }
+
+	  /// <summary>
+	  /// Advance to the next subword in the string.
+	  /// </summary>
+	  /// <returns> index of the next subword, or <seealso cref="#DONE"/> if all subwords have been returned </returns>
+	  internal int next()
+	  {
+		current = end;
+		if (current == DONE)
+		{
+		  return DONE;
+		}
+
+		if (skipPossessive)
+		{
+		  current += 2;
+		  skipPossessive = false;
+		}
+
+		int lastType = 0;
+
+		while (current < endBounds && (WordDelimiterFilter.isSubwordDelim(lastType = charType(text[current]))))
+		{
+		  current++;
+		}
+
+		if (current >= endBounds)
+		{
+		  return end = DONE;
+		}
+
+		for (end = current + 1; end < endBounds; end++)
+		{
+		  int type_Renamed = charType(text[end]);
+		  if (isBreak(lastType, type_Renamed))
+		  {
+			break;
+		  }
+		  lastType = type_Renamed;
+		}
+
+		if (end < endBounds - 1 && endsWithPossessive(end + 2))
+		{
+		  skipPossessive = true;
+		}
+
+		return end;
+	  }
+
+
+	  /// <summary>
+	  /// Return the type of the current subword.
+	  /// This currently uses the type of the first character in the subword.
+	  /// </summary>
+	  /// <returns> type of the current word </returns>
+	  internal int type()
+	  {
+		if (end == DONE)
+		{
+		  return 0;
+		}
+
+		int type_Renamed = charType(text[current]);
+		switch (type_Renamed)
+		{
+		  // return ALPHA word type for both lower and upper
+		  case WordDelimiterFilter.LOWER:
+		  case WordDelimiterFilter.UPPER:
+			return WordDelimiterFilter.ALPHA;
+		  default:
+			return type_Renamed;
+		}
+	  }
+
+	  /// <summary>
+	  /// Reset the text to a new value, and reset all state
+	  /// </summary>
+	  /// <param name="text"> New text </param>
+	  /// <param name="length"> length of the text </param>
+	  internal void setText(char[] text, int length)
+	  {
+		this.text = text;
+		this.length = this.endBounds = length;
+		current = startBounds = end = 0;
+		skipPossessive = hasFinalPossessive = false;
+		setBounds();
+	  }
+
+	  // ================================================= Helper Methods ================================================
+
+	  /// <summary>
+	  /// Determines whether the transition from lastType to type indicates a break
+	  /// </summary>
+	  /// <param name="lastType"> Last subword type </param>
+	  /// <param name="type"> Current subword type </param>
+	  /// <returns> {@code true} if the transition indicates a break, {@code false} otherwise </returns>
+	  private bool isBreak(int lastType, int type)
+	  {
+		if ((type_Renamed & lastType) != 0)
+		{
+		  return false;
+		}
+
+		if (!splitOnCaseChange && WordDelimiterFilter.isAlpha(lastType) && WordDelimiterFilter.isAlpha(type_Renamed))
+		{
+		  // ALPHA->ALPHA: always ignore if case isn't considered.
+		  return false;
+		}
+		else if (WordDelimiterFilter.isUpper(lastType) && WordDelimiterFilter.isAlpha(type_Renamed))
+		{
+		  // UPPER->letter: Don't split
+		  return false;
+		}
+		else if (!splitOnNumerics && ((WordDelimiterFilter.isAlpha(lastType) && WordDelimiterFilter.isDigit(type_Renamed)) || (WordDelimiterFilter.isDigit(lastType) && WordDelimiterFilter.isAlpha(type_Renamed))))
+		{
+		  // ALPHA->NUMERIC, NUMERIC->ALPHA :Don't split
+		  return false;
+		}
+
+		return true;
+	  }
+
+	  /// <summary>
+	  /// Determines if the current word contains only one subword.  Note, it could be potentially surrounded by delimiters
+	  /// </summary>
+	  /// <returns> {@code true} if the current word contains only one subword, {@code false} otherwise </returns>
+	  internal bool SingleWord
+	  {
+		  get
+		  {
+			if (hasFinalPossessive)
+			{
+			  return current == startBounds && end == endBounds - 2;
+			}
+			else
+			{
+			  return current == startBounds && end == endBounds;
+			}
+		  }
+	  }
+
+	  /// <summary>
+	  /// Set the internal word bounds (remove leading and trailing delimiters). Note, if a possessive is found, don't remove
+	  /// it yet, simply note it.
+	  /// </summary>
+	  private void setBounds()
+	  {
+		while (startBounds < length && (WordDelimiterFilter.isSubwordDelim(charType(text[startBounds]))))
+		{
+		  startBounds++;
+		}
+
+		while (endBounds > startBounds && (WordDelimiterFilter.isSubwordDelim(charType(text[endBounds - 1]))))
+		{
+		  endBounds--;
+		}
+		if (endsWithPossessive(endBounds))
+		{
+		  hasFinalPossessive = true;
+		}
+		current = startBounds;
+	  }
+
+	  /// <summary>
+	  /// Determines if the text at the given position indicates an English possessive which should be removed
+	  /// </summary>
+	  /// <param name="pos"> Position in the text to check if it indicates an English possessive </param>
+	  /// <returns> {@code true} if the text at the position indicates an English posessive, {@code false} otherwise </returns>
+	  private bool endsWithPossessive(int pos)
+	  {
+		return (stemEnglishPossessive && pos > 2 && text[pos - 2] == '\'' && (text[pos - 1] == 's' || text[pos - 1] == 'S') && WordDelimiterFilter.isAlpha(charType(text[pos - 3])) && (pos == endBounds || WordDelimiterFilter.isSubwordDelim(charType(text[pos]))));
+	  }
+
+	  /// <summary>
+	  /// Determines the type of the given character
+	  /// </summary>
+	  /// <param name="ch"> Character whose type is to be determined </param>
+	  /// <returns> Type of the character </returns>
+	  private int charType(int ch)
+	  {
+		if (ch < charTypeTable.Length)
+		{
+		  return charTypeTable[ch];
+		}
+		return getType(ch);
+	  }
+
+	  /// <summary>
+	  /// Computes the type of the given character
+	  /// </summary>
+	  /// <param name="ch"> Character whose type is to be determined </param>
+	  /// <returns> Type of the character </returns>
+	  public static sbyte getType(int ch)
+	  {
+		switch (char.getType(ch))
+		{
+		  case char.UPPERCASE_LETTER:
+			  return WordDelimiterFilter.UPPER;
+		  case char.LOWERCASE_LETTER:
+			  return WordDelimiterFilter.LOWER;
+
+		  case char.TITLECASE_LETTER:
+		  case char.MODIFIER_LETTER:
+		  case char.OTHER_LETTER:
+		  case char.NON_SPACING_MARK:
+		  case char.ENCLOSING_MARK: // depends what it encloses?
+		  case char.COMBINING_SPACING_MARK:
+			return WordDelimiterFilter.ALPHA;
+
+		  case char.DECIMAL_DIGIT_NUMBER:
+		  case char.LETTER_NUMBER:
+		  case char.OTHER_NUMBER:
+			return WordDelimiterFilter.DIGIT;
+
+		  // case Character.SPACE_SEPARATOR:
+		  // case Character.LINE_SEPARATOR:
+		  // case Character.PARAGRAPH_SEPARATOR:
+		  // case Character.CONTROL:
+		  // case Character.FORMAT:
+		  // case Character.PRIVATE_USE:
+
+		  case char.SURROGATE: // prevent splitting
+			return WordDelimiterFilter.ALPHA | WordDelimiterFilter.DIGIT;
+
+		  // case Character.DASH_PUNCTUATION:
+		  // case Character.START_PUNCTUATION:
+		  // case Character.END_PUNCTUATION:
+		  // case Character.CONNECTOR_PUNCTUATION:
+		  // case Character.OTHER_PUNCTUATION:
+		  // case Character.MATH_SYMBOL:
+		  // case Character.CURRENCY_SYMBOL:
+		  // case Character.MODIFIER_SYMBOL:
+		  // case Character.OTHER_SYMBOL:
+		  // case Character.INITIAL_QUOTE_PUNCTUATION:
+		  // case Character.FINAL_QUOTE_PUNCTUATION:
+
+		  default:
+			  return WordDelimiterFilter.SUBWORD_DELIM;
+		}
+	  }
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramFilterFactory.cs
new file mode 100644
index 0000000..5b3d94b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramFilterFactory.cs
@@ -0,0 +1,61 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.ngram
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Creates new instances of <seealso cref="EdgeNGramTokenFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_edgngrm" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="1"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class EdgeNGramFilterFactory : TokenFilterFactory
+	{
+	  private readonly int maxGramSize;
+	  private readonly int minGramSize;
+	  private readonly string side;
+
+	  /// <summary>
+	  /// Creates a new EdgeNGramFilterFactory </summary>
+	  public EdgeNGramFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		minGramSize = getInt(args, "minGramSize", EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE);
+		maxGramSize = getInt(args, "maxGramSize", EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
+		side = get(args, "side", EdgeNGramTokenFilter.Side.FRONT.Label);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override EdgeNGramTokenFilter create(TokenStream input)
+	  {
+		return new EdgeNGramTokenFilter(luceneMatchVersion, input, side, minGramSize, maxGramSize);
+	  }
+	}
+
+}
\ No newline at end of file


[11/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternReplaceFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternReplaceFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternReplaceFilter.cs
new file mode 100644
index 0000000..1876a21
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternReplaceFilter.cs
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.analysis.pattern
+{
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+
+	/// <summary>
+	/// A TokenFilter which applies a Pattern to each token in the stream,
+	/// replacing match occurances with the specified replacement string.
+	/// 
+	/// <para>
+	/// <b>Note:</b> Depending on the input and the pattern used and the input
+	/// TokenStream, this TokenFilter may produce Tokens whose text is the empty
+	/// string.
+	/// </para>
+	/// </summary>
+	/// <seealso cref= Pattern </seealso>
+	public sealed class PatternReplaceFilter : TokenFilter
+	{
+	  private readonly string replacement;
+	  private readonly bool all;
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly Matcher m;
+
+	  /// <summary>
+	  /// Constructs an instance to replace either the first, or all occurances
+	  /// </summary>
+	  /// <param name="in"> the TokenStream to process </param>
+	  /// <param name="p"> the patterm to apply to each Token </param>
+	  /// <param name="replacement"> the "replacement string" to substitute, if null a
+	  ///        blank string will be used. Note that this is not the literal
+	  ///        string that will be used, '$' and '\' have special meaning. </param>
+	  /// <param name="all"> if true, all matches will be replaced otherwise just the first match. </param>
+	  /// <seealso cref= Matcher#quoteReplacement </seealso>
+	  public PatternReplaceFilter(TokenStream @in, Pattern p, string replacement, bool all) : base(@in)
+	  {
+		this.replacement = (null == replacement) ? "" : replacement;
+		this.all = all;
+		this.m = p.matcher(termAtt);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (!input.incrementToken())
+		{
+			return false;
+		}
+
+		m.reset();
+		if (m.find())
+		{
+		  // replaceAll/replaceFirst will reset() this previous find.
+		  string transformed = all ? m.replaceAll(replacement) : m.replaceFirst(replacement);
+		  termAtt.setEmpty().append(transformed);
+		}
+
+		return true;
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternReplaceFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternReplaceFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternReplaceFilterFactory.cs
new file mode 100644
index 0000000..d030789
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternReplaceFilterFactory.cs
@@ -0,0 +1,64 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.pattern
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+
+	/// <summary>
+	/// Factory for <seealso cref="PatternReplaceFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_ptnreplace" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.KeywordTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.PatternReplaceFilterFactory" pattern="([^a-z])" replacement=""
+	///             replace="all"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	/// <seealso cref= PatternReplaceFilter </seealso>
+	public class PatternReplaceFilterFactory : TokenFilterFactory
+	{
+	  internal readonly Pattern pattern;
+	  internal readonly string replacement;
+	  internal readonly bool replaceAll;
+
+	  /// <summary>
+	  /// Creates a new PatternReplaceFilterFactory </summary>
+	  public PatternReplaceFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		pattern = getPattern(args, "pattern");
+		replacement = get(args, "replacement");
+		replaceAll = "all".Equals(get(args, "replace", Arrays.asList("all", "first"), "all"));
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override PatternReplaceFilter create(TokenStream input)
+	  {
+		return new PatternReplaceFilter(input, pattern, replacement, replaceAll);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternTokenizer.cs
new file mode 100644
index 0000000..d403494
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternTokenizer.cs
@@ -0,0 +1,185 @@
+using System.Text;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.analysis.pattern
+{
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+
+	/// <summary>
+	/// This tokenizer uses regex pattern matching to construct distinct tokens
+	/// for the input stream.  It takes two arguments:  "pattern" and "group".
+	/// <p/>
+	/// <ul>
+	/// <li>"pattern" is the regular expression.</li>
+	/// <li>"group" says which group to extract into tokens.</li>
+	///  </ul>
+	/// <para>
+	/// group=-1 (the default) is equivalent to "split".  In this case, the tokens will
+	/// be equivalent to the output from (without empty tokens):
+	/// <seealso cref="String#split(java.lang.String)"/>
+	/// </para>
+	/// <para>
+	/// Using group >= 0 selects the matching group as the token.  For example, if you have:<br/>
+	/// <pre>
+	///  pattern = \'([^\']+)\'
+	///  group = 0
+	///  input = aaa 'bbb' 'ccc'
+	/// </pre>
+	/// the output will be two tokens: 'bbb' and 'ccc' (including the ' marks).  With the same input
+	/// but using group=1, the output would be: bbb and ccc (no ' marks)
+	/// </para>
+	/// <para>NOTE: This Tokenizer does not output tokens that are of zero length.</para>
+	/// </summary>
+	/// <seealso cref= Pattern </seealso>
+	public sealed class PatternTokenizer : Tokenizer
+	{
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+
+	  private readonly StringBuilder str = new StringBuilder();
+	  private int index;
+
+	  private readonly int group;
+	  private readonly Matcher matcher;
+
+	  /// <summary>
+	  /// creates a new PatternTokenizer returning tokens from group (-1 for split functionality) </summary>
+	  public PatternTokenizer(Reader input, Pattern pattern, int group) : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, pattern, group)
+	  {
+	  }
+
+	  /// <summary>
+	  /// creates a new PatternTokenizer returning tokens from group (-1 for split functionality) </summary>
+	  public PatternTokenizer(AttributeFactory factory, Reader input, Pattern pattern, int group) : base(factory, input)
+	  {
+		this.group = group;
+
+		// Use "" instead of str so don't consume chars
+		// (fillBuffer) from the input on throwing IAE below:
+		matcher = pattern.matcher("");
+
+		// confusingly group count depends ENTIRELY on the pattern but is only accessible via matcher
+		if (group >= 0 && group > matcher.groupCount())
+		{
+		  throw new System.ArgumentException("invalid group specified: pattern only has: " + matcher.groupCount() + " capturing groups");
+		}
+	  }
+
+	  public override bool incrementToken()
+	  {
+		if (index >= str.Length)
+		{
+			return false;
+		}
+		clearAttributes();
+		if (group >= 0)
+		{
+
+		  // match a specific group
+		  while (matcher.find())
+		  {
+			index = matcher.start(group);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int endIndex = matcher.end(group);
+			int endIndex = matcher.end(group);
+			if (index == endIndex)
+			{
+				continue;
+			}
+			termAtt.setEmpty().append(str, index, endIndex);
+			offsetAtt.setOffset(correctOffset(index), correctOffset(endIndex));
+			return true;
+		  }
+
+		  index = int.MaxValue; // mark exhausted
+		  return false;
+
+		}
+		else
+		{
+
+		  // String.split() functionality
+		  while (matcher.find())
+		  {
+			if (matcher.start() - index > 0)
+			{
+			  // found a non-zero-length token
+			  termAtt.setEmpty().append(str, index, matcher.start());
+			  offsetAtt.setOffset(correctOffset(index), correctOffset(matcher.start()));
+			  index = matcher.end();
+			  return true;
+			}
+
+			index = matcher.end();
+		  }
+
+		  if (str.Length - index == 0)
+		  {
+			index = int.MaxValue; // mark exhausted
+			return false;
+		  }
+
+		  termAtt.setEmpty().append(str, index, str.Length);
+		  offsetAtt.setOffset(correctOffset(index), correctOffset(str.Length));
+		  index = int.MaxValue; // mark exhausted
+		  return true;
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void end() throws java.io.IOException
+	  public override void end()
+	  {
+		base.end();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int ofs = correctOffset(str.length());
+		int ofs = correctOffset(str.Length);
+		offsetAtt.setOffset(ofs, ofs);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		fillBuffer(str, input);
+		matcher.reset(str);
+		index = 0;
+	  }
+
+	  // TODO: we should see if we can make this tokenizer work without reading
+	  // the entire document into RAM, perhaps with Matcher.hitEnd/requireEnd ?
+	  internal readonly char[] buffer = new char[8192];
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void fillBuffer(StringBuilder sb, java.io.Reader input) throws java.io.IOException
+	  private void fillBuffer(StringBuilder sb, Reader input)
+	  {
+		int len;
+		sb.Length = 0;
+		while ((len = input.read(buffer)) > 0)
+		{
+		  sb.Append(buffer, 0, len);
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternTokenizerFactory.cs
new file mode 100644
index 0000000..c0b15c9
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternTokenizerFactory.cs
@@ -0,0 +1,94 @@
+using System.Collections.Generic;
+using TokenizerFactory = Lucene.Net.Analysis.Util.TokenizerFactory;
+
+namespace org.apache.lucene.analysis.pattern
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using TokenizerFactory = TokenizerFactory;
+	using AttributeFactory = org.apache.lucene.util.AttributeSource.AttributeFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="PatternTokenizer"/>.
+	/// This tokenizer uses regex pattern matching to construct distinct tokens
+	/// for the input stream.  It takes two arguments:  "pattern" and "group".
+	/// <p/>
+	/// <ul>
+	/// <li>"pattern" is the regular expression.</li>
+	/// <li>"group" says which group to extract into tokens.</li>
+	///  </ul>
+	/// <para>
+	/// group=-1 (the default) is equivalent to "split".  In this case, the tokens will
+	/// be equivalent to the output from (without empty tokens):
+	/// <seealso cref="String#split(java.lang.String)"/>
+	/// </para>
+	/// <para>
+	/// Using group >= 0 selects the matching group as the token.  For example, if you have:<br/>
+	/// <pre>
+	///  pattern = \'([^\']+)\'
+	///  group = 0
+	///  input = aaa 'bbb' 'ccc'
+	/// </pre>
+	/// the output will be two tokens: 'bbb' and 'ccc' (including the ' marks).  With the same input
+	/// but using group=1, the output would be: bbb and ccc (no ' marks)
+	/// </para>
+	/// <para>NOTE: This Tokenizer does not output tokens that are of zero length.</para>
+	/// 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_ptn" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.PatternTokenizerFactory" pattern="\'([^\']+)\'" group="1"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre> 
+	/// </summary>
+	/// <seealso cref= PatternTokenizer
+	/// @since solr1.2 </seealso>
+	public class PatternTokenizerFactory : TokenizerFactory
+	{
+	  public const string PATTERN = "pattern";
+	  public const string GROUP = "group";
+
+	  protected internal readonly Pattern pattern;
+	  protected internal readonly int group;
+
+	  /// <summary>
+	  /// Creates a new PatternTokenizerFactory </summary>
+	  public PatternTokenizerFactory(IDictionary<string, string> args) : base(args)
+	  {
+		pattern = getPattern(args, PATTERN);
+		group = getInt(args, GROUP, -1);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  /// <summary>
+	  /// Split the input using configured pattern
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: @Override public PatternTokenizer create(final org.apache.lucene.util.AttributeSource.AttributeFactory factory, final java.io.Reader in)
+	  public override PatternTokenizer create(AttributeFactory factory, Reader @in)
+	  {
+		return new PatternTokenizer(factory, @in, pattern, group);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Payloads/AbstractEncoder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Payloads/AbstractEncoder.cs b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/AbstractEncoder.cs
new file mode 100644
index 0000000..a336f2b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/AbstractEncoder.cs
@@ -0,0 +1,39 @@
+namespace org.apache.lucene.analysis.payloads
+{
+
+	using BytesRef = org.apache.lucene.util.BytesRef;
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+
+	/// <summary>
+	/// Base class for payload encoders.
+	/// 
+	/// 
+	/// </summary>
+	public abstract class AbstractEncoder : PayloadEncoder
+	{
+		public abstract BytesRef encode(char[] buffer, int offset, int length);
+	  public virtual BytesRef encode(char[] buffer)
+	  {
+		return encode(buffer, 0, buffer.Length);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Payloads/DelimitedPayloadTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Payloads/DelimitedPayloadTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/DelimitedPayloadTokenFilter.cs
new file mode 100644
index 0000000..1c03b4e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/DelimitedPayloadTokenFilter.cs
@@ -0,0 +1,82 @@
+namespace org.apache.lucene.analysis.payloads
+{
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using PayloadAttribute = org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+
+	/// <summary>
+	/// Characters before the delimiter are the "token", those after are the payload.
+	/// <p/>
+	/// For example, if the delimiter is '|', then for the string "foo|bar", foo is the token
+	/// and "bar" is a payload.
+	/// <p/>
+	/// Note, you can also include a <seealso cref="org.apache.lucene.analysis.payloads.PayloadEncoder"/> to convert the payload in an appropriate way (from characters to bytes).
+	/// <p/>
+	/// Note make sure your Tokenizer doesn't split on the delimiter, or this won't work
+	/// </summary>
+	/// <seealso cref= PayloadEncoder </seealso>
+	public sealed class DelimitedPayloadTokenFilter : TokenFilter
+	{
+	  public const char DEFAULT_DELIMITER = '|';
+	  private readonly char delimiter;
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly PayloadAttribute payAtt = addAttribute(typeof(PayloadAttribute));
+	  private readonly PayloadEncoder encoder;
+
+
+	  public DelimitedPayloadTokenFilter(TokenStream input, char delimiter, PayloadEncoder encoder) : base(input)
+	  {
+		this.delimiter = delimiter;
+		this.encoder = encoder;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] buffer = termAtt.buffer();
+		  char[] buffer = termAtt.buffer();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int length = termAtt.length();
+		  int length = termAtt.length();
+		  for (int i = 0; i < length; i++)
+		  {
+			if (buffer[i] == delimiter)
+			{
+			  payAtt.Payload = encoder.encode(buffer, i + 1, (length - (i + 1)));
+			  termAtt.Length = i; // simply set a new length
+			  return true;
+			}
+		  }
+		  // we have not seen the delimiter
+		  payAtt.Payload = null;
+		  return true;
+		}
+		else
+		{
+			return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Payloads/DelimitedPayloadTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Payloads/DelimitedPayloadTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/DelimitedPayloadTokenFilterFactory.cs
new file mode 100644
index 0000000..4d5dd75
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/DelimitedPayloadTokenFilterFactory.cs
@@ -0,0 +1,85 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.payloads
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
+	using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="DelimitedPayloadTokenFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_dlmtd" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float" delimiter="|"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class DelimitedPayloadTokenFilterFactory : TokenFilterFactory, ResourceLoaderAware
+	{
+	  public const string ENCODER_ATTR = "encoder";
+	  public const string DELIMITER_ATTR = "delimiter";
+
+	  private readonly string encoderClass;
+	  private readonly char delimiter;
+
+	  private PayloadEncoder encoder;
+
+	  /// <summary>
+	  /// Creates a new DelimitedPayloadTokenFilterFactory </summary>
+	  public DelimitedPayloadTokenFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		encoderClass = require(args, ENCODER_ATTR);
+		delimiter = getChar(args, DELIMITER_ATTR, '|');
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override DelimitedPayloadTokenFilter create(TokenStream input)
+	  {
+		return new DelimitedPayloadTokenFilter(input, delimiter, encoder);
+	  }
+
+	  public virtual void inform(ResourceLoader loader)
+	  {
+		if (encoderClass.Equals("float"))
+		{
+		  encoder = new FloatEncoder();
+		}
+		else if (encoderClass.Equals("integer"))
+		{
+		  encoder = new IntegerEncoder();
+		}
+		else if (encoderClass.Equals("identity"))
+		{
+		  encoder = new IdentityEncoder();
+		}
+		else
+		{
+		  encoder = loader.newInstance(encoderClass, typeof(PayloadEncoder));
+		}
+	  }
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Payloads/FloatEncoder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Payloads/FloatEncoder.cs b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/FloatEncoder.cs
new file mode 100644
index 0000000..05cb90d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/FloatEncoder.cs
@@ -0,0 +1,41 @@
+namespace org.apache.lucene.analysis.payloads
+{
+
+	using BytesRef = org.apache.lucene.util.BytesRef;
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/// <summary>
+	///  Encode a character array Float as a <seealso cref="BytesRef"/>.
+	/// <p/> </summary>
+	/// <seealso cref= org.apache.lucene.analysis.payloads.PayloadHelper#encodeFloat(float, byte[], int)
+	/// 
+	///  </seealso>
+	public class FloatEncoder : AbstractEncoder, PayloadEncoder
+	{
+
+	  public override BytesRef encode(char[] buffer, int offset, int length)
+	  {
+		float payload = float.Parse(new string(buffer, offset, length)); //TODO: improve this so that we don't have to new Strings
+		sbyte[] bytes = PayloadHelper.encodeFloat(payload);
+		BytesRef result = new BytesRef(bytes);
+		return result;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Payloads/IdentityEncoder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Payloads/IdentityEncoder.cs b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/IdentityEncoder.cs
new file mode 100644
index 0000000..f204244
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/IdentityEncoder.cs
@@ -0,0 +1,63 @@
+namespace org.apache.lucene.analysis.payloads
+{
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using BytesRef = org.apache.lucene.util.BytesRef;
+
+
+	/// <summary>
+	///  Does nothing other than convert the char array to a byte array using the specified encoding.
+	/// 
+	/// 
+	/// </summary>
+	public class IdentityEncoder : AbstractEncoder, PayloadEncoder
+	{
+	  protected internal Charset charset = StandardCharsets.UTF_8;
+
+	  public IdentityEncoder()
+	  {
+	  }
+
+	  public IdentityEncoder(Charset charset)
+	  {
+		this.charset = charset;
+	  }
+
+	  public override BytesRef encode(char[] buffer, int offset, int length)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final ByteBuffer bb = charset.encode(java.nio.CharBuffer.wrap(buffer, offset, length));
+		ByteBuffer bb = charset.encode(CharBuffer.wrap(buffer, offset, length));
+		if (bb.hasArray())
+		{
+		  return new BytesRef(bb.array(), bb.arrayOffset() + bb.position(), bb.remaining());
+		}
+		else
+		{
+		  // normally it should always have an array, but who knows?
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final byte[] b = new byte[bb.remaining()];
+		  sbyte[] b = new sbyte[bb.remaining()];
+		  bb.get(b);
+		  return new BytesRef(b);
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Payloads/IntegerEncoder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Payloads/IntegerEncoder.cs b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/IntegerEncoder.cs
new file mode 100644
index 0000000..1533f9e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/IntegerEncoder.cs
@@ -0,0 +1,42 @@
+namespace org.apache.lucene.analysis.payloads
+{
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using ArrayUtil = org.apache.lucene.util.ArrayUtil;
+	using BytesRef = org.apache.lucene.util.BytesRef;
+
+
+	/// <summary>
+	///  Encode a character array Integer as a <seealso cref="BytesRef"/>.
+	/// <p/>
+	/// See <seealso cref="org.apache.lucene.analysis.payloads.PayloadHelper#encodeInt(int, byte[], int)"/>.
+	/// 
+	/// 
+	/// </summary>
+	public class IntegerEncoder : AbstractEncoder, PayloadEncoder
+	{
+
+	  public override BytesRef encode(char[] buffer, int offset, int length)
+	  {
+		int payload = ArrayUtil.parseInt(buffer, offset, length); //TODO: improve this so that we don't have to new Strings
+		sbyte[] bytes = PayloadHelper.encodeInt(payload);
+		BytesRef result = new BytesRef(bytes);
+		return result;
+	  }
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Payloads/NumericPayloadTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Payloads/NumericPayloadTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/NumericPayloadTokenFilter.cs
new file mode 100644
index 0000000..629fef0
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/NumericPayloadTokenFilter.cs
@@ -0,0 +1,70 @@
+namespace org.apache.lucene.analysis.payloads
+{
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using PayloadAttribute = org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+	using BytesRef = org.apache.lucene.util.BytesRef;
+
+
+	/// <summary>
+	/// Assigns a payload to a token based on the <seealso cref="org.apache.lucene.analysis.Token#type()"/>
+	/// 
+	/// 
+	/// </summary>
+	public class NumericPayloadTokenFilter : TokenFilter
+	{
+
+	  private string typeMatch;
+	  private BytesRef thePayload;
+
+	  private readonly PayloadAttribute payloadAtt = addAttribute(typeof(PayloadAttribute));
+	  private readonly TypeAttribute typeAtt = addAttribute(typeof(TypeAttribute));
+
+	  public NumericPayloadTokenFilter(TokenStream input, float payload, string typeMatch) : base(input)
+	  {
+		if (typeMatch == null)
+		{
+		  throw new System.ArgumentException("typeMatch cannot be null");
+		}
+		//Need to encode the payload
+		thePayload = new BytesRef(PayloadHelper.encodeFloat(payload));
+		this.typeMatch = typeMatch;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (typeAtt.type().Equals(typeMatch))
+		  {
+			payloadAtt.Payload = thePayload;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Payloads/NumericPayloadTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Payloads/NumericPayloadTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/NumericPayloadTokenFilterFactory.cs
new file mode 100644
index 0000000..c6e21d6
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/NumericPayloadTokenFilterFactory.cs
@@ -0,0 +1,60 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.payloads
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="NumericPayloadTokenFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_numpayload" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.NumericPayloadTokenFilterFactory" payload="24" typeMatch="word"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class NumericPayloadTokenFilterFactory : TokenFilterFactory
+	{
+	  private readonly float payload;
+	  private readonly string typeMatch;
+
+	  /// <summary>
+	  /// Creates a new NumericPayloadTokenFilterFactory </summary>
+	  public NumericPayloadTokenFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		payload = requireFloat(args, "payload");
+		typeMatch = require(args, "typeMatch");
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override NumericPayloadTokenFilter create(TokenStream input)
+	  {
+		return new NumericPayloadTokenFilter(input,payload,typeMatch);
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Payloads/PayloadEncoder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Payloads/PayloadEncoder.cs b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/PayloadEncoder.cs
new file mode 100644
index 0000000..312f335
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/PayloadEncoder.cs
@@ -0,0 +1,43 @@
+namespace org.apache.lucene.analysis.payloads
+{
+
+	using BytesRef = org.apache.lucene.util.BytesRef;
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	/// <summary>
+	/// Mainly for use with the DelimitedPayloadTokenFilter, converts char buffers to
+	/// <seealso cref="BytesRef"/>.
+	/// <p/>
+	/// NOTE: This interface is subject to change 
+	/// 
+	/// 
+	/// </summary>
+	public interface PayloadEncoder
+	{
+
+	  BytesRef encode(char[] buffer);
+
+	  /// <summary>
+	  /// Convert a char array to a <seealso cref="BytesRef"/> </summary>
+	  /// <returns> encoded <seealso cref="BytesRef"/> </returns>
+	  BytesRef encode(char[] buffer, int offset, int length);
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Payloads/PayloadHelper.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Payloads/PayloadHelper.cs b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/PayloadHelper.cs
new file mode 100644
index 0000000..00eb10e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/PayloadHelper.cs
@@ -0,0 +1,81 @@
+namespace org.apache.lucene.analysis.payloads
+{
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	/// <summary>
+	/// Utility methods for encoding payloads.
+	/// 
+	/// 
+	/// </summary>
+	public class PayloadHelper
+	{
+
+	  public static sbyte[] encodeFloat(float payload)
+	  {
+		return encodeFloat(payload, new sbyte[4], 0);
+	  }
+
+	  public static sbyte[] encodeFloat(float payload, sbyte[] data, int offset)
+	  {
+		return encodeInt(float.floatToIntBits(payload), data, offset);
+	  }
+
+	  public static sbyte[] encodeInt(int payload)
+	  {
+		return encodeInt(payload, new sbyte[4], 0);
+	  }
+
+	  public static sbyte[] encodeInt(int payload, sbyte[] data, int offset)
+	  {
+		data[offset] = (sbyte)(payload >> 24);
+		data[offset + 1] = (sbyte)(payload >> 16);
+		data[offset + 2] = (sbyte)(payload >> 8);
+		data[offset + 3] = (sbyte) payload;
+		return data;
+	  }
+
+	  /// <seealso cref= #decodeFloat(byte[], int) </seealso>
+	  /// <seealso cref= #encodeFloat(float) </seealso>
+	  /// <returns> the decoded float </returns>
+	  public static float decodeFloat(sbyte[] bytes)
+	  {
+		return decodeFloat(bytes, 0);
+	  }
+
+	  /// <summary>
+	  /// Decode the payload that was encoded using <seealso cref="#encodeFloat(float)"/>.
+	  /// NOTE: the length of the array must be at least offset + 4 long. </summary>
+	  /// <param name="bytes"> The bytes to decode </param>
+	  /// <param name="offset"> The offset into the array. </param>
+	  /// <returns> The float that was encoded
+	  /// </returns>
+	  /// <seealso cref= #encodeFloat(float) </seealso>
+	  public static float decodeFloat(sbyte[] bytes, int offset)
+	  {
+
+		return float.intBitsToFloat(decodeInt(bytes, offset));
+	  }
+
+	  public static int decodeInt(sbyte[] bytes, int offset)
+	  {
+		return ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF) << 16) | ((bytes[offset + 2] & 0xFF) << 8) | (bytes[offset + 3] & 0xFF);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Payloads/TokenOffsetPayloadTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Payloads/TokenOffsetPayloadTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/TokenOffsetPayloadTokenFilter.cs
new file mode 100644
index 0000000..b08d0a4
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/TokenOffsetPayloadTokenFilter.cs
@@ -0,0 +1,61 @@
+namespace org.apache.lucene.analysis.payloads
+{
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PayloadAttribute = org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+	using BytesRef = org.apache.lucene.util.BytesRef;
+
+
+	/// <summary>
+	/// Adds the <seealso cref="OffsetAttribute#startOffset()"/>
+	/// and <seealso cref="OffsetAttribute#endOffset()"/>
+	/// First 4 bytes are the start
+	/// 
+	/// 
+	/// </summary>
+	public class TokenOffsetPayloadTokenFilter : TokenFilter
+	{
+	  private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+	  private readonly PayloadAttribute payAtt = addAttribute(typeof(PayloadAttribute));
+
+	  public TokenOffsetPayloadTokenFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  sbyte[] data = new sbyte[8];
+		  PayloadHelper.encodeInt(offsetAtt.startOffset(), data, 0);
+		  PayloadHelper.encodeInt(offsetAtt.endOffset(), data, 4);
+		  BytesRef payload = new BytesRef(data);
+		  payAtt.Payload = payload;
+		  return true;
+		}
+		else
+		{
+		return false;
+		}
+	  }
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Payloads/TokenOffsetPayloadTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Payloads/TokenOffsetPayloadTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/TokenOffsetPayloadTokenFilterFactory.cs
new file mode 100644
index 0000000..f06a9d8
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/TokenOffsetPayloadTokenFilterFactory.cs
@@ -0,0 +1,56 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.payloads
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="TokenOffsetPayloadTokenFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_tokenoffset" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.TokenOffsetPayloadTokenFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class TokenOffsetPayloadTokenFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new TokenOffsetPayloadTokenFilterFactory </summary>
+	  public TokenOffsetPayloadTokenFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenOffsetPayloadTokenFilter create(TokenStream input)
+	  {
+		return new TokenOffsetPayloadTokenFilter(input);
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Payloads/TypeAsPayloadTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Payloads/TypeAsPayloadTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/TypeAsPayloadTokenFilter.cs
new file mode 100644
index 0000000..0c1bb7b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/TypeAsPayloadTokenFilter.cs
@@ -0,0 +1,62 @@
+namespace org.apache.lucene.analysis.payloads
+{
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using PayloadAttribute = org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+	using BytesRef = org.apache.lucene.util.BytesRef;
+
+
+	/// <summary>
+	/// Makes the <seealso cref="org.apache.lucene.analysis.Token#type()"/> a payload.
+	/// 
+	/// Encodes the type using <seealso cref="String#getBytes(String)"/> with "UTF-8" as the encoding
+	/// 
+	/// 
+	/// </summary>
+	public class TypeAsPayloadTokenFilter : TokenFilter
+	{
+	  private readonly PayloadAttribute payloadAtt = addAttribute(typeof(PayloadAttribute));
+	  private readonly TypeAttribute typeAtt = addAttribute(typeof(TypeAttribute));
+
+	  public TypeAsPayloadTokenFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  string type = typeAtt.type();
+		  if (type != null && type.Length > 0)
+		  {
+			payloadAtt.Payload = new BytesRef(type);
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Payloads/TypeAsPayloadTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Payloads/TypeAsPayloadTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/TypeAsPayloadTokenFilterFactory.cs
new file mode 100644
index 0000000..88c236c
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/TypeAsPayloadTokenFilterFactory.cs
@@ -0,0 +1,56 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.payloads
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="TypeAsPayloadTokenFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_typeaspayload" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.TypeAsPayloadTokenFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class TypeAsPayloadTokenFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new TypeAsPayloadTokenFilterFactory </summary>
+	  public TypeAsPayloadTokenFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TypeAsPayloadTokenFilter create(TokenStream input)
+	  {
+		return new TypeAsPayloadTokenFilter(input);
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilter.cs
new file mode 100644
index 0000000..92f73bc
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilter.cs
@@ -0,0 +1,109 @@
+using System;
+
+namespace org.apache.lucene.analysis.position
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+
+	/// <summary>
+	/// Set the positionIncrement of all tokens to the "positionIncrement",
+	/// except the first return token which retains its original positionIncrement value.
+	/// The default positionIncrement value is zero. </summary>
+	/// @deprecated (4.4) PositionFilter makes <seealso cref="TokenStream"/> graphs inconsistent
+	///             which can cause highlighting bugs. Its main use-case being to make
+	///             <a href="{@docRoot}/../queryparser/overview-summary.html">QueryParser</a>
+	///             generate boolean queries instead of phrase queries, it is now advised to use
+	///             {@code QueryParser.setAutoGeneratePhraseQueries(boolean)}
+	///             (for simple cases) or to override {@code QueryParser.newFieldQuery}. 
+	[Obsolete("(4.4) PositionFilter makes <seealso cref="TokenStream"/> graphs inconsistent")]
+	public sealed class PositionFilter : TokenFilter
+	{
+
+	  /// <summary>
+	  /// Position increment to assign to all but the first token - default = 0 </summary>
+	  private readonly int positionIncrement;
+
+	  /// <summary>
+	  /// The first token must have non-zero positionIncrement * </summary>
+	  private bool firstTokenPositioned = false;
+
+	  private PositionIncrementAttribute posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
+
+	  /// <summary>
+	  /// Constructs a PositionFilter that assigns a position increment of zero to
+	  /// all but the first token from the given input stream.
+	  /// </summary>
+	  /// <param name="input"> the input stream </param>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public PositionFilter(final org.apache.lucene.analysis.TokenStream input)
+	  public PositionFilter(TokenStream input) : this(input, 0)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Constructs a PositionFilter that assigns the given position increment to
+	  /// all but the first token from the given input stream.
+	  /// </summary>
+	  /// <param name="input"> the input stream </param>
+	  /// <param name="positionIncrement"> position increment to assign to all but the first
+	  ///  token from the input stream </param>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public PositionFilter(final org.apache.lucene.analysis.TokenStream input, final int positionIncrement)
+	  public PositionFilter(TokenStream input, int positionIncrement) : base(input)
+	  {
+		if (positionIncrement < 0)
+		{
+		  throw new System.ArgumentException("positionIncrement may not be negative");
+		}
+		this.positionIncrement = positionIncrement;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (firstTokenPositioned)
+		  {
+			posIncrAtt.PositionIncrement = positionIncrement;
+		  }
+		  else
+		  {
+			firstTokenPositioned = true;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		firstTokenPositioned = false;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilterFactory.cs
new file mode 100644
index 0000000..74bf1e4
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilterFactory.cs
@@ -0,0 +1,70 @@
+using System;
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.position
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Factory for <seealso cref="PositionFilter"/>.
+	/// Set the positionIncrement of all tokens to the "positionIncrement", except the first return token which retains its
+	/// original positionIncrement value. The default positionIncrement value is zero.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_position" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.PositionFilterFactory" positionIncrement="0"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	/// <seealso cref= org.apache.lucene.analysis.position.PositionFilter
+	/// @since solr 1.4 </seealso>
+	/// @deprecated (4.4) 
+	[Obsolete("(4.4)")]
+	public class PositionFilterFactory : TokenFilterFactory
+	{
+	  private readonly int positionIncrement;
+
+	  /// <summary>
+	  /// Creates a new PositionFilterFactory </summary>
+	  public PositionFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		positionIncrement = getInt(args, "positionIncrement", 0);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+		if (luceneMatchVersion != null && luceneMatchVersion.onOrAfter(Version.LUCENE_44))
+		{
+		  throw new System.ArgumentException("PositionFilter is deprecated as of Lucene 4.4. You should either fix your code to not use it or use Lucene 4.3 version compatibility");
+		}
+	  }
+
+	  public override PositionFilter create(TokenStream input)
+	  {
+		return new PositionFilter(input, positionIncrement);
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseAnalyzer.cs
new file mode 100644
index 0000000..ac178f5
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseAnalyzer.cs
@@ -0,0 +1,155 @@
+using System;
+
+namespace org.apache.lucene.analysis.pt
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using Version = org.apache.lucene.util.Version;
+	using PortugueseStemmer = org.tartarus.snowball.ext.PortugueseStemmer;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Portuguese.
+	/// <para>
+	/// <a name="version"/>
+	/// </para>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating PortugueseAnalyzer:
+	/// <ul>
+	///   <li> As of 3.6, PortugueseLightStemFilter is used for less aggressive stemming.
+	/// </ul>
+	/// </para>
+	/// </summary>
+	public sealed class PortugueseAnalyzer : StopwordAnalyzerBase
+	{
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// File containing default Portuguese stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "portuguese_stop.txt";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop words set. </summary>
+	  /// <returns> default stop words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(typeof(SnowballFilter), DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public PortugueseAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  public PortugueseAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+	  /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+	  /// stemming.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
+	  public PortugueseAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates a
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> A
+	  ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided and <seealso cref="PortugueseLightStemFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		result = new LowerCaseFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		if (matchVersion.onOrAfter(Version.LUCENE_36))
+		{
+		  result = new PortugueseLightStemFilter(result);
+		}
+		else
+		{
+		  result = new SnowballFilter(result, new PortugueseStemmer());
+		}
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseLightStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseLightStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseLightStemFilter.cs
new file mode 100644
index 0000000..3a0529b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseLightStemFilter.cs
@@ -0,0 +1,66 @@
+namespace org.apache.lucene.analysis.pt
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="PortugueseLightStemmer"/> to stem 
+	/// Portuguese words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// </summary>
+	public sealed class PortugueseLightStemFilter : TokenFilter
+	{
+	  private readonly PortugueseLightStemmer stemmer = new PortugueseLightStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public PortugueseLightStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseLightStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseLightStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseLightStemFilterFactory.cs
new file mode 100644
index 0000000..3dd6ebc
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseLightStemFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.pt
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="PortugueseLightStemFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_ptlgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.PortugueseLightStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class PortugueseLightStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new PortugueseLightStemFilterFactory </summary>
+	  public PortugueseLightStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new PortugueseLightStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseLightStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseLightStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseLightStemmer.cs
new file mode 100644
index 0000000..1262d8d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseLightStemmer.cs
@@ -0,0 +1,252 @@
+namespace org.apache.lucene.analysis.pt
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/* 
+	 * This algorithm is updated based on code located at:
+	 * http://members.unine.ch/jacques.savoy/clef/
+	 * 
+	 * Full copyright for that code follows:
+	 */
+
+	/*
+	 * Copyright (c) 2005, Jacques Savoy
+	 * All rights reserved.
+	 *
+	 * Redistribution and use in source and binary forms, with or without 
+	 * modification, are permitted provided that the following conditions are met:
+	 *
+	 * Redistributions of source code must retain the above copyright notice, this 
+	 * list of conditions and the following disclaimer. Redistributions in binary 
+	 * form must reproduce the above copyright notice, this list of conditions and
+	 * the following disclaimer in the documentation and/or other materials 
+	 * provided with the distribution. Neither the name of the author nor the names 
+	 * of its contributors may be used to endorse or promote products derived from 
+	 * this software without specific prior written permission.
+	 * 
+	 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+	 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+	 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+	 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+	 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+	 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+	 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+	 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+	 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+	 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+	 * POSSIBILITY OF SUCH DAMAGE.
+	 */
+
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+	/// <summary>
+	/// Light Stemmer for Portuguese
+	/// <para>
+	/// This stemmer implements the "UniNE" algorithm in:
+	/// <i>Light Stemming Approaches for the French, Portuguese, German and Hungarian Languages</i>
+	/// Jacques Savoy
+	/// </para>
+	/// </summary>
+	public class PortugueseLightStemmer
+	{
+
+	  public virtual int stem(char[] s, int len)
+	  {
+		if (len < 4)
+		{
+		  return len;
+		}
+
+		len = removeSuffix(s, len);
+
+		if (len > 3 && s[len - 1] == 'a')
+		{
+		  len = normFeminine(s, len);
+		}
+
+		if (len > 4)
+		{
+		  switch (s[len - 1])
+		  {
+			case 'e':
+			case 'a':
+			case 'o':
+				len--;
+				break;
+		  }
+		}
+
+		for (int i = 0; i < len; i++)
+		{
+		  switch (s[i])
+		  {
+			case 'à':
+			case 'á':
+			case 'â':
+			case 'ä':
+			case 'ã':
+				s[i] = 'a';
+				break;
+			case 'ò':
+			case 'ó':
+			case 'ô':
+			case 'ö':
+			case 'õ':
+				s[i] = 'o';
+				break;
+			case 'è':
+			case 'é':
+			case 'ê':
+			case 'ë':
+				s[i] = 'e';
+				break;
+			case 'ù':
+			case 'ú':
+			case 'û':
+			case 'ü':
+				s[i] = 'u';
+				break;
+			case 'ì':
+			case 'í':
+			case 'î':
+			case 'ï':
+				s[i] = 'i';
+				break;
+			case 'ç':
+				s[i] = 'c';
+				break;
+		  }
+		}
+
+		return len;
+	  }
+
+	  private int removeSuffix(char[] s, int len)
+	  {
+		if (len > 4 && StemmerUtil.EndsWith(s, len, "es"))
+		{
+		  switch (s[len - 3])
+		  {
+			case 'r':
+			case 's':
+			case 'l':
+			case 'z':
+				return len - 2;
+		  }
+		}
+
+		if (len > 3 && StemmerUtil.EndsWith(s, len, "ns"))
+		{
+		  s[len - 2] = 'm';
+		  return len - 1;
+		}
+
+		if (len > 4 && (StemmerUtil.EndsWith(s, len, "eis") || StemmerUtil.EndsWith(s, len, "éis")))
+		{
+		  s[len - 3] = 'e';
+		  s[len - 2] = 'l';
+		  return len - 1;
+		}
+
+		if (len > 4 && StemmerUtil.EndsWith(s, len, "ais"))
+		{
+		  s[len - 2] = 'l';
+		  return len - 1;
+		}
+
+		if (len > 4 && StemmerUtil.EndsWith(s, len, "óis"))
+		{
+		  s[len - 3] = 'o';
+		  s[len - 2] = 'l';
+		  return len - 1;
+		}
+
+		if (len > 4 && StemmerUtil.EndsWith(s, len, "is"))
+		{
+		  s[len - 1] = 'l';
+		  return len;
+		}
+
+		if (len > 3 && (StemmerUtil.EndsWith(s, len, "ões") || StemmerUtil.EndsWith(s, len, "ães")))
+		{
+		  len--;
+		  s[len - 2] = 'ã';
+		  s[len - 1] = 'o';
+		  return len;
+		}
+
+		if (len > 6 && StemmerUtil.EndsWith(s, len, "mente"))
+		{
+		  return len - 5;
+		}
+
+		if (len > 3 && s[len - 1] == 's')
+		{
+		  return len - 1;
+		}
+		return len;
+	  }
+
+	  private int normFeminine(char[] s, int len)
+	  {
+		if (len > 7 && (StemmerUtil.EndsWith(s, len, "inha") || StemmerUtil.EndsWith(s, len, "iaca") || StemmerUtil.EndsWith(s, len, "eira")))
+		{
+		  s[len - 1] = 'o';
+		  return len;
+		}
+
+		if (len > 6)
+		{
+		  if (StemmerUtil.EndsWith(s, len, "osa") || StemmerUtil.EndsWith(s, len, "ica") || StemmerUtil.EndsWith(s, len, "ida") || StemmerUtil.EndsWith(s, len, "ada") || StemmerUtil.EndsWith(s, len, "iva") || StemmerUtil.EndsWith(s, len, "ama"))
+		  {
+			s[len - 1] = 'o';
+			return len;
+		  }
+
+		  if (StemmerUtil.EndsWith(s, len, "ona"))
+		  {
+			s[len - 3] = 'ã';
+			s[len - 2] = 'o';
+			return len - 1;
+		  }
+
+		  if (StemmerUtil.EndsWith(s, len, "ora"))
+		  {
+			return len - 1;
+		  }
+
+		  if (StemmerUtil.EndsWith(s, len, "esa"))
+		  {
+			s[len - 3] = 'ê';
+			return len - 1;
+		  }
+
+		  if (StemmerUtil.EndsWith(s, len, "na"))
+		  {
+			s[len - 1] = 'o';
+			return len;
+		  }
+		}
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseMinimalStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseMinimalStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseMinimalStemFilter.cs
new file mode 100644
index 0000000..e9a0d26
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseMinimalStemFilter.cs
@@ -0,0 +1,66 @@
+namespace org.apache.lucene.analysis.pt
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="PortugueseMinimalStemmer"/> to stem 
+	/// Portuguese words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// </summary>
+	public sealed class PortugueseMinimalStemFilter : TokenFilter
+	{
+	  private readonly PortugueseMinimalStemmer stemmer = new PortugueseMinimalStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public PortugueseMinimalStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseMinimalStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseMinimalStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseMinimalStemFilterFactory.cs
new file mode 100644
index 0000000..e893dad
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseMinimalStemFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.pt
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="PortugueseMinimalStemFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_ptminstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.PortugueseMinimalStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class PortugueseMinimalStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new PortugueseMinimalStemFilterFactory </summary>
+	  public PortugueseMinimalStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new PortugueseMinimalStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file


[04/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilter.cs
new file mode 100644
index 0000000..c0a52c6
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilter.cs
@@ -0,0 +1,151 @@
+using System;
+
+namespace org.apache.lucene.analysis.tr
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+	/// <summary>
+	/// Normalizes Turkish token text to lower case.
+	/// <para>
+	/// Turkish and Azeri have unique casing behavior for some characters. This
+	/// filter applies Turkish lowercase rules. For more information, see <a
+	/// href="http://en.wikipedia.org/wiki/Turkish_dotted_and_dotless_I"
+	/// >http://en.wikipedia.org/wiki/Turkish_dotted_and_dotless_I</a>
+	/// </para>
+	/// </summary>
+	public sealed class TurkishLowerCaseFilter : TokenFilter
+	{
+	  private const int LATIN_CAPITAL_LETTER_I = '\u0049';
+	  private const int LATIN_SMALL_LETTER_I = '\u0069';
+	  private const int LATIN_SMALL_LETTER_DOTLESS_I = '\u0131';
+	  private const int COMBINING_DOT_ABOVE = '\u0307';
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+
+	  /// <summary>
+	  /// Create a new TurkishLowerCaseFilter, that normalizes Turkish token text 
+	  /// to lower case.
+	  /// </summary>
+	  /// <param name="in"> TokenStream to filter </param>
+	  public TurkishLowerCaseFilter(TokenStream @in) : base(@in)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		bool iOrAfter = false;
+
+		if (input.incrementToken())
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] buffer = termAtt.buffer();
+		  char[] buffer = termAtt.buffer();
+		  int length = termAtt.length();
+		  for (int i = 0; i < length;)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int ch = Character.codePointAt(buffer, i, length);
+			int ch = char.codePointAt(buffer, i, length);
+
+			iOrAfter = (ch == LATIN_CAPITAL_LETTER_I || (iOrAfter && char.getType(ch) == char.NON_SPACING_MARK));
+
+			if (iOrAfter) // all the special I turkish handling happens here.
+			{
+			  switch (ch)
+			  {
+				// remove COMBINING_DOT_ABOVE to mimic composed lowercase
+				case COMBINING_DOT_ABOVE:
+				  length = delete(buffer, i, length);
+				  continue;
+				// i itself, it depends if it is followed by COMBINING_DOT_ABOVE
+				// if it is, we will make it small i and later remove the dot
+				case LATIN_CAPITAL_LETTER_I:
+				  if (isBeforeDot(buffer, i + 1, length))
+				  {
+					buffer[i] = (char)LATIN_SMALL_LETTER_I;
+				  }
+				  else
+				  {
+					buffer[i] = (char)LATIN_SMALL_LETTER_DOTLESS_I;
+					// below is an optimization. no COMBINING_DOT_ABOVE follows,
+					// so don't waste time calculating Character.getType(), etc
+					iOrAfter = false;
+				  }
+				  i++;
+				  continue;
+			  }
+			}
+
+			i += char.toChars(char.ToLower(ch), buffer, i);
+		  }
+
+		  termAtt.Length = length;
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+
+
+	  /// <summary>
+	  /// lookahead for a combining dot above.
+	  /// other NSMs may be in between.
+	  /// </summary>
+	  private bool isBeforeDot(char[] s, int pos, int len)
+	  {
+		for (int i = pos; i < len;)
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int ch = Character.codePointAt(s, i, len);
+		  int ch = char.codePointAt(s, i, len);
+		  if (char.getType(ch) != char.NON_SPACING_MARK)
+		  {
+			return false;
+		  }
+		  if (ch == COMBINING_DOT_ABOVE)
+		  {
+			return true;
+		  }
+		  i += char.charCount(ch);
+		}
+
+		return false;
+	  }
+
+	  /// <summary>
+	  /// delete a character in-place.
+	  /// rarely happens, only if COMBINING_DOT_ABOVE is found after an i
+	  /// </summary>
+	  private int delete(char[] s, int pos, int len)
+	  {
+		if (pos < len)
+		{
+		  Array.Copy(s, pos + 1, s, pos, len - pos - 1);
+		}
+
+		return len - 1;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilterFactory.cs
new file mode 100644
index 0000000..7edf5e2
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilterFactory.cs
@@ -0,0 +1,64 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.tr
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using AbstractAnalysisFactory = org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+	using MultiTermAwareComponent = org.apache.lucene.analysis.util.MultiTermAwareComponent;
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="TurkishLowerCaseFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_trlwr" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.TurkishLowerCaseFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class TurkishLowerCaseFilterFactory : TokenFilterFactory, MultiTermAwareComponent
+	{
+
+	  /// <summary>
+	  /// Creates a new TurkishLowerCaseFilterFactory </summary>
+	  public TurkishLowerCaseFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new TurkishLowerCaseFilter(input);
+	  }
+
+	  public virtual AbstractAnalysisFactory MultiTermComponent
+	  {
+		  get
+		  {
+			return this;
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
new file mode 100644
index 0000000..8cf5e28
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
@@ -0,0 +1,406 @@
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Analysis.Core;
+using org.apache.lucene.analysis.util;
+
+namespace Lucene.Net.Analysis.Util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+    /// <summary>
+	/// Abstract parent class for analysis factories <seealso cref="TokenizerFactory"/>,
+	/// <seealso cref="TokenFilterFactory"/> and <seealso cref="CharFilterFactory"/>.
+	/// <para>
+	/// The typical lifecycle for a factory consumer is:
+	/// <ol>
+	///   <li>Create factory via its constructor (or via XXXFactory.forName)
+	///   <li>(Optional) If the factory uses resources such as files, <seealso cref="ResourceLoaderAware#inform(ResourceLoader)"/> is called to initialize those resources.
+	///   <li>Consumer calls create() to obtain instances.
+	/// </ol>
+	/// </para>
+	/// </summary>
+	public abstract class AbstractAnalysisFactory
+	{
+	  public const string LUCENE_MATCH_VERSION_PARAM = "luceneMatchVersion";
+
+	  /// <summary>
+	  /// The original args, before any processing </summary>
+	  private readonly IDictionary<string, string> originalArgs;
+
+	  /// <summary>
+	  /// the luceneVersion arg </summary>
+	  protected internal readonly Lucene.Net.Util.Version luceneMatchVersion;
+	  /// <summary>
+	  /// whether the luceneMatchVersion arg is explicitly specified in the serialized schema </summary>
+	  private bool isExplicitLuceneMatchVersion = false;
+
+	  /// <summary>
+	  /// Initialize this factory via a set of key-value pairs.
+	  /// </summary>
+	  protected internal AbstractAnalysisFactory(IDictionary<string, string> args)
+	  {
+		originalArgs = Collections.UnmodifiableMap(new Dictionary<>(args));
+		string version = get(args, LUCENE_MATCH_VERSION_PARAM);
+		luceneMatchVersion = version == null ? null : Version.ParseLeniently(version);
+		args.Remove(CLASS_NAME); // consume the class arg
+	  }
+
+	  public IDictionary<string, string> OriginalArgs
+	  {
+		  get
+		  {
+			return originalArgs;
+		  }
+	  }
+
+	   /// <summary>
+	   /// this method can be called in the <seealso cref="TokenizerFactory#create(java.io.Reader)"/>
+	   /// or <seealso cref="TokenFilterFactory#create(org.apache.lucene.analysis.TokenStream)"/> methods,
+	   /// to inform user, that for this factory a <seealso cref="#luceneMatchVersion"/> is required 
+	   /// </summary>
+	  protected internal void assureMatchVersion()
+	  {
+		if (luceneMatchVersion == null)
+		{
+//JAVA TO C# CONVERTER WARNING: The .NET Type.FullName property will not always yield results identical to the Java Class.getName method:
+		  throw new System.ArgumentException("Configuration Error: Factory '" + this.GetType().FullName + "' needs a 'luceneMatchVersion' parameter");
+		}
+	  }
+
+	  public Version LuceneMatchVersion
+	  {
+		  get
+		  {
+			return this.luceneMatchVersion;
+		  }
+	  }
+
+	  public virtual string require(IDictionary<string, string> args, string name)
+	  {
+		string s = args.Remove(name);
+		if (s == null)
+		{
+		  throw new System.ArgumentException("Configuration Error: missing parameter '" + name + "'");
+		}
+		return s;
+	  }
+	  public virtual string require(IDictionary<string, string> args, string name, ICollection<string> allowedValues)
+	  {
+		return require(args, name, allowedValues, true);
+	  }
+	  public virtual string require(IDictionary<string, string> args, string name, ICollection<string> allowedValues, bool caseSensitive)
+	  {
+		string s = args.Remove(name);
+		if (s == null)
+		{
+		  throw new System.ArgumentException("Configuration Error: missing parameter '" + name + "'");
+		}
+		else
+		{
+		  foreach (string allowedValue in allowedValues)
+		  {
+			if (caseSensitive)
+			{
+			  if (s.Equals(allowedValue))
+			  {
+				return s;
+			  }
+			}
+			else
+			{
+			  if (s.Equals(allowedValue, StringComparison.CurrentCultureIgnoreCase))
+			  {
+				return s;
+			  }
+			}
+		  }
+		  throw new System.ArgumentException("Configuration Error: '" + name + "' value must be one of " + allowedValues);
+		}
+	  }
+	  public virtual string get(IDictionary<string, string> args, string name)
+	  {
+		return args.Remove(name); // defaultVal = null
+	  }
+	  public virtual string get(IDictionary<string, string> args, string name, string defaultVal)
+	  {
+		string s = args.Remove(name);
+		return s == null ? defaultVal : s;
+	  }
+	  public virtual string get(IDictionary<string, string> args, string name, ICollection<string> allowedValues)
+	  {
+		return get(args, name, allowedValues, null); // defaultVal = null
+	  }
+	  public virtual string get(IDictionary<string, string> args, string name, ICollection<string> allowedValues, string defaultVal)
+	  {
+		return get(args, name, allowedValues, defaultVal, true);
+	  }
+	  public virtual string get(IDictionary<string, string> args, string name, ICollection<string> allowedValues, string defaultVal, bool caseSensitive)
+	  {
+		string s = args.Remove(name);
+		if (s == null)
+		{
+		  return defaultVal;
+		}
+		else
+		{
+		  foreach (string allowedValue in allowedValues)
+		  {
+			if (caseSensitive)
+			{
+			  if (s.Equals(allowedValue))
+			  {
+				return s;
+			  }
+			}
+			else
+			{
+			  if (s.Equals(allowedValue, StringComparison.CurrentCultureIgnoreCase))
+			  {
+				return s;
+			  }
+			}
+		  }
+		  throw new System.ArgumentException("Configuration Error: '" + name + "' value must be one of " + allowedValues);
+		}
+	  }
+
+	  protected internal int requireInt(IDictionary<string, string> args, string name)
+	  {
+		return int.Parse(require(args, name));
+	  }
+	  protected internal int getInt(IDictionary<string, string> args, string name, int defaultVal)
+	  {
+		string s = args.Remove(name);
+		return s == null ? defaultVal : int.Parse(s);
+	  }
+
+	  protected internal bool requireBoolean(IDictionary<string, string> args, string name)
+	  {
+		return bool.Parse(require(args, name));
+	  }
+	  protected internal bool getBoolean(IDictionary<string, string> args, string name, bool defaultVal)
+	  {
+		string s = args.Remove(name);
+		return s == null ? defaultVal : bool.Parse(s);
+	  }
+
+	  protected internal float requireFloat(IDictionary<string, string> args, string name)
+	  {
+		return float.Parse(require(args, name));
+	  }
+	  protected internal float getFloat(IDictionary<string, string> args, string name, float defaultVal)
+	  {
+		string s = args.Remove(name);
+		return s == null ? defaultVal : float.Parse(s);
+	  }
+
+	  public virtual char requireChar(IDictionary<string, string> args, string name)
+	  {
+		return require(args, name)[0];
+	  }
+	  public virtual char getChar(IDictionary<string, string> args, string name, char defaultValue)
+	  {
+		string s = args.Remove(name);
+		if (s == null)
+		{
+		  return defaultValue;
+		}
+		else
+		{
+		  if (s.Length != 1)
+		  {
+			throw new System.ArgumentException(name + " should be a char. \"" + s + "\" is invalid");
+		  }
+		  else
+		  {
+			return s[0];
+		  }
+		}
+	  }
+
+	  private static readonly Pattern ITEM_PATTERN = Pattern.compile("[^,\\s]+");
+
+	  /// <summary>
+	  /// Returns whitespace- and/or comma-separated set of values, or null if none are found </summary>
+	  public virtual HashSet<string> getSet(IDictionary<string, string> args, string name)
+	  {
+		string s = args.Remove(name);
+		if (s == null)
+		{
+		 return null;
+		}
+		else
+		{
+		  HashSet<string> set = null;
+		  Matcher matcher = ITEM_PATTERN.matcher(s);
+		  if (matcher.find())
+		  {
+			set = new HashSet<>();
+			set.Add(matcher.group(0));
+			while (matcher.find())
+			{
+			  set.Add(matcher.group(0));
+			}
+		  }
+		  return set;
+		}
+	  }
+
+	  /// <summary>
+	  /// Compiles a pattern for the value of the specified argument key <code>name</code> 
+	  /// </summary>
+	  protected internal Pattern GetPattern(IDictionary<string, string> args, string name)
+	  {
+		try
+		{
+		  return Pattern.compile(require(args, name));
+		}
+		catch (PatternSyntaxException e)
+		{
+		  throw new System.ArgumentException("Configuration Error: '" + name + "' can not be parsed in " + this.GetType().Name, e);
+		}
+	  }
+
+	  /// <summary>
+	  /// Returns as <seealso cref="CharArraySet"/> from wordFiles, which
+	  /// can be a comma-separated list of filenames
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: protected final CharArraySet getWordSet(ResourceLoader loader, String wordFiles, boolean ignoreCase) throws java.io.IOException
+	  protected internal CharArraySet GetWordSet(ResourceLoader loader, string wordFiles, bool ignoreCase)
+	  {
+		assureMatchVersion();
+		IList<string> files = splitFileNames(wordFiles);
+		CharArraySet words = null;
+		if (files.Count > 0)
+		{
+		  // default stopwords list has 35 or so words, but maybe don't make it that
+		  // big to start
+		  words = new CharArraySet(luceneMatchVersion, files.Count * 10, ignoreCase);
+		  foreach (string file in files)
+		  {
+			IList<string> wlist = getLines(loader, file.Trim());
+			words.addAll(StopFilter.makeStopSet(luceneMatchVersion, wlist, ignoreCase));
+		  }
+		}
+		return words;
+	  }
+
+	  /// <summary>
+	  /// Returns the resource's lines (with content treated as UTF-8)
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: protected final java.util.List<String> getLines(ResourceLoader loader, String resource) throws java.io.IOException
+	  protected internal IList<string> getLines(ResourceLoader loader, string resource)
+	  {
+		return WordlistLoader.getLines(loader.openResource(resource), StandardCharsets.UTF_8);
+	  }
+
+	  /// <summary>
+	  /// same as <seealso cref="#getWordSet(ResourceLoader, String, boolean)"/>,
+	  /// except the input is in snowball format. 
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: protected final CharArraySet getSnowballWordSet(ResourceLoader loader, String wordFiles, boolean ignoreCase) throws java.io.IOException
+	  protected internal CharArraySet getSnowballWordSet(ResourceLoader loader, string wordFiles, bool ignoreCase)
+	  {
+		assureMatchVersion();
+		IList<string> files = splitFileNames(wordFiles);
+		CharArraySet words = null;
+		if (files.Count > 0)
+		{
+		  // default stopwords list has 35 or so words, but maybe don't make it that
+		  // big to start
+		  words = new CharArraySet(luceneMatchVersion, files.Count * 10, ignoreCase);
+		  foreach (string file in files)
+		  {
+			InputStream stream = null;
+			Reader reader = null;
+			try
+			{
+			  stream = loader.openResource(file.Trim());
+			  CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT);
+			  reader = new InputStreamReader(stream, decoder);
+			  WordlistLoader.getSnowballWordSet(reader, words);
+			}
+			finally
+			{
+			  IOUtils.closeWhileHandlingException(reader, stream);
+			}
+		  }
+		}
+		return words;
+	  }
+
+	  /// <summary>
+	  /// Splits file names separated by comma character.
+	  /// File names can contain comma characters escaped by backslash '\'
+	  /// </summary>
+	  /// <param name="fileNames"> the string containing file names </param>
+	  /// <returns> a list of file names with the escaping backslashed removed </returns>
+	  protected internal IList<string> splitFileNames(string fileNames)
+	  {
+		if (fileNames == null)
+		{
+		  return System.Linq.Enumerable.Empty<string>();
+		}
+
+		IList<string> result = new List<string>();
+		foreach (string file in fileNames.Split("(?<!\\\\),", true))
+		{
+		  result.Add(file.replaceAll("\\\\(?=,)", ""));
+		}
+
+		return result;
+	  }
+
+	  private const string CLASS_NAME = "class";
+
+	  /// <returns> the string used to specify the concrete class name in a serialized representation: the class arg.  
+	  ///         If the concrete class name was not specified via a class arg, returns {@code getClass().getName()}. </returns>
+	  public virtual string ClassArg
+	  {
+		  get
+		  {
+			if (null != originalArgs)
+			{
+			  string className = originalArgs[CLASS_NAME];
+			  if (null != className)
+			  {
+				return className;
+			  }
+			}
+			return this.GetType().Name;
+		  }
+	  }
+
+	  public virtual bool ExplicitLuceneMatchVersion
+	  {
+		  get
+		  {
+			return isExplicitLuceneMatchVersion;
+		  }
+		  set
+		  {
+			this.isExplicitLuceneMatchVersion = value;
+		  }
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs
new file mode 100644
index 0000000..351446f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs
@@ -0,0 +1,165 @@
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Threading;
+using Lucene.Net.Analysis.Util;
+
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using SPIClassIterator = org.apache.lucene.util.SPIClassIterator;
+
+	/// <summary>
+	/// Helper class for loading named SPIs from classpath (e.g. Tokenizers, TokenStreams).
+	/// @lucene.internal
+	/// </summary>
+	internal sealed class AnalysisSPILoader<S> where S : AbstractAnalysisFactory
+	{
+
+	  private volatile IDictionary<string, Type> services = Collections.emptyMap();
+	  private readonly Type clazz;
+	  private readonly string[] suffixes;
+
+	  public AnalysisSPILoader(Type clazz) : this(clazz, new string[] {clazz.SimpleName})
+	  {
+	  }
+
+	  public AnalysisSPILoader(Type clazz, ClassLoader loader) : this(clazz, new string[] {clazz.SimpleName}, loader)
+	  {
+	  }
+
+	  public AnalysisSPILoader(Type clazz, string[] suffixes) : this(clazz, suffixes, Thread.CurrentThread.ContextClassLoader)
+	  {
+	  }
+
+	  public AnalysisSPILoader(Type clazz, string[] suffixes, ClassLoader classloader)
+	  {
+		this.clazz = clazz;
+		this.suffixes = suffixes;
+		// if clazz' classloader is not a parent of the given one, we scan clazz's classloader, too:
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final ClassLoader clazzClassloader = clazz.getClassLoader();
+		ClassLoader clazzClassloader = clazz.ClassLoader;
+		if (clazzClassloader != null && !SPIClassIterator.isParentClassLoader(clazzClassloader, classloader))
+		{
+		  reload(clazzClassloader);
+		}
+		reload(classloader);
+	  }
+
+	  /// <summary>
+	  /// Reloads the internal SPI list from the given <seealso cref="ClassLoader"/>.
+	  /// Changes to the service list are visible after the method ends, all
+	  /// iterators (e.g., from <seealso cref="#availableServices()"/>,...) stay consistent. 
+	  /// 
+	  /// <para><b>NOTE:</b> Only new service providers are added, existing ones are
+	  /// never removed or replaced.
+	  /// 
+	  /// </para>
+	  /// <para><em>This method is expensive and should only be called for discovery
+	  /// of new service providers on the given classpath/classloader!</em>
+	  /// </para>
+	  /// </summary>
+	  public void reload(ClassLoader classloader)
+	  {
+		  lock (this)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.util.LinkedHashMap<String,Class> services = new java.util.LinkedHashMap<>(this.services);
+			LinkedHashMap<string, Type> services = new LinkedHashMap<string, Type>(this.services);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.SPIClassIterator<S> loader = org.apache.lucene.util.SPIClassIterator.get(clazz, classloader);
+			SPIClassIterator<S> loader = SPIClassIterator.get(clazz, classloader);
+			while (loader.hasNext())
+			{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final Class service = loader.next();
+			  Type service = loader.next();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String clazzName = service.getSimpleName();
+			  string clazzName = service.SimpleName;
+			  string name = null;
+			  foreach (string suffix in suffixes)
+			  {
+				if (clazzName.EndsWith(suffix, StringComparison.Ordinal))
+				{
+				  name = clazzName.Substring(0, clazzName.Length - suffix.Length).ToLower(Locale.ROOT);
+				  break;
+				}
+			  }
+			  if (name == null)
+			  {
+				throw new ServiceConfigurationError("The class name " + service.Name + " has wrong suffix, allowed are: " + Arrays.ToString(suffixes));
+			  }
+			  // only add the first one for each name, later services will be ignored
+			  // this allows to place services before others in classpath to make 
+			  // them used instead of others
+			  //
+			  // TODO: Should we disallow duplicate names here?
+			  // Allowing it may get confusing on collisions, as different packages
+			  // could contain same factory class, which is a naming bug!
+			  // When changing this be careful to allow reload()!
+			  if (!services.containsKey(name))
+			  {
+				services.put(name, service);
+			  }
+			}
+			this.services = Collections.unmodifiableMap(services);
+		  }
+	  }
+
+	  public S newInstance(string name, IDictionary<string, string> args)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final Class service = lookupClass(name);
+		Type service = lookupClass(name);
+		try
+		{
+		  return service.getConstructor(typeof(IDictionary)).newInstance(args);
+		}
+		catch (Exception e)
+		{
+		  throw new System.ArgumentException("SPI class of type " + clazz.Name + " with name '" + name + "' cannot be instantiated. " + "This is likely due to a misconfiguration of the java class '" + service.Name + "': ", e);
+		}
+	  }
+
+	  public Type lookupClass(string name)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final Class service = services.get(name.toLowerCase(java.util.Locale.ROOT));
+		Type service = services[name.ToLower(Locale.ROOT)];
+		if (service != null)
+		{
+		  return service;
+		}
+		else
+		{
+		  throw new System.ArgumentException("A SPI class of type " + clazz.Name + " with name '" + name + "' does not exist. " + "You need to add the corresponding JAR file supporting this SPI to your classpath. " + "The current classpath supports the following names: " + availableServices());
+		}
+	  }
+
+	  public HashSet<string> availableServices()
+	  {
+		return services.Keys;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayIterator.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayIterator.cs
new file mode 100644
index 0000000..1d1c44b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayIterator.cs
@@ -0,0 +1,278 @@
+using System;
+
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	/// <summary>
+	/// A CharacterIterator used internally for use with <seealso cref="BreakIterator"/>
+	/// @lucene.internal
+	/// </summary>
+	public abstract class CharArrayIterator : CharacterIterator
+	{
+	  private char[] array;
+	  private int start;
+	  private int index;
+	  private int length;
+	  private int limit;
+
+	  public virtual char [] Text
+	  {
+		  get
+		  {
+			return array;
+		  }
+	  }
+
+	  public virtual int Start
+	  {
+		  get
+		  {
+			return start;
+		  }
+	  }
+
+	  public virtual int Length
+	  {
+		  get
+		  {
+			return length;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Set a new region of text to be examined by this iterator
+	  /// </summary>
+	  /// <param name="array"> text buffer to examine </param>
+	  /// <param name="start"> offset into buffer </param>
+	  /// <param name="length"> maximum length to examine </param>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public void setText(final char array[] , int start, int length)
+	  public virtual void setText(char[] array, int start, int length)
+	  {
+		this.array = array;
+		this.start = start;
+		this.index = start;
+		this.length = length;
+		this.limit = start + length;
+	  }
+
+	  public override char current()
+	  {
+		return (index == limit) ? DONE : jreBugWorkaround(array[index]);
+	  }
+
+	  protected internal abstract char jreBugWorkaround(char ch);
+
+	  public override char first()
+	  {
+		index = start;
+		return current();
+	  }
+
+	  public override int BeginIndex
+	  {
+		  get
+		  {
+			return 0;
+		  }
+	  }
+
+	  public override int EndIndex
+	  {
+		  get
+		  {
+			return length;
+		  }
+	  }
+
+	  public override int Index
+	  {
+		  get
+		  {
+			return index - start;
+		  }
+	  }
+
+	  public override char last()
+	  {
+		index = (limit == start) ? limit : limit - 1;
+		return current();
+	  }
+
+	  public override char next()
+	  {
+		if (++index >= limit)
+		{
+		  index = limit;
+		  return DONE;
+		}
+		else
+		{
+		  return current();
+		}
+	  }
+
+	  public override char previous()
+	  {
+		if (--index < start)
+		{
+		  index = start;
+		  return DONE;
+		}
+		else
+		{
+		  return current();
+		}
+	  }
+
+	  public override char setIndex(int position)
+	  {
+		if (position < BeginIndex || position > EndIndex)
+		{
+		  throw new System.ArgumentException("Illegal Position: " + position);
+		}
+		index = start + position;
+		return current();
+	  }
+
+	  public override CharArrayIterator clone()
+	  {
+		try
+		{
+		  return (CharArrayIterator)base.clone();
+		}
+		catch (CloneNotSupportedException e)
+		{
+		  // CharacterIterator does not allow you to throw CloneNotSupported
+		  throw new Exception(e);
+		}
+	  }
+
+	  /// <summary>
+	  /// Create a new CharArrayIterator that works around JRE bugs
+	  /// in a manner suitable for <seealso cref="BreakIterator#getSentenceInstance()"/>
+	  /// </summary>
+	  public static CharArrayIterator newSentenceInstance()
+	  {
+		if (HAS_BUGGY_BREAKITERATORS)
+		{
+		  return new CharArrayIteratorAnonymousInnerClassHelper();
+		}
+		else
+		{
+		  return new CharArrayIteratorAnonymousInnerClassHelper2();
+		}
+	  }
+
+	  private class CharArrayIteratorAnonymousInnerClassHelper : CharArrayIterator
+	  {
+		  public CharArrayIteratorAnonymousInnerClassHelper()
+		  {
+		  }
+
+			  // work around this for now by lying about all surrogates to 
+			  // the sentence tokenizer, instead we treat them all as 
+			  // SContinue so we won't break around them.
+		  protected internal override char jreBugWorkaround(char ch)
+		  {
+			return ch >= 0xD800 && ch <= 0xDFFF ? 0x002C : ch;
+		  }
+	  }
+
+	  private class CharArrayIteratorAnonymousInnerClassHelper2 : CharArrayIterator
+	  {
+		  public CharArrayIteratorAnonymousInnerClassHelper2()
+		  {
+		  }
+
+			  // no bugs
+		  protected internal override char jreBugWorkaround(char ch)
+		  {
+			return ch;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Create a new CharArrayIterator that works around JRE bugs
+	  /// in a manner suitable for <seealso cref="BreakIterator#getWordInstance()"/>
+	  /// </summary>
+	  public static CharArrayIterator newWordInstance()
+	  {
+		if (HAS_BUGGY_BREAKITERATORS)
+		{
+		  return new CharArrayIteratorAnonymousInnerClassHelper3();
+		}
+		else
+		{
+		  return new CharArrayIteratorAnonymousInnerClassHelper4();
+		}
+	  }
+
+	  private class CharArrayIteratorAnonymousInnerClassHelper3 : CharArrayIterator
+	  {
+		  public CharArrayIteratorAnonymousInnerClassHelper3()
+		  {
+		  }
+
+			  // work around this for now by lying about all surrogates to the word, 
+			  // instead we treat them all as ALetter so we won't break around them.
+		  protected internal override char jreBugWorkaround(char ch)
+		  {
+			return ch >= 0xD800 && ch <= 0xDFFF ? 0x0041 : ch;
+		  }
+	  }
+
+	  private class CharArrayIteratorAnonymousInnerClassHelper4 : CharArrayIterator
+	  {
+		  public CharArrayIteratorAnonymousInnerClassHelper4()
+		  {
+		  }
+
+			  // no bugs
+		  protected internal override char jreBugWorkaround(char ch)
+		  {
+			return ch;
+		  }
+	  }
+
+	  /// <summary>
+	  /// True if this JRE has a buggy BreakIterator implementation
+	  /// </summary>
+	  public static readonly bool HAS_BUGGY_BREAKITERATORS;
+	  static CharArrayIterator()
+	  {
+		bool v;
+		try
+		{
+		  BreakIterator bi = BreakIterator.getSentenceInstance(Locale.US);
+		  bi.Text = "\udb40\udc53";
+		  bi.next();
+		  v = false;
+		}
+		catch (Exception)
+		{
+		  v = true;
+		}
+		HAS_BUGGY_BREAKITERATORS = v;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
new file mode 100644
index 0000000..1086572
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
@@ -0,0 +1,928 @@
+using System;
+using System.Diagnostics;
+using System.Collections;
+using System.Collections.Generic;
+using System.Text;
+
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using Version = org.apache.lucene.util.Version;
+
+
+	/// <summary>
+	/// A simple class that stores key Strings as char[]'s in a
+	/// hash table. Note that this is not a general purpose
+	/// class.  For example, it cannot remove items from the
+	/// map, nor does it resize its hash table to be smaller,
+	/// etc.  It is designed to be quick to retrieve items
+	/// by char[] keys without the necessity of converting
+	/// to a String first.
+	/// 
+	/// <a name="version"></a>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating <seealso cref="CharArrayMap"/>:
+	/// <ul>
+	///   <li> As of 3.1, supplementary characters are
+	///       properly lowercased.</li>
+	/// </ul>
+	/// Before 3.1 supplementary characters could not be
+	/// lowercased correctly due to the lack of Unicode 4
+	/// support in JDK 1.4. To use instances of
+	/// <seealso cref="CharArrayMap"/> with the behavior before Lucene
+	/// 3.1 pass a <seealso cref="Version"/> &lt; 3.1 to the constructors.
+	/// </para>
+	/// </summary>
+	public class CharArrayMap<V> : AbstractMap<object, V>
+	{
+	  // private only because missing generics
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: private static final CharArrayMap<?> EMPTY_MAP = new EmptyCharArrayMap<>();
+	  private static readonly CharArrayMap<?> EMPTY_MAP = new EmptyCharArrayMap<?>();
+
+	  private const int INIT_SIZE = 8;
+	  private readonly CharacterUtils charUtils;
+	  private bool ignoreCase;
+	  private int count;
+	  internal readonly Version matchVersion; // package private because used in CharArraySet
+	  internal char[][] keys; // package private because used in CharArraySet's non Set-conform CharArraySetIterator
+	  internal V[] values; // package private because used in CharArraySet's non Set-conform CharArraySetIterator
+
+	  /// <summary>
+	  /// Create map with enough capacity to hold startSize terms
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          compatibility match version see <a href="#version">Version
+	  ///          note</a> above for details. </param>
+	  /// <param name="startSize">
+	  ///          the initial capacity </param>
+	  /// <param name="ignoreCase">
+	  ///          <code>false</code> if and only if the set should be case sensitive
+	  ///          otherwise <code>true</code>. </param>
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @SuppressWarnings("unchecked") public CharArrayMap(org.apache.lucene.util.Version matchVersion, int startSize, boolean ignoreCase)
+	  public CharArrayMap(Version matchVersion, int startSize, bool ignoreCase)
+	  {
+		this.ignoreCase = ignoreCase;
+		int size_Renamed = INIT_SIZE;
+		while (startSize + (startSize >> 2) > size_Renamed)
+		{
+		  size_Renamed <<= 1;
+		}
+		keys = new char[size_Renamed][];
+		values = (V[]) new object[size_Renamed];
+		this.charUtils = CharacterUtils.getInstance(matchVersion);
+		this.matchVersion = matchVersion;
+	  }
+
+	  /// <summary>
+	  /// Creates a map from the mappings in another map. 
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          compatibility match version see <a href="#version">Version
+	  ///          note</a> above for details. </param>
+	  /// <param name="c">
+	  ///          a map whose mappings to be copied </param>
+	  /// <param name="ignoreCase">
+	  ///          <code>false</code> if and only if the set should be case sensitive
+	  ///          otherwise <code>true</code>. </param>
+	  public CharArrayMap<T1>(Version matchVersion, IDictionary<T1> c, bool ignoreCase) where T1 : V : this(matchVersion, c.Count, ignoreCase)
+	  {
+		putAll(c);
+	  }
+
+	  /// <summary>
+	  /// Create set from the supplied map (used internally for readonly maps...) </summary>
+	  private CharArrayMap(CharArrayMap<V> toCopy)
+	  {
+		this.keys = toCopy.keys;
+		this.values = toCopy.values;
+		this.ignoreCase = toCopy.ignoreCase;
+		this.count = toCopy.count;
+		this.charUtils = toCopy.charUtils;
+		this.matchVersion = toCopy.matchVersion;
+	  }
+
+	  /// <summary>
+	  /// Clears all entries in this map. This method is supported for reusing, but not <seealso cref="Map#remove"/>. </summary>
+	  public override void clear()
+	  {
+		count = 0;
+		Arrays.fill(keys, null);
+		Arrays.fill(values, null);
+	  }
+
+	  /// <summary>
+	  /// true if the <code>len</code> chars of <code>text</code> starting at <code>off</code>
+	  /// are in the <seealso cref="#keySet()"/> 
+	  /// </summary>
+	  public virtual bool containsKey(char[] text, int off, int len)
+	  {
+		return keys[getSlot(text, off, len)] != null;
+	  }
+
+	  /// <summary>
+	  /// true if the <code>CharSequence</code> is in the <seealso cref="#keySet()"/> </summary>
+	  public virtual bool containsKey(CharSequence cs)
+	  {
+		return keys[getSlot(cs)] != null;
+	  }
+
+	  public override bool containsKey(object o)
+	  {
+		if (o is char[])
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] text = (char[])o;
+		  char[] text = (char[])o;
+		  return containsKey(text, 0, text.Length);
+		}
+		return containsKey(o.ToString());
+	  }
+
+	  /// <summary>
+	  /// returns the value of the mapping of <code>len</code> chars of <code>text</code>
+	  /// starting at <code>off</code> 
+	  /// </summary>
+	  public virtual V get(char[] text, int off, int len)
+	  {
+		return values[getSlot(text, off, len)];
+	  }
+
+	  /// <summary>
+	  /// returns the value of the mapping of the chars inside this {@code CharSequence} </summary>
+	  public virtual V get(CharSequence cs)
+	  {
+		return values[getSlot(cs)];
+	  }
+
+	  public override V get(object o)
+	  {
+		if (o is char[])
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] text = (char[])o;
+		  char[] text = (char[])o;
+		  return get(text, 0, text.Length);
+		}
+		return get(o.ToString());
+	  }
+
+	  private int getSlot(char[] text, int off, int len)
+	  {
+		int code = getHashCode(text, off, len);
+		int pos = code & (keys.Length - 1);
+		char[] text2 = keys[pos];
+		if (text2 != null && !Equals(text, off, len, text2))
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int inc = ((code>>8)+code)|1;
+		  int inc = ((code >> 8) + code) | 1;
+		  do
+		  {
+			code += inc;
+			pos = code & (keys.Length - 1);
+			text2 = keys[pos];
+		  } while (text2 != null && !Equals(text, off, len, text2));
+		}
+		return pos;
+	  }
+
+	  /// <summary>
+	  /// Returns true if the String is in the set </summary>
+	  private int getSlot(CharSequence text)
+	  {
+		int code = getHashCode(text);
+		int pos = code & (keys.Length - 1);
+		char[] text2 = keys[pos];
+		if (text2 != null && !Equals(text, text2))
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int inc = ((code>>8)+code)|1;
+		  int inc = ((code >> 8) + code) | 1;
+		  do
+		  {
+			code += inc;
+			pos = code & (keys.Length - 1);
+			text2 = keys[pos];
+		  } while (text2 != null && !Equals(text, text2));
+		}
+		return pos;
+	  }
+
+	  /// <summary>
+	  /// Add the given mapping. </summary>
+	  public virtual V put(CharSequence text, V value)
+	  {
+		return put(text.ToString(), value); // could be more efficient
+	  }
+
+	  public override V put(object o, V value)
+	  {
+		if (o is char[])
+		{
+		  return put((char[])o, value);
+		}
+		return put(o.ToString(), value);
+	  }
+
+	  /// <summary>
+	  /// Add the given mapping. </summary>
+	  public virtual V put(string text, V value)
+	  {
+		return put(text.ToCharArray(), value);
+	  }
+
+	  /// <summary>
+	  /// Add the given mapping.
+	  /// If ignoreCase is true for this Set, the text array will be directly modified.
+	  /// The user should never modify this text array after calling this method.
+	  /// </summary>
+	  public virtual V put(char[] text, V value)
+	  {
+		if (ignoreCase)
+		{
+		  charUtils.ToLower(text, 0, text.Length);
+		}
+		int slot = getSlot(text, 0, text.Length);
+		if (keys[slot] != null)
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final V oldValue = values[slot];
+		  V oldValue = values[slot];
+		  values[slot] = value;
+		  return oldValue;
+		}
+		keys[slot] = text;
+		values[slot] = value;
+		count++;
+
+		if (count + (count >> 2) > keys.Length)
+		{
+		  rehash();
+		}
+
+		return null;
+	  }
+
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @SuppressWarnings("unchecked") private void rehash()
+	  private void rehash()
+	  {
+		Debug.Assert(keys.Length == values.Length);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newSize = 2*keys.length;
+		int newSize = 2 * keys.Length;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[][] oldkeys = keys;
+		char[][] oldkeys = keys;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final V[] oldvalues = values;
+		V[] oldvalues = values;
+		keys = new char[newSize][];
+		values = (V[]) new object[newSize];
+
+		for (int i = 0; i < oldkeys.Length; i++)
+		{
+		  char[] text = oldkeys[i];
+		  if (text != null)
+		  {
+			// todo: could be faster... no need to compare strings on collision
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int slot = getSlot(text,0,text.length);
+			int slot = getSlot(text,0,text.Length);
+			keys[slot] = text;
+			values[slot] = oldvalues[i];
+		  }
+		}
+	  }
+
+	  private bool Equals(char[] text1, int off, int len, char[] text2)
+	  {
+		if (len != text2.Length)
+		{
+		  return false;
+		}
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int limit = off+len;
+		int limit = off + len;
+		if (ignoreCase)
+		{
+		  for (int i = 0;i < len;)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int codePointAt = charUtils.codePointAt(text1, off+i, limit);
+			int codePointAt = charUtils.codePointAt(text1, off + i, limit);
+			if (char.ToLower(codePointAt) != charUtils.codePointAt(text2, i, text2.Length))
+			{
+			  return false;
+			}
+			i += char.charCount(codePointAt);
+		  }
+		}
+		else
+		{
+		  for (int i = 0;i < len;i++)
+		  {
+			if (text1[off + i] != text2[i])
+			{
+			  return false;
+			}
+		  }
+		}
+		return true;
+	  }
+
+	  private bool Equals(CharSequence text1, char[] text2)
+	  {
+		int len = text1.length();
+		if (len != text2.Length)
+		{
+		  return false;
+		}
+		if (ignoreCase)
+		{
+		  for (int i = 0;i < len;)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int codePointAt = charUtils.codePointAt(text1, i);
+			int codePointAt = charUtils.codePointAt(text1, i);
+			if (char.ToLower(codePointAt) != charUtils.codePointAt(text2, i, text2.Length))
+			{
+			  return false;
+			}
+			i += char.charCount(codePointAt);
+		  }
+		}
+		else
+		{
+		  for (int i = 0;i < len;i++)
+		  {
+			if (text1.charAt(i) != text2[i])
+			{
+			  return false;
+			}
+		  }
+		}
+		return true;
+	  }
+
+	  private int getHashCode(char[] text, int offset, int len)
+	  {
+		if (text == null)
+		{
+		  throw new System.NullReferenceException();
+		}
+		int code = 0;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int stop = offset + len;
+		int stop = offset + len;
+		if (ignoreCase)
+		{
+		  for (int i = offset; i < stop;)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int codePointAt = charUtils.codePointAt(text, i, stop);
+			int codePointAt = charUtils.codePointAt(text, i, stop);
+			code = code * 31 + char.ToLower(codePointAt);
+			i += char.charCount(codePointAt);
+		  }
+		}
+		else
+		{
+		  for (int i = offset; i < stop; i++)
+		  {
+			code = code * 31 + text[i];
+		  }
+		}
+		return code;
+	  }
+
+	  private int getHashCode(CharSequence text)
+	  {
+		if (text == null)
+		{
+		  throw new System.NullReferenceException();
+		}
+		int code = 0;
+		int len = text.length();
+		if (ignoreCase)
+		{
+		  for (int i = 0; i < len;)
+		  {
+			int codePointAt = charUtils.codePointAt(text, i);
+			code = code * 31 + char.ToLower(codePointAt);
+			i += char.charCount(codePointAt);
+		  }
+		}
+		else
+		{
+		  for (int i = 0; i < len; i++)
+		  {
+			code = code * 31 + text.charAt(i);
+		  }
+		}
+		return code;
+	  }
+
+	  public override V remove(object key)
+	  {
+		throw new System.NotSupportedException();
+	  }
+
+	  public override int size()
+	  {
+		return count;
+	  }
+
+	  public override string ToString()
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final StringBuilder sb = new StringBuilder("{");
+		StringBuilder sb = new StringBuilder("{");
+		foreach (KeyValuePair<object, V> entry in entrySet())
+		{
+		  if (sb.Length > 1)
+		  {
+			  sb.Append(", ");
+		  }
+		  sb.Append(entry);
+		}
+		return sb.Append('}').ToString();
+	  }
+
+	  private EntrySet entrySet_Renamed = null;
+	  private CharArraySet keySet_Renamed = null;
+
+	  internal virtual EntrySet createEntrySet()
+	  {
+		return new EntrySet(this, true);
+	  }
+
+	  public override EntrySet entrySet()
+	  {
+		if (entrySet_Renamed == null)
+		{
+		  entrySet_Renamed = createEntrySet();
+		}
+		return entrySet_Renamed;
+	  }
+
+	  // helper for CharArraySet to not produce endless recursion
+	  internal HashSet<object> originalKeySet()
+	  {
+		return base.Keys;
+	  }
+
+	  /// <summary>
+	  /// Returns an <seealso cref="CharArraySet"/> view on the map's keys.
+	  /// The set will use the same {@code matchVersion} as this map. 
+	  /// </summary>
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @Override @SuppressWarnings({"unchecked","rawtypes"}) public final CharArraySet keySet()
+	  public override CharArraySet keySet()
+	  {
+		if (keySet_Renamed == null)
+		{
+		  // prevent adding of entries
+		  keySet_Renamed = new CharArraySetAnonymousInnerClassHelper(this, (CharArrayMap) this);
+		}
+		return keySet_Renamed;
+	  }
+
+	  private class CharArraySetAnonymousInnerClassHelper : CharArraySet
+	  {
+		  private readonly CharArrayMap<V> outerInstance;
+
+		  public CharArraySetAnonymousInnerClassHelper(CharArrayMap<V> outerInstance, CharArrayMap (CharArrayMap) this) : base((CharArrayMap) this)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  public override bool add(object o)
+		  {
+			throw new System.NotSupportedException();
+		  }
+		  public override bool add(CharSequence text)
+		  {
+			throw new System.NotSupportedException();
+		  }
+		  public override bool add(string text)
+		  {
+			throw new System.NotSupportedException();
+		  }
+		  public override bool add(char[] text)
+		  {
+			throw new System.NotSupportedException();
+		  }
+	  }
+
+	  /// <summary>
+	  /// public iterator class so efficient methods are exposed to users </summary>
+	  public class EntryIterator : IEnumerator<KeyValuePair<object, V>>
+	  {
+		  private readonly CharArrayMap<V> outerInstance;
+
+		internal int pos = -1;
+		internal int lastPos;
+		internal readonly bool allowModify;
+
+		internal EntryIterator(CharArrayMap<V> outerInstance, bool allowModify)
+		{
+			this.outerInstance = outerInstance;
+		  this.allowModify = allowModify;
+		  goNext();
+		}
+
+		internal virtual void goNext()
+		{
+		  lastPos = pos;
+		  pos++;
+		  while (pos < outerInstance.keys.Length && outerInstance.keys[pos] == null)
+		  {
+			  pos++;
+		  }
+		}
+
+		public override bool hasNext()
+		{
+		  return pos < outerInstance.keys.Length;
+		}
+
+		/// <summary>
+		/// gets the next key... do not modify the returned char[] </summary>
+		public virtual char[] nextKey()
+		{
+		  goNext();
+		  return outerInstance.keys[lastPos];
+		}
+
+		/// <summary>
+		/// gets the next key as a newly created String object </summary>
+		public virtual string nextKeyString()
+		{
+		  return new string(nextKey());
+		}
+
+		/// <summary>
+		/// returns the value associated with the last key returned </summary>
+		public virtual V currentValue()
+		{
+		  return outerInstance.values[lastPos];
+		}
+
+		/// <summary>
+		/// sets the value associated with the last key returned </summary>
+		public virtual V setValue(V value)
+		{
+		  if (!allowModify)
+		  {
+			throw new System.NotSupportedException();
+		  }
+		  V old = outerInstance.values[lastPos];
+		  outerInstance.values[lastPos] = value;
+		  return old;
+		}
+
+		/// <summary>
+		/// use nextCharArray() + currentValue() for better efficiency. </summary>
+		public override KeyValuePair<object, V> next()
+		{
+		  goNext();
+		  return new MapEntry(outerInstance, lastPos, allowModify);
+		}
+
+		public override void remove()
+		{
+		  throw new System.NotSupportedException();
+		}
+	  }
+
+	  private sealed class MapEntry : KeyValuePair<object, V>
+	  {
+		  private readonly CharArrayMap<V> outerInstance;
+
+		internal readonly int pos;
+		internal readonly bool allowModify;
+
+		internal MapEntry(CharArrayMap<V> outerInstance, int pos, bool allowModify)
+		{
+			this.outerInstance = outerInstance;
+		  this.pos = pos;
+		  this.allowModify = allowModify;
+		}
+
+		public override object Key
+		{
+			get
+			{
+			  // we must clone here, as putAll to another CharArrayMap
+			  // with other case sensitivity flag would corrupt the keys
+			  return outerInstance.keys[pos].clone();
+			}
+		}
+
+		public override V Value
+		{
+			get
+			{
+			  return outerInstance.values[pos];
+			}
+		}
+
+		public override V setValue(V value)
+		{
+		  if (!allowModify)
+		  {
+			throw new System.NotSupportedException();
+		  }
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final V old = values[pos];
+		  V old = outerInstance.values[pos];
+		  outerInstance.values[pos] = value;
+		  return old;
+		}
+
+		public override string ToString()
+		{
+		  return (new StringBuilder()).Append(outerInstance.keys[pos]).Append('=').Append((outerInstance.values[pos] == outerInstance) ? "(this Map)" : outerInstance.values[pos]).ToString();
+		}
+	  }
+
+	  /// <summary>
+	  /// public EntrySet class so efficient methods are exposed to users </summary>
+	  public sealed class EntrySet : AbstractSet<KeyValuePair<object, V>>
+	  {
+		  private readonly CharArrayMap<V> outerInstance;
+
+		internal readonly bool allowModify;
+
+		internal EntrySet(CharArrayMap<V> outerInstance, bool allowModify)
+		{
+			this.outerInstance = outerInstance;
+		  this.allowModify = allowModify;
+		}
+
+		public override EntryIterator iterator()
+		{
+		  return new EntryIterator(outerInstance, allowModify);
+		}
+
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @Override @SuppressWarnings("unchecked") public boolean contains(Object o)
+		public override bool contains(object o)
+		{
+		  if (!(o is DictionaryEntry))
+		  {
+			return false;
+		  }
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.util.Map.Entry<Object,V> e = (java.util.Map.Entry<Object,V>)o;
+		  KeyValuePair<object, V> e = (KeyValuePair<object, V>)o;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final Object key = e.getKey();
+		  object key = e.Key;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final Object val = e.getValue();
+		  object val = e.Value;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final Object v = get(key);
+		  object v = outerInstance.get(key);
+		  return v == null ? val == null : v.Equals(val);
+		}
+
+		public override bool remove(object o)
+		{
+		  throw new System.NotSupportedException();
+		}
+
+		public override int size()
+		{
+		  return outerInstance.count;
+		}
+
+		public override void clear()
+		{
+		  if (!allowModify)
+		  {
+			throw new System.NotSupportedException();
+		  }
+		  outerInstance.clear();
+		}
+	  }
+
+	  /// <summary>
+	  /// Returns an unmodifiable <seealso cref="CharArrayMap"/>. This allows to provide
+	  /// unmodifiable views of internal map for "read-only" use.
+	  /// </summary>
+	  /// <param name="map">
+	  ///          a map for which the unmodifiable map is returned. </param>
+	  /// <returns> an new unmodifiable <seealso cref="CharArrayMap"/>. </returns>
+	  /// <exception cref="NullPointerException">
+	  ///           if the given map is <code>null</code>. </exception>
+	  public static CharArrayMap<V> unmodifiableMap<V>(CharArrayMap<V> map)
+	  {
+		if (map == null)
+		{
+		  throw new System.NullReferenceException("Given map is null");
+		}
+		if (map == emptyMap() || map.Empty)
+		{
+		  return emptyMap();
+		}
+		if (map is UnmodifiableCharArrayMap)
+		{
+		  return map;
+		}
+		return new UnmodifiableCharArrayMap<>(map);
+	  }
+
+	  /// <summary>
+	  /// Returns a copy of the given map as a <seealso cref="CharArrayMap"/>. If the given map
+	  /// is a <seealso cref="CharArrayMap"/> the ignoreCase property will be preserved.
+	  /// <para>
+	  /// <b>Note:</b> If you intend to create a copy of another <seealso cref="CharArrayMap"/> where
+	  /// the <seealso cref="Version"/> of the source map differs from its copy
+	  /// <seealso cref="#CharArrayMap(Version, Map, boolean)"/> should be used instead.
+	  /// The <seealso cref="#copy(Version, Map)"/> will preserve the <seealso cref="Version"/> of the
+	  /// source map it is an instance of <seealso cref="CharArrayMap"/>.
+	  /// </para>
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          compatibility match version see <a href="#version">Version
+	  ///          note</a> above for details. This argument will be ignored if the
+	  ///          given map is a <seealso cref="CharArrayMap"/>. </param>
+	  /// <param name="map">
+	  ///          a map to copy </param>
+	  /// <returns> a copy of the given map as a <seealso cref="CharArrayMap"/>. If the given map
+	  ///         is a <seealso cref="CharArrayMap"/> the ignoreCase property as well as the
+	  ///         matchVersion will be of the given map will be preserved. </returns>
+//JAVA TO C# CONVERTER TODO TASK: The following line could not be converted:
+	  SuppressWarnings("unchecked") public static <V> CharArrayMap<V> copy(final org.apache.lucene.util.Version matchVersion, final java.util.Map<?,? extends V> map)
+	  {
+		if (map == EMPTY_MAP)
+		{
+		  return emptyMap();
+		}
+		if (map is CharArrayMap)
+		{
+		  CharArrayMap<V> m = (CharArrayMap<V>) map;
+		  // use fast path instead of iterating all values
+		  // this is even on very small sets ~10 times faster than iterating
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[][] keys = new char[m.keys.length][];
+		  char[][] keys = new char[m.keys.Length][];
+		  Array.Copy(m.keys, 0, keys, 0, keys.Length);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final V[] values = (V[]) new Object[m.values.length];
+		  V[] values = (V[]) new object[m.values.Length];
+		  Array.Copy(m.values, 0, values, 0, values.Length);
+		  m = new CharArrayMap<>(m);
+		  m.keys = keys;
+		  m.values = values;
+		  return m;
+		}
+		return new CharArrayMap<>(matchVersion, map, false);
+	  }
+
+	  /// <summary>
+	  /// Returns an empty, unmodifiable map. </summary>
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @SuppressWarnings("unchecked") public static <V> CharArrayMap<V> emptyMap()
+	  public static <V> CharArrayMap<V> emptyMap()
+	  {
+		return (CharArrayMap<V>) EMPTY_MAP;
+	  }
+
+	  // package private CharArraySet instanceof check in CharArraySet
+	  static class UnmodifiableCharArrayMap<V> extends CharArrayMap<V>
+	  {
+
+		UnmodifiableCharArrayMap(CharArrayMap<V> map)
+		{
+		  base(map);
+		}
+
+		public void clear()
+		{
+		  throw new System.NotSupportedException();
+		}
+
+		public V put(object o, V val)
+		{
+		  throw new System.NotSupportedException();
+		}
+
+		public V put(char[] text, V val)
+		{
+		  throw new System.NotSupportedException();
+		}
+
+		public V put(CharSequence text, V val)
+		{
+		  throw new System.NotSupportedException();
+		}
+
+		public V put(string text, V val)
+		{
+		  throw new System.NotSupportedException();
+		}
+
+		public V remove(object key)
+		{
+		  throw new System.NotSupportedException();
+		}
+
+		EntrySet createEntrySet()
+		{
+		  return new EntrySet(this, false);
+		}
+	  }
+
+	  /// <summary>
+	  /// Empty <seealso cref="org.apache.lucene.analysis.util.CharArrayMap.UnmodifiableCharArrayMap"/> optimized for speed.
+	  /// Contains checks will always return <code>false</code> or throw
+	  /// NPE if necessary.
+	  /// </summary>
+	  private static final class EmptyCharArrayMap<V> extends UnmodifiableCharArrayMap<V>
+	  {
+		EmptyCharArrayMap()
+		{
+		  base(new CharArrayMap<V>(Version.LUCENE_CURRENT, 0, false));
+		}
+
+		public bool containsKey(char[] text, int off, int len)
+		{
+		  if (text == null)
+		  {
+			throw new System.NullReferenceException();
+		  }
+		  return false;
+		}
+
+		public bool containsKey(CharSequence cs)
+		{
+		  if (cs == null)
+		  {
+			throw new System.NullReferenceException();
+		  }
+		  return false;
+		}
+
+		public bool containsKey(object o)
+		{
+		  if (o == null)
+		  {
+			throw new System.NullReferenceException();
+		  }
+		  return false;
+		}
+
+		public V get(char[] text, int off, int len)
+		{
+		  if (text == null)
+		  {
+			throw new System.NullReferenceException();
+		  }
+		  return null;
+		}
+
+		public V get(CharSequence cs)
+		{
+		  if (cs == null)
+		  {
+			throw new System.NullReferenceException();
+		  }
+		  return null;
+		}
+
+		public V get(object o)
+		{
+		  if (o == null)
+		  {
+			throw new System.NullReferenceException();
+		  }
+		  return null;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
new file mode 100644
index 0000000..d9253d7
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
@@ -0,0 +1,267 @@
+using System.Collections.Generic;
+using System.Text;
+
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using Version = org.apache.lucene.util.Version;
+
+
+	/// <summary>
+	/// A simple class that stores Strings as char[]'s in a
+	/// hash table.  Note that this is not a general purpose
+	/// class.  For example, it cannot remove items from the
+	/// set, nor does it resize its hash table to be smaller,
+	/// etc.  It is designed to be quick to test if a char[]
+	/// is in the set without the necessity of converting it
+	/// to a String first.
+	/// 
+	/// <a name="version"></a>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating <seealso cref="CharArraySet"/>:
+	/// <ul>
+	///   <li> As of 3.1, supplementary characters are
+	///       properly lowercased.</li>
+	/// </ul>
+	/// Before 3.1 supplementary characters could not be
+	/// lowercased correctly due to the lack of Unicode 4
+	/// support in JDK 1.4. To use instances of
+	/// <seealso cref="CharArraySet"/> with the behavior before Lucene
+	/// 3.1 pass a <seealso cref="Version"/> < 3.1 to the constructors.
+	/// <P>
+	/// <em>Please note:</em> This class implements <seealso cref="java.util.Set Set"/> but
+	/// does not behave like it should in all cases. The generic type is
+	/// {@code Set<Object>}, because you can add any object to it,
+	/// that has a string representation. The add methods will use
+	/// <seealso cref="Object#toString"/> and store the result using a {@code char[]}
+	/// buffer. The same behavior have the {@code contains()} methods.
+	/// The <seealso cref="#iterator()"/> returns an {@code Iterator<char[]>}.
+	/// </para>
+	/// </summary>
+	public class CharArraySet : AbstractSet<object>
+	{
+	  public static readonly CharArraySet EMPTY_SET = new CharArraySet(CharArrayMap.emptyMap<object>());
+	  private static readonly object PLACEHOLDER = new object();
+
+	  private readonly CharArrayMap<object> map;
+
+	  /// <summary>
+	  /// Create set with enough capacity to hold startSize terms
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          compatibility match version see <a href="#version">Version
+	  ///          note</a> above for details. </param>
+	  /// <param name="startSize">
+	  ///          the initial capacity </param>
+	  /// <param name="ignoreCase">
+	  ///          <code>false</code> if and only if the set should be case sensitive
+	  ///          otherwise <code>true</code>. </param>
+	  public CharArraySet(Version matchVersion, int startSize, bool ignoreCase) : this(new CharArrayMap<>(matchVersion, startSize, ignoreCase))
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates a set from a Collection of objects. 
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          compatibility match version see <a href="#version">Version
+	  ///          note</a> above for details. </param>
+	  /// <param name="c">
+	  ///          a collection whose elements to be placed into the set </param>
+	  /// <param name="ignoreCase">
+	  ///          <code>false</code> if and only if the set should be case sensitive
+	  ///          otherwise <code>true</code>. </param>
+	  public CharArraySet<T1>(Version matchVersion, ICollection<T1> c, bool ignoreCase) : this(matchVersion, c.Count, ignoreCase)
+	  {
+		addAll(c);
+	  }
+
+	  /// <summary>
+	  /// Create set from the specified map (internal only), used also by <seealso cref="CharArrayMap#keySet()"/> </summary>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: CharArraySet(final CharArrayMap<Object> map)
+	  internal CharArraySet(CharArrayMap<object> map)
+	  {
+		this.map = map;
+	  }
+
+	  /// <summary>
+	  /// Clears all entries in this set. This method is supported for reusing, but not <seealso cref="Set#remove"/>. </summary>
+	  public override void clear()
+	  {
+		map.clear();
+	  }
+
+	  /// <summary>
+	  /// true if the <code>len</code> chars of <code>text</code> starting at <code>off</code>
+	  /// are in the set 
+	  /// </summary>
+	  public virtual bool contains(char[] text, int off, int len)
+	  {
+		return map.containsKey(text, off, len);
+	  }
+
+	  /// <summary>
+	  /// true if the <code>CharSequence</code> is in the set </summary>
+	  public virtual bool contains(CharSequence cs)
+	  {
+		return map.containsKey(cs);
+	  }
+
+	  public override bool contains(object o)
+	  {
+		return map.containsKey(o);
+	  }
+
+	  public override bool add(object o)
+	  {
+		return map.put(o, PLACEHOLDER) == null;
+	  }
+
+	  /// <summary>
+	  /// Add this CharSequence into the set </summary>
+	  public virtual bool add(CharSequence text)
+	  {
+		return map.put(text, PLACEHOLDER) == null;
+	  }
+
+	  /// <summary>
+	  /// Add this String into the set </summary>
+	  public virtual bool add(string text)
+	  {
+		return map.put(text, PLACEHOLDER) == null;
+	  }
+
+	  /// <summary>
+	  /// Add this char[] directly to the set.
+	  /// If ignoreCase is true for this Set, the text array will be directly modified.
+	  /// The user should never modify this text array after calling this method.
+	  /// </summary>
+	  public virtual bool add(char[] text)
+	  {
+		return map.put(text, PLACEHOLDER) == null;
+	  }
+
+	  public override int size()
+	  {
+		return map.size();
+	  }
+
+	  /// <summary>
+	  /// Returns an unmodifiable <seealso cref="CharArraySet"/>. This allows to provide
+	  /// unmodifiable views of internal sets for "read-only" use.
+	  /// </summary>
+	  /// <param name="set">
+	  ///          a set for which the unmodifiable set is returned. </param>
+	  /// <returns> an new unmodifiable <seealso cref="CharArraySet"/>. </returns>
+	  /// <exception cref="NullPointerException">
+	  ///           if the given set is <code>null</code>. </exception>
+	  public static CharArraySet unmodifiableSet(CharArraySet set)
+	  {
+		if (set == null)
+		{
+		  throw new System.NullReferenceException("Given set is null");
+		}
+		if (set == EMPTY_SET)
+		{
+		  return EMPTY_SET;
+		}
+		if (set.map is CharArrayMap.UnmodifiableCharArrayMap)
+		{
+		  return set;
+		}
+		return new CharArraySet(CharArrayMap.unmodifiableMap(set.map));
+	  }
+
+	  /// <summary>
+	  /// Returns a copy of the given set as a <seealso cref="CharArraySet"/>. If the given set
+	  /// is a <seealso cref="CharArraySet"/> the ignoreCase property will be preserved.
+	  /// <para>
+	  /// <b>Note:</b> If you intend to create a copy of another <seealso cref="CharArraySet"/> where
+	  /// the <seealso cref="Version"/> of the source set differs from its copy
+	  /// <seealso cref="#CharArraySet(Version, Collection, boolean)"/> should be used instead.
+	  /// The <seealso cref="#copy(Version, Set)"/> will preserve the <seealso cref="Version"/> of the
+	  /// source set it is an instance of <seealso cref="CharArraySet"/>.
+	  /// </para>
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          compatibility match version see <a href="#version">Version
+	  ///          note</a> above for details. This argument will be ignored if the
+	  ///          given set is a <seealso cref="CharArraySet"/>. </param>
+	  /// <param name="set">
+	  ///          a set to copy </param>
+	  /// <returns> a copy of the given set as a <seealso cref="CharArraySet"/>. If the given set
+	  ///         is a <seealso cref="CharArraySet"/> the ignoreCase property as well as the
+	  ///         matchVersion will be of the given set will be preserved. </returns>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public static CharArraySet copy(final org.apache.lucene.util.Version matchVersion, final java.util.Set<?> set)
+	  public static CharArraySet copy<T1>(Version matchVersion, HashSet<T1> set)
+	  {
+		if (set == EMPTY_SET)
+		{
+		  return EMPTY_SET;
+		}
+		if (set is CharArraySet)
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final CharArraySet source = (CharArraySet) set;
+		  CharArraySet source = (CharArraySet) set;
+		  return new CharArraySet(CharArrayMap.copy(source.map.matchVersion, source.map));
+		}
+		return new CharArraySet(matchVersion, set, false);
+	  }
+
+	  /// <summary>
+	  /// Returns an <seealso cref="Iterator"/> for {@code char[]} instances in this set.
+	  /// </summary>
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @Override @SuppressWarnings("unchecked") public java.util.Iterator<Object> iterator()
+	  public override IEnumerator<object> iterator()
+	  {
+		// use the AbstractSet#keySet()'s iterator (to not produce endless recursion)
+		return map.originalKeySet().GetEnumerator();
+	  }
+
+	  public override string ToString()
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final StringBuilder sb = new StringBuilder("[");
+		StringBuilder sb = new StringBuilder("[");
+		foreach (object item in this)
+		{
+		  if (sb.Length > 1)
+		  {
+			  sb.Append(", ");
+		  }
+		  if (item is char[])
+		  {
+			sb.Append((char[]) item);
+		  }
+		  else
+		  {
+			sb.Append(item);
+		  }
+		}
+		return sb.Append(']').ToString();
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/CharFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharFilterFactory.cs
new file mode 100644
index 0000000..e2f5b0a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharFilterFactory.cs
@@ -0,0 +1,86 @@
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Analysis.Util;
+
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	/// <summary>
+	/// Abstract parent class for analysis factories that create <seealso cref="CharFilter"/>
+	/// instances.
+	/// </summary>
+	public abstract class CharFilterFactory : AbstractAnalysisFactory
+	{
+
+	  private static readonly AnalysisSPILoader<CharFilterFactory> loader = new AnalysisSPILoader<CharFilterFactory>(typeof(CharFilterFactory));
+
+	  /// <summary>
+	  /// looks up a charfilter by name from context classpath </summary>
+	  public static CharFilterFactory forName(string name, IDictionary<string, string> args)
+	  {
+		return loader.newInstance(name, args);
+	  }
+
+	  /// <summary>
+	  /// looks up a charfilter class by name from context classpath </summary>
+	  public static Type lookupClass(string name)
+	  {
+		return loader.lookupClass(name);
+	  }
+
+	  /// <summary>
+	  /// returns a list of all available charfilter names </summary>
+	  public static HashSet<string> availableCharFilters()
+	  {
+		return loader.availableServices();
+	  }
+
+	  /// <summary>
+	  /// Reloads the factory list from the given <seealso cref="ClassLoader"/>.
+	  /// Changes to the factories are visible after the method ends, all
+	  /// iterators (<seealso cref="#availableCharFilters()"/>,...) stay consistent. 
+	  /// 
+	  /// <para><b>NOTE:</b> Only new factories are added, existing ones are
+	  /// never removed or replaced.
+	  /// 
+	  /// </para>
+	  /// <para><em>This method is expensive and should only be called for discovery
+	  /// of new factories on the given classpath/classloader!</em>
+	  /// </para>
+	  /// </summary>
+	  public static void reloadCharFilters(ClassLoader classloader)
+	  {
+		loader.reload(classloader);
+	  }
+
+	  /// <summary>
+	  /// Initialize this factory via a set of key-value pairs.
+	  /// </summary>
+	  protected internal CharFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Wraps the given Reader with a CharFilter. </summary>
+	  public abstract Reader create(Reader input);
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/CharTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharTokenizer.cs
new file mode 100644
index 0000000..1cd6395
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharTokenizer.cs
@@ -0,0 +1,209 @@
+using System.Diagnostics;
+using System.IO;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using org.apache.lucene.analysis.util;
+
+namespace Lucene.Net.Analysis.Util
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// An abstract base class for simple, character-oriented tokenizers. 
+    /// <para>
+    /// <a name="version">You must specify the required <seealso cref="Version"/> compatibility
+    /// when creating <seealso cref="CharTokenizer"/>:
+    /// <ul>
+    /// <li>As of 3.1, <seealso cref="CharTokenizer"/> uses an int based API to normalize and
+    /// detect token codepoints. See <seealso cref="#isTokenChar(int)"/> and
+    /// <seealso cref="#normalize(int)"/> for details.</li>
+    /// </ul>
+    /// </para>
+    /// <para>
+    /// A new <seealso cref="CharTokenizer"/> API has been introduced with Lucene 3.1. This API
+    /// moved from UTF-16 code units to UTF-32 codepoints to eventually add support
+    /// for <a href=
+    /// "http://java.sun.com/j2se/1.5.0/docs/api/java/lang/Character.html#supplementary"
+    /// >supplementary characters</a>. The old <i>char</i> based API has been
+    /// deprecated and should be replaced with the <i>int</i> based methods
+    /// <seealso cref="#isTokenChar(int)"/> and <seealso cref="#normalize(int)"/>.
+    /// </para>
+    /// <para>
+    /// As of Lucene 3.1 each <seealso cref="CharTokenizer"/> - constructor expects a
+    /// <seealso cref="Version"/> argument. Based on the given <seealso cref="Version"/> either the new
+    /// API or a backwards compatibility layer is used at runtime. For
+    /// <seealso cref="Version"/> < 3.1 the backwards compatibility layer ensures correct
+    /// behavior even for indexes build with previous versions of Lucene. If a
+    /// <seealso cref="Version"/> >= 3.1 is used <seealso cref="CharTokenizer"/> requires the new API to
+    /// be implemented by the instantiated class. Yet, the old <i>char</i> based API
+    /// is not required anymore even if backwards compatibility must be preserved.
+    /// <seealso cref="CharTokenizer"/> subclasses implementing the new API are fully backwards
+    /// compatible if instantiated with <seealso cref="Version"/> < 3.1.
+    /// </para>
+    /// <para>
+    /// <strong>Note:</strong> If you use a subclass of <seealso cref="CharTokenizer"/> with <seealso cref="Version"/> >=
+    /// 3.1 on an index build with a version < 3.1, created tokens might not be
+    /// compatible with the terms in your index.
+    /// </para>
+    /// 
+    /// </summary>
+    public abstract class CharTokenizer : Tokenizer
+    {
+        private readonly TextReader _input;
+
+        /// <summary>
+        /// Creates a new <seealso cref="CharTokenizer"/> instance
+        /// </summary>
+        /// <param name="matchVersion">
+        ///          Lucene version to match </param>
+        /// <param name="input">
+        ///          the input to split up into tokens </param>
+        public CharTokenizer(Version matchVersion, TextReader input)
+            : base(input)
+        {
+            charUtils = CharacterUtils.getInstance(matchVersion);
+        }
+
+        /// <summary>
+        /// Creates a new <seealso cref="CharTokenizer"/> instance
+        /// </summary>
+        /// <param name="matchVersion">
+        ///          Lucene version to match </param>
+        /// <param name="factory">
+        ///          the attribute factory to use for this <seealso cref="Tokenizer"/> </param>
+        /// <param name="input">
+        ///          the input to split up into tokens </param>
+        public CharTokenizer(Version matchVersion, AttributeFactory factory, TextReader input)
+            : base(factory, input)
+        {
+            _input = input;
+            charUtils = CharacterUtils.getInstance(matchVersion);
+        }
+
+        private int offset = 0, bufferIndex = 0, dataLen = 0, finalOffset = 0;
+        private const int MAX_WORD_LEN = 255;
+        private const int IO_BUFFER_SIZE = 4096;
+
+        private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+        private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+
+        private readonly CharacterUtils charUtils;
+        private readonly CharacterUtils.CharacterBuffer ioBuffer = CharacterUtils.newCharacterBuffer(IO_BUFFER_SIZE);
+
+        /// <summary>
+        /// Returns true iff a codepoint should be included in a token. This tokenizer
+        /// generates as tokens adjacent sequences of codepoints which satisfy this
+        /// predicate. Codepoints for which this is false are used to define token
+        /// boundaries and are not included in tokens.
+        /// </summary>
+        protected internal abstract bool IsTokenChar(char c);
+
+        /// <summary>
+        /// Called on each token character to normalize it before it is added to the
+        /// token. The default implementation does nothing. Subclasses may use this to,
+        /// e.g., lowercase tokens.
+        /// </summary>
+        protected virtual int Normalize(int c)
+        {
+            return c;
+        }
+
+        public override bool IncrementToken()
+        {
+            ClearAttributes();
+            int length = 0;
+            int start = -1; // this variable is always initialized
+            int end_Renamed = -1;
+            char[] buffer = termAtt.Buffer();
+            while (true)
+            {
+                if (bufferIndex >= dataLen)
+                {
+                    offset += dataLen;
+                    charUtils.fill(ioBuffer, input); // read supplementary char aware with CharacterUtils
+                    if (ioBuffer.Length == 0)
+                    {
+                        dataLen = 0; // so next offset += dataLen won't decrement offset
+                        if (length > 0)
+                        {
+                            break;
+                        }
+                        else
+                        {
+                            finalOffset = CorrectOffset(offset);
+                            return false;
+                        }
+                    }
+                    dataLen = ioBuffer.Length;
+                    bufferIndex = 0;
+                }
+                // use CharacterUtils here to support < 3.1 UTF-16 code unit behavior if the char based methods are gone
+                int c = charUtils.codePointAt(ioBuffer.Buffer, bufferIndex, ioBuffer.Length);
+                int charCount = Character.CharCount(c);
+                bufferIndex += charCount;
+
+                if (isTokenChar(c)) // if it's a token char
+                {
+                    if (length == 0) // start of token
+                    {
+                        Debug.Assert(start == -1);
+                        start = offset + bufferIndex - charCount;
+                        end_Renamed = start;
+                    } // check if a supplementary could run out of bounds
+                    else if (length >= buffer.Length - 1)
+                    {
+                        buffer = termAtt.ResizeBuffer(2 + length); // make sure a supplementary fits in the buffer
+                    }
+                    end_Renamed += charCount;
+                    length += Character.ToChars(Normalize(c), buffer, length); // buffer it, normalized
+                    if (length >= MAX_WORD_LEN) // buffer overflow! make sure to check for >= surrogate pair could break == test
+                    {
+                        break;
+                    }
+                } // at non-Letter w/ chars
+                else if (length > 0)
+                {
+                    break; // return 'em
+                }
+            }
+
+            termAtt.Length = length;
+            Debug.Assert(start != -1);
+            offsetAtt.SetOffset(CorrectOffset(start), finalOffset = CorrectOffset(end_Renamed));
+            return true;
+        }
+
+        public override void End()
+        {
+            base.End();
+            // set final offset
+            offsetAtt.SetOffset(finalOffset, finalOffset);
+        }
+
+        public override void Reset()
+        {
+            base.Reset();
+            bufferIndex = 0;
+            offset = 0;
+            dataLen = 0;
+            finalOffset = 0;
+            ioBuffer.reset(); // make sure to reset the IO buffer!!
+        }
+    }
+}
\ No newline at end of file


[03/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs
new file mode 100644
index 0000000..e876a6f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs
@@ -0,0 +1,498 @@
+using System.Diagnostics;
+
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// <seealso cref="CharacterUtils"/> provides a unified interface to Character-related
+	/// operations to implement backwards compatible character operations based on a
+	/// <seealso cref="Version"/> instance.
+	/// 
+	/// @lucene.internal
+	/// </summary>
+	public abstract class CharacterUtils
+	{
+	  private static readonly Java4CharacterUtils JAVA_4 = new Java4CharacterUtils();
+	  private static readonly Java5CharacterUtils JAVA_5 = new Java5CharacterUtils();
+
+	  /// <summary>
+	  /// Returns a <seealso cref="CharacterUtils"/> implementation according to the given
+	  /// <seealso cref="Version"/> instance.
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          a version instance </param>
+	  /// <returns> a <seealso cref="CharacterUtils"/> implementation according to the given
+	  ///         <seealso cref="Version"/> instance. </returns>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public static CharacterUtils getInstance(final org.apache.lucene.util.Version matchVersion)
+	  public static CharacterUtils getInstance(Version matchVersion)
+	  {
+		return matchVersion.onOrAfter(Version.LUCENE_31) ? JAVA_5 : JAVA_4;
+	  }
+
+	  /// <summary>
+	  /// Return a <seealso cref="CharacterUtils"/> instance compatible with Java 1.4. </summary>
+	  public static CharacterUtils Java4Instance
+	  {
+		  get
+		  {
+			return JAVA_4;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Returns the code point at the given index of the <seealso cref="CharSequence"/>.
+	  /// Depending on the <seealso cref="Version"/> passed to
+	  /// <seealso cref="CharacterUtils#getInstance(Version)"/> this method mimics the behavior
+	  /// of <seealso cref="Character#codePointAt(char[], int)"/> as it would have been
+	  /// available on a Java 1.4 JVM or on a later virtual machine version.
+	  /// </summary>
+	  /// <param name="seq">
+	  ///          a character sequence </param>
+	  /// <param name="offset">
+	  ///          the offset to the char values in the chars array to be converted
+	  /// </param>
+	  /// <returns> the Unicode code point at the given index </returns>
+	  /// <exception cref="NullPointerException">
+	  ///           - if the sequence is null. </exception>
+	  /// <exception cref="IndexOutOfBoundsException">
+	  ///           - if the value offset is negative or not less than the length of
+	  ///           the character sequence. </exception>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public abstract int codePointAt(final CharSequence seq, final int offset);
+	  public abstract int codePointAt(CharSequence seq, int offset);
+
+	  /// <summary>
+	  /// Returns the code point at the given index of the char array where only elements
+	  /// with index less than the limit are used.
+	  /// Depending on the <seealso cref="Version"/> passed to
+	  /// <seealso cref="CharacterUtils#getInstance(Version)"/> this method mimics the behavior
+	  /// of <seealso cref="Character#codePointAt(char[], int)"/> as it would have been
+	  /// available on a Java 1.4 JVM or on a later virtual machine version.
+	  /// </summary>
+	  /// <param name="chars">
+	  ///          a character array </param>
+	  /// <param name="offset">
+	  ///          the offset to the char values in the chars array to be converted </param>
+	  /// <param name="limit"> the index afer the last element that should be used to calculate
+	  ///        codepoint.  
+	  /// </param>
+	  /// <returns> the Unicode code point at the given index </returns>
+	  /// <exception cref="NullPointerException">
+	  ///           - if the array is null. </exception>
+	  /// <exception cref="IndexOutOfBoundsException">
+	  ///           - if the value offset is negative or not less than the length of
+	  ///           the char array. </exception>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public abstract int codePointAt(final char[] chars, final int offset, final int limit);
+	  public abstract int codePointAt(char[] chars, int offset, int limit);
+
+	  /// <summary>
+	  /// Return the number of characters in <code>seq</code>. </summary>
+	  public abstract int codePointCount(CharSequence seq);
+
+	  /// <summary>
+	  /// Creates a new <seealso cref="CharacterBuffer"/> and allocates a <code>char[]</code>
+	  /// of the given bufferSize.
+	  /// </summary>
+	  /// <param name="bufferSize">
+	  ///          the internal char buffer size, must be <code>&gt;= 2</code> </param>
+	  /// <returns> a new <seealso cref="CharacterBuffer"/> instance. </returns>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public static CharacterBuffer newCharacterBuffer(final int bufferSize)
+	  public static CharacterBuffer newCharacterBuffer(int bufferSize)
+	  {
+		if (bufferSize < 2)
+		{
+		  throw new System.ArgumentException("buffersize must be >= 2");
+		}
+		return new CharacterBuffer(new char[bufferSize], 0, 0);
+	  }
+
+
+	  /// <summary>
+	  /// Converts each unicode codepoint to lowerCase via <seealso cref="Character#toLowerCase(int)"/> starting 
+	  /// at the given offset. </summary>
+	  /// <param name="buffer"> the char buffer to lowercase </param>
+	  /// <param name="offset"> the offset to start at </param>
+	  /// <param name="limit"> the max char in the buffer to lower case </param>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public final void toLowerCase(final char[] buffer, final int offset, final int limit)
+	  public void ToLower(char[] buffer, int offset, int limit)
+	  {
+		Debug.Assert(buffer.Length >= limit);
+		Debug.Assert(offset <= 0 && offset <= buffer.Length);
+		for (int i = offset; i < limit;)
+		{
+		  i += char.toChars(char.ToLower(codePointAt(buffer, i, limit)), buffer, i);
+		}
+	  }
+
+	  /// <summary>
+	  /// Converts each unicode codepoint to UpperCase via <seealso cref="Character#toUpperCase(int)"/> starting 
+	  /// at the given offset. </summary>
+	  /// <param name="buffer"> the char buffer to UPPERCASE </param>
+	  /// <param name="offset"> the offset to start at </param>
+	  /// <param name="limit"> the max char in the buffer to lower case </param>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public final void toUpperCase(final char[] buffer, final int offset, final int limit)
+	  public void ToUpper(char[] buffer, int offset, int limit)
+	  {
+		Debug.Assert(buffer.Length >= limit);
+		Debug.Assert(offset <= 0 && offset <= buffer.Length);
+		for (int i = offset; i < limit;)
+		{
+		  i += char.toChars(char.ToUpper(codePointAt(buffer, i, limit)), buffer, i);
+		}
+	  }
+
+	  /// <summary>
+	  /// Converts a sequence of Java characters to a sequence of unicode code points. </summary>
+	  ///  <returns> the number of code points written to the destination buffer  </returns>
+	  public int toCodePoints(char[] src, int srcOff, int srcLen, int[] dest, int destOff)
+	  {
+		if (srcLen < 0)
+		{
+		  throw new System.ArgumentException("srcLen must be >= 0");
+		}
+		int codePointCount_Renamed = 0;
+		for (int i = 0; i < srcLen;)
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int cp = codePointAt(src, srcOff + i, srcOff + srcLen);
+		  int cp = codePointAt(src, srcOff + i, srcOff + srcLen);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int charCount = Character.charCount(cp);
+		  int charCount = char.charCount(cp);
+		  dest[destOff + codePointCount_Renamed++] = cp;
+		  i += charCount;
+		}
+		return codePointCount_Renamed;
+	  }
+
+	  /// <summary>
+	  /// Converts a sequence of unicode code points to a sequence of Java characters. </summary>
+	  ///  <returns> the number of chars written to the destination buffer  </returns>
+	  public int toChars(int[] src, int srcOff, int srcLen, char[] dest, int destOff)
+	  {
+		if (srcLen < 0)
+		{
+		  throw new System.ArgumentException("srcLen must be >= 0");
+		}
+		int written = 0;
+		for (int i = 0; i < srcLen; ++i)
+		{
+		  written += char.toChars(src[srcOff + i], dest, destOff + written);
+		}
+		return written;
+	  }
+
+	  /// <summary>
+	  /// Fills the <seealso cref="CharacterBuffer"/> with characters read from the given
+	  /// reader <seealso cref="Reader"/>. This method tries to read <code>numChars</code>
+	  /// characters into the <seealso cref="CharacterBuffer"/>, each call to fill will start
+	  /// filling the buffer from offset <code>0</code> up to <code>numChars</code>.
+	  /// In case code points can span across 2 java characters, this method may
+	  /// only fill <code>numChars - 1</code> characters in order not to split in
+	  /// the middle of a surrogate pair, even if there are remaining characters in
+	  /// the <seealso cref="Reader"/>.
+	  /// <para>
+	  /// Depending on the <seealso cref="Version"/> passed to
+	  /// <seealso cref="CharacterUtils#getInstance(Version)"/> this method implements
+	  /// supplementary character awareness when filling the given buffer. For all
+	  /// <seealso cref="Version"/> &gt; 3.0 <seealso cref="#fill(CharacterBuffer, Reader, int)"/> guarantees
+	  /// that the given <seealso cref="CharacterBuffer"/> will never contain a high surrogate
+	  /// character as the last element in the buffer unless it is the last available
+	  /// character in the reader. In other words, high and low surrogate pairs will
+	  /// always be preserved across buffer boarders.
+	  /// </para>
+	  /// <para>
+	  /// A return value of <code>false</code> means that this method call exhausted
+	  /// the reader, but there may be some bytes which have been read, which can be
+	  /// verified by checking whether <code>buffer.getLength() &gt; 0</code>.
+	  /// </para>
+	  /// </summary>
+	  /// <param name="buffer">
+	  ///          the buffer to fill. </param>
+	  /// <param name="reader">
+	  ///          the reader to read characters from. </param>
+	  /// <param name="numChars">
+	  ///          the number of chars to read </param>
+	  /// <returns> <code>false</code> if and only if reader.read returned -1 while trying to fill the buffer </returns>
+	  /// <exception cref="IOException">
+	  ///           if the reader throws an <seealso cref="IOException"/>. </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public abstract boolean fill(CharacterBuffer buffer, java.io.Reader reader, int numChars) throws java.io.IOException;
+	  public abstract bool fill(CharacterBuffer buffer, Reader reader, int numChars);
+
+	  /// <summary>
+	  /// Convenience method which calls <code>fill(buffer, reader, buffer.buffer.length)</code>. </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public final boolean fill(CharacterBuffer buffer, java.io.Reader reader) throws java.io.IOException
+	  public bool fill(CharacterBuffer buffer, Reader reader)
+	  {
+		return fill(buffer, reader, buffer.buffer.Length);
+	  }
+
+	  /// <summary>
+	  /// Return the index within <code>buf[start:start+count]</code> which is by <code>offset</code>
+	  ///  code points from <code>index</code>. 
+	  /// </summary>
+	  public abstract int offsetByCodePoints(char[] buf, int start, int count, int index, int offset);
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: static int readFully(java.io.Reader reader, char[] dest, int offset, int len) throws java.io.IOException
+	  internal static int readFully(Reader reader, char[] dest, int offset, int len)
+	  {
+		int read = 0;
+		while (read < len)
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int r = reader.read(dest, offset + read, len - read);
+		  int r = reader.read(dest, offset + read, len - read);
+		  if (r == -1)
+		  {
+			break;
+		  }
+		  read += r;
+		}
+		return read;
+	  }
+
+	  private sealed class Java5CharacterUtils : CharacterUtils
+	  {
+		internal Java5CharacterUtils()
+		{
+		}
+
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: @Override public int codePointAt(final CharSequence seq, final int offset)
+		public override int codePointAt(CharSequence seq, int offset)
+		{
+		  return char.codePointAt(seq, offset);
+		}
+
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: @Override public int codePointAt(final char[] chars, final int offset, final int limit)
+		public override int codePointAt(char[] chars, int offset, int limit)
+		{
+		 return char.codePointAt(chars, offset, limit);
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean fill(final CharacterBuffer buffer, final java.io.Reader reader, int numChars) throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+		public override bool fill(CharacterBuffer buffer, Reader reader, int numChars)
+		{
+		  Debug.Assert(buffer.buffer.Length >= 2);
+		  if (numChars < 2 || numChars > buffer.buffer.Length)
+		  {
+			throw new System.ArgumentException("numChars must be >= 2 and <= the buffer size");
+		  }
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] charBuffer = buffer.buffer;
+		  char[] charBuffer = buffer.buffer;
+		  buffer.offset = 0;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int offset;
+		  int offset;
+
+		  // Install the previously saved ending high surrogate:
+		  if (buffer.lastTrailingHighSurrogate != 0)
+		  {
+			charBuffer[0] = buffer.lastTrailingHighSurrogate;
+			buffer.lastTrailingHighSurrogate = (char)0;
+			offset = 1;
+		  }
+		  else
+		  {
+			offset = 0;
+		  }
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int read = readFully(reader, charBuffer, offset, numChars - offset);
+		  int read = readFully(reader, charBuffer, offset, numChars - offset);
+
+		  buffer.length = offset + read;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final boolean result = buffer.length == numChars;
+		  bool result = buffer.length == numChars;
+		  if (buffer.length < numChars)
+		  {
+			// We failed to fill the buffer. Even if the last char is a high
+			// surrogate, there is nothing we can do
+			return result;
+		  }
+
+		  if (char.IsHighSurrogate(charBuffer[buffer.length - 1]))
+		  {
+			buffer.lastTrailingHighSurrogate = charBuffer[--buffer.length];
+		  }
+		  return result;
+		}
+
+		public override int codePointCount(CharSequence seq)
+		{
+		  return char.codePointCount(seq, 0, seq.length());
+		}
+
+		public override int offsetByCodePoints(char[] buf, int start, int count, int index, int offset)
+		{
+		  return char.offsetByCodePoints(buf, start, count, index, offset);
+		}
+	  }
+
+	  private sealed class Java4CharacterUtils : CharacterUtils
+	  {
+		internal Java4CharacterUtils()
+		{
+		}
+
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: @Override public int codePointAt(final CharSequence seq, final int offset)
+		public override int codePointAt(CharSequence seq, int offset)
+		{
+		  return seq.charAt(offset);
+		}
+
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: @Override public int codePointAt(final char[] chars, final int offset, final int limit)
+		public override int codePointAt(char[] chars, int offset, int limit)
+		{
+		  if (offset >= limit)
+		  {
+			throw new System.IndexOutOfRangeException("offset must be less than limit");
+		  }
+		  return chars[offset];
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean fill(CharacterBuffer buffer, java.io.Reader reader, int numChars) throws java.io.IOException
+		public override bool fill(CharacterBuffer buffer, Reader reader, int numChars)
+		{
+		  Debug.Assert(buffer.buffer.Length >= 1);
+		  if (numChars < 1 || numChars > buffer.buffer.Length)
+		  {
+			throw new System.ArgumentException("numChars must be >= 1 and <= the buffer size");
+		  }
+		  buffer.offset = 0;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int read = readFully(reader, buffer.buffer, 0, numChars);
+		  int read = readFully(reader, buffer.buffer, 0, numChars);
+		  buffer.length = read;
+		  buffer.lastTrailingHighSurrogate = (char)0;
+		  return read == numChars;
+		}
+
+		public override int codePointCount(CharSequence seq)
+		{
+		  return seq.length();
+		}
+
+		public override int offsetByCodePoints(char[] buf, int start, int count, int index, int offset)
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int result = index + offset;
+		  int result = index + offset;
+		  if (result < 0 || result > count)
+		  {
+			throw new System.IndexOutOfRangeException();
+		  }
+		  return result;
+		}
+
+	  }
+
+	  /// <summary>
+	  /// A simple IO buffer to use with
+	  /// <seealso cref="CharacterUtils#fill(CharacterBuffer, Reader)"/>.
+	  /// </summary>
+	  public sealed class CharacterBuffer
+	  {
+
+		internal readonly char[] buffer;
+		internal int offset;
+		internal int length;
+		// NOTE: not private so outer class can access without
+		// $access methods:
+		internal char lastTrailingHighSurrogate;
+
+		internal CharacterBuffer(char[] buffer, int offset, int length)
+		{
+		  this.buffer = buffer;
+		  this.offset = offset;
+		  this.length = length;
+		}
+
+		/// <summary>
+		/// Returns the internal buffer
+		/// </summary>
+		/// <returns> the buffer </returns>
+		public char[] Buffer
+		{
+			get
+			{
+			  return buffer;
+			}
+		}
+
+		/// <summary>
+		/// Returns the data offset in the internal buffer.
+		/// </summary>
+		/// <returns> the offset </returns>
+		public int Offset
+		{
+			get
+			{
+			  return offset;
+			}
+		}
+
+		/// <summary>
+		/// Return the length of the data in the internal buffer starting at
+		/// <seealso cref="#getOffset()"/>
+		/// </summary>
+		/// <returns> the length </returns>
+		public int Length
+		{
+			get
+			{
+			  return length;
+			}
+		}
+
+		/// <summary>
+		/// Resets the CharacterBuffer. All internals are reset to its default
+		/// values.
+		/// </summary>
+		public void reset()
+		{
+		  offset = 0;
+		  length = 0;
+		  lastTrailingHighSurrogate = (char)0;
+		}
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/ClasspathResourceLoader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/ClasspathResourceLoader.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/ClasspathResourceLoader.cs
new file mode 100644
index 0000000..8b7c93b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/ClasspathResourceLoader.cs
@@ -0,0 +1,105 @@
+using System;
+using System.Threading;
+
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	/// <summary>
+	/// Simple <seealso cref="ResourceLoader"/> that uses <seealso cref="ClassLoader#getResourceAsStream(String)"/>
+	/// and <seealso cref="Class#forName(String,boolean,ClassLoader)"/> to open resources and
+	/// classes, respectively.
+	/// </summary>
+	public sealed class ClasspathResourceLoader : ResourceLoader
+	{
+	  private readonly Type clazz;
+	  private readonly ClassLoader loader;
+
+	  /// <summary>
+	  /// Creates an instance using the context classloader to load Resources and classes.
+	  /// Resource paths must be absolute.
+	  /// </summary>
+	  public ClasspathResourceLoader() : this(Thread.CurrentThread.ContextClassLoader)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates an instance using the given classloader to load Resources and classes.
+	  /// Resource paths must be absolute.
+	  /// </summary>
+	  public ClasspathResourceLoader(ClassLoader loader) : this(null, loader)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates an instance using the context classloader to load Resources and classes
+	  /// Resources are resolved relative to the given class, if path is not absolute.
+	  /// </summary>
+	  public ClasspathResourceLoader(Type clazz) : this(clazz, clazz.ClassLoader)
+	  {
+	  }
+
+	  private ClasspathResourceLoader(Type clazz, ClassLoader loader)
+	  {
+		this.clazz = clazz;
+		this.loader = loader;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public java.io.InputStream openResource(String resource) throws java.io.IOException
+	  public InputStream openResource(string resource)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.io.InputStream stream = (clazz != null) ? clazz.getResourceAsStream(resource) : loader.getResourceAsStream(resource);
+		InputStream stream = (clazz != null) ? clazz.getResourceAsStream(resource) : loader.getResourceAsStream(resource);
+		if (stream == null)
+		{
+		  throw new IOException("Resource not found: " + resource);
+		}
+		return stream;
+	  }
+
+	  public Type findClass<T>(string cname, Type expectedType)
+	  {
+		try
+		{
+		  return Type.GetType(cname, true, loader).asSubclass(expectedType);
+		}
+		catch (Exception e)
+		{
+		  throw new Exception("Cannot load class: " + cname, e);
+		}
+	  }
+
+	  public T newInstance<T>(string cname, Type expectedType)
+	  {
+		Type clazz = findClass(cname, expectedType);
+		try
+		{
+		  return clazz.newInstance();
+		}
+		catch (Exception e)
+		{
+		  throw new Exception("Cannot create instance: " + cname, e);
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilter.cs
new file mode 100644
index 0000000..2571ccd
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilter.cs
@@ -0,0 +1,80 @@
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+	/// <summary>
+	/// Removes elisions from a <seealso cref="TokenStream"/>. For example, "l'avion" (the plane) will be
+	/// tokenized as "avion" (plane).
+	/// </summary>
+	/// <seealso cref= <a href="http://fr.wikipedia.org/wiki/%C3%89lision">Elision in Wikipedia</a> </seealso>
+	public sealed class ElisionFilter : TokenFilter
+	{
+	  private readonly CharArraySet articles;
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+
+	  /// <summary>
+	  /// Constructs an elision filter with a Set of stop words </summary>
+	  /// <param name="input"> the source <seealso cref="TokenStream"/> </param>
+	  /// <param name="articles"> a set of stopword articles </param>
+	  public ElisionFilter(TokenStream input, CharArraySet articles) : base(input)
+	  {
+		this.articles = articles;
+	  }
+
+	  /// <summary>
+	  /// Increments the <seealso cref="TokenStream"/> with a <seealso cref="CharTermAttribute"/> without elisioned start
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  char[] termBuffer = termAtt.buffer();
+		  int termLength = termAtt.length();
+
+		  int index = -1;
+		  for (int i = 0; i < termLength; i++)
+		  {
+			char ch = termBuffer[i];
+			if (ch == '\'' || ch == '\u2019')
+			{
+			  index = i;
+			  break;
+			}
+		  }
+
+		  // An apostrophe has been found. If the prefix is an article strip it off.
+		  if (index >= 0 && articles.contains(termBuffer, 0, index))
+		  {
+			termAtt.copyBuffer(termBuffer, index + 1, termLength - (index + 1));
+		  }
+
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilterFactory.cs
new file mode 100644
index 0000000..7dc1bbe
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilterFactory.cs
@@ -0,0 +1,86 @@
+using System.Collections.Generic;
+using Lucene.Net.Analysis.Util;
+
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using FrenchAnalyzer = org.apache.lucene.analysis.fr.FrenchAnalyzer;
+
+	/// <summary>
+	/// Factory for <seealso cref="ElisionFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_elsn" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.ElisionFilterFactory" 
+	///       articles="stopwordarticles.txt" ignoreCase="true"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class ElisionFilterFactory : TokenFilterFactory, ResourceLoaderAware, MultiTermAwareComponent
+	{
+	  private readonly string articlesFile;
+	  private readonly bool ignoreCase;
+	  private CharArraySet articles;
+
+	  /// <summary>
+	  /// Creates a new ElisionFilterFactory </summary>
+	  public ElisionFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		articlesFile = get(args, "articles");
+		ignoreCase = getBoolean(args, "ignoreCase", false);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void inform(ResourceLoader loader) throws java.io.IOException
+	  public virtual void inform(ResourceLoader loader)
+	  {
+		if (articlesFile == null)
+		{
+		  articles = FrenchAnalyzer.DEFAULT_ARTICLES;
+		}
+		else
+		{
+		  articles = getWordSet(loader, articlesFile, ignoreCase);
+		}
+	  }
+
+	  public override ElisionFilter create(TokenStream input)
+	  {
+		return new ElisionFilter(input, articles);
+	  }
+
+	  public virtual AbstractAnalysisFactory MultiTermComponent
+	  {
+		  get
+		  {
+			return this;
+		  }
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/FilesystemResourceLoader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/FilesystemResourceLoader.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/FilesystemResourceLoader.cs
new file mode 100644
index 0000000..598fef8
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/FilesystemResourceLoader.cs
@@ -0,0 +1,113 @@
+using System;
+
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	/// <summary>
+	/// Simple <seealso cref="ResourceLoader"/> that opens resource files
+	/// from the local file system, optionally resolving against
+	/// a base directory.
+	/// 
+	/// <para>This loader wraps a delegate <seealso cref="ResourceLoader"/>
+	/// that is used to resolve all files, the current base directory
+	/// does not contain. <seealso cref="#newInstance"/> is always resolved
+	/// against the delegate, as a <seealso cref="ClassLoader"/> is needed.
+	/// 
+	/// </para>
+	/// <para>You can chain several {@code FilesystemResourceLoader}s
+	/// to allow lookup of files in more than one base directory.
+	/// </para>
+	/// </summary>
+	public sealed class FilesystemResourceLoader : ResourceLoader
+	{
+	  private readonly File baseDirectory;
+	  private readonly ResourceLoader @delegate;
+
+	  /// <summary>
+	  /// Creates a resource loader that requires absolute filenames or relative to CWD
+	  /// to resolve resources. Files not found in file system and class lookups
+	  /// are delegated to context classloader.
+	  /// </summary>
+	  public FilesystemResourceLoader() : this((File) null)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates a resource loader that resolves resources against the given
+	  /// base directory (may be {@code null} to refer to CWD).
+	  /// Files not found in file system and class lookups are delegated to context
+	  /// classloader.
+	  /// </summary>
+	  public FilesystemResourceLoader(File baseDirectory) : this(baseDirectory, new ClasspathResourceLoader())
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates a resource loader that resolves resources against the given
+	  /// base directory (may be {@code null} to refer to CWD).
+	  /// Files not found in file system and class lookups are delegated
+	  /// to the given delegate <seealso cref="ResourceLoader"/>.
+	  /// </summary>
+	  public FilesystemResourceLoader(File baseDirectory, ResourceLoader @delegate)
+	  {
+		if (baseDirectory != null && !baseDirectory.Directory)
+		{
+		  throw new System.ArgumentException("baseDirectory is not a directory or null");
+		}
+		if (@delegate == null)
+		{
+		  throw new System.ArgumentException("delegate ResourceLoader may not be null");
+		}
+		this.baseDirectory = baseDirectory;
+		this.@delegate = @delegate;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public java.io.InputStream openResource(String resource) throws java.io.IOException
+	  public InputStream openResource(string resource)
+	  {
+		try
+		{
+		  File file = new File(resource);
+		  if (baseDirectory != null && !file.Absolute)
+		  {
+			file = new File(baseDirectory, resource);
+		  }
+		  return new FileInputStream(file);
+		}
+		catch (FileNotFoundException)
+		{
+		  return @delegate.openResource(resource);
+		}
+	  }
+
+	  public T newInstance<T>(string cname, Type expectedType)
+	  {
+		return @delegate.newInstance(cname, expectedType);
+	  }
+
+	  public Type findClass<T>(string cname, Type expectedType)
+	  {
+		return @delegate.findClass(cname, expectedType);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs
new file mode 100644
index 0000000..4d55a25
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs
@@ -0,0 +1,150 @@
+using System;
+using Lucene.Net.Analysis.Tokenattributes;
+
+namespace Lucene.Net.Analysis.Util
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// Abstract base class for TokenFilters that may remove tokens.
+    /// You have to implement <seealso cref="#accept"/> and return a boolean if the current
+    /// token should be preserved. <seealso cref="#incrementToken"/> uses this method
+    /// to decide if a token should be passed to the caller.
+    /// <para><a name="lucene_match_version" />As of Lucene 4.4, an
+    /// <seealso cref="IllegalArgumentException"/> is thrown when trying to disable position
+    /// increments when filtering terms.
+    /// </para>
+    /// </summary>
+    public abstract class FilteringTokenFilter : TokenFilter
+    {
+
+        private static void CheckPositionIncrement(Version version, bool enablePositionIncrements)
+        {
+            if (!enablePositionIncrements && version.OnOrAfter(Version.LUCENE_44))
+            {
+                throw new System.ArgumentException("enablePositionIncrements=false is not supported anymore as of Lucene 4.4 as it can create broken token streams");
+            }
+        }
+
+        protected internal readonly Version version;
+        private readonly PositionIncrementAttribute posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
+        private bool enablePositionIncrements; // no init needed, as ctor enforces setting value!
+        private bool first = true;
+        private int skippedPositions;
+
+        /// <summary>
+        /// Create a new <seealso cref="FilteringTokenFilter"/>. </summary>
+        /// <param name="version">                  the <a href="#lucene_match_version">Lucene match version</a> </param>
+        /// <param name="enablePositionIncrements"> whether to increment position increments when filtering out terms </param>
+        /// <param name="input">                    the input to consume </param>
+        /// @deprecated enablePositionIncrements=false is not supported anymore as of Lucene 4.4 
+        [Obsolete("enablePositionIncrements=false is not supported anymore as of Lucene 4.4")]
+        public FilteringTokenFilter(Version version, bool enablePositionIncrements, TokenStream input)
+            : this(version, input)
+        {
+            CheckPositionIncrement(version, enablePositionIncrements);
+            this.enablePositionIncrements = enablePositionIncrements;
+        }
+
+        /// <summary>
+        /// Create a new <seealso cref="FilteringTokenFilter"/>. </summary>
+        /// <param name="version"> the Lucene match version </param>
+        /// <param name="in">      the <seealso cref="TokenStream"/> to consume </param>
+        public FilteringTokenFilter(Version version, TokenStream @in)
+            : base(@in)
+        {
+            this.version = version;
+            this.enablePositionIncrements = true;
+        }
+
+        /// <summary>
+        /// Override this method and return if the current input token should be returned by <seealso cref="#incrementToken"/>. </summary>
+        protected internal abstract bool Accept();
+
+        public override bool IncrementToken()
+        {
+            if (enablePositionIncrements)
+            {
+                skippedPositions = 0;
+                while (Input.IncrementToken())
+                {
+                    if (Accept())
+                    {
+                        if (skippedPositions != 0)
+                        {
+                            posIncrAtt.PositionIncrement = posIncrAtt.PositionIncrement + skippedPositions;
+                        }
+                        return true;
+                    }
+                    skippedPositions += posIncrAtt.PositionIncrement;
+                }
+            }
+            else
+            {
+                while (Input.IncrementToken())
+                {
+                    if (Accept())
+                    {
+                        if (first)
+                        {
+                            // first token having posinc=0 is illegal.
+                            if (posIncrAtt.PositionIncrement == 0)
+                            {
+                                posIncrAtt.PositionIncrement = 1;
+                            }
+                            first = false;
+                        }
+                        return true;
+                    }
+                }
+            }
+            // reached EOS -- return false
+            return false;
+        }
+
+        public override void Reset()
+        {
+            base.Reset();
+            first = true;
+            skippedPositions = 0;
+        }
+
+        /// <seealso cref= #setEnablePositionIncrements(boolean) </seealso>
+        public virtual bool EnablePositionIncrements
+        {
+            get
+            {
+                return enablePositionIncrements;
+            }
+            set
+            {
+                CheckPositionIncrement(version, value);
+                this.enablePositionIncrements = value;
+            }
+        }
+
+        public override void End()
+        {
+            base.End();
+            if (enablePositionIncrements)
+            {
+                posIncrAtt.PositionIncrement = posIncrAtt.PositionIncrement + skippedPositions;
+            }
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/MultiTermAwareComponent.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/MultiTermAwareComponent.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/MultiTermAwareComponent.cs
new file mode 100644
index 0000000..64cdb36
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/MultiTermAwareComponent.cs
@@ -0,0 +1,39 @@
+using Lucene.Net.Analysis.Util;
+
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/// <summary>
+	/// Add to any analysis factory component to allow returning an
+	/// analysis component factory for use with partial terms in prefix queries,
+	/// wildcard queries, range query endpoints, regex queries, etc.
+	/// 
+	/// @lucene.experimental
+	/// </summary>
+	public interface MultiTermAwareComponent
+	{
+	  /// <summary>
+	  /// Returns an analysis component to handle analysis if multi-term queries.
+	  /// The returned component must be a TokenizerFactory, TokenFilterFactory or CharFilterFactory.
+	  /// </summary>
+	  AbstractAnalysisFactory MultiTermComponent {get;}
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
new file mode 100644
index 0000000..ead67a2
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
@@ -0,0 +1,205 @@
+using System;
+
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/// <summary>
+	/// A StringBuilder that allows one to access the array.
+	/// </summary>
+	public class OpenStringBuilder : Appendable, CharSequence
+	{
+	  protected internal char[] buf;
+	  protected internal int len;
+
+	  public OpenStringBuilder() : this(32)
+	  {
+	  }
+
+	  public OpenStringBuilder(int size)
+	  {
+		buf = new char[size_Renamed];
+	  }
+
+	  public OpenStringBuilder(char[] arr, int len)
+	  {
+		set(arr, len);
+	  }
+
+	  public virtual int Length
+	  {
+		  set
+		  {
+			  this.len = value;
+		  }
+	  }
+
+	  public virtual void set(char[] arr, int end)
+	  {
+		this.buf = arr;
+		this.len = end;
+	  }
+
+	  public virtual char[] Array
+	  {
+		  get
+		  {
+			  return buf;
+		  }
+	  }
+	  public virtual int size()
+	  {
+		  return len;
+	  }
+	  public override int length()
+	  {
+		  return len;
+	  }
+	  public virtual int capacity()
+	  {
+		  return buf.Length;
+	  }
+
+	  public override Appendable append(CharSequence csq)
+	  {
+		return append(csq, 0, csq.length());
+	  }
+
+	  public override Appendable append(CharSequence csq, int start, int end)
+	  {
+		reserve(end - start);
+		for (int i = start; i < end; i++)
+		{
+		  unsafeWrite(csq.charAt(i));
+		}
+		return this;
+	  }
+
+	  public override Appendable append(char c)
+	  {
+		write(c);
+		return this;
+	  }
+
+	  public override char charAt(int index)
+	  {
+		return buf[index];
+	  }
+
+	  public virtual void setCharAt(int index, char ch)
+	  {
+		buf[index] = ch;
+	  }
+
+	  public override CharSequence subSequence(int start, int end)
+	  {
+		throw new System.NotSupportedException(); // todo
+	  }
+
+	  public virtual void unsafeWrite(char b)
+	  {
+		buf[len++] = b;
+	  }
+
+	  public virtual void unsafeWrite(int b)
+	  {
+		  unsafeWrite((char)b);
+	  }
+
+	  public virtual void unsafeWrite(char[] b, int off, int len)
+	  {
+		Array.Copy(b, off, buf, this.len, len);
+		this.len += len;
+	  }
+
+	  protected internal virtual void resize(int len)
+	  {
+		char[] newbuf = new char[Math.Max(buf.Length << 1, len)];
+		Array.Copy(buf, 0, newbuf, 0, size());
+		buf = newbuf;
+	  }
+
+	  public virtual void reserve(int num)
+	  {
+		if (len + num > buf.Length)
+		{
+			resize(len + num);
+		}
+	  }
+
+	  public virtual void write(char b)
+	  {
+		if (len >= buf.Length)
+		{
+		  resize(len + 1);
+		}
+		unsafeWrite(b);
+	  }
+
+	  public virtual void write(int b)
+	  {
+		  write((char)b);
+	  }
+
+	  public void write(char[] b)
+	  {
+		write(b,0,b.Length);
+	  }
+
+	  public virtual void write(char[] b, int off, int len)
+	  {
+		reserve(len);
+		unsafeWrite(b, off, len);
+	  }
+
+	  public void write(OpenStringBuilder arr)
+	  {
+		write(arr.buf, 0, len);
+	  }
+
+	  public virtual void write(string s)
+	  {
+		reserve(s.Length);
+		s.CopyTo(0, buf, len, s.Length - 0);
+		len += s.Length;
+	  }
+
+	  public virtual void flush()
+	  {
+	  }
+
+	  public void reset()
+	  {
+		len = 0;
+	  }
+
+	  public virtual char[] ToCharArray()
+	  {
+		char[] newbuf = new char[size()];
+		Array.Copy(buf, 0, newbuf, 0, size());
+		return newbuf;
+	  }
+
+	  public override string ToString()
+	  {
+		return new string(buf, 0, size());
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoader.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoader.cs
new file mode 100644
index 0000000..3e4bc1f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoader.cs
@@ -0,0 +1,49 @@
+using System;
+
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	/// <summary>
+	/// Abstraction for loading resources (streams, files, and classes).
+	/// </summary>
+	public interface ResourceLoader
+	{
+
+	  /// <summary>
+	  /// Opens a named resource
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public java.io.InputStream openResource(String resource) throws java.io.IOException;
+	  InputStream openResource(string resource);
+
+
+	  /// <summary>
+	  /// Finds class of the name and expected type
+	  /// </summary>
+	  Type findClass<T>(string cname, Type expectedType);
+
+	  /// <summary>
+	  /// Creates an instance of the name and expected type
+	  /// </summary>
+	  // TODO: fix exception handling
+	  T newInstance<T>(string cname, Type expectedType);
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoaderAware.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoaderAware.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoaderAware.cs
new file mode 100644
index 0000000..97fe682
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoaderAware.cs
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.analysis.util
+{
+
+	/// <summary>
+	/// Interface for a component that needs to be initialized by
+	/// an implementation of <seealso cref="ResourceLoader"/>.
+	/// </summary>
+	/// <seealso cref= ResourceLoader </seealso>
+	public interface ResourceLoaderAware
+	{
+
+	  /// <summary>
+	  /// Initializes this component with the provided ResourceLoader
+	  /// (used for loading classes, files, etc).
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: void inform(ResourceLoader loader) throws java.io.IOException;
+	  void inform(ResourceLoader loader);
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/RollingCharBuffer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/RollingCharBuffer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/RollingCharBuffer.cs
new file mode 100644
index 0000000..1aae904
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/RollingCharBuffer.cs
@@ -0,0 +1,200 @@
+using System;
+using System.Diagnostics;
+
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using ArrayUtil = org.apache.lucene.util.ArrayUtil;
+	using RamUsageEstimator = org.apache.lucene.util.RamUsageEstimator;
+
+	/// <summary>
+	/// Acts like a forever growing char[] as you read
+	///  characters into it from the provided reader, but
+	///  internally it uses a circular buffer to only hold the
+	///  characters that haven't been freed yet.  This is like a
+	///  PushbackReader, except you don't have to specify
+	///  up-front the max size of the buffer, but you do have to
+	///  periodically call <seealso cref="#freeBefore"/>. 
+	/// </summary>
+
+	public sealed class RollingCharBuffer
+	{
+
+	  private Reader reader;
+
+	  private char[] buffer = new char[512];
+
+	  // Next array index to write to in buffer:
+	  private int nextWrite;
+
+	  // Next absolute position to read from reader:
+	  private int nextPos;
+
+	  // How many valid chars (wrapped) are in the buffer:
+	  private int count;
+
+	  // True if we hit EOF
+	  private bool end;
+
+	  /// <summary>
+	  /// Clear array and switch to new reader. </summary>
+	  public void reset(Reader reader)
+	  {
+		this.reader = reader;
+		nextPos = 0;
+		nextWrite = 0;
+		count = 0;
+		end = false;
+	  }
+
+	  /* Absolute position read.  NOTE: pos must not jump
+	   * ahead by more than 1!  Ie, it's OK to read arbitarily
+	   * far back (just not prior to the last {@link
+	   * #freeBefore}), but NOT ok to read arbitrarily far
+	   * ahead.  Returns -1 if you hit EOF. */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public int get(int pos) throws java.io.IOException
+	  public int get(int pos)
+	  {
+		//System.out.println("    get pos=" + pos + " nextPos=" + nextPos + " count=" + count);
+		if (pos == nextPos)
+		{
+		  if (end)
+		  {
+			return -1;
+		  }
+		  if (count == buffer.Length)
+		  {
+			// Grow
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] newBuffer = new char[org.apache.lucene.util.ArrayUtil.oversize(1+count, org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_CHAR)];
+			char[] newBuffer = new char[ArrayUtil.oversize(1 + count, RamUsageEstimator.NUM_BYTES_CHAR)];
+			//System.out.println(Thread.currentThread().getName() + ": cb grow " + newBuffer.length);
+			Array.Copy(buffer, nextWrite, newBuffer, 0, buffer.Length - nextWrite);
+			Array.Copy(buffer, 0, newBuffer, buffer.Length - nextWrite, nextWrite);
+			nextWrite = buffer.Length;
+			buffer = newBuffer;
+		  }
+		  if (nextWrite == buffer.Length)
+		  {
+			nextWrite = 0;
+		  }
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int toRead = buffer.length - Math.max(count, nextWrite);
+		  int toRead = buffer.Length - Math.Max(count, nextWrite);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int readCount = reader.read(buffer, nextWrite, toRead);
+		  int readCount = reader.read(buffer, nextWrite, toRead);
+		  if (readCount == -1)
+		  {
+			end = true;
+			return -1;
+		  }
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int ch = buffer[nextWrite];
+		  int ch = buffer[nextWrite];
+		  nextWrite += readCount;
+		  count += readCount;
+		  nextPos += readCount;
+		  return ch;
+		}
+		else
+		{
+		  // Cannot read from future (except by 1):
+		  Debug.Assert(pos < nextPos);
+
+		  // Cannot read from already freed past:
+		  Debug.Assert(nextPos - pos <= count, "nextPos=" + nextPos + " pos=" + pos + " count=" + count);
+
+		  return buffer[getIndex(pos)];
+		}
+	  }
+
+	  // For assert:
+	  private bool inBounds(int pos)
+	  {
+		return pos >= 0 && pos < nextPos && pos >= nextPos - count;
+	  }
+
+	  private int getIndex(int pos)
+	  {
+		int index = nextWrite - (nextPos - pos);
+		if (index < 0)
+		{
+		  // Wrap:
+		  index += buffer.Length;
+		  Debug.Assert(index >= 0);
+		}
+		return index;
+	  }
+
+	  public char[] get(int posStart, int length)
+	  {
+		Debug.Assert(length > 0);
+		Debug.Assert(inBounds(posStart), "posStart=" + posStart + " length=" + length);
+		//System.out.println("    buffer.get posStart=" + posStart + " len=" + length);
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int startIndex = getIndex(posStart);
+		int startIndex = getIndex(posStart);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int endIndex = getIndex(posStart + length);
+		int endIndex = getIndex(posStart + length);
+		//System.out.println("      startIndex=" + startIndex + " endIndex=" + endIndex);
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] result = new char[length];
+		char[] result = new char[length];
+		if (endIndex >= startIndex && length < buffer.Length)
+		{
+		  Array.Copy(buffer, startIndex, result, 0, endIndex - startIndex);
+		}
+		else
+		{
+		  // Wrapped:
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int part1 = buffer.length-startIndex;
+		  int part1 = buffer.Length - startIndex;
+		  Array.Copy(buffer, startIndex, result, 0, part1);
+		  Array.Copy(buffer, 0, result, buffer.Length - startIndex, length - part1);
+		}
+		return result;
+	  }
+
+	  /// <summary>
+	  /// Call this to notify us that no chars before this
+	  ///  absolute position are needed anymore. 
+	  /// </summary>
+	  public void freeBefore(int pos)
+	  {
+		Debug.Assert(pos >= 0);
+		Debug.Assert(pos <= nextPos);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newCount = nextPos - pos;
+		int newCount = nextPos - pos;
+		Debug.Assert(newCount <= count, "newCount=" + newCount + " count=" + count);
+		Debug.Assert(newCount <= buffer.Length, "newCount=" + newCount + " buf.length=" + buffer.Length);
+		count = newCount;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs
new file mode 100644
index 0000000..873936e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs
@@ -0,0 +1,258 @@
+using System;
+using System.Diagnostics;
+
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+
+	/// <summary>
+	/// Breaks text into sentences with a <seealso cref="BreakIterator"/> and
+	/// allows subclasses to decompose these sentences into words.
+	/// <para>
+	/// This can be used by subclasses that need sentence context 
+	/// for tokenization purposes, such as CJK segmenters.
+	/// </para>
+	/// <para>
+	/// Additionally it can be used by subclasses that want to mark
+	/// sentence boundaries (with a custom attribute, extra token, position
+	/// increment, etc) for downstream processing.
+	/// 
+	/// @lucene.experimental
+	/// </para>
+	/// </summary>
+	public abstract class SegmentingTokenizerBase : Tokenizer
+	{
+	  protected internal const int BUFFERMAX = 1024;
+	  protected internal readonly char[] buffer = new char[BUFFERMAX];
+	  /// <summary>
+	  /// true length of text in the buffer </summary>
+	  private int length = 0;
+	  /// <summary>
+	  /// length in buffer that can be evaluated safely, up to a safe end point </summary>
+	  private int usableLength = 0;
+	  /// <summary>
+	  /// accumulated offset of previous buffers for this reader, for offsetAtt </summary>
+	  protected internal int offset = 0;
+
+	  private readonly BreakIterator iterator;
+	  private readonly CharArrayIterator wrapper = CharArrayIterator.newSentenceInstance();
+
+	  private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+
+	  /// <summary>
+	  /// Construct a new SegmenterBase, using
+	  /// the provided BreakIterator for sentence segmentation.
+	  /// <para>
+	  /// Note that you should never share BreakIterators across different
+	  /// TokenStreams, instead a newly created or cloned one should always
+	  /// be provided to this constructor.
+	  /// </para>
+	  /// </summary>
+	  public SegmentingTokenizerBase(Reader reader, BreakIterator iterator) : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, reader, iterator)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Construct a new SegmenterBase, also supplying the AttributeFactory
+	  /// </summary>
+	  public SegmentingTokenizerBase(AttributeFactory factory, Reader reader, BreakIterator iterator) : base(factory, reader)
+	  {
+		this.iterator = iterator;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (length == 0 || !incrementWord())
+		{
+		  while (!incrementSentence())
+		  {
+			refill();
+			if (length <= 0) // no more bytes to read;
+			{
+			  return false;
+			}
+		  }
+		}
+
+		return true;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		wrapper.setText(buffer, 0, 0);
+		iterator.Text = wrapper;
+		length = usableLength = offset = 0;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
+	  public override void end()
+	  {
+		base.end();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int finalOffset = correctOffset(length < 0 ? offset : offset + length);
+		int finalOffset = correctOffset(length < 0 ? offset : offset + length);
+		offsetAtt.setOffset(finalOffset, finalOffset);
+	  }
+
+	  /// <summary>
+	  /// Returns the last unambiguous break position in the text. </summary>
+	  private int findSafeEnd()
+	  {
+		for (int i = length - 1; i >= 0; i--)
+		{
+		  if (isSafeEnd(buffer[i]))
+		  {
+			return i + 1;
+		  }
+		}
+		return -1;
+	  }
+
+	  /// <summary>
+	  /// For sentence tokenization, these are the unambiguous break positions. </summary>
+	  protected internal virtual bool isSafeEnd(char ch)
+	  {
+		switch (ch)
+		{
+		  case 0x000D:
+		  case 0x000A:
+		  case 0x0085:
+		  case 0x2028:
+		  case 0x2029:
+			return true;
+		  default:
+			return false;
+		}
+	  }
+
+	  /// <summary>
+	  /// Refill the buffer, accumulating the offset and setting usableLength to the
+	  /// last unambiguous break position
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void refill() throws java.io.IOException
+	  private void refill()
+	  {
+		offset += usableLength;
+		int leftover = length - usableLength;
+		Array.Copy(buffer, usableLength, buffer, 0, leftover);
+		int requested = buffer.Length - leftover;
+		int returned = read(input, buffer, leftover, requested);
+		length = returned < 0 ? leftover : returned + leftover;
+		if (returned < requested) // reader has been emptied, process the rest
+		{
+		  usableLength = length;
+		}
+		else // still more data to be read, find a safe-stopping place
+		{
+		  usableLength = findSafeEnd();
+		  if (usableLength < 0)
+		  {
+			usableLength = length; /*
+		  }
+	                                * more than IOBUFFER of text without breaks,
+	                                * gonna possibly truncate tokens
+	                                */
+		}
+
+		wrapper.setText(buffer, 0, Math.Max(0, usableLength));
+		iterator.Text = wrapper;
+	  }
+
+	  // TODO: refactor to a shared readFully somewhere
+	  // (NGramTokenizer does this too):
+	  /// <summary>
+	  /// commons-io's readFully, but without bugs if offset != 0 </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private static int read(java.io.Reader input, char[] buffer, int offset, int length) throws java.io.IOException
+	  private static int read(Reader input, char[] buffer, int offset, int length)
+	  {
+		Debug.Assert(length >= 0, "length must not be negative: " + length);
+
+		int remaining = length;
+		while (remaining > 0)
+		{
+		  int location = length - remaining;
+		  int count = input.read(buffer, offset + location, remaining);
+		  if (-1 == count) // EOF
+		  {
+			break;
+		  }
+		  remaining -= count;
+		}
+		return length - remaining;
+	  }
+
+	  /// <summary>
+	  /// return true if there is a token from the buffer, or null if it is
+	  /// exhausted.
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private boolean incrementSentence() throws java.io.IOException
+	  private bool incrementSentence()
+	  {
+		if (length == 0) // we must refill the buffer
+		{
+		  return false;
+		}
+
+		while (true)
+		{
+		  int start = iterator.current();
+
+		  if (start == BreakIterator.DONE)
+		  {
+			return false; // BreakIterator exhausted
+		  }
+
+		  // find the next set of boundaries
+		  int end_Renamed = iterator.next();
+
+		  if (end_Renamed == BreakIterator.DONE)
+		  {
+			return false; // BreakIterator exhausted
+		  }
+
+		  setNextSentence(start, end_Renamed);
+		  if (incrementWord())
+		  {
+			return true;
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Provides the next input sentence for analysis </summary>
+	  protected internal abstract void setNextSentence(int sentenceStart, int sentenceEnd);
+
+	  /// <summary>
+	  /// Returns true if another word is available </summary>
+	  protected internal abstract bool incrementWord();
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/StemmerUtil.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/StemmerUtil.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/StemmerUtil.cs
new file mode 100644
index 0000000..e8a1ddc
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/StemmerUtil.cs
@@ -0,0 +1,153 @@
+using System;
+using System.Diagnostics;
+
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/// <summary>
+	/// Some commonly-used stemming functions
+	/// 
+	/// @lucene.internal
+	/// </summary>
+	public class StemmerUtil
+	{
+	  /// <summary>
+	  /// no instance </summary>
+	  private StemmerUtil()
+	  {
+	  }
+
+	  /// <summary>
+	  /// Returns true if the character array starts with the suffix.
+	  /// </summary>
+	  /// <param name="s"> Input Buffer </param>
+	  /// <param name="len"> length of input buffer </param>
+	  /// <param name="prefix"> Prefix string to test </param>
+	  /// <returns> true if <code>s</code> starts with <code>prefix</code> </returns>
+	  public static bool StartsWith(char[] s, int len, string prefix)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int prefixLen = prefix.length();
+		int prefixLen = prefix.Length;
+		if (prefixLen > len)
+		{
+		  return false;
+		}
+		for (int i = 0; i < prefixLen; i++)
+		{
+		  if (s[i] != prefix[i])
+		  {
+			return false;
+		  }
+		}
+		return true;
+	  }
+
+	  /// <summary>
+	  /// Returns true if the character array ends with the suffix.
+	  /// </summary>
+	  /// <param name="s"> Input Buffer </param>
+	  /// <param name="len"> length of input buffer </param>
+	  /// <param name="suffix"> Suffix string to test </param>
+	  /// <returns> true if <code>s</code> ends with <code>suffix</code> </returns>
+	  public static bool EndsWith(char[] s, int len, string suffix)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int suffixLen = suffix.length();
+		int suffixLen = suffix.Length;
+		if (suffixLen > len)
+		{
+		  return false;
+		}
+		for (int i = suffixLen - 1; i >= 0; i--)
+		{
+		  if (s[len - (suffixLen - i)] != suffix[i])
+		  {
+			return false;
+		  }
+		}
+
+		return true;
+	  }
+
+	  /// <summary>
+	  /// Returns true if the character array ends with the suffix.
+	  /// </summary>
+	  /// <param name="s"> Input Buffer </param>
+	  /// <param name="len"> length of input buffer </param>
+	  /// <param name="suffix"> Suffix string to test </param>
+	  /// <returns> true if <code>s</code> ends with <code>suffix</code> </returns>
+	  public static bool EndsWith(char[] s, int len, char[] suffix)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int suffixLen = suffix.length;
+		int suffixLen = suffix.Length;
+		if (suffixLen > len)
+		{
+		  return false;
+		}
+		for (int i = suffixLen - 1; i >= 0; i--)
+		{
+		  if (s[len - (suffixLen - i)] != suffix[i])
+		  {
+			return false;
+		  }
+		}
+
+		return true;
+	  }
+
+	  /// <summary>
+	  /// Delete a character in-place
+	  /// </summary>
+	  /// <param name="s"> Input Buffer </param>
+	  /// <param name="pos"> Position of character to delete </param>
+	  /// <param name="len"> length of input buffer </param>
+	  /// <returns> length of input buffer after deletion </returns>
+	  public static int delete(char[] s, int pos, int len)
+	  {
+		Debug.Assert(pos < len);
+		if (pos < len - 1) // don't arraycopy if asked to delete last character
+		{
+		  Array.Copy(s, pos + 1, s, pos, len - pos - 1);
+		}
+		return len - 1;
+	  }
+
+	  /// <summary>
+	  /// Delete n characters in-place
+	  /// </summary>
+	  /// <param name="s"> Input Buffer </param>
+	  /// <param name="pos"> Position of character to delete </param>
+	  /// <param name="len"> Length of input buffer </param>
+	  /// <param name="nChars"> number of characters to delete </param>
+	  /// <returns> length of input buffer after deletion </returns>
+	  public static int deleteN(char[] s, int pos, int len, int nChars)
+	  {
+		Debug.Assert(pos + nChars <= len);
+		if (pos + nChars < len) // don't arraycopy if asked to delete the last characters
+		{
+		  Array.Copy(s, pos + nChars, s, pos, len - pos - nChars);
+		}
+		return len - nChars;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/StopwordAnalyzerBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/StopwordAnalyzerBase.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/StopwordAnalyzerBase.cs
new file mode 100644
index 0000000..2433a83
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/StopwordAnalyzerBase.cs
@@ -0,0 +1,172 @@
+using System;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.analysis.util
+{
+
+
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Base class for Analyzers that need to make use of stopword sets. 
+	/// 
+	/// </summary>
+	public abstract class StopwordAnalyzerBase : Analyzer
+	{
+
+	  /// <summary>
+	  /// An immutable stopword set
+	  /// </summary>
+	  protected internal readonly CharArraySet stopwords;
+
+	  protected internal readonly Version matchVersion;
+
+	  /// <summary>
+	  /// Returns the analyzer's stopword set or an empty set if the analyzer has no
+	  /// stopwords
+	  /// </summary>
+	  /// <returns> the analyzer's stopword set or an empty set if the analyzer has no
+	  ///         stopwords </returns>
+	  public virtual CharArraySet StopwordSet
+	  {
+		  get
+		  {
+			return stopwords;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Creates a new instance initialized with the given stopword set
+	  /// </summary>
+	  /// <param name="version">
+	  ///          the Lucene version for cross version compatibility </param>
+	  /// <param name="stopwords">
+	  ///          the analyzer's stopword set </param>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: protected StopwordAnalyzerBase(final org.apache.lucene.util.Version version, final CharArraySet stopwords)
+	  protected internal StopwordAnalyzerBase(Version version, CharArraySet stopwords)
+	  {
+		matchVersion = version;
+		// analyzers should use char array set for stopwords!
+		this.stopwords = stopwords == null ? CharArraySet.EMPTY_SET : CharArraySet.unmodifiableSet(CharArraySet.copy(version, stopwords));
+	  }
+
+	  /// <summary>
+	  /// Creates a new Analyzer with an empty stopword set
+	  /// </summary>
+	  /// <param name="version">
+	  ///          the Lucene version for cross version compatibility </param>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: protected StopwordAnalyzerBase(final org.apache.lucene.util.Version version)
+	  protected internal StopwordAnalyzerBase(Version version) : this(version, null)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates a CharArraySet from a file resource associated with a class. (See
+	  /// <seealso cref="Class#getResourceAsStream(String)"/>).
+	  /// </summary>
+	  /// <param name="ignoreCase">
+	  ///          <code>true</code> if the set should ignore the case of the
+	  ///          stopwords, otherwise <code>false</code> </param>
+	  /// <param name="aClass">
+	  ///          a class that is associated with the given stopwordResource </param>
+	  /// <param name="resource">
+	  ///          name of the resource file associated with the given class </param>
+	  /// <param name="comment">
+	  ///          comment string to ignore in the stopword file </param>
+	  /// <returns> a CharArraySet containing the distinct stopwords from the given
+	  ///         file </returns>
+	  /// <exception cref="IOException">
+	  ///           if loading the stopwords throws an <seealso cref="IOException"/> </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: protected static CharArraySet loadStopwordSet(final boolean ignoreCase, final Class aClass, final String resource, final String comment) throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+	  protected internal static CharArraySet loadStopwordSet(bool ignoreCase, Type aClass, string resource, string comment)
+	  {
+		Reader reader = null;
+		try
+		{
+		  reader = IOUtils.getDecodingReader(aClass.getResourceAsStream(resource), StandardCharsets.UTF_8);
+		  return WordlistLoader.getWordSet(reader, comment, new CharArraySet(Version.LUCENE_CURRENT, 16, ignoreCase));
+		}
+		finally
+		{
+		  IOUtils.close(reader);
+		}
+
+	  }
+
+	  /// <summary>
+	  /// Creates a CharArraySet from a file.
+	  /// </summary>
+	  /// <param name="stopwords">
+	  ///          the stopwords file to load
+	  /// </param>
+	  /// <param name="matchVersion">
+	  ///          the Lucene version for cross version compatibility </param>
+	  /// <returns> a CharArraySet containing the distinct stopwords from the given
+	  ///         file </returns>
+	  /// <exception cref="IOException">
+	  ///           if loading the stopwords throws an <seealso cref="IOException"/> </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: protected static CharArraySet loadStopwordSet(java.io.File stopwords, org.apache.lucene.util.Version matchVersion) throws java.io.IOException
+	  protected internal static CharArraySet loadStopwordSet(File stopwords, Version matchVersion)
+	  {
+		Reader reader = null;
+		try
+		{
+		  reader = IOUtils.getDecodingReader(stopwords, StandardCharsets.UTF_8);
+		  return WordlistLoader.getWordSet(reader, matchVersion);
+		}
+		finally
+		{
+		  IOUtils.close(reader);
+		}
+	  }
+
+	  /// <summary>
+	  /// Creates a CharArraySet from a file.
+	  /// </summary>
+	  /// <param name="stopwords">
+	  ///          the stopwords reader to load
+	  /// </param>
+	  /// <param name="matchVersion">
+	  ///          the Lucene version for cross version compatibility </param>
+	  /// <returns> a CharArraySet containing the distinct stopwords from the given
+	  ///         reader </returns>
+	  /// <exception cref="IOException">
+	  ///           if loading the stopwords throws an <seealso cref="IOException"/> </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: protected static CharArraySet loadStopwordSet(java.io.Reader stopwords, org.apache.lucene.util.Version matchVersion) throws java.io.IOException
+	  protected internal static CharArraySet loadStopwordSet(Reader stopwords, Version matchVersion)
+	  {
+		try
+		{
+		  return WordlistLoader.getWordSet(stopwords, matchVersion);
+		}
+		finally
+		{
+		  IOUtils.close(stopwords);
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/TokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/TokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/TokenFilterFactory.cs
new file mode 100644
index 0000000..c7769ba
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/TokenFilterFactory.cs
@@ -0,0 +1,86 @@
+using System;
+using System.Collections.Generic;
+using org.apache.lucene.analysis.util;
+
+namespace Lucene.Net.Analysis.Util
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+
+    /// <summary>
+    /// Abstract parent class for analysis factories that create <seealso cref="TokenFilter"/>
+    /// instances.
+    /// </summary>
+    public abstract class TokenFilterFactory : AbstractAnalysisFactory
+    {
+
+        private static readonly AnalysisSPILoader<TokenFilterFactory> loader = new AnalysisSPILoader<TokenFilterFactory>(typeof(TokenFilterFactory), new string[] { "TokenFilterFactory", "FilterFactory" });
+
+        /// <summary>
+        /// looks up a tokenfilter by name from context classpath </summary>
+        public static TokenFilterFactory forName(string name, IDictionary<string, string> args)
+        {
+            return loader.newInstance(name, args);
+        }
+
+        /// <summary>
+        /// looks up a tokenfilter class by name from context classpath </summary>
+        public static Type lookupClass(string name)
+        {
+            return loader.lookupClass(name);
+        }
+
+        /// <summary>
+        /// returns a list of all available tokenfilter names from context classpath </summary>
+        public static HashSet<string> availableTokenFilters()
+        {
+            return loader.availableServices();
+        }
+
+        /// <summary>
+        /// Reloads the factory list from the given <seealso cref="ClassLoader"/>.
+        /// Changes to the factories are visible after the method ends, all
+        /// iterators (<seealso cref="#availableTokenFilters()"/>,...) stay consistent. 
+        /// 
+        /// <para><b>NOTE:</b> Only new factories are added, existing ones are
+        /// never removed or replaced.
+        /// 
+        /// </para>
+        /// <para><em>This method is expensive and should only be called for discovery
+        /// of new factories on the given classpath/classloader!</em>
+        /// </para>
+        /// </summary>
+        public static void ReloadTokenFilters(ClassLoader classloader)
+        {
+            loader.reload(classloader);
+        }
+
+        /// <summary>
+        /// Initialize this factory via a set of key-value pairs.
+        /// </summary>
+        protected internal TokenFilterFactory(IDictionary<string, string> args)
+            : base(args)
+        {
+        }
+
+        /// <summary>
+        /// Transform the specified input TokenStream </summary>
+        public abstract TokenStream Create(TokenStream input);
+    }
+}
\ No newline at end of file


[25/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData5.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData5.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData5.cs
new file mode 100644
index 0000000..53c2fe0
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData5.cs
@@ -0,0 +1,53 @@
+/*
+Copyright © 2003,
+Center for Intelligent Information Retrieval,
+University of Massachusetts, Amherst.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+3. The names "Center for Intelligent Information Retrieval" and
+"University of Massachusetts" must not be used to endorse or promote products
+derived from this software without prior written permission. To obtain
+permission, contact info@ciir.cs.umass.edu.
+
+THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF MASSACHUSETTS AND OTHER CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
+*/
+/* This is a java version of Bob Krovetz' KStem.
+ *
+ * Java version by Sergio Guzman-Lara.
+ * CIIR-UMass Amherst http://ciir.cs.umass.edu
+ */
+namespace org.apache.lucene.analysis.en
+{
+
+	/// <summary>
+	/// A list of words used by Kstem
+	/// </summary>
+	internal class KStemData5
+	{
+		private KStemData5()
+		{
+		}
+	   internal static string[] data = new string[] {"lock","locker","locket","lockjaw","locknut", "lockout","locks","locksmith","lockstitch","lockup", "loco","locomotion","locomotive","locum","locus", "locust","locution","lode","lodestar","lodestone", "lodge","lodgement","lodger","lodging","lodgings", "lodgment","loess","loft","lofted","lofty", "log","loganberry","logarithm","logarithmic","logbook", "logger","loggerheads","loggia","logic","logical", "logically","logician","logistic","logistics","logjam", "logrolling","loin","loincloth","loins","loiter", "loll","lollipop","lollop","lolly","lone", "lonely","loner","lonesome","long","longboat", "longbow","longevity","longhaired","longhand","longheaded", "longhop","longing","longish","longitude","longitudinal", "longship","longshoreman","longsighted","longstanding","longstop", "longsuffering","longueur","longways","longwearing","longwinded", "longwise","loo","loofa","loofah","look", "looker","lookout","looks","loom","loon", "loony","loop"
 ,"loophole","loose","loosebox", "loosen","loot","lop","lope","loppings", "loquacious","loquat","lord","lordly","lords", "lordship","lore","lorgnette","lorn","lorry", "lose","loser","loss","lost","lot", "loth","lotion","lottery","lotto","lotus", "loud","loudhailer","loudmouth","loudspeaker","lough", "lounge","lounger","lour","louse","lousy", "lout","louver","louvre","lovable","love", "loveable","lovebird","lovechild","loveless","lovelorn", "lovely","lovemaking","lover","lovers","lovesick", "lovey","loving","low","lowborn","lowbred", "lowbrow","lowdown","lower","lowermost","lowland", "lowlander","lowly","loyal","loyalist","loyalty", "lozenge","lsd","ltd","lubber","lubricant", "lubricate","lubricator","lubricious","lucerne","lucid", "luck","luckless","lucky","lucrative","lucre", "ludicrous","ludo","luff","lug","luggage", "lugger","lughole","lugsail","lugubrious","lugworm", "lukewarm","lull","lullaby","lumbago","lumbar", "lumber","lumberjack","lumberman","lumberyard","luminary", "lumino
 us","lumme","lummox","lummy","lump", "lumpish","lumpy","lunacy","lunar","lunate", "lunatic","lunch","lunchtime","lung","lunge", "lungfish","lungpower","lupin","lurch","lure", "lurgy","lurid","lurk","luscious","lush", "lust","luster","lustful","lustre","lustrous", "lusty","lutanist","lute","lutenist","luv", "luxuriant","luxuriate","luxurious","luxury","lychee", "lychgate","lye","lymph","lymphatic","lynch", "lynx","lyre","lyrebird","lyric","lyrical", "lyricism","lyricist","lyrics","mac","macabre", "macadam","macadamise","macadamize","macaroni","macaroon", "macaw","mace","macerate","mach","machete", "machiavellian","machination","machine","machinegun","machinery", "machinist","mackerel","mackintosh","macrobiotic","macrocosm", "mad","madam","madame","madcap","madden", "maddening","madder","made","madeira","mademoiselle", "madhouse","madly","madman","madness","madonna", "madrigal","maelstrom","maenad","maestro","mafia", "mag","magazine","magenta","maggot","maggoty", "magi","magic","magic
 al","magician","magisterial", "magistracy","magistrate","magma","magnanimity","magnanimous", "magnate","magnesia","magnesium","magnet","magnetic", "magnetise","magnetism","magnetize","magneto","magnificat", "magnification","magnificent","magnifier","magnify","magniloquent", "magnitude","magnolia","magnum","magpie","magus", "maharaja","maharajah","maharanee","maharani","mahatma", "mahlstick","mahogany","mahout","maid","maiden", "maidenhair","maidenhead","maidenhood","maidenly","maidservant", "mail","mailbag","mailbox","maim","main", "mainland","mainline","mainly","mainmast","mains", "mainsail","mainspring","mainstay","mainstream","maintain", "maintenance","maisonette","maisonnette","maize","majestic", "majesty","majolica","major","majordomo","majorette", "majority","make","maker","makeshift","making", "makings","malachite","maladjusted","maladministration","maladroit", "malady","malaise","malapropism","malapropos","malaria", "malarial","malay","malcontent","malcontented","male", "mal
 ediction","malefactor","maleficent","malevolent","malfeasance", "malformation","malformed","malfunction","malice","malicious", "malign","malignancy","malignant","malignity","malinger", "mall","mallard","malleable","mallet","mallow", "malmsey","malnutrition","malodorous","malpractice","malt", "malthusian","maltreat","maltster","mama","mamba", "mambo","mamma","mammal","mammary","mammon", "mammoth","mammy","man","manacle","manage", "manageable","management","manager","manageress","managerial", "manatee","mandarin","mandate","mandatory","mandible", "mandolin","mandrake","mandrill","maneuver","maneuverable", "manful","manganese","mange","manger","mangle", "mango","mangosteen","mangrove","mangy","manhandle", "manhole","manhood","manhour","mania","maniac", "maniacal","manic","manicure","manicurist","manifest", "manifestation","manifesto","manifold","manikin","manila", "manilla","manipulate","manipulation","mankind","manly", "manna","manned","mannequin","manner","mannered", "mannerism","man
 nerly","manners","mannikin","mannish", "manoeuverable","manoeuvre","manometer","manor","manorial", "manpower","mansard","manse","manservant","mansion", "mansions","manslaughter","mantelpiece","mantelshelf","mantilla", "mantis","mantle","mantrap","manual","manufacture", "manufacturer","manumit","manure","manuscript","manx", "many","maoism","maori","map","maple", "mapping","maquis","mar","marabou","marabout", "maraschino","marathon","maraud","marble","marbled", "marbles","marc","marcasite","march","marchioness", "margarine","margin","marginal","marguerite","marigold", "marihuana","marijuana","marimba","marina","marinade", "marinate","marine","mariner","marionette","marital", "maritime","marjoram","mark","markdown","marked", "marker","market","marketeer","marketer","marketing", "marketplace","marking","marksman","marksmanship","markup", "marl","marlinespike","marmalade","marmoreal","marmoset", "marmot","marocain","maroon","marquee","marquess", "marquetry","marquis","marriage","marriage
 able","married", "marrow","marrowbone","marrowfat","marry","mars", "marsala","marseillaise","marsh","marshal","marshmallow", "marshy","marsupial","mart","marten","martial", "martian","martin","martinet","martini","martinmas", "martyr","martyrdom","marvel","marvellous","marvelous", "marxism","marzipan","mascara","mascot","masculine", "masculinity","maser","mash","mashie","mask", "masked","masochism","mason","masonic","masonry", "masque","masquerade","mass","massacre","massage", "masses","masseur","massif","massive","massy", "mast","mastectomy","master","masterful","masterly", "mastermind","masterpiece","mastership","masterstroke","mastery", "masthead","mastic","masticate","mastiff","mastitis", "mastodon","mastoid","mastoiditis","masturbate","mat", "matador","match","matchbox","matching","matchless", "matchlock","matchmaker","matchstick","matchwood","mate", "material","materialise","materialism","materialist","materialize", "maternal","maternity","matey","mathematician","mathematics",
  "matins","matriarch","matriarchy","matricide","matriculate", "matrimony","matrix","matron","matronly","matt", "matter","matting","mattins","mattock","mattress", "maturation","mature","maturity","maudlin","maul", "maulstick","maunder","mausoleum","mauve","maverick", "maw","mawkish","maxi","maxim","maximal", "maximise","maximize","maximum","may","maybe", "maybeetle","mayday","mayfly","mayhem","mayonnaise", "mayor","mayoralty","mayoress","maypole","mayst", "maze","mazed","mazurka","mccarthyism","mead", "meadow","meadowsweet","meager","meagre","meal", "mealie","mealtime","mealy","mealybug","mean", "meander","meanderings","meaning","meaningful","meaningless", "means","meant","meantime","meanwhile","measles", "measly","measurable","measure","measured","measureless", "measurement","meat","meatball","meaty","mecca", "mechanic","mechanical","mechanics","mechanise","mechanism", "mechanistic","mechanize","medal","medalist","medallion", "medallist","meddle","meddlesome","media","mediaeval", "m
 edial","median","mediate","medic","medical", "medicament","medicare","medicate","medication","medicinal", "medicine","medico","medieval","mediocre","mediocrity", "meditate","meditation","meditative","mediterranean","medium", "medlar","medley","meed","meek","meerschaum", "meet","meeting","meetinghouse","megadeath","megahertz", "megalith","megalithic","megalomania","megalomaniac","megaphone", "megaton","megrim","meiosis","melancholia","melancholic", "melancholy","meld","melee","meliorate","meliorism", "mellifluous","mellow","melodic","melodious","melodrama", "melodramatic","melody","melon","melt","melting", "member","membership","membrane","membranous","memento", "memo","memoir","memoirs","memorabilia","memorable", "memorandum","memorial","memorise","memorize","memory", "memsahib","men","menace","menagerie","mend", "mendacious","mendacity","mendelian","mendicant","mending", "menfolk","menial","meningitis","meniscus","menopause", "menses","menstrual","menstruate","mensurable","mensurat
 ion", "mental","mentality","menthol","mentholated","mention", "mentor","menu","meow","mephistopheles","mercantile", "mercenary","mercer","mercerise","mercerize","merchandise", "merchant","merchantman","merciful","merciless","mercurial", "mercury","mercy","mere","merely","meretricious", "merge","merger","meridian","meridional","meringue", "merino","merit","meritocracy","meritorious","mermaid", "merman","merriment","merry","merrymaking","mesa", "mescalin","mescaline","mesdames","mesdemoiselles","meseems", "mesh","mesmeric","mesmerise","mesmerism","mesmerist", "mesmerize","mess","message","messenger","messiah", "messianic","messieurs","messmate","messrs","messuage", "messy","mestizo","met","metabolic","metabolise", "metabolism","metabolize","metacarpal","metal","metalanguage", "metallic","metallurgist","metallurgy","metalwork","metamorphose", "metamorphosis","metaphor","metaphorical","metaphysics","metatarsal", "mete","metempsychosis","meteor","meteoric","meteorite", "meteoroid","meteo
 rologist","meteorology","meter","methane", "methinks","method","methodical","methodism","methodology", "meths","methuselah","meticulous","metre","metric", "metrical","metrication","metricise","metricize","metro", "metronome","metropolis","metropolitan","mettle","mettlesome", "mew","mews","mezzanine","mezzo","mezzotint", "miaow","miasma","mica","mice","michaelmas", "mick","mickey","microbe","microbiologist","microbiology", "microcosm","microelectronics","microfiche","microfilm","micromesh", "micrometer","micron","microorganism","microphone","microscope", "microscopic","microsecond","microwave","mid","midair", "midcourse","midday","midden","middle","middlebrow", "middleman","middleweight","middling","midge","midget", "midi","midland","midlands","midmost","midnight", "midpoint","midriff","midshipman","midships","midst", "midsummer","midway","midweek","midwest","midwicket", "midwife","midwifery","mien","miffed","might", "mightily","mighty","mignonette","migraine","migrant", "migrate","m
 igration","migratory","mikado","mike", "milady","mild","mildew","mildly","mile", "mileage","mileometer","miler","milestone","milieu", "militancy","militant","militarise","militarism","militarize", "military","militate","militia","militiaman","milk", "milker","milkmaid","milkman","milksop","milkweed", "milky","mill","millboard","milldam","millenarian", "millenium","millepede","miller","millet","millibar", "milligram","milligramme","milliliter","millilitre","millimeter", "millimetre","milliner","millinery","million","millionaire", "millipede","millpond","millrace","millstone","millwheel", "millwright","milometer","milord","milt","mime", "mimeograph","mimetic","mimic","mimicry","mimosa", "min","minaret","minatory","mince","mincemeat", "mincer","mincingly","mind","minded","mindful", "mindless","mine","minefield","minelayer","miner", "mineral","mineralogist","mineralogy","minestrone","minesweeper", "mingle","mingy","mini","miniature","miniaturist", "minibus","minim","minimal","minimise",
 "minimize", "minimum","mining","minion","minister","ministerial", "ministrant","ministration","ministry","miniver","mink", "minnow","minor","minority","minotaur","minster", "minstrel","minstrelsy","mint","minuet","minus", "minuscule","minute","minutely","minuteman","minutes", "minutia","minx","miracle","miraculous","mirage", "mire","mirror","mirth","miry","misadventure", "misadvise","misalliance","misanthrope","misanthropy","misapplication", "misapply","misapprehend","misapprehension","misappropriate","misbegotten", "misbehave","misbehaved","misbehavior","misbehaviour","miscalculate", "miscall","miscarry","miscast","miscegenation","miscellaneous", "miscellany","mischance","mischief","mischievous","misconceive", "misconception","misconduct","misconstruction","misconstrue","miscount", "miscreant","miscue","misdate","misdeal","misdeed", "misdemeanor","misdemeanour","misdirect","misdoing","miser", "miserable","miserably","miserly","misery","misfire", "misfit","misfortune","misgiving","m
 isgovern","misguide", "misguided","mishandle","mishap","mishear","mishit", "mishmash","misinform","misinterpret","misjudge","misjudgement", "misjudgment","mislay","mislead","mismanage","mismatch", "misname","misnomer","misogynist","misogyny","misplace", "misprint","mispronounce","mispronunciation","misquote","misread", "misreport","misrepresent","misrule","miss","missal", "misshapen","missile","missing","mission","missionary", "missis","missive","misspell","misspend","misstate", "misstatement","missus","missy","mist","mistake", "mistaken","mister","mistime","mistletoe","mistral", "mistranslate","mistress","mistrial","mistrust","mistrustful", "mists","misty","misunderstand","misunderstanding","misuse", "mite","miter","mitigate","mitosis","mitre", "mitt","mitten","mix","mixed","mixer", "mixture","mizen","mizzen","mizzenmast","mizzle", "mnemonic","mnemonics","moa","moan","moat", "moated","mob","mobile","mobilisation","mobilise", "mobility","mobilization","mobilize","mobster","moccasin"
 , "mocha","mock","mockers","mockery","mockingbird", "modal","mode","model","moderate","moderately", "moderation","moderations","moderato","moderator","modern", "modernise","modernism","modernistic","modernity","modernize", "modest","modesty","modicum","modification","modifier", "modify","modish","mods","modular","modulate", "modulation","module","moggy","mogul","moh", "mohair","mohammedan","mohammedanism","moiety","moist", "moisten","moisture","moisturise","moisturize","moke", "molar","molasses","mold","molder","molding", "moldy","mole","molecular","molecule","molehill", "moleskin","molest","moll","mollify","mollusc", "mollusk","mollycoddle","molt","molten","molto", "molybdenum","mom","moment","momentarily","momentary", "momentous","moments","momentum","momma","mommy", "monarch","monarchic","monarchism","monarchist","monarchy", "monastery","monastic","monasticism","monaural","monday", "monetary","money","moneybags","moneybox","moneychanger", "moneyed","moneylender","moneymaker","mon
 eys","monger", "mongol","mongolism","mongoose","mongrel","monies", "monitor","monk","monkey","mono","monochrome", "monocle","monogamous","monogamy","monogram","monograph", "monolith","monolithic","monolog","monologue","monomania", "monomaniac","mononucleosis","monophonic","monophthong","monoplane", "monopolise","monopolist","monopolize","monopoly","monorail", "monosyllabic","monosyllable","monotheism","monotone","monotonous", "monotony","monotype","monoxide","monsieur","monsignor", "monsoon","monster","monstrance","monstrosity","monstrous", "montage","month","monthly","monument","monumental", "monumentally","moo","mooch","moocow","mood", "moody","moon","moonbeam","mooncalf","moonlight", "moonlit","moonshine","moonstone","moonstruck","moony", "moor","moorhen","moorings","moorish","moorland", "moose","moot","mop","mope","moped", "moppet","moquette","moraine","moral","morale", "moralise","moralist","moralistic","morality","moralize", "morally","morals","morass","moratorium","morbid", "
 morbidity","mordant","more","morello","moreover", "mores","moresque","morganatic","morgue","moribund", "mormon","mormonism","morn","morning","mornings", "morocco","moron","moronic","morose","morpheme", "morphemics","morpheus","morphine","morphology","morrow", "morsel","mortal","mortality","mortally","mortar", "mortarboard","mortgage","mortgagee","mortgagor","mortice", "mortician","mortification","mortify","mortise","mortuary", "mosaic","moselle","mosey","moslem","mosque", "mosquito","moss","mossy","most","mostly", "mote","motel","motet","moth","mothball", "mothballs","mother","motherhood","motherly","mothproof", "motif","motion","motionless","motions","motivate", "motivation","motive","motley","motocross","motor", "motorbike","motorboat","motorcade","motorcar","motorcycle", "motorcyclist","motoring","motorise","motorist","motorize", "motorman","motorway","mottled","motto","mould", "moulder","moulding","mouldy","moult","mound", "mount","mountain","mountaineer","mountaineering","mount
 ainous", "mountainside","mountaintop","mountebank","mountie","mourn", "mourner","mournful","mourning","mouse","mouser", "mousetrap","moussaka","mousse","moustache","mousy", "mouth","mouthful","mouthorgan","mouthpiece","mouthwash", "movable","move","moveable","movement","movements", "mover","movie","movies","moving","mow", "mower","mpg","mph","mra","mrs", "msc","much","muchness","mucilage","muck", "muckheap","muckrake","mucky","mucous","mucus", "mud","muddle","muddy","mudflat","mudguard", "mudpack","mudslinger","muesli","muezzin","muff", "muffin","muffle","muffler","mufti","mug", "mugger","muggins","muggy","mugwump","muhammadan", "muhammadanism","mulatto","mulberry","mulch","mulct", "mule","muleteer","mulish","mull","mullah", "mullet","mulligatawny","mullion","mullioned","multifarious", "multiform","multilateral","multilingual","multimillionaire","multiple", "multiplex","multiplication","multiplicity","multiply","multiracial", "multistorey","multitude","multitudinous","mum","mumble",
  "mummer","mummery","mummify","mumming","mummy", "mumps","munch","mundane","municipal","municipality", "munificence","munificent","muniments","munition","munitions", "mural","murder","murderous","murk","murky", "murmur","murphy","murrain","muscatel","muscle", "muscled","muscleman","muscovite","muscular","muse", "museum","mush","mushroom","mushy","music", "musical","musically","musician","musicianship","musk", "musket","musketeer","musketry","muskmelon","muskrat", "musky","muslim","muslin","musquash","muss", "mussel","must","mustache","mustachio","mustang", "mustard","muster","musty","mutable","mutant", "mutation","mute","muted","mutilate","mutilation", "mutineer","mutinous","mutiny","mutt","mutter", "mutton","muttonchops","mutual","mutuality","muzak", "muzzle","muzzy","mycology","myelitis","myna", "mynah","myopia","myriad","myrrh","myrtle", "myself","mysterious","mystery","mystic","mystical", "mysticism","mystification","mystify","mystique","myth", "mythical","mythological","mytholo
 gist","mythology","myxomatosis", "nab","nabob","nacelle","nacre","nadir", "nag","naiad","nail","nailbrush","naive", "naivete","naivety","naked","name","namedrop", "nameless","namely","nameplate","namesake","nanny", "nap","napalm","naphtha","naphthalene","napkin", "nappy","narc","narcissism","narcissus","narcotic", "nark","narky","narrate","narration","narrative", "narrator","narrow","narrowly","narrows","narwhal", "nasal","nasalise","nasalize","nascent","nasturtium", "nasty","natal","nation","national","nationalise", "nationalism","nationalist","nationalistic","nationality","nationalize", "nationwide","native","nativity","nato","natter", "natty","natural","naturalise","naturalism","naturalist", "naturalistic","naturalize","naturally","naturalness","nature", "naturism","naturopath","naught","naughty","nausea", "nauseate","nauseous","nautch","nautical","nautilus", "naval","nave","navel","navigable","navigate", "navigation","navigator","navvy","navy","nay", "nazi","nco","neanderthal","
 neapolitan","near", "nearby","nearly","nearside","nearsighted","neat", "nebula","nebular","nebulous","necessaries","necessarily", "necessary","necessitate","necessitous","necessity","neck", "neckband","neckerchief","necklace","necklet","neckline", "necktie","neckwear","necromancer","necromancy","necrophilia", "necrophiliac","necropolis","nectar","nectarine","need", "needful","needle","needless","needlessly","needlewoman", "needlework","needs","needy","nefarious","negate", "negative","neglect","neglectful","negligee","negligence", "negligent","negligible","negotiable","negotiate","negotiation", "negress","negro","negus","neigh","neighbor", "neighborhood","neighboring","neighborly","neighbour","neighbourhood", "neighbouring","neighbourly","neither","nelson","nemesis", "neoclassical","neocolonialism","neolithic","neologism","neon", "neonate","neophyte","neoplasm","nephew","nephritis", "nepotism","neptune","nereid","nerve","nerveless", "nerves","nervous","nervy","ness","nest", "nesting"
 ,"nestle","nestling","nestor","net", "netball","nether","nethermost","nets","nett", "netting","nettle","network","neural","neuralgia", "neurasthenia","neurasthenic","neuritis","neurologist","neurology", "neurosis","neurotic","neuter","neutral","neutralise", "neutrality","neutralize","neutralizer","neutron","never", "nevermore","nevertheless","new","newborn","newcomer", "newel","newfangled","newfoundland","newly","newlywed", "newmarket","news","newsagent","newsboy","newscast", "newscaster","newsletter","newsmonger","newspaper","newsprint", "newsreel","newsroom","newssheet","newsstand","newsvendor", "newsworthy","newsy","newt","newtonian","next", "nexus","nhs","niacin","nib","nibble", "niblick","nibs","nice","nicely","nicety", "niche","nick","nickel","nicker","nicknack", "nickname","nicotine","niece","niff","nifty", "niggard","niggardly","nigger","niggle","niggling", "nigh","night","nightcap","nightclothes","nightclub", "nightdress","nightfall","nighthawk","nightingale","nightjar", "n
 ightlife","nightlight","nightline","nightlong","nightly", "nightmare","nights","nightshade","nightshirt","nightstick", "nighttime","nihilism","nilotic","nimble","nimbus", "nimrod","nincompoop","nine","ninepin","ninepins", "nines","nineteen","ninety","ninny","ninth", "nip","nipper","nippers","nipping","nipple", "nippy","nirvana","nisi","nit","niter", "nitpick","nitpicking","nitrate","nitre","nitric", "nitrochalk","nitrogen","nitroglycerin","nitroglycerine","nitrous", "nitwit","nix","nob","nobble","nobility", "noble","nobleman","nobly","nobody","nocturnal", "nocturne","nod","nodal","noddle","nodular", "nodule","noel","noes","nog","noggin", "nohow","noise","noisome","noisy","nomad", "nomadic","nomenclature","nominal","nominate","nomination", "nominative","nominee","nonage","nonagenarian","nonaggression", "nonaligned","nonalignment","nonassertive","nonce","nonchalance", "nonchalant","noncombatant","noncommittal","nonconductor","nonconformist", "nonconformity","noncontributory","nondescr
 ipt","none","nonentity", "nonesuch","nonetheless","nonfiction","nonflammable","nonintervention", "nonobservance","nonpareil","nonpayment","nonplus","nonproliferation", "nonresident","nonrestrictive","nonsense","nonsensical","nonskid", "nonsmoker","nonstandard","nonstarter","nonstick","nonstop", "nonunion","nonverbal","nonviolence","nonviolent","nonwhite", "noodle","nook","noon","noonday","noose", "nope","nor","nordic","norm","normal", "normalise","normality","normalize","normally","norman", "normative","north","northbound","northeast","northeaster", "northeasterly","northeastern","northeastward","northeastwards","northerly", "northern","northerner","northernmost","northward","northwards", "northwest","northwester","northwesterly","northwestern","northwestward", "northwestwards","nos","nose","nosebag","nosebleed", "nosecone","nosedive","nosegay","nosey","nosh", "nostalgia","nostril","nostrum","nosy","not", "notability","notable","notably","notarise","notarize", "notary","notation","n
 otch","note","notebook", "notecase","noted","notepaper","noteworthy","nothing", "nothingness","notice","noticeable","notifiable","notification", "notify","notion","notional","notions","notoriety", "notorious","notwithstanding","nougat","nought","noun", "nourish","nourishment","nous","nova","novel", "novelette","novelettish","novelist","novella","novelty", "november","novice","noviciate","novitiate","novocaine", "now","nowadays","nowhere","nowise","noxious", "nozzle","nth","nuance","nub","nubile", "nuclear","nucleus","nude","nudge","nudism", "nudity","nugatory","nugget","nuisance","null", "nullah","nullify","nullity","numb","number", "numberless","numberplate","numbers","numbly","numbskull", "numeracy","numeral","numerate","numeration","numerator", "numerical","numerology","numerous","numinous","numismatic", "numismatics","numskull","nun","nuncio","nunnery", "nuptial","nuptials","nurse","nurseling","nursemaid", "nursery","nurseryman","nursing","nursling","nurture", "nut","nutcase","n
 utcracker","nuthouse","nutmeg", "nutria","nutrient","nutriment","nutrition","nutritious", "nutritive","nuts","nutshell","nutty","nuzzle", "nylon","nylons","nymph","nymphet","nymphomania", "nymphomaniac","oaf","oak","oaken","oakum", "oap","oar","oarlock","oarsman","oarsmanship", "oasis","oat","oatcake","oath","oatmeal", "oats","obbligato","obdurate","obeah","obedient", "obeisance","obelisk","obese","obey","obfuscate", "obituary","object","objection","objectionable","objective", "objector","oblation","obligate","obligation","obligatory", "oblige","obliging","oblique","obliterate","oblivion", "oblivious","oblong","obloquy","obnoxious","oboe", "oboist","obscene","obscenity","obscurantism","obscure", "obscurity","obsequies","obsequious","observable","observance", "observant","observation","observations","observatory","observe", "observer","observing","obsess","obsession","obsessional", "obsessive","obsidian","obsolescent","obsolete","obstacle", "obstetrician","obstetrics","obstinate","ob
 streperous","obstruct", "obstruction","obstructionism","obstructive","obtain","obtainable", "obtrude","obtrusive","obtuse","obverse","obviate", "obvious","obviously","ocarina","occasion","occasional", "occident","occidental","occult","occupancy","occupant", "occupation","occupational","occupier","occupy","occur", "occurrence","ocean","oceangoing","oceanography","ocelot", "ocher","ochre","octagon","octane","octave", "octavo","octet","october","octogenarian","octopus", "octosyllabic","ocular","oculist","odalisque","odd", "oddball","oddity","oddly","oddment","odds", "ode","odious","odium","odor","odoriferous", "odorous","odour","odyssey","oecumenical","oecumenicalism", "oesophagus","oestrogen","off","offal","offbeat", "offence","offend","offender","offense","offensive", "offer","offering","offertory","offhand","office", "officeholder","officer","offices","official","officialdom", "officialese","officially","officiate","officious","offing", "offish","offprint","offset","offshoot","offsh
 ore", "offside","offspring","offstage","oft","often", "ogle","ogre","ohm","oho","oil", "oilcake","oilcan","oilcloth","oiled","oilfield", "oilman","oilrig","oils","oilskin","oilskins", "oily","oink","ointment","okapi","okay", "okra","old","olden","oldish","oldster", "oleaginous","oleander","oleograph","olfactory","oligarch", "oligarchy","olive","olympiad","olympian","olympic", "ombudsman","omega","omelet","omelette","omen", "ominous","omission","omit","omnibus","omnipotent", "omnipresent","omniscient","omnivorous","once","oncoming", "one","onerous","oneself","onetime","ongoing", "onion","onlooker","only","onomatopoeia","onrush", "onset","onshore","onside","onslaught","onto", "ontology","onus","onward","onwards","onyx", "oodles","oof","oomph","oops","ooze", "opacity","opal","opalescent","opaque","ope", "open","opencast","opener","openhearted","opening", "openly","openwork","opera","operable","operate", "operation","operational","operative","operator","operetta", "ophthalmia","ophthalm
 ic","ophthalmology","ophthalmoscope","opiate", "opine","opinion","opinionated","opium","opossum", "opponent","opportune","opportunism","opportunity","oppose", "opposite","opposition","oppress","oppression","oppressive", "oppressor","opprobrious","opprobrium","ops","opt", "optative","optic","optical","optician","optics", "optimism","optimum","option","optional","opulence", "opulent","opus","oracle","oracular","oral", "orange","orangeade","orangeman","orangutang","oration", "orator","oratorical","oratorio","oratory","orb", "orbit","orchard","orchestra","orchestral","orchestrate", "orchid","ordain","ordeal","order","ordered", "orderly","orders","ordinal","ordinance","ordinand", "ordinarily","ordinary","ordinate","ordination","ordnance", "ordure","ore","oregano","organ","organdie", "organdy","organic","organisation","organise","organised", "organism","organist","organization","organize","organized", "orgasm","orgiastic","orgy","orient","oriental", "orientalist","orientate","orientation"
 ,"orifice","origin", "original","originality","originally","originate","oriole", "orison","orlon","ormolu","ornament","ornamental", "ornamentation","ornate","ornery","ornithology","orotund", "orphan","orphanage","orrery","orrisroot","orthodontic", "orthodontics","orthodox","orthodoxy","orthography","orthopaedic", "orthopaedics","orthopedic","orthopedics","ortolan","oryx", "oscar","oscillate","oscillation","oscillator","oscillograph", "oscilloscope","osculation","osier","osmosis","osprey", "osseous","ossification","ossify","ostensible","ostentation", "osteoarthritis","osteopath","osteopathy","ostler","ostracise", "ostracize","ostrich","other","otherwise","otherworldly", "otiose","otter","ottoman","oubliette","ouch", "ought","ounce","our","ours","ourselves", "ousel","oust","out","outback","outbalance", "outbid","outbound","outbrave","outbreak","outbuilding", "outburst","outcast","outcaste","outclass","outcome", "outcrop","outcry","outdated","outdistance","outdo", "outdoor","outdoors",
 "outer","outermost","outface", "outfall","outfield","outfight","outfit","outflank", "outflow","outfox","outgeneral","outgoing","outgoings", "outgrow","outgrowth","outhouse","outing","outlandish", "outlast","outlaw","outlay","outlet","outline", "outlive","outlook","outlying","outmaneuver","outmanoeuvre", "outmarch","outmatch","outmoded","outmost","outnumber", "outpatient","outplay","outpoint","outpost","outpourings", "output","outrage","outrageous","outrange","outrank", "outride","outrider","outrigger","outright","outrival", "outrun","outsell","outset","outshine","outside", "outsider","outsize","outskirts","outsmart","outspoken", "outspread","outstanding","outstay","outstretched","outstrip", "outtalk","outvote","outward","outwardly","outwards", "outwear","outweigh","outwit","outwork","outworn", "ouzel","ouzo","ova","oval","ovarian", "ovary","ovation","oven","ovenware","over", "overact","overage","overall","overalls","overarch", "overarm","overawe","overbalance","overbear","overbearin
 g", "overbid","overblown","overboard","overburden","overcall", "overcapitalise","overcapitalize","overcast","overcharge","overcloud", "overcoat","overcome","overcompensate","overcrop","overcrowd", "overdevelop","overdo","overdone","overdose","overdraft", "overdraw","overdrawn","overdress","overdrive","overdue", "overestimate","overexpose","overflow","overfly","overgrown", "overgrowth","overhand","overhang","overhaul","overhead", "overheads","overhear","overjoyed","overkill","overland", "overlap","overlay","overleaf","overleap","overload", "overlong","overlook","overlord","overly","overman", "overmaster","overmuch","overnight","overpass","overpay", "overplay","overpopulated","overpopulation","overpower","overpowering", "overprint","overrate","overreach","override","overriding", "overrule","overrun","overseas","oversee","overseer", "oversell","oversexed","overshadow","overshoe","overshoot", "overside","oversight","oversimplify","oversleep","overspill", "overstate","overstatement","ove
 rstay","oversteer","overstep", "overstock","overstrung","overstuffed","oversubscribed","overt", "overtake","overtax","overthrow","overtime","overtone", "overtones","overtop","overtrump","overture","overtures", "overturn","overweening","overweight","overwhelm","overwhelming", "overwork","overwrought","oviduct","oviparous","ovoid", "ovulate","ovum","owe","owl","owlet", "owlish","own","owner","ownership","oxbridge", "oxcart","oxeye","oxide","oxidise","oxidize", "oxon","oxonian","oxtail","oxyacetylene","oxygen", "oxygenate","oyez","oyster","oystercatcher","ozone", "pabulum","pace","pacemaker","pacesetter","pachyderm", "pacific","pacifier","pacifism","pacifist","pacify", "pack","package","packed","packer","packet", "packing","packsaddle","pact","pad","padding", "paddle","paddock","paddy","padlock","padre", "paean","paederast","paederasty","paediatrician","paediatrics", "paella","paeony","pagan","paganism","page", "pageant","pageantry","pagination","pagoda","paid", "pail","paillasse","pai
 n","pained","painful", "painkiller","painless","pains","painstaking","paint", "paintbrush","painter","painting","paints","paintwork", "pair","paisley","pajama","pajamas","pal", "palace","paladin","palais","palakeen","palanquin", "palatable","palatal","palatalize","palate","palatial", "palatinate","palaver","pale","paleface","paleography", "paleolithic","paleontology","palette","palfrey","palimpsest", "palindrome","paling","palings","palisade","palish", "pall","palladian","pallbearer","pallet","palliasse", "palliate","palliation","palliative","pallid","pallor", "pally","palm","palmer","palmetto","palmist", "palmistry","palmy","palomino","palpable","palpate", "palpitate","palpitation","palsied","palsy","palter", "paltry","pampas","pamper","pamphlet","pamphleteer", "pan","panacea","panache","panama","panatela", "panatella","pancake","panchromatic","pancreas","panda", "pandemic","pandemonium","pander","pandit","panegyric", "panel","paneling","panelist","panelling","panellist", "pang","p
 anhandle","panic","panicky","panjabi", "panjandrum","pannier","pannikin","panoplied","panoply", "panorama","panpipes","pansy","pant","pantaloon", "pantaloons","pantechnicon","pantheism","pantheon","panther", "panties","pantile","panto","pantograph","pantomime", "pantry","pants","panty","panzer","pap", "papa","papacy","papadum","papal","papaya", "paper","paperback","paperboy","paperhanger","papers", "paperweight","paperwork","papery","papist","papoose", "pappy","paprika","papyrus","par","parable", "parabola","parachute","parachutist","paraclete","parade", "paradigm","paradigmatic","paradise","paradisiacal","paradox", "paraffin","paragon","paragraph","parakeet","parallel", "parallelism","parallelogram","paralyse","paralysis","paralytic", "paralyze","paramilitary","paramount","paramountcy","paramour", "paranoia","paranoiac","paranoid","parapet","paraphernalia", "paraphrase","paraplegia","paraplegic","paraquat","paras", "parasite","parasitic","parasol","parathyroid","paratrooper", "para
 troops","paratyphoid","parboil","parcel","parch", "parchment","pard","pardon","pardonable","pardonably", "pardoner","pare","parent","parentage","parental", "parenthesis","parenthetic","parenthood","parer","parhelion", "pariah","paring","parish","parishioner","parisian", "parity","park","parka","parkin","parking", "parkland","parky","parlance","parley","parliament", "parliamentarian","parliamentary","parlor","parlour","parlous", "parmesan","parochial","parodist","parody","parole", "paroxysm","parquet","parr","parricide","parrot", "parry","parse","parsee","parsi","parsimonious", "parsimony","parsley","parsnip","parson","parsonage", "part","partake","parterre","parthenogenesis","partial", "partiality","partially","participant","participate","participation", "participial","participle","particle","particular","particularise", "particularity","particularize","particularly","particulars","parting", "partisan","partita","partition","partitive","partizan", "partly","partner","partnership","p
 artook","partridge", "parts","parturition","party","parvenu","paschal", "pasha","pass","passable","passage","passageway", "passbook","passenger","passerby","passim","passing", "passion","passionate","passionately","passionflower","passive", "passivity","passivize","passkey","passover","passport", "password","past","pasta","paste","pasteboard", "pastel","pastern","pasteurise","pasteurize","pastiche", "pastille","pastime","pasting","pastor","pastoral", "pastorale","pastorate","pastrami","pastry","pasturage", "pasture","pasty","pat","patch","patchouli", "patchwork","patchy","patella","patent","patentee", "patently","pater","paterfamilias","paternal","paternalism", "paternity","paternoster","path","pathan","pathetic", "pathfinder","pathological","pathologist","pathology","pathos", "pathway","patience","patient","patina","patio", "patisserie","patois","patrial","patriarch","patriarchal", "patriarchate","patriarchy","patrician","patricide","patrimony", "patriot","patriotic","patriotism","
 patrol","patrolman", "patron","patronage","patroness","patronise","patronize", "patronymic","patten","patter","pattern","patty", "paucity","paunch","paunchy","pauper","pauperise", "pauperism","pauperize","pause","pavan","pavane", "pave","paved","pavement","pavilion","paving", "paw","pawky","pawl","pawn","pawnbroker", "pawnshop","pawpaw","pay","payable","payday", "payee","payer","payload","paymaster","payment", "paynim","payoff","payola","payroll","pea", "peace","peaceable","peaceful","peacekeeping","peacemaker", "peacetime","peach","peachick","peacock","peafowl", "peahen","peak","peaked","peaky","peal", "peanut","peanuts","pear","pearl","pearly", "pearmain","peasant","peasantry","peashooter","peat", "pebble","pebbledash","pebbly","pecan","peccadillo", "peccary","peck","pecker","peckish","pectic", "pectin","pectoral","peculate","peculiar","peculiarity", "peculiarly","pecuniary","pedagogue","pedagogy","pedal"};
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData6.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData6.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData6.cs
new file mode 100644
index 0000000..585c388
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData6.cs
@@ -0,0 +1,53 @@
+/*
+Copyright © 2003,
+Center for Intelligent Information Retrieval,
+University of Massachusetts, Amherst.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+3. The names "Center for Intelligent Information Retrieval" and
+"University of Massachusetts" must not be used to endorse or promote products
+derived from this software without prior written permission. To obtain
+permission, contact info@ciir.cs.umass.edu.
+
+THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF MASSACHUSETTS AND OTHER CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
+*/
+/* This is a java version of Bob Krovetz' KStem.
+ *
+ * Java version by Sergio Guzman-Lara.
+ * CIIR-UMass Amherst http://ciir.cs.umass.edu
+ */
+namespace org.apache.lucene.analysis.en
+{
+
+	/// <summary>
+	/// A list of words used by Kstem
+	/// </summary>
+	internal class KStemData6
+	{
+		private KStemData6()
+		{
+		}
+	   internal static string[] data = new string[] {"pedant","pedantic","pedantry","peddle","peddler", "pederast","pederasty","pedestal","pedestrian","pediatrician", "pediatrics","pedicab","pedicel","pedicure","pedigree", "pediment","pedlar","pedometer","pee","peek", "peekaboo","peel","peeler","peelings","peep", "peeper","peephole","peepul","peer","peerage", "peeress","peerless","peeve","peevish","peewit", "peg","pejorative","pekinese","pekingese","pekoe", "pelagic","pelf","pelican","pellagra","pellet", "pellucid","pelmet","pelota","pelt","pelvic", "pelvis","pemican","pemmican","pen","penal", "penalise","penalize","penalty","penance","pence", "penchant","pencil","pendant","pendent","pending", "pendulous","pendulum","penetrate","penetrating","penetration", "penetrative","penguin","penicillin","peninsula","penis", "penitent","penitential","penitentiary","penknife","penmanship", "pennant","penniless","pennon","penny","pennyweight", "pennywort","penology","pension","pensionable","pensione
 r", "pensive","pentagon","pentagram","pentameter","pentateuch", "pentathlon","pentecost","penthouse","penultimate","penumbra", "penurious","penury","peon","peony","people", "pep","pepper","peppercorn","peppermint","peppery", "pepsin","peptic","per","peradventure","perambulate", "perambulator","perceive","percentage","percentile","perceptible", "perception","perceptive","perch","perchance","percipient", "percolate","percolator","percussion","percussionist","perdition", "peregrination","peremptory","perennial","perfect","perfectible", "perfection","perfectionist","perfectly","perfidious","perfidy", "perforate","perforation","perforce","perform","performance", "performer","perfume","perfumier","perfunctory","pergola", "perhaps","perigee","perihelion","peril","perilous", "perimeter","period","periodic","periodical","periods", "peripatetic","peripheral","periphery","periphrasis","periphrastic", "periscope","perish","perishable","perisher","perishing", "peristyle","peritonitis","periwig",
 "periwinkle","perjure", "perjurer","perjury","perk","perky","perm", "permafrost","permanence","permanency","permanent","permanganate", "permeable","permeate","permissible","permission","permissive", "permit","permutation","permute","pernicious","pernickety", "pernod","peroration","peroxide","perpendicular","perpetrate", "perpetual","perpetuate","perpetuity","perplex","perplexed", "perplexity","perquisite","perry","persecute","persecution", "perseverance","persevere","persevering","persian","persiflage", "persimmon","persist","persistence","persistent","persnickety", "person","persona","personable","personage","personal", "personalise","personalities","personality","personalize","personally", "personification","personify","personnel","perspective","perspex", "perspicacious","perspiration","perspire","persuade","persuasion", "persuasive","pert","pertain","pertinacious","pertinent", "perturb","perturbation","peruke","peruse","pervade", "pervasive","perverse","perversion","perversity","
 pervert", "peseta","pesky","peso","pessary","pessimism", "pessimist","pest","pester","pesticide","pestiferous", "pestilence","pestilent","pestle","pet","petal", "petaled","petalled","petard","peterman","petite", "petition","petitioner","petrel","petrifaction","petrify", "petrochemical","petrol","petroleum","petrology","petticoat", "pettifogging","pettish","petty","petulant","petunia", "pew","pewit","pewter","peyote","pfennig", "phaeton","phagocyte","phalanx","phalarope","phallic", "phallus","phantasmagoria","phantasmal","phantasy","phantom", "pharaoh","pharisaic","pharisee","pharmaceutical","pharmacist", "pharmacology","pharmacopoeia","pharmacy","pharyngitis","pharynx", "phase","phd","pheasant","phenobarbitone","phenol", "phenomenal","phenomenally","phenomenon","phew","phi", "phial","philander","philanthropic","philanthropist","philanthropy", "philatelist","philately","philharmonic","philhellene","philippic", "philistine","philological","philologist","philology","philosopher", "phil
 osophical","philosophise","philosophize","philosophy","philter", "philtre","phizog","phlebitis","phlebotomy","phlegm", "phlegmatic","phlox","phobia","phoenician","phoenix", "phone","phoneme","phonemic","phonemics","phonetic", "phonetician","phonetics","phoney","phonic","phonics", "phonograph","phonology","phony","phooey","phosphate", "phosphorescence","phosphorescent","phosphoric","phosphorus","photo", "photocopier","photocopy","photoelectric","photogenic","photograph", "photographer","photographic","photography","photosensitive","photosensitize", "photostat","photosynthesis","phototsensitise","phrasal","phrase", "phrasebook","phraseology","phrenetic","phrenology","phthisis", "phut","phylloxera","phylum","physic","physical", "physically","physician","physicist","physics","physio", "physiognomy","physiology","physiotherapy","physique","pianissimo", "pianist","piano","pianola","piaster","piastre", "piazza","pibroch","picador","picaresque","piccalilli", "piccaninny","piccolo","pick","p
 ickaback","pickaninny", "pickax","pickaxe","picked","picker","pickerel", "picket","pickings","pickle","pickled","pickpocket", "picky","picnic","picnicker","pictorial","picture", "pictures","picturesque","piddle","piddling","pidgin", "pie","piebald","piece","piecemeal","pieces", "piecework","piecrust","pied","pier","pierce", "piercing","pierrot","piety","piezoelectric","piffle", "piffling","pig","pigeon","pigeonhole","piggery", "piggish","piggy","piggyback","piggybank","pigheaded", "piglet","pigment","pigmentation","pigmy","pignut", "pigskin","pigsticking","pigsty","pigswill","pigtail", "pike","pikestaff","pilaster","pilau","pilchard", "pile","piles","pileup","pilfer","pilferage", "pilgrim","pilgrimage","pill","pillage","pillar", "pillbox","pillion","pillock","pillory","pillow", "pillowcase","pilot","pimento","pimp","pimpernel", "pimple","pin","pinafore","pincer","pincers", "pinch","pinchbeck","pinched","pinchpenny","pincushion", "pine","pineal","pineapple","pinecone","pinewood", "pi
 ney","ping","pinhead","pinion","pink", "pinkeye","pinkie","pinkish","pinko","pinky", "pinnace","pinnacle","pinnate","pinny","pinpoint", "pinprick","pinstripe","pint","pinta","pintable", "pinup","pinwheel","piny","pioneer","pious", "piousness","pip","pipal","pipe","pipeline", "piper","pipes","pipette","piping","pipit", "pippin","pipsqueak","piquant","pique","piquet", "piracy","piranha","pirate","pirouette","piscatorial", "pish","piss","pissed","pistachio","pistil", "pistol","piston","pit","pitch","pitchblende", "pitcher","pitchfork","piteous","pitfall","pith", "pithead","pithy","pitiable","pitiful","pitiless", "pitman","piton","pittance","pituitary","pity", "pivot","pivotal","pixie","pixilated","pixy", "pizza","pizzicato","placard","placate","place", "placebo","placed","placekick","placement","placenta", "placid","placket","plagarise","plagarize","plagiarism", "plague","plaguey","plaice","plaid","plain", "plainly","plainsman","plainsong","plainspoken","plaint", "plaintiff","plaintive
 ","plait","plan","planchette", "planet","planetarium","planetary","plangent","plank", "planking","plankton","planner","plant","plantain", "plantation","planter","plaque","plash","plasma", "plaster","plasterboard","plastered","plasterer","plastering", "plastic","plasticine","plasticity","plastics","plastron", "plate","plateau","platelayer","platform","plating", "platinum","platitude","platonic","platoon","platter", "platypus","plaudit","plausible","play","playable", "playback","playbill","playboy","player","playful", "playgoer","playground","playgroup","playhouse","playmate", "playpen","playroom","playsuit","plaything","playtime", "playwright","plaza","plea","pleach","plead", "pleading","pleadings","pleasant","pleasantry","please", "pleased","pleasing","pleasurable","pleasure","pleat", "pleb","plebeian","plebiscite","plectrum","pled", "pledge","pleistocene","plenary","plenipotentiary","plenitude", "plenteous","plentiful","plenty","pleonasm","plethora", "pleurisy","plexus","pliable","
 pliant","pliers", "plight","plimsoll","plinth","pliocene","plod", "plodder","plonk","plop","plosive","plot", "plough","ploughboy","ploughman","ploughshare","plover", "plow","plowboy","plowman","plowshare","ploy", "pluck","plucky","plug","plughole","plum", "plumage","plumb","plumbago","plumber","plumbing", "plume","plumed","plummet","plummy","plump", "plunder","plunge","plunger","plunk","pluperfect", "plural","pluralism","plurality","pluribus","plus", "plush","plushy","pluto","plutocracy","plutocrat", "plutonium","ply","plywood","pneumatic","pneumoconiosis", "pneumonia","poach","poacher","pock","pocked", "pocket","pocketbook","pocketful","pocketknife","pockmark", "pockmarked","pod","podgy","podiatry","podium", "poem","poesy","poet","poetaster","poetess", "poetic","poetical","poetry","pogrom","poignancy", "poignant","poinsettia","point","pointed","pointer", "pointillism","pointless","points","pointsman","poise", "poised","poison","poisonous","poke","poker", "pokerwork","poky","polack"
 ,"polar","polarisation", "polarise","polarity","polarization","polarize","polaroid", "polaroids","polder","pole","poleax","poleaxe", "polecat","polemic","polemical","polemics","police", "policeman","policewoman","policy","polio","polish", "polisher","politburo","polite","politic","politicalise", "politicalize","politician","politicise","politicize","politicking", "politico","politics","polity","polka","poll", "pollard","pollen","pollinate","polling","pollster", "pollutant","pollute","pollution","polly","pollyanna", "polo","polonaise","polony","poltergeist","poltroon", "poly","polyandrous","polyandry","polyanthus","polyester", "polyethylene","polygamist","polygamous","polygamy","polyglot", "polygon","polymath","polymer","polymorphous","polyp", "polyphony","polypus","polystyrene","polysyllable","polytechnic", "polytheism","polythene","polyurethane","pomade","pomander", "pomegranate","pomeranian","pommel","pommy","pomp", "pompom","pomposity","pompous","ponce","poncho", "poncy","pond","
 ponder","ponderous","pone", "pong","poniard","pontiff","pontifical","pontificals", "pontificate","pontoon","pony","ponytail","pooch", "poodle","poof","pooh","pool","poolroom", "pools","poop","pooped","poor","poorhouse", "poorly","poorness","poove","pop","popadam", "popadum","popcorn","popery","popgun","popinjay", "popish","poplar","poplin","poppa","popper", "poppet","poppy","poppycock","popshop","popsy", "populace","popular","popularise","popularity","popularize", "popularly","populate","population","populism","populist", "populous","porcelain","porch","porcine","porcupine", "pore","pork","porker","porky","porn", "pornography","porosity","porous","porphyry","porpoise", "porridge","porringer","port","portable","portage", "portal","portals","portcullis","portend","portent", "portentous","porter","porterage","porterhouse","portfolio", "porthole","portico","portion","portly","portmanteau", "portrait","portraitist","portraiture","portray","portrayal", "pose","poser","poseur","posh","posi
 t", "position","positional","positive","positively","positiveness", "positivism","positron","posse","possess","possessed", "possession","possessive","possessor","posset","possibility", "possible","possibly","possum","post","postage", "postal","postbag","postbox","postcard","postcode", "postdate","poster","posterior","posterity","postern", "postgraduate","posthaste","posthumous","postilion","postillion", "posting","postman","postmark","postmaster","postmortem", "postpaid","postpone","postprandial","postscript","postulant", "postulate","posture","postwar","posy","pot", "potable","potash","potassium","potation","potato", "potbellied","potbelly","potboiler","potbound","poteen", "potency","potent","potentate","potential","potentiality", "pothead","pother","potherb","pothole","potholing", "pothouse","pothunter","potion","potluck","potpourri", "potsherd","potshot","pottage","potted","potter", "potteries","pottery","potty","pouch","pouf", "pouffe","poulterer","poultice","poultry","pounce", 
 "pound","poundage","pounding","pour","pout", "poverty","powder","powdered","powdery","power", "powerboat","powerful","powerhouse","powerless","powers", "powwow","pox","pps","practicable","practical", "practicality","practically","practice","practiced","practise", "practised","practitioner","praesidium","praetor","praetorian", "pragmatic","pragmatism","prairie","praise","praises", "praiseworthy","praline","pram","prance","prank", "prankster","prat","prate","pratfall","prattle", "prawn","praxis","pray","prayer","preach", "preachify","preamble","prearrange","prebend","prebendary", "precarious","precast","precaution","precede","precedence", "precedent","preceding","precentor","precept","preceptor", "precession","precinct","precincts","preciosity","precious", "precipice","precipitate","precipitation","precipitous","precise", "precisely","precision","preclude","precocious","precognition", "preconceived","preconception","precondition","precook","precursor", "predator","predatory","predecea
 se","predecessor","predestinate", "predestination","predestine","predetermine","predeterminer","predicament", "predicate","predicative","predict","predictable","prediction", "predigest","predilection","predispose","predisposition","predominance", "predominant","predominantly","predominate","preeminent","preeminently", "preempt","preemption","preemptive","preen","preexist", "preexistence","prefab","prefabricate","prefabricated","preface", "prefatory","prefect","prefecture","prefer","preferable", "preference","preferential","preferment","prefigure","prefix", "pregnancy","pregnant","preheat","prehensile","prehistoric", "prehistory","prejudge","prejudice","prejudiced","prejudicial", "prelacy","prelate","prelim","preliminary","prelims", "preliterate","prelude","premarital","premature","premeditate", "premeditated","premier","premise","premises","premiss", "premium","premonition","premonitory","prenatal","prentice", "preoccupation","preoccupied","preoccupy","preordain","prep", "prepack","
 preparation","preparatory","prepare","prepared", "preparedness","prepay","preponderance","preponderant","preponderate", "preposition","prepositional","prepossessed","prepossessing","prepossession", "preposterous","prepuce","prerecord","prerequisite","prerogative", "presage","presbyter","presbyterian","presbytery","preschool", "prescient","prescribe","prescribed","prescript","prescription", "prescriptive","presence","present","presentable","presentation", "presenter","presentiment","presently","presents","preservable", "preservation","preservative","preserve","preserver","preset", "preshrunk","preside","presidency","president","presidential", "presidium","press","pressed","pressgang","pressing", "pressman","pressmark","pressure","pressurise","pressurize", "prestidigitation","prestige","prestigious","prestissimo","presto", "prestressed","presumable","presume","presumption","presumptive", "presumptuous","presuppose","presupposition","pretence","pretend", "pretended","pretender","preten
 se","pretension","pretentious", "pretentiousness","preterit","preterite","preternatural","pretext", "pretor","pretorian","prettify","prettily","pretty", "pretzel","prevail","prevailing","prevalent","prevaricate", "prevent","prevention","preventive","preview","previous", "prevision","prewar","prey","price","priceless", "pricey","prick","prickle","prickly","pricy", "pride","priest","priesthood","priestly","prig", "priggish","prim","primacy","primaeval","primal", "primarily","primary","primate","prime","primer", "primeval","priming","primitive","primogeniture","primordial", "primp","primrose","primula","primus","prince", "princedom","princely","princess","principal","principality", "principally","principle","principled","principles","prink", "print","printable","printer","printing","printout", "prior","priority","priory","prise","prism", "prismatic","prison","prisoner","prissy","pristine", "prithee","privacy","private","privateer","privation", "privet","privilege","privileged","privily
 ","privy", "prize","prizefight","prizeman","pro","probability", "probable","probably","probate","probation","probationer", "probe","probity","problem","problematic","proboscis", "procedural","procedure","proceed","proceeding","proceedings", "proceeds","process","procession","processional","proclaim", "proclamation","proclivity","proconsul","proconsulate","procrastinate", "procreate","proctor","procure","procurer","prod", "prodigal","prodigious","prodigy","produce","producer", "product","production","productive","productivity","proem", "prof","profanation","profane","profanity","profess", "professed","professedly","profession","professional","professionalism", "professor","professorial","professorship","proffer","proficient", "profile","profit","profitable","profiteer","profligacy", "profligate","profound","profundity","profuse","profusion", "progenitor","progeny","progesterone","prognathous","prognosis", "prognostic","prognosticate","prognostication","program","programer", "programm
 er","progress","progression","progressive","prohibit", "prohibition","prohibitionist","prohibitive","prohibitory","project", "projectile","projection","projectionist","projector","prolapse", "prole","prolegomena","proletarian","proletariat","proliferate", "proliferation","prolific","prolix","prolog","prologue", "prolong","prolongation","prolonged","prom","promenade", "promenader","prominence","prominent","promiscuity","promiscuous", "promise","promising","promontory","promote","promoter", "promotion","prompt","prompter","promptness","promulgate", "pron","prone","prong","pronominal","pronoun", "pronounce","pronounceable","pronounced","pronouncement","pronto", "pronunciamento","pronunciation","proof","proofread","prop", "propaganda","propagandise","propagandist","propagandize","propagate", "propagation","propane","propel","propellant","propellent", "propeller","propensity","proper","properly","propertied", "property","prophecy","prophesy","prophet","prophetess", "prophetic","prophets"
 ,"prophylactic","prophylaxis","propinquity", "propitiate","propitiatory","propitious","propjet","proponent", "proportion","proportional","proportionate","proportions","proposal", "propose","proposition","propound","proprietary","proprieties", "proprietor","proprietress","propriety","propulsion","propulsive", "propylene","prorogation","prorogue","prosaic","proscenium", "proscribe","proscription","prose","prosecute","prosecution", "prosecutor","proselyte","proselytise","proselytize","prosody", "prospect","prospective","prospector","prospects","prospectus", "prosper","prosperity","prosperous","prostate","prosthesis", "prostitute","prostitution","prostrate","prostration","prosy", "protagonist","protean","protect","protection","protectionism", "protective","protector","protectorate","protein","protest", "protestant","protestation","protocol","proton","protoplasm", "prototype","protozoa","protozoan","protozoon","protract", "protraction","protractor","protrude","protrusion","protrusive", "
 protuberance","protuberant","proud","provable","prove", "proven","provenance","provender","proverb","proverbial", "proverbially","proverbs","provide","provided","providence", "provident","providential","provider","providing","province", "provinces","provincial","provision","provisional","provisions", "proviso","provocation","provocative","provoke","provoking", "provost","prow","prowess","prowl","prowler", "prox","proximal","proximate","proximity","proximo", "proxy","prude","prudence","prudent","prudential", "prudery","prudish","prune","pruning","prurience", "prurient","pruritus","prussian","pry","psalm", "psalmist","psalmody","psalms","psalter","psaltery", "psephology","pseud","pseudonym","pseudonymous","pshaw", "psittacosis","psoriasis","psst","psyche","psychedelic", "psychiatric","psychiatrist","psychiatry","psychic","psycho", "psychoanalyse","psychoanalysis","psychoanalyst","psychoanalytic","psychoanalyze", "psychokinesis","psychological","psychologist","psychology","psychopath",
  "psychosis","psychosomatic","psychotherapy","psychotic","pta", "ptarmigan","pterodactyl","pto","ptomaine","pub", "puberty","pubic","public","publican","publication", "publicise","publicist","publicity","publicize","publish", "publisher","publishing","puce","puck","pucker", "puckish","pud","pudding","puddle","pudendum", "pudgy","pueblo","puerile","puerility","puerperal", "puff","puffball","puffed","puffer","puffin", "puffy","pug","pugilism","pugilist","pugnacious", "pugnacity","puissance","puissant","puke","pukka", "pulchritude","pulchritudinous","pule","pull","pullback", "pullet","pulley","pullman","pullout","pullover", "pullthrough","pullulate","pulmonary","pulp","pulpit", "pulsar","pulsate","pulsation","pulse","pulverise", "pulverize","puma","pumice","pummel","pump", "pumpernickel","pumpkin","pun","punch","punchy", "punctilio","punctilious","punctual","punctuate","punctuation", "puncture","pundit","pungent","punic","punish", "punishable","punishing","punishment","punitive","punja
 bi", "punk","punkah","punnet","punster","punt", "puny","pup","pupa","pupate","pupil", "puppet","puppeteer","puppy","purblind","purchase", "purchaser","purdah","pure","pureblooded","purebred", "puree","purely","pureness","purgation","purgative", "purgatory","purge","purification","purify","purist", "puritan","puritanical","purity","purl","purler", "purlieus","purloin","purple","purplish","purport", "purpose","purposeful","purposeless","purposely","purposive", "purr","purse","purser","pursuance","pursue", "pursuer","pursuit","purulent","purvey","purveyance", "purveyor","purview","pus","push","pushbike", "pushcart","pushchair","pushed","pusher","pushover", "pushy","pusillanimous","puss","pussy","pussycat", "pussyfoot","pustule","put","putative","putrefaction", "putrefactive","putrefy","putrescent","putrid","putsch", "putt","puttee","putter","putto","putty", "puzzle","puzzlement","puzzler","pvc","pygmy", "pyjama","pyjamas","pylon","pyorrhea","pyorrhoea", "pyramid","pyre","pyrex","pyrexi
 a","pyrites", "pyromania","pyromaniac","pyrotechnic","pyrotechnics","python", "pyx","qed","qty","qua","quack", "quackery","quad","quadragesima","quadrangle","quadrangular", "quadrant","quadrilateral","quadrille","quadrillion","quadroon", "quadruped","quadruple","quadruplet","quadruplicate","quaff", "quagga","quagmire","quail","quaint","quake", "quaker","qualification","qualifications","qualified","qualifier", "qualify","qualitative","quality","qualm","quandary", "quantify","quantitative","quantity","quantum","quarantine", "quark","quarrel","quarrelsome","quarry","quart", "quarter","quarterdeck","quarterfinal","quartering","quarterly", "quartermaster","quarters","quarterstaff","quartet","quartette", "quarto","quartz","quasar","quash","quatercentenary", "quatrain","quaver","quay","quean","queasy", "queen","queenly","queer","quell","quench", "quenchless","querulous","query","quest","question", "questionable","questioner","questioning","questionnaire","quetzal", "queue","quibble","quick
 ","quicken","quickie", "quicklime","quicksand","quicksilver","quickstep","quid", "quiescent","quiet","quieten","quietism","quietude", "quietus","quiff","quill","quilt","quilted", "quin","quince","quinine","quinquagesima","quinsy", "quintal","quintessence","quintet","quintette","quintuplet", "quip","quire","quirk","quisling","quit", "quits","quittance","quitter","quiver","quixotic", "quiz","quizmaster","quizzical","quod","quoit", "quoits","quondam","quorum","quota","quotable", "quotation","quote","quoth","quotidian","quotient", "rabbi","rabbinical","rabbit","rabble","rabelaisian", "rabid","rabies","rac","raccoon","race", "racecourse","racehorse","raceme","racer","races", "racetrack","racial","racialism","racially","racing", "rack","racket","racketeer","racketeering","rackets", "raconteur","racoon","racquet","racquets","racy", "radar","radial","radiance","radiant","radiate", "radiation","radiator","radical","radicalise","radicalism", "radicalize","radicle","radii","radio","radioactive
 ", "radioactivity","radiogram","radiograph","radiographer","radiography", "radioisotope","radiolocation","radiology","radiotherapist","radiotherapy", "radish","radium","radius","raffia","raffish", "raffle","raft","rafter","raftered","raftsman", "rag","raga","ragamuffin","ragbag","rage", "ragged","raglan","ragout","ragtag","ragtime", "raid","raider","rail","railhead","railing", "raillery","railroad","rails","railway","raiment", "rain","rainbow","raincoat","raindrop","rainfall", "rainproof","rains","rainstorm","rainwater","rainy", "raise","raisin","raj","raja","rajah", "rake","rakish","rallentando","rally","ram", "ramadan","ramble","rambler","rambling","rambunctious", "ramekin","ramification","ramify","ramjet","ramp", "rampage","rampant","rampart","ramrod","ramshackle", "ran","ranch","rancher","rancid","rancor", "rancorous","rancour","rand","random","randy", "ranee","rang","range","ranger","rani", "rank","ranker","ranking","rankle","ranks", "ransack","ransom","rant","rap","rapacious",
  "rapacity","rape","rapid","rapids","rapier", "rapine","rapist","rapport","rapprochement","rapscallion", "rapt","rapture","rapturous","rare","rarebit", "rarefied","rarefy","rarely","raring","rarity", "rascal","rascally","rash","rasher","rasp", "raspberry","rat","ratable","ratchet","rate", "rateable","ratepayer","rather","ratify","rating", "ratio","ratiocination","ration","rational","rationale", "rationalise","rationalism","rationalist","rationalize","rations", "ratlin","ratline","rats","rattan","ratter", "rattle","rattlebrained","rattlesnake","rattletrap","rattling", "ratty","raucous","raunchy","ravage","ravages", "rave","ravel","raven","ravening","ravenous", "raver","ravine","raving","ravings","ravioli", "ravish","ravishing","ravishment","raw","rawhide", "ray","rayon","raze","razor","razorback", "razzle","reach","react","reaction","reactionary", "reactivate","reactive","reactor","read","readable", "readdress","reader","readership","readily","readiness", "reading","readjust","readou
 t","ready","reafforest", "reagent","real","realign","realisable","realisation", "realise","realism","realist","realistic","reality", "realizable","realization","realize","really","realm", "realpolitik","realtor","realty","ream","reanimate", "reap","reaper","reappear","reappraisal","rear", "rearguard","rearm","rearmament","rearmost","rearrange", "rearward","rearwards","reason","reasonable","reasonably", "reasoned","reasoning","reassure","rebarbative","rebate", "rebel","rebellion","rebellious","rebind","rebirth", "reborn","rebound","rebuff","rebuild","rebuke", "rebus","rebut","rebuttal","recalcitrance","recalcitrant", "recall","recant","recap","recapitulate","recapitulation", "recapture","recast","recce","recd","recede", "receipt","receipts","receivable","receive","received", "receiver","receivership","receiving","recent","recently", "receptacle","reception","receptionist","receptive","recess", "recession","recessional","recessive","recharge","recidivist", "recipe","recipient","recipr
 ocal","reciprocate","reciprocity", "recital","recitation","recitative","recite","reck", "reckless","reckon","reckoner","reckoning","reclaim", "reclamation","recline","recluse","recognise","recognition", "recognizance","recognize","recoil","recollect","recollection", "recommend","recommendation","recompense","reconcile","reconciliation", "recondite","recondition","reconnaissance","reconnoiter","reconnoitre", "reconsider","reconstitute","reconstruct","reconstruction","record", "recorder","recording","recordkeeping","recount","recoup", "recourse","recover","recovery","recreant","recreate", "recreation","recreational","recriminate","recrimination","recrudescence", "recruit","rectal","rectangle","rectangular","rectification", "rectifier","rectify","rectilinear","rectitude","recto", "rector","rectory","rectum","recumbent","recuperate", "recuperative","recur","recurrence","recurrent","recurved", "recusant","recycle","red","redbreast","redbrick", "redcap","redcoat","redcurrant","redden","re
 ddish", "redecorate","redeem","redeemer","redemption","redemptive", "redeploy","redhead","rediffusion","redirect","redistribute", "redo","redolence","redolent","redouble","redoubt", "redoubtable","redound","redress","redskin","reduce", "reduction","redundancy","redundant","reduplicate","redwing", "redwood","reecho","reed","reeds","reeducate", "reedy","reef","reefer","reek","reel", "reentry","reeve","ref","reface","refashion", "refectory","refer","referee","reference","referendum", "refill","refine","refined","refinement","refiner", "refinery","refit","reflate","reflation","reflect", "reflection","reflective","reflector","reflex","reflexes", "reflexive","refloat","refoot","reforest","reform", "reformation","reformatory","refract","refractory","refrain", "refresh","refresher","refreshing","refreshment","refreshments", "refrigerant","refrigerate","refrigeration","refrigerator","reft", "refuel","refuge","refugee","refulgence","refulgent", "refund","refurbish","refusal","refuse","refutab
 le", "refutation","refute","regain","regal","regale", "regalia","regard","regardful","regarding","regardless", "regards","regatta","regency","regenerate","regent", "reggae","regicide","regime","regimen","regiment", "regimental","regimentals","regina","region","regional", "regions","register","registrar","registration","registry", "regnant","regress","regressive","regret","regrets", "regrettable","regrettably","regroup","regular","regularise", "regularity","regularize","regularly","regulate","regulation", "regulator","regulo","regurgitate","rehabilitate","rehash", "rehear","rehearsal","rehearse","rehouse","reich", "reification","reify","reign","reimburse","reimbursement", "rein","reincarnate","reincarnation","reindeer","reinforce", "reinforcement","reinforcements","reins","reinstate","reinsure", "reissue","reiterate","reject","rejection","rejoice", "rejoicing","rejoicings","rejoin","rejoinder","rejuvenate", "rekindle","relaid","relapse","relate","related", "relation","relational","re
 lations","relationship","relative", "relatively","relativism","relativistic","relativity","relax", "relaxation","relaxing","relay","release","relegate", "relent","relentless","relevance","relevant","reliability", "reliable","reliance","reliant","relic","relics", "relict","relief","relieve","relieved","religion", "religious","religiously","reline","relinquish","reliquary", "relish","relive","reload","relocate","reluctance", "reluctant","reluctantly","rely","remain","remainder", "remains","remake","remand","remark","remarkable", "remarkably","remarry","remediable","remedial","remedy", "remember","remembrance","remilitarise","remilitarize","remind", "reminder","reminisce","reminiscence","reminiscences","reminiscent", "remiss","remission","remit","remittance","remittent", "remnant","remodel","remold","remonstrance","remonstrate", "remorse","remorseful","remote","remotely","remould", "remount","removal","remove","remover","remunerate", "remunerative","renaissance","renal","rename","renas
 cent", "rend","render","rendering","rendezvous","rendition", "renegade","renege","renegue","renew","renewable", "renewal","rennet","renounce","renovate","renown", "renowned","rent","rental","renter","rentier", "renunciation","reopen","reorganise","reorganize","rep", "repaid","repair","reparable","reparation","reparations", "repartee","repast","repatriate","repay","repayable", "repayment","repeal","repeat","repeated","repeatedly", "repeater","repeating","repel","repellent","repent", "repentance","repentant","repercussion","repertoire","repertory", "repetition","repetitious","repine","replace","replacement", "replay","replenish","replete","repletion","replica", "replicate","reply","repoint","report","reportage", "reportedly","reporter","repose","repository","repossess", "repot","repp","reprehend","reprehensible","represent", "representation","representational","representations","representative","repress", "repressed","repression","repressive","reprieve","reprimand", "reprint","reprisa
 l","reprise","reproach","reprobate", "reproduce","reproducer","reproduction","reproductive","reproof", "reprove","reproving","reptile","reptilian","republic", "republican","republicanism","repudiate","repugnance","repugnant", "repulse","repulsion","repulsive","reputable","reputation", "repute","reputed","reputedly","request","requiem", "require","requirement","requisite","requisition","requital", "requite","reredos","rerun","rescind","rescript", "rescue","research","reseat","resemblance","resemble", "resent","resentment","reservation","reserve","reserved", "reservedly","reservist","reservoir","reset","resettle", "reshuffle","reside","residence","residency","resident", "residential","residual","residuary","residue","resign", "resignation","resigned","resilience","resilient","resin", "resinated","resist","resistance","resistant","resistor", "resole","resolute","resolution","resolvable","resolve", "resonance","resonant","resonate","resonator","resort", "resound","resounding","resource"
 ,"resourceful","resources", "respect","respectability","respectable","respecter","respectful", "respecting","respective","respectively","respects","respiration", "respirator","respiratory","respire","respite","resplendence", "resplendent","respond","respondent","response","responsibility", "responsible","responsibly","responsive","rest","restage", "restate","restaurant","restaurateur","restful","restitution", "restive","restless","restock","restoration","restorative", "restore","restorer","restrain","restrained","restraint", "restrict","restricted","restriction","restrictive","restructure", "result","resultant","resume","resumption","resurface", "resurgence","resurgent","resurrect","resurrection","resuscitate", "retail","retailer","retain","retainer","retake", "retaliate","retaliation","retaliatory","retard","retarded", "retch","retd","retell","retention","retentive", "rethink","reticence","reticent","reticulated","reticulation", "reticule","retina","retinue","retire","retired", "re
 tirement","retiring","retort","retouch","retrace", "retract","retractable","retractile","retraction","retread", "retreat","retrench","retrial","retraining","retribution", "retributive","retrieval","retrieve","retriever","retroactive", "retroflex","retrograde","retrogress","retrogressive","retrospect", "retrospection","retrospective","retroversion","retsina","return", "returnable","returns","reunion","reunite","reuse", "rev","revalue","revamp","reveal","revealing", "reveille","revel","revelation","revelry","revenge", "revenue","reverberant","reverberate","reverberation","revere", "reverence","reverend","reverent","reverential","reverie", "revers","reversal","reverse","reversion","reversionary", "revert","revetment","review","reviewer","revile", "revise","revision","revisionism","revitalise","revitalize", "revival","revivalist","revive","revivify","revocable", "revocation","revoke","revolt","revolting","revolution", "revolutionary","revolutionise","revolutionize","revolve","revolver",
  "revolving","revue","revulsion","reward","rewarding", "rewards","rewire","reword","rewrite","rex", "rhapsodise","rhapsodize","rhapsody","rhea","rhenish", "rheostat","rhetoric","rhetorical","rhetorically","rhetorician", "rheum","rheumatic","rheumaticky","rheumatics","rheumatism", "rheumatoid","rhinestone","rhinoceros","rhizome","rhododendron", "rhomboid","rhombus","rhubarb","rhyme","rhymed", "rhymester","rhythm","rhythmic","rib","ribald", "ribaldry","ribbed","ribbing","ribbon","riboflavin", "rice","rich","riches","richly","richness", "rick","rickets","rickety","ricksha","rickshaw", "ricochet","rid","riddance","ridden","riddle", "ride","rider","riderless","ridge","ridgepole", "ridicule","ridiculous","riding","riesling","rife", "riff","riffle","riffraff","rifle","rifleman", "rifles","rifling","rift","rig","rigging", "right","righteous","rightful","rightist","rightly", "rights","rightward","rightwards","rigid","rigidity", "rigmarole","rigor","rigorous","rigour","rile", "rill","rim","ri
 me","rind","rinderpest", "ring","ringer","ringleader","ringlet","ringmaster", "ringside","ringworm","rink","rinse","riot", "riotous","rip","riparian","ripcord","ripen", "riposte","ripple","ripsaw","riptide","rise", "riser","risibility","risible","rising","risk", "risky","risotto","rissole","rite","ritual", "ritualism","ritzy","rival","rivalry","rive", "river","riverbed","riverside","rivet","riveter", "riveting","riviera","rivulet","rna","roach", "road","roadbed","roadblock","roadhouse","roadman", "roadside","roadstead","roadster","roadway","roadworthy", "roam","roan","roar","roaring","roast", "roaster","roasting","rob","robber","robbery", "robe","robin","robot","robust","rock", "rockbound","rocker","rockery","rocket","rocketry", "rocks","rocky","rococo","rod","rode", "rodent","rodeo","rodomontade","roe","roebuck", "rogation","roger","rogue","roguery","roguish", "roisterer","role","roll","roller","rollicking", "rolling","rolls","romaic","roman","romance", "romanesque","romantic","rom
 anticise","romanticism","romanticize", "romany","romish","romp","romper","rompers", "rondeau","rondo","roneo","rood","roodscreen", "roof","roofing","roofless","rooftree","rook", "rookery","rookie","room","roomer","roommate", "rooms","roomy","roost","rooster","root", "rooted","rootless","roots","rope","ropedancer", "ropes","ropewalk","ropeway","ropey","ropy", "roquefort","rosary","rose","roseate","rosebud", "roseleaf","rosemary","rosette","rosewater","rosewood", "rosin","roster","rostrum","rosy","rot", "rota","rotary","rotate","rotation","rotatory", "rotgut","rotisserie","rotogravure","rotor","rotten", "rottenly","rotter","rotund","rotunda","rouble", "rouge","rough","roughage","roughcast","roughen", "roughhouse","roughly","roughneck","roughness","roughrider", "roughshod","roulette","round","roundabout","roundel", "roundelay","rounders","roundhead","roundhouse","roundish", "roundly","rounds","roundsman","roundup","roup", "rouse","rousing","roustabout","rout","route", "routine","roux",
 "rove","rover","row", "rowan","rowanberry","rowdy","rowdyism","rowel", "rower","rowing","rowlock","royal","royalist", "royalty","rpm","rsm","rsvp","rub", "rubber","rubberise","rubberize","rubberneck","rubbery", "rubbing","rubbish","rubbishy","rubble","rubdown", "rubella","rubicon","rubicund","ruble","rubric", "ruby","ruck","rucksack","ruckus","ruction", "ructions","rudder","ruddle","ruddy","rude", "rudely","rudiment","rudimentary","rudiments","rue", "rueful","ruff","ruffian","ruffianly","ruffle", "rug","rugby","rugged","ruin","ruination", "ruinous","ruins","rule","rulebook","ruler", "ruling","rum","rumba","rumble","rumbling", "rumbustious","ruminant","ruminate","ruminative","rummage", "rummy","rumor","rumored","rumormonger","rumour", "rumoured","rumourmonger","rump","rumple","rumpus", "run","runaway","rung","runnel","runner", "running","runny","runs","runt","runway"};
+	}
+
+}
\ No newline at end of file


[29/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizerFactory.cs
new file mode 100644
index 0000000..51960b8
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizerFactory.cs
@@ -0,0 +1,58 @@
+using System.Collections.Generic;
+using Lucene.Net.Analysis.Core;
+using TokenizerFactory = Lucene.Net.Analysis.Util.TokenizerFactory;
+
+namespace org.apache.lucene.analysis.core
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenizerFactory = TokenizerFactory;
+	using AttributeFactory = org.apache.lucene.util.AttributeSource.AttributeFactory;
+
+
+	/// <summary>
+	/// Factory for <seealso cref="WhitespaceTokenizer"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class WhitespaceTokenizerFactory : TokenizerFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new WhitespaceTokenizerFactory </summary>
+	  public WhitespaceTokenizerFactory(IDictionary<string, string> args) : base(args)
+	  {
+		assureMatchVersion();
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override WhitespaceTokenizer create(AttributeFactory factory, Reader input)
+	  {
+		return new WhitespaceTokenizer(luceneMatchVersion, factory, input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechAnalyzer.cs
new file mode 100644
index 0000000..230ecfd
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechAnalyzer.cs
@@ -0,0 +1,161 @@
+using System;
+
+namespace org.apache.lucene.analysis.cz
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using Version = org.apache.lucene.util.Version;
+
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Czech language.
+	/// <para>
+	/// Supports an external list of stopwords (words that will not be indexed at
+	/// all). A default set of stopwords is used unless an alternative list is
+	/// specified.
+	/// </para>
+	/// 
+	/// <a name="version"/>
+	/// <para>
+	/// You must specify the required <seealso cref="Version"/> compatibility when creating
+	/// CzechAnalyzer:
+	/// <ul>
+	/// <li>As of 3.1, words are stemmed with <seealso cref="CzechStemFilter"/>
+	/// <li>As of 2.9, StopFilter preserves position increments
+	/// <li>As of 2.4, Tokens incorrectly identified as acronyms are corrected (see
+	/// <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
+	/// </ul>
+	/// </para>
+	/// </summary>
+	public sealed class CzechAnalyzer : StopwordAnalyzerBase
+	{
+	  /// <summary>
+	  /// File containing default Czech stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "stopwords.txt";
+
+	  /// <summary>
+	  /// Returns a set of default Czech-stopwords
+	  /// </summary>
+	  /// <returns> a set of default Czech-stopwords </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_SET;
+		  }
+	  }
+
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(typeof(CzechAnalyzer), DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#", Version.LUCENE_CURRENT);
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+
+	  private readonly CharArraySet stemExclusionTable;
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words (<seealso cref="#getDefaultStopSet()"/>).
+	  /// </summary>
+	  /// <param name="matchVersion"> Lucene version to match See
+	  ///          <seealso cref="<a href="#version">above</a>"/> </param>
+	  public CzechAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words.
+	  /// </summary>
+	  /// <param name="matchVersion"> Lucene version to match See
+	  ///          <seealso cref="<a href="#version">above</a>"/> </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  public CzechAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words and a set of work to be
+	  /// excluded from the <seealso cref="CzechStemFilter"/>.
+	  /// </summary>
+	  /// <param name="matchVersion"> Lucene version to match See
+	  ///          <seealso cref="<a href="#version">above</a>"/> </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  /// <param name="stemExclusionTable"> a stemming exclusion set </param>
+	  public CzechAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionTable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable));
+	  }
+
+	  /// <summary>
+	  /// Creates
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from a <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , and <seealso cref="CzechStemFilter"/> (only if version is >= LUCENE_31). If
+	  ///         a version is >= LUCENE_31 and a stem exclusion set is provided via
+	  ///         <seealso cref="#CzechAnalyzer(Version, CharArraySet, CharArraySet)"/> a
+	  ///         <seealso cref="SetKeywordMarkerFilter"/> is added before
+	  ///         <seealso cref="CzechStemFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		result = new LowerCaseFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (matchVersion.onOrAfter(Version.LUCENE_31))
+		{
+		  if (!this.stemExclusionTable.Empty)
+		  {
+			result = new SetKeywordMarkerFilter(result, stemExclusionTable);
+		  }
+		  result = new CzechStemFilter(result);
+		}
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechStemFilter.cs
new file mode 100644
index 0000000..598cb86
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechStemFilter.cs
@@ -0,0 +1,67 @@
+namespace org.apache.lucene.analysis.cz
+{
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; // for javadoc
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="CzechStemmer"/> to stem Czech words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// <para><b>NOTE</b>: Input is expected to be in lowercase, 
+	/// but with diacritical marks</para> </summary>
+	/// <seealso cref= SetKeywordMarkerFilter </seealso>
+	public sealed class CzechStemFilter : TokenFilter
+	{
+	  private readonly CzechStemmer stemmer = new CzechStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public CzechStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechStemFilterFactory.cs
new file mode 100644
index 0000000..7152da8
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechStemFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.cz
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="CzechStemFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_czstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.CzechStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class CzechStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new CzechStemFilterFactory </summary>
+	  public CzechStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new CzechStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechStemmer.cs
new file mode 100644
index 0000000..49ebc43
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechStemmer.cs
@@ -0,0 +1,157 @@
+namespace org.apache.lucene.analysis.cz
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+	/// <summary>
+	/// Light Stemmer for Czech.
+	/// <para>
+	/// Implements the algorithm described in:  
+	/// <i>
+	/// Indexing and stemming approaches for the Czech language
+	/// </i>
+	/// http://portal.acm.org/citation.cfm?id=1598600
+	/// </para>
+	/// </summary>
+	public class CzechStemmer
+	{
+
+	  /// <summary>
+	  /// Stem an input buffer of Czech text.
+	  /// </summary>
+	  /// <param name="s"> input buffer </param>
+	  /// <param name="len"> length of input buffer </param>
+	  /// <returns> length of input buffer after normalization
+	  /// 
+	  /// <para><b>NOTE</b>: Input is expected to be in lowercase, 
+	  /// but with diacritical marks</para> </returns>
+	  public virtual int stem(char[] s, int len)
+	  {
+		len = removeCase(s, len);
+		len = removePossessives(s, len);
+		if (len > 0)
+		{
+		  len = normalize(s, len);
+		}
+		return len;
+	  }
+
+	  private int removeCase(char[] s, int len)
+	  {
+		if (len > 7 && StemmerUtil.EndsWith(s, len, "atech"))
+		{
+		  return len - 5;
+		}
+
+		if (len > 6 && (StemmerUtil.EndsWith(s, len,"ětem") || StemmerUtil.EndsWith(s, len,"etem") || StemmerUtil.EndsWith(s, len,"atům")))
+		{
+		  return len - 4;
+		}
+
+		if (len > 5 && (StemmerUtil.EndsWith(s, len, "ech") || StemmerUtil.EndsWith(s, len, "ich") || StemmerUtil.EndsWith(s, len, "ích") || StemmerUtil.EndsWith(s, len, "ého") || StemmerUtil.EndsWith(s, len, "ěmi") || StemmerUtil.EndsWith(s, len, "emi") || StemmerUtil.EndsWith(s, len, "ému") || StemmerUtil.EndsWith(s, len, "ěte") || StemmerUtil.EndsWith(s, len, "ete") || StemmerUtil.EndsWith(s, len, "ěti") || StemmerUtil.EndsWith(s, len, "eti") || StemmerUtil.EndsWith(s, len, "ího") || StemmerUtil.EndsWith(s, len, "iho") || StemmerUtil.EndsWith(s, len, "ími") || StemmerUtil.EndsWith(s, len, "ímu") || StemmerUtil.EndsWith(s, len, "imu") || StemmerUtil.EndsWith(s, len, "ách") || StemmerUtil.EndsWith(s, len, "ata") || StemmerUtil.EndsWith(s, len, "aty") || StemmerUtil.EndsWith(s, len, "ých") || StemmerUtil.EndsWith(s, len, "ama") || StemmerUtil.EndsWith(s, len, "ami") || StemmerUtil.EndsWith(s, len, "ové") || StemmerUtil.EndsWith(s, len, "ovi") || StemmerUtil.EndsWith(s, len, "�
 �mi")))
+		{
+		  return len - 3;
+		}
+
+		if (len > 4 && (StemmerUtil.EndsWith(s, len, "em") || StemmerUtil.EndsWith(s, len, "es") || StemmerUtil.EndsWith(s, len, "ém") || StemmerUtil.EndsWith(s, len, "ím") || StemmerUtil.EndsWith(s, len, "ům") || StemmerUtil.EndsWith(s, len, "at") || StemmerUtil.EndsWith(s, len, "ám") || StemmerUtil.EndsWith(s, len, "os") || StemmerUtil.EndsWith(s, len, "us") || StemmerUtil.EndsWith(s, len, "ým") || StemmerUtil.EndsWith(s, len, "mi") || StemmerUtil.EndsWith(s, len, "ou")))
+		{
+		  return len - 2;
+		}
+
+		if (len > 3)
+		{
+		  switch (s[len - 1])
+		  {
+			case 'a':
+			case 'e':
+			case 'i':
+			case 'o':
+			case 'u':
+			case 'ů':
+			case 'y':
+			case 'á':
+			case 'é':
+			case 'í':
+			case 'ý':
+			case 'ě':
+			  return len - 1;
+		  }
+		}
+
+		return len;
+	  }
+
+	  private int removePossessives(char[] s, int len)
+	  {
+		if (len > 5 && (StemmerUtil.EndsWith(s, len, "ov") || StemmerUtil.EndsWith(s, len, "in") || StemmerUtil.EndsWith(s, len, "ův")))
+		{
+		  return len - 2;
+		}
+
+		return len;
+	  }
+
+	  private int normalize(char[] s, int len)
+	  {
+		if (StemmerUtil.EndsWith(s, len, "čt")) // čt -> ck
+		{
+		  s[len - 2] = 'c';
+		  s[len - 1] = 'k';
+		  return len;
+		}
+
+		if (StemmerUtil.EndsWith(s, len, "št")) // št -> sk
+		{
+		  s[len - 2] = 's';
+		  s[len - 1] = 'k';
+		  return len;
+		}
+
+		switch (s[len - 1])
+		{
+		  case 'c': // [cč] -> k
+		  case 'č':
+			s[len - 1] = 'k';
+			return len;
+		  case 'z': // [zž] -> h
+		  case 'ž':
+			s[len - 1] = 'h';
+			return len;
+		}
+
+		if (len > 1 && s[len - 2] == 'e')
+		{
+		  s[len - 2] = s[len - 1]; // e* > *
+		  return len - 1;
+		}
+
+		if (len > 2 && s[len - 2] == 'ů')
+		{
+		  s[len - 2] = 'o'; // *ů* -> *o*
+		  return len;
+		}
+
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Da/DanishAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Da/DanishAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Da/DanishAnalyzer.cs
new file mode 100644
index 0000000..b02657b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Da/DanishAnalyzer.cs
@@ -0,0 +1,139 @@
+using System;
+
+namespace org.apache.lucene.analysis.da
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using Version = org.apache.lucene.util.Version;
+	using DanishStemmer = org.tartarus.snowball.ext.DanishStemmer;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Danish.
+	/// </summary>
+	public sealed class DanishAnalyzer : StopwordAnalyzerBase
+	{
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// File containing default Danish stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "danish_stop.txt";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop words set. </summary>
+	  /// <returns> default stop words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(typeof(SnowballFilter), DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public DanishAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  public DanishAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+	  /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+	  /// stemming.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
+	  public DanishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates a
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> A
+	  ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided and <seealso cref="SnowballFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		result = new LowerCaseFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		result = new SnowballFilter(result, new DanishStemmer());
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/De/GermanAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/De/GermanAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/De/GermanAnalyzer.cs
new file mode 100644
index 0000000..13da913
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/De/GermanAnalyzer.cs
@@ -0,0 +1,185 @@
+using System;
+
+namespace org.apache.lucene.analysis.de
+{
+	// This file is encoded in UTF-8
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
+	using StandardAnalyzer = org.apache.lucene.analysis.standard.StandardAnalyzer;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using Version = org.apache.lucene.util.Version;
+	using German2Stemmer = org.tartarus.snowball.ext.German2Stemmer;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for German language. 
+	/// <para>
+	/// Supports an external list of stopwords (words that
+	/// will not be indexed at all) and an external list of exclusions (word that will
+	/// not be stemmed, but indexed).
+	/// A default set of stopwords is used unless an alternative list is specified, but the
+	/// exclusion list is empty by default.
+	/// </para>
+	/// 
+	/// <a name="version"/>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating GermanAnalyzer:
+	/// <ul>
+	///   <li> As of 3.6, GermanLightStemFilter is used for less aggressive stemming.
+	///   <li> As of 3.1, Snowball stemming is done with SnowballFilter, and 
+	///        Snowball stopwords are used by default.
+	///   <li> As of 2.9, StopFilter preserves position
+	///        increments
+	/// </ul>
+	/// 
+	/// </para>
+	/// <para><b>NOTE</b>: This class uses the same <seealso cref="Version"/>
+	/// dependent settings as <seealso cref="StandardAnalyzer"/>.</para>
+	/// </summary>
+	public sealed class GermanAnalyzer : StopwordAnalyzerBase
+	{
+
+	  /// @deprecated in 3.1, remove in Lucene 5.0 (index bw compat) 
+	  [Obsolete("in 3.1, remove in Lucene 5.0 (index bw compat)")]
+	  private static readonly string[] GERMAN_STOP_WORDS = new string[] {"einer", "eine", "eines", "einem", "einen", "der", "die", "das", "dass", "daß", "du", "er", "sie", "es", "was", "wer", "wie", "wir", "und", "oder", "ohne", "mit", "am", "im", "in", "aus", "auf", "ist", "sein", "war", "wird", "ihr", "ihre", "ihres", "als", "für", "von", "mit", "dich", "dir", "mich", "mir", "mein", "sein", "kein", "durch", "wegen", "wird"};
+
+	  /// <summary>
+	  /// File containing default German stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "german_stop.txt";
+
+	  /// <summary>
+	  /// Returns a set of default German-stopwords </summary>
+	  /// <returns> a set of default German-stopwords  </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_SET;
+		  }
+	  }
+
+	  private class DefaultSetHolder
+	  {
+		/// @deprecated in 3.1, remove in Lucene 5.0 (index bw compat) 
+		[Obsolete("in 3.1, remove in Lucene 5.0 (index bw compat)")]
+		internal static readonly CharArraySet DEFAULT_SET_30 = CharArraySet.unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(GERMAN_STOP_WORDS), false));
+		internal static readonly CharArraySet DEFAULT_SET;
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(typeof(SnowballFilter), DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Contains the stopwords used with the <seealso cref="StopFilter"/>.
+	  /// </summary>
+
+	  /// <summary>
+	  /// Contains words that should be indexed but not stemmed.
+	  /// </summary>
+	  private readonly CharArraySet exclusionSet;
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words:
+	  /// <seealso cref="#getDefaultStopSet()"/>.
+	  /// </summary>
+	  public GermanAnalyzer(Version matchVersion) : this(matchVersion, matchVersion.onOrAfter(Version.LUCENE_31) ? DefaultSetHolder.DEFAULT_SET : DefaultSetHolder.DEFAULT_SET_30)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words 
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          lucene compatibility version </param>
+	  /// <param name="stopwords">
+	  ///          a stopword set </param>
+	  public GermanAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          lucene compatibility version </param>
+	  /// <param name="stopwords">
+	  ///          a stopword set </param>
+	  /// <param name="stemExclusionSet">
+	  ///          a stemming exclusion set </param>
+	  public GermanAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		exclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from a <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided, <seealso cref="GermanNormalizationFilter"/> and <seealso cref="GermanLightStemFilter"/> </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		result = new LowerCaseFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		result = new SetKeywordMarkerFilter(result, exclusionSet);
+		if (matchVersion.onOrAfter(Version.LUCENE_36))
+		{
+		  result = new GermanNormalizationFilter(result);
+		  result = new GermanLightStemFilter(result);
+		}
+		else if (matchVersion.onOrAfter(Version.LUCENE_31))
+		{
+		  result = new SnowballFilter(result, new German2Stemmer());
+		}
+		else
+		{
+		  result = new GermanStemFilter(result);
+		}
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/De/GermanLightStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/De/GermanLightStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/De/GermanLightStemFilter.cs
new file mode 100644
index 0000000..57997f8
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/De/GermanLightStemFilter.cs
@@ -0,0 +1,66 @@
+namespace org.apache.lucene.analysis.de
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="GermanLightStemmer"/> to stem German
+	/// words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// </summary>
+	public sealed class GermanLightStemFilter : TokenFilter
+	{
+	  private readonly GermanLightStemmer stemmer = new GermanLightStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public GermanLightStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/De/GermanLightStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/De/GermanLightStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/De/GermanLightStemFilterFactory.cs
new file mode 100644
index 0000000..0f8746e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/De/GermanLightStemFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.de
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="GermanLightStemFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_delgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.GermanLightStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class GermanLightStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new GermanLightStemFilterFactory </summary>
+	  public GermanLightStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new GermanLightStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/De/GermanLightStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/De/GermanLightStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/De/GermanLightStemmer.cs
new file mode 100644
index 0000000..87307ec
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/De/GermanLightStemmer.cs
@@ -0,0 +1,177 @@
+namespace org.apache.lucene.analysis.de
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/* 
+	 * This algorithm is updated based on code located at:
+	 * http://members.unine.ch/jacques.savoy/clef/
+	 * 
+	 * Full copyright for that code follows:
+	 */
+
+	/*
+	 * Copyright (c) 2005, Jacques Savoy
+	 * All rights reserved.
+	 *
+	 * Redistribution and use in source and binary forms, with or without 
+	 * modification, are permitted provided that the following conditions are met:
+	 *
+	 * Redistributions of source code must retain the above copyright notice, this 
+	 * list of conditions and the following disclaimer. Redistributions in binary 
+	 * form must reproduce the above copyright notice, this list of conditions and
+	 * the following disclaimer in the documentation and/or other materials 
+	 * provided with the distribution. Neither the name of the author nor the names 
+	 * of its contributors may be used to endorse or promote products derived from 
+	 * this software without specific prior written permission.
+	 * 
+	 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+	 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+	 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+	 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+	 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+	 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+	 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+	 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+	 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+	 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+	 * POSSIBILITY OF SUCH DAMAGE.
+	 */
+
+	/// <summary>
+	/// Light Stemmer for German.
+	/// <para>
+	/// This stemmer implements the "UniNE" algorithm in:
+	/// <i>Light Stemming Approaches for the French, Portuguese, German and Hungarian Languages</i>
+	/// Jacques Savoy
+	/// </para>
+	/// </summary>
+	public class GermanLightStemmer
+	{
+
+	  public virtual int stem(char[] s, int len)
+	  {
+		for (int i = 0; i < len; i++)
+		{
+		  switch (s[i])
+		  {
+			case 'ä':
+			case 'à':
+			case 'á':
+			case 'â':
+				s[i] = 'a';
+				break;
+			case 'ö':
+			case 'ò':
+			case 'ó':
+			case 'ô':
+				s[i] = 'o';
+				break;
+			case 'ï':
+			case 'ì':
+			case 'í':
+			case 'î':
+				s[i] = 'i';
+				break;
+			case 'ü':
+			case 'ù':
+			case 'ú':
+			case 'û':
+				s[i] = 'u';
+				break;
+		  }
+		}
+
+		len = step1(s, len);
+		return step2(s, len);
+	  }
+
+	  private bool stEnding(char ch)
+	  {
+		switch (ch)
+		{
+		  case 'b':
+		  case 'd':
+		  case 'f':
+		  case 'g':
+		  case 'h':
+		  case 'k':
+		  case 'l':
+		  case 'm':
+		  case 'n':
+		  case 't':
+			  return true;
+		  default:
+			  return false;
+		}
+	  }
+
+	  private int step1(char[] s, int len)
+	  {
+		if (len > 5 && s[len - 3] == 'e' && s[len - 2] == 'r' && s[len - 1] == 'n')
+		{
+		  return len - 3;
+		}
+
+		if (len > 4 && s[len - 2] == 'e')
+		{
+		  switch (s[len - 1])
+		  {
+			case 'm':
+			case 'n':
+			case 'r':
+			case 's':
+				return len - 2;
+		  }
+		}
+
+		if (len > 3 && s[len - 1] == 'e')
+		{
+		  return len - 1;
+		}
+
+		if (len > 3 && s[len - 1] == 's' && stEnding(s[len - 2]))
+		{
+		  return len - 1;
+		}
+
+		return len;
+	  }
+
+	  private int step2(char[] s, int len)
+	  {
+		if (len > 5 && s[len - 3] == 'e' && s[len - 2] == 's' && s[len - 1] == 't')
+		{
+		  return len - 3;
+		}
+
+		if (len > 4 && s[len - 2] == 'e' && (s[len - 1] == 'r' || s[len - 1] == 'n'))
+		{
+		  return len - 2;
+		}
+
+		if (len > 4 && s[len - 2] == 's' && s[len - 1] == 't' && stEnding(s[len - 3]))
+		{
+		  return len - 2;
+		}
+
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/De/GermanMinimalStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/De/GermanMinimalStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/De/GermanMinimalStemFilter.cs
new file mode 100644
index 0000000..ca93a4e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/De/GermanMinimalStemFilter.cs
@@ -0,0 +1,66 @@
+namespace org.apache.lucene.analysis.de
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="GermanMinimalStemmer"/> to stem German
+	/// words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// </summary>
+	public sealed class GermanMinimalStemFilter : TokenFilter
+	{
+	  private readonly GermanMinimalStemmer stemmer = new GermanMinimalStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public GermanMinimalStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/De/GermanMinimalStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/De/GermanMinimalStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/De/GermanMinimalStemFilterFactory.cs
new file mode 100644
index 0000000..bb72f4b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/De/GermanMinimalStemFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.de
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="GermanMinimalStemFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_deminstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.GermanMinimalStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class GermanMinimalStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new GermanMinimalStemFilterFactory </summary>
+	  public GermanMinimalStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new GermanMinimalStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/De/GermanMinimalStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/De/GermanMinimalStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/De/GermanMinimalStemmer.cs
new file mode 100644
index 0000000..a1e109d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/De/GermanMinimalStemmer.cs
@@ -0,0 +1,151 @@
+namespace org.apache.lucene.analysis.de
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/* 
+	 * This algorithm is updated based on code located at:
+	 * http://members.unine.ch/jacques.savoy/clef/
+	 * 
+	 * Full copyright for that code follows:
+	 */
+
+	/*
+	 * Copyright (c) 2005, Jacques Savoy
+	 * All rights reserved.
+	 *
+	 * Redistribution and use in source and binary forms, with or without 
+	 * modification, are permitted provided that the following conditions are met:
+	 *
+	 * Redistributions of source code must retain the above copyright notice, this 
+	 * list of conditions and the following disclaimer. Redistributions in binary 
+	 * form must reproduce the above copyright notice, this list of conditions and
+	 * the following disclaimer in the documentation and/or other materials 
+	 * provided with the distribution. Neither the name of the author nor the names 
+	 * of its contributors may be used to endorse or promote products derived from 
+	 * this software without specific prior written permission.
+	 * 
+	 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+	 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+	 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+	 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+	 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+	 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+	 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+	 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+	 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+	 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+	 * POSSIBILITY OF SUCH DAMAGE.
+	 */
+
+	/// <summary>
+	/// Minimal Stemmer for German.
+	/// <para>
+	/// This stemmer implements the following algorithm:
+	/// <i>Morphologie et recherche d'information</i>
+	/// Jacques Savoy.
+	/// </para>
+	/// </summary>
+	public class GermanMinimalStemmer
+	{
+
+	  public virtual int stem(char[] s, int len)
+	  {
+		if (len < 5)
+		{
+		  return len;
+		}
+
+		for (int i = 0; i < len; i++)
+		{
+		  switch (s[i])
+		  {
+			case 'ä':
+				s[i] = 'a';
+				break;
+			case 'ö':
+				s[i] = 'o';
+				break;
+			case 'ü':
+				s[i] = 'u';
+				break;
+		  }
+		}
+
+		if (len > 6 && s[len - 3] == 'n' && s[len - 2] == 'e' && s[len - 1] == 'n')
+		{
+			return len - 3;
+		}
+
+		if (len > 5)
+		{
+		  switch (s[len - 1])
+		  {
+			case 'n':
+				if (s[len - 2] == 'e')
+				{
+					return len - 2;
+				}
+				else
+				{
+					break;
+				}
+			case 'e':
+				if (s[len - 2] == 's')
+				{
+					return len - 2;
+				}
+				else
+				{
+					break;
+				}
+			case 's':
+				if (s[len - 2] == 'e')
+				{
+					return len - 2;
+				}
+				else
+				{
+					break;
+				}
+			case 'r':
+				if (s[len - 2] == 'e')
+				{
+					return len - 2;
+				}
+				else
+				{
+					break;
+				}
+		  }
+		}
+
+		switch (s[len - 1])
+		{
+		  case 'n':
+		  case 'e':
+		  case 's':
+		  case 'r':
+			  return len - 1;
+		}
+
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/De/GermanNormalizationFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/De/GermanNormalizationFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/De/GermanNormalizationFilter.cs
new file mode 100644
index 0000000..19fcbf7
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/De/GermanNormalizationFilter.cs
@@ -0,0 +1,130 @@
+using System;
+
+namespace org.apache.lucene.analysis.de
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using StemmerUtil = org.apache.lucene.analysis.util.StemmerUtil;
+
+	/// <summary>
+	/// Normalizes German characters according to the heuristics
+	/// of the <a href="http://snowball.tartarus.org/algorithms/german2/stemmer.html">
+	/// German2 snowball algorithm</a>.
+	/// It allows for the fact that ä, ö and ü are sometimes written as ae, oe and ue.
+	/// <para>
+	/// <ul>
+	///   <li> 'ß' is replaced by 'ss'
+	///   <li> 'ä', 'ö', 'ü' are replaced by 'a', 'o', 'u', respectively.
+	///   <li> 'ae' and 'oe' are replaced by 'a', and 'o', respectively.
+	///   <li> 'ue' is replaced by 'u', when not following a vowel or q.
+	/// </ul>
+	/// </para>
+	/// <para>
+	/// This is useful if you want this normalization without using
+	/// the German2 stemmer, or perhaps no stemming at all.
+	/// </para>
+	/// </summary>
+	public sealed class GermanNormalizationFilter : TokenFilter
+	{
+	  // FSM with 3 states:
+	  private const int N = 0; // ordinary state
+	  private const int V = 1; // stops 'u' from entering umlaut state
+	  private const int U = 2; // umlaut state, allows e-deletion
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+
+	  public GermanNormalizationFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  int state = N;
+		  char[] buffer = termAtt.buffer();
+		  int length = termAtt.length();
+		  for (int i = 0; i < length; i++)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char c = buffer[i];
+			char c = buffer[i];
+			switch (c)
+			{
+			  case 'a':
+			  case 'o':
+				state = U;
+				break;
+			  case 'u':
+				state = (state == N) ? U : V;
+				break;
+			  case 'e':
+				if (state == U)
+				{
+				  length = StemmerUtil.delete(buffer, i--, length);
+				}
+				state = V;
+				break;
+			  case 'i':
+			  case 'q':
+			  case 'y':
+				state = V;
+				break;
+			  case 'ä':
+				buffer[i] = 'a';
+				state = V;
+				break;
+			  case 'ö':
+				buffer[i] = 'o';
+				state = V;
+				break;
+			  case 'ü':
+				buffer[i] = 'u';
+				state = V;
+				break;
+			  case 'ß':
+				buffer[i++] = 's';
+				buffer = termAtt.resizeBuffer(1 + length);
+				if (i < length)
+				{
+				  Array.Copy(buffer, i, buffer, i + 1, (length - i));
+				}
+				buffer[i] = 's';
+				length++;
+				state = N;
+				break;
+			  default:
+				state = N;
+			break;
+			}
+		  }
+		  termAtt.Length = length;
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/De/GermanNormalizationFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/De/GermanNormalizationFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/De/GermanNormalizationFilterFactory.cs
new file mode 100644
index 0000000..0229746
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/De/GermanNormalizationFilterFactory.cs
@@ -0,0 +1,65 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.de
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using AbstractAnalysisFactory = org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+	using MultiTermAwareComponent = org.apache.lucene.analysis.util.MultiTermAwareComponent;
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="GermanNormalizationFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_denorm" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.GermanNormalizationFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre> 
+	/// </summary>
+	public class GermanNormalizationFilterFactory : TokenFilterFactory, MultiTermAwareComponent
+	{
+
+	  /// <summary>
+	  /// Creates a new GermanNormalizationFilterFactory </summary>
+	  public GermanNormalizationFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new GermanNormalizationFilter(input);
+	  }
+
+	  public virtual AbstractAnalysisFactory MultiTermComponent
+	  {
+		  get
+		  {
+			return this;
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/De/GermanStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/De/GermanStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/De/GermanStemFilter.cs
new file mode 100644
index 0000000..203c990
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/De/GermanStemFilter.cs
@@ -0,0 +1,96 @@
+namespace org.apache.lucene.analysis.de
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that stems German words. 
+	/// <para>
+	/// It supports a table of words that should
+	/// not be stemmed at all. The stemmer used can be changed at runtime after the
+	/// filter object is created (as long as it is a <seealso cref="GermanStemmer"/>).
+	/// </para>
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para> </summary>
+	/// <seealso cref= SetKeywordMarkerFilter </seealso>
+	public sealed class GermanStemFilter : TokenFilter
+	{
+		/// <summary>
+		/// The actual token in the input stream.
+		/// </summary>
+		private GermanStemmer stemmer = new GermanStemmer();
+
+		private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+		private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+		/// <summary>
+		/// Creates a <seealso cref="GermanStemFilter"/> instance </summary>
+		/// <param name="in"> the source <seealso cref="TokenStream"/>  </param>
+		public GermanStemFilter(TokenStream @in) : base(@in)
+		{
+		}
+
+		/// <returns>  Returns true for next token in the stream, or false at EOS </returns>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+		public override bool incrementToken()
+		{
+		  if (input.incrementToken())
+		  {
+			string term = termAtt.ToString();
+
+			if (!keywordAttr.Keyword)
+			{
+			  string s = stemmer.stem(term);
+			  // If not stemmed, don't waste the time adjusting the token.
+			  if ((s != null) && !s.Equals(term))
+			  {
+				termAtt.setEmpty().append(s);
+			  }
+			}
+			return true;
+		  }
+		  else
+		  {
+			return false;
+		  }
+		}
+
+		/// <summary>
+		/// Set a alternative/custom <seealso cref="GermanStemmer"/> for this filter.
+		/// </summary>
+		public GermanStemmer Stemmer
+		{
+			set
+			{
+			  if (value != null)
+			  {
+				this.stemmer = value;
+			  }
+			}
+		}
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/De/GermanStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/De/GermanStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/De/GermanStemFilterFactory.cs
new file mode 100644
index 0000000..4f5c136
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/De/GermanStemFilterFactory.cs
@@ -0,0 +1,56 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.de
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="GermanStemFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_destem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.GermanStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class GermanStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new GermanStemFilterFactory </summary>
+	  public GermanStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override GermanStemFilter create(TokenStream @in)
+	  {
+		return new GermanStemFilter(@in);
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/De/GermanStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/De/GermanStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/De/GermanStemmer.cs
new file mode 100644
index 0000000..9b63922
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/De/GermanStemmer.cs
@@ -0,0 +1,308 @@
+using System.Text;
+
+namespace org.apache.lucene.analysis.de
+{
+
+	// This file is encoded in UTF-8
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/// <summary>
+	/// A stemmer for German words. 
+	/// <para>
+	/// The algorithm is based on the report
+	/// "A Fast and Simple Stemming Algorithm for German Words" by J&ouml;rg
+	/// Caumanns (joerg.caumanns at isst.fhg.de).
+	/// </para>
+	/// </summary>
+	public class GermanStemmer
+	{
+		/// <summary>
+		/// Buffer for the terms while stemming them.
+		/// </summary>
+		private StringBuilder sb = new StringBuilder();
+
+		/// <summary>
+		/// Amount of characters that are removed with <tt>substitute()</tt> while stemming.
+		/// </summary>
+		private int substCount = 0;
+
+		private static readonly Locale locale = new Locale("de", "DE");
+
+		/// <summary>
+		/// Stemms the given term to an unique <tt>discriminator</tt>.
+		/// </summary>
+		/// <param name="term">  The term that should be stemmed. </param>
+		/// <returns>      Discriminator for <tt>term</tt> </returns>
+		protected internal virtual string stem(string term)
+		{
+		  // Use lowercase for medium stemming.
+		  term = term.ToLower(locale);
+		  if (!isStemmable(term))
+		  {
+			return term;
+		  }
+		  // Reset the StringBuilder.
+		  sb.Remove(0, sb.Length);
+		  sb.Insert(0, term);
+		  // Stemming starts here...
+		  substitute(sb);
+		  strip(sb);
+		  optimize(sb);
+		  resubstitute(sb);
+		  removeParticleDenotion(sb);
+		  return sb.ToString();
+		}
+
+		/// <summary>
+		/// Checks if a term could be stemmed.
+		/// </summary>
+		/// <returns>  true if, and only if, the given term consists in letters. </returns>
+		private bool isStemmable(string term)
+		{
+		  for (int c = 0; c < term.Length; c++)
+		  {
+			if (!char.IsLetter(term[c]))
+			{
+			  return false;
+			}
+		  }
+		  return true;
+		}
+
+		/// <summary>
+		/// suffix stripping (stemming) on the current term. The stripping is reduced
+		/// to the seven "base" suffixes "e", "s", "n", "t", "em", "er" and * "nd",
+		/// from which all regular suffixes are build of. The simplification causes
+		/// some overstemming, and way more irregular stems, but still provides unique.
+		/// discriminators in the most of those cases.
+		/// The algorithm is context free, except of the length restrictions.
+		/// </summary>
+		private void strip(StringBuilder buffer)
+		{
+		  bool doMore = true;
+		  while (doMore && buffer.Length > 3)
+		  {
+			if ((buffer.Length + substCount > 5) && StringHelperClass.SubstringSpecial(buffer, buffer.Length - 2, buffer.Length).Equals("nd"))
+			{
+			  buffer.Remove(buffer.Length - 2, buffer.Length - buffer.Length - 2);
+			}
+			else if ((buffer.Length + substCount > 4) && StringHelperClass.SubstringSpecial(buffer, buffer.Length - 2, buffer.Length).Equals("em"))
+			{
+				buffer.Remove(buffer.Length - 2, buffer.Length - buffer.Length - 2);
+			}
+			else if ((buffer.Length + substCount > 4) && StringHelperClass.SubstringSpecial(buffer, buffer.Length - 2, buffer.Length).Equals("er"))
+			{
+				buffer.Remove(buffer.Length - 2, buffer.Length - buffer.Length - 2);
+			}
+			else if (buffer[buffer.Length - 1] == 'e')
+			{
+			  buffer.Remove(buffer.Length - 1, 1);
+			}
+			else if (buffer[buffer.Length - 1] == 's')
+			{
+			  buffer.Remove(buffer.Length - 1, 1);
+			}
+			else if (buffer[buffer.Length - 1] == 'n')
+			{
+			  buffer.Remove(buffer.Length - 1, 1);
+			}
+			// "t" occurs only as suffix of verbs.
+			else if (buffer[buffer.Length - 1] == 't')
+			{
+			  buffer.Remove(buffer.Length - 1, 1);
+			}
+			else
+			{
+			  doMore = false;
+			}
+		  }
+		}
+
+		/// <summary>
+		/// Does some optimizations on the term. This optimisations are
+		/// contextual.
+		/// </summary>
+		private void optimize(StringBuilder buffer)
+		{
+		  // Additional step for female plurals of professions and inhabitants.
+		  if (buffer.Length > 5 && StringHelperClass.SubstringSpecial(buffer, buffer.Length - 5, buffer.Length).Equals("erin*"))
+		  {
+			buffer.Remove(buffer.Length - 1, 1);
+			strip(buffer);
+		  }
+		  // Additional step for irregular plural nouns like "Matrizen -> Matrix".
+		  // NOTE: this length constraint is probably not a great value, its just to prevent AIOOBE on empty terms
+		  if (buffer.Length > 0 && buffer[buffer.Length - 1] == ('z'))
+		  {
+			buffer[buffer.Length - 1] = 'x';
+		  }
+		}
+
+		/// <summary>
+		/// Removes a particle denotion ("ge") from a term.
+		/// </summary>
+		private void removeParticleDenotion(StringBuilder buffer)
+		{
+		  if (buffer.Length > 4)
+		  {
+			for (int c = 0; c < buffer.Length - 3; c++)
+			{
+			  if (buffer.Substring(c, 4).Equals("gege"))
+			  {
+				buffer.Remove(c, c + 2 - c);
+				return;
+			  }
+			}
+		  }
+		}
+
+		/// <summary>
+		/// Do some substitutions for the term to reduce overstemming:
+		/// 
+		/// - Substitute Umlauts with their corresponding vowel: äöü -> aou,
+		///   "ß" is substituted by "ss"
+		/// - Substitute a second char of a pair of equal characters with
+		///   an asterisk: ?? -> ?*
+		/// - Substitute some common character combinations with a token:
+		///   sch/ch/ei/ie/ig/st -> $/§/%/&/#/!
+		/// </summary>
+		private void substitute(StringBuilder buffer)
+		{
+		  substCount = 0;
+		  for (int c = 0; c < buffer.Length; c++)
+		  {
+			// Replace the second char of a pair of the equal characters with an asterisk
+			if (c > 0 && buffer[c] == buffer[c - 1])
+			{
+			  buffer[c] = '*';
+			}
+			// Substitute Umlauts.
+			else if (buffer[c] == 'ä')
+			{
+			  buffer[c] = 'a';
+			}
+			else if (buffer[c] == 'ö')
+			{
+			  buffer[c] = 'o';
+			}
+			else if (buffer[c] == 'ü')
+			{
+			  buffer[c] = 'u';
+			}
+			// Fix bug so that 'ß' at the end of a word is replaced.
+			else if (buffer[c] == 'ß')
+			{
+				buffer[c] = 's';
+				buffer.Insert(c + 1, 's');
+				substCount++;
+			}
+			// Take care that at least one character is left left side from the current one
+			if (c < buffer.Length - 1)
+			{
+			  // Masking several common character combinations with an token
+			  if ((c < buffer.Length - 2) && buffer[c] == 's' && buffer[c + 1] == 'c' && buffer[c + 2] == 'h')
+			  {
+				buffer[c] = '$';
+				buffer.Remove(c + 1, c + 3 - c + 1);
+				substCount = + 2;
+			  }
+			  else if (buffer[c] == 'c' && buffer[c + 1] == 'h')
+			  {
+				buffer[c] = '§';
+				buffer.Remove(c + 1, 1);
+				substCount++;
+			  }
+			  else if (buffer[c] == 'e' && buffer[c + 1] == 'i')
+			  {
+				buffer[c] = '%';
+				buffer.Remove(c + 1, 1);
+				substCount++;
+			  }
+			  else if (buffer[c] == 'i' && buffer[c + 1] == 'e')
+			  {
+				buffer[c] = '&';
+				buffer.Remove(c + 1, 1);
+				substCount++;
+			  }
+			  else if (buffer[c] == 'i' && buffer[c + 1] == 'g')
+			  {
+				buffer[c] = '#';
+				buffer.Remove(c + 1, 1);
+				substCount++;
+			  }
+			  else if (buffer[c] == 's' && buffer[c + 1] == 't')
+			  {
+				buffer[c] = '!';
+				buffer.Remove(c + 1, 1);
+				substCount++;
+			  }
+			}
+		  }
+		}
+
+		/// <summary>
+		/// Undoes the changes made by substitute(). That are character pairs and
+		/// character combinations. Umlauts will remain as their corresponding vowel,
+		/// as "ß" remains as "ss".
+		/// </summary>
+		private void resubstitute(StringBuilder buffer)
+		{
+		  for (int c = 0; c < buffer.Length; c++)
+		  {
+			if (buffer[c] == '*')
+			{
+			  char x = buffer[c - 1];
+			  buffer[c] = x;
+			}
+			else if (buffer[c] == '$')
+			{
+			  buffer[c] = 's';
+			  buffer.Insert(c + 1, new char[]{'c', 'h'}, 0, 2);
+			}
+			else if (buffer[c] == '§')
+			{
+			  buffer[c] = 'c';
+			  buffer.Insert(c + 1, 'h');
+			}
+			else if (buffer[c] == '%')
+			{
+			  buffer[c] = 'e';
+			  buffer.Insert(c + 1, 'i');
+			}
+			else if (buffer[c] == '&')
+			{
+			  buffer[c] = 'i';
+			  buffer.Insert(c + 1, 'e');
+			}
+			else if (buffer[c] == '#')
+			{
+			  buffer[c] = 'i';
+			  buffer.Insert(c + 1, 'g');
+			}
+			else if (buffer[c] == '!')
+			{
+			  buffer[c] = 's';
+			  buffer.Insert(c + 1, 't');
+			}
+		  }
+		}
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/El/GreekAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/El/GreekAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/El/GreekAnalyzer.cs
new file mode 100644
index 0000000..9ad4f94
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/El/GreekAnalyzer.cs
@@ -0,0 +1,139 @@
+using System;
+
+namespace org.apache.lucene.analysis.el
+{
+
+	/// <summary>
+	/// Copyright 2005 The Apache Software Foundation
+	/// 
+	/// Licensed under the Apache License, Version 2.0 (the "License");
+	/// you may not use this file except in compliance with the License.
+	/// You may obtain a copy of the License at
+	/// 
+	///     http://www.apache.org/licenses/LICENSE-2.0
+	/// 
+	/// Unless required by applicable law or agreed to in writing, software
+	/// distributed under the License is distributed on an "AS IS" BASIS,
+	/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	/// See the License for the specific language governing permissions and
+	/// limitations under the License.
+	/// </summary>
+
+
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using StandardAnalyzer = org.apache.lucene.analysis.standard.StandardAnalyzer;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for the Greek language. 
+	/// <para>
+	/// Supports an external list of stopwords (words
+	/// that will not be indexed at all).
+	/// A default set of stopwords is used unless an alternative list is specified.
+	/// </para>
+	/// 
+	/// <a name="version"/>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating GreekAnalyzer:
+	/// <ul>
+	///   <li> As of 3.1, StandardFilter and GreekStemmer are used by default.
+	///   <li> As of 2.9, StopFilter preserves position
+	///        increments
+	/// </ul>
+	/// 
+	/// </para>
+	/// <para><b>NOTE</b>: This class uses the same <seealso cref="Version"/>
+	/// dependent settings as <seealso cref="StandardAnalyzer"/>.</para>
+	/// </summary>
+	public sealed class GreekAnalyzer : StopwordAnalyzerBase
+	{
+	  /// <summary>
+	  /// File containing default Greek stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "stopwords.txt";
+
+	  /// <summary>
+	  /// Returns a set of default Greek-stopwords </summary>
+	  /// <returns> a set of default Greek-stopwords  </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_SET;
+		  }
+	  }
+
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_SET = loadStopwordSet(false, typeof(GreekAnalyzer), DEFAULT_STOPWORD_FILE, "#");
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words. </summary>
+	  /// <param name="matchVersion"> Lucene compatibility version,
+	  ///   See <a href="#version">above</a> </param>
+	  public GreekAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. 
+	  /// <para>
+	  /// <b>NOTE:</b> The stopwords set should be pre-processed with the logic of 
+	  /// <seealso cref="GreekLowerCaseFilter"/> for best results.
+	  ///  
+	  /// </para>
+	  /// </summary>
+	  /// <param name="matchVersion"> Lucene compatibility version,
+	  ///   See <a href="#version">above</a> </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  public GreekAnalyzer(Version matchVersion, CharArraySet stopwords) : base(matchVersion, stopwords)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from a <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="GreekLowerCaseFilter"/>, <seealso cref="StandardFilter"/>,
+	  ///         <seealso cref="StopFilter"/>, and <seealso cref="GreekStemFilter"/> </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new GreekLowerCaseFilter(matchVersion, source);
+		if (matchVersion.onOrAfter(Version.LUCENE_31))
+		{
+		  result = new StandardFilter(matchVersion, result);
+		}
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (matchVersion.onOrAfter(Version.LUCENE_31))
+		{
+		  result = new GreekStemFilter(result);
+		}
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+}
\ No newline at end of file


[30/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/TernaryTree.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/TernaryTree.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/TernaryTree.cs
new file mode 100644
index 0000000..cbbd16a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/TernaryTree.cs
@@ -0,0 +1,780 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.analysis.compound.hyphenation
+{
+
+
+	/// <summary>
+	/// <h2>Ternary Search Tree.</h2>
+	/// 
+	/// <para>
+	/// A ternary search tree is a hybrid between a binary tree and a digital search
+	/// tree (trie). Keys are limited to strings. A data value of type char is stored
+	/// in each leaf node. It can be used as an index (or pointer) to the data.
+	/// Branches that only contain one key are compressed to one node by storing a
+	/// pointer to the trailer substring of the key. This class is intended to serve
+	/// as base class or helper class to implement Dictionary collections or the
+	/// like. Ternary trees have some nice properties as the following: the tree can
+	/// be traversed in sorted order, partial matches (wildcard) can be implemented,
+	/// retrieval of all keys within a given distance from the target, etc. The
+	/// storage requirements are higher than a binary tree but a lot less than a
+	/// trie. Performance is comparable with a hash table, sometimes it outperforms a
+	/// hash function (most of the time can determine a miss faster than a hash).
+	/// </para>
+	/// 
+	/// <para>
+	/// The main purpose of this java port is to serve as a base for implementing
+	/// TeX's hyphenation algorithm (see The TeXBook, appendix H). Each language
+	/// requires from 5000 to 15000 hyphenation patterns which will be keys in this
+	/// tree. The strings patterns are usually small (from 2 to 5 characters), but
+	/// each char in the tree is stored in a node. Thus memory usage is the main
+	/// concern. We will sacrifice 'elegance' to keep memory requirements to the
+	/// minimum. Using java's char type as pointer (yes, I know pointer it is a
+	/// forbidden word in java) we can keep the size of the node to be just 8 bytes
+	/// (3 pointers and the data char). This gives room for about 65000 nodes. In my
+	/// tests the english patterns took 7694 nodes and the german patterns 10055
+	/// nodes, so I think we are safe.
+	/// </para>
+	/// 
+	/// <para>
+	/// All said, this is a map with strings as keys and char as value. Pretty
+	/// limited!. It can be extended to a general map by using the string
+	/// representation of an object and using the char value as an index to an array
+	/// that contains the object values.
+	/// </para>
+	/// 
+	/// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
+	/// </summary>
+
+	public class TernaryTree : ICloneable
+	{
+
+	  /// <summary>
+	  /// We use 4 arrays to represent a node. I guess I should have created a proper
+	  /// node class, but somehow Knuth's pascal code made me forget we now have a
+	  /// portable language with virtual memory management and automatic garbage
+	  /// collection! And now is kind of late, furthermore, if it ain't broken, don't
+	  /// fix it.
+	  /// </summary>
+
+	  /// <summary>
+	  /// Pointer to low branch and to rest of the key when it is stored directly in
+	  /// this node, we don't have unions in java!
+	  /// </summary>
+	  protected internal char[] lo;
+
+	  /// <summary>
+	  /// Pointer to high branch.
+	  /// </summary>
+	  protected internal char[] hi;
+
+	  /// <summary>
+	  /// Pointer to equal branch and to data when this node is a string terminator.
+	  /// </summary>
+	  protected internal char[] eq;
+
+	  /// <summary>
+	  /// <P>
+	  /// The character stored in this node: splitchar. Two special values are
+	  /// reserved:
+	  /// </P>
+	  /// <ul>
+	  /// <li>0x0000 as string terminator</li>
+	  /// <li>0xFFFF to indicate that the branch starting at this node is compressed</li>
+	  /// </ul>
+	  /// <para>
+	  /// This shouldn't be a problem if we give the usual semantics to strings since
+	  /// 0xFFFF is guaranteed not to be an Unicode character.
+	  /// </para>
+	  /// </summary>
+	  protected internal char[] sc;
+
+	  /// <summary>
+	  /// This vector holds the trailing of the keys when the branch is compressed.
+	  /// </summary>
+	  protected internal CharVector kv;
+
+	  protected internal char root;
+
+	  protected internal char freenode;
+
+	  protected internal int length; // number of items in tree
+
+	  protected internal const int BLOCK_SIZE = 2048; // allocation size for arrays
+
+	  internal TernaryTree()
+	  {
+		init();
+	  }
+
+	  protected internal virtual void init()
+	  {
+		root = (char)0;
+		freenode = (char)1;
+		length = 0;
+		lo = new char[BLOCK_SIZE];
+		hi = new char[BLOCK_SIZE];
+		eq = new char[BLOCK_SIZE];
+		sc = new char[BLOCK_SIZE];
+		kv = new CharVector();
+	  }
+
+	  /// <summary>
+	  /// Branches are initially compressed, needing one node per key plus the size
+	  /// of the string key. They are decompressed as needed when another key with
+	  /// same prefix is inserted. This saves a lot of space, specially for long
+	  /// keys.
+	  /// </summary>
+	  public virtual void insert(string key, char val)
+	  {
+		// make sure we have enough room in the arrays
+		int len = key.Length + 1; // maximum number of nodes that may be generated
+		if (freenode + len > eq.Length)
+		{
+		  redimNodeArrays(eq.Length + BLOCK_SIZE);
+		}
+		char[] strkey = new char[len--];
+		key.CopyTo(0, strkey, 0, len - 0);
+		strkey[len] = (char)0;
+		root = insert(root, strkey, 0, val);
+	  }
+
+	  public virtual void insert(char[] key, int start, char val)
+	  {
+		int len = strlen(key) + 1;
+		if (freenode + len > eq.Length)
+		{
+		  redimNodeArrays(eq.Length + BLOCK_SIZE);
+		}
+		root = insert(root, key, start, val);
+	  }
+
+	  /// <summary>
+	  /// The actual insertion function, recursive version.
+	  /// </summary>
+	  private char insert(char p, char[] key, int start, char val)
+	  {
+		int len = strlen(key, start);
+		if (p == 0)
+		{
+		  // this means there is no branch, this node will start a new branch.
+		  // Instead of doing that, we store the key somewhere else and create
+		  // only one node with a pointer to the key
+		  p = freenode++;
+		  eq[p] = val; // holds data
+		  length++;
+		  hi[p] = (char)0;
+		  if (len > 0)
+		  {
+			sc[p] = (char)0xFFFF; // indicates branch is compressed
+			lo[p] = (char) kv.alloc(len + 1); // use 'lo' to hold pointer to key
+			strcpy(kv.Array, lo[p], key, start);
+		  }
+		  else
+		  {
+			sc[p] = (char)0;
+			lo[p] = (char)0;
+		  }
+		  return p;
+		}
+
+		if (sc[p] == 0xFFFF)
+		{
+		  // branch is compressed: need to decompress
+		  // this will generate garbage in the external key array
+		  // but we can do some garbage collection later
+		  char pp = freenode++;
+		  lo[pp] = lo[p]; // previous pointer to key
+		  eq[pp] = eq[p]; // previous pointer to data
+		  lo[p] = (char)0;
+		  if (len > 0)
+		  {
+			sc[p] = kv.get(lo[pp]);
+			eq[p] = pp;
+			lo[pp]++;
+			if (kv.get(lo[pp]) == 0)
+			{
+			  // key completly decompressed leaving garbage in key array
+			  lo[pp] = (char)0;
+			  sc[pp] = (char)0;
+			  hi[pp] = (char)0;
+			}
+			else
+			{
+			  // we only got first char of key, rest is still there
+			  sc[pp] = (char)0xFFFF;
+			}
+		  }
+		  else
+		  {
+			// In this case we can save a node by swapping the new node
+			// with the compressed node
+			sc[pp] = (char)0xFFFF;
+			hi[p] = pp;
+			sc[p] = (char)0;
+			eq[p] = val;
+			length++;
+			return p;
+		  }
+		}
+		char s = key[start];
+		if (s < sc[p])
+		{
+		  lo[p] = insert(lo[p], key, start, val);
+		}
+		else if (s == sc[p])
+		{
+		  if (s != 0)
+		  {
+			eq[p] = insert(eq[p], key, start + 1, val);
+		  }
+		  else
+		  {
+			// key already in tree, overwrite data
+			eq[p] = val;
+		  }
+		}
+		else
+		{
+		  hi[p] = insert(hi[p], key, start, val);
+		}
+		return p;
+	  }
+
+	  /// <summary>
+	  /// Compares 2 null terminated char arrays
+	  /// </summary>
+	  public static int strcmp(char[] a, int startA, char[] b, int startB)
+	  {
+		for (; a[startA] == b[startB]; startA++, startB++)
+		{
+		  if (a[startA] == 0)
+		  {
+			return 0;
+		  }
+		}
+		return a[startA] - b[startB];
+	  }
+
+	  /// <summary>
+	  /// Compares a string with null terminated char array
+	  /// </summary>
+	  public static int strcmp(string str, char[] a, int start)
+	  {
+		int i , d , len = str.Length;
+		for (i = 0; i < len; i++)
+		{
+		  d = (int) str[i] - a[start + i];
+		  if (d != 0)
+		  {
+			return d;
+		  }
+		  if (a[start + i] == 0)
+		  {
+			return d;
+		  }
+		}
+		if (a[start + i] != 0)
+		{
+		  return -a[start + i];
+		}
+		return 0;
+
+	  }
+
+	  public static void strcpy(char[] dst, int di, char[] src, int si)
+	  {
+		while (src[si] != 0)
+		{
+		  dst[di++] = src[si++];
+		}
+		dst[di] = (char)0;
+	  }
+
+	  public static int strlen(char[] a, int start)
+	  {
+		int len = 0;
+		for (int i = start; i < a.Length && a[i] != 0; i++)
+		{
+		  len++;
+		}
+		return len;
+	  }
+
+	  public static int strlen(char[] a)
+	  {
+		return strlen(a, 0);
+	  }
+
+	  public virtual int find(string key)
+	  {
+		int len = key.Length;
+		char[] strkey = new char[len + 1];
+		key.CopyTo(0, strkey, 0, len - 0);
+		strkey[len] = (char)0;
+
+		return find(strkey, 0);
+	  }
+
+	  public virtual int find(char[] key, int start)
+	  {
+		int d;
+		char p = root;
+		int i = start;
+		char c;
+
+		while (p != 0)
+		{
+		  if (sc[p] == 0xFFFF)
+		  {
+			if (strcmp(key, i, kv.Array, lo[p]) == 0)
+			{
+			  return eq[p];
+			}
+			else
+			{
+			  return -1;
+			}
+		  }
+		  c = key[i];
+		  d = c - sc[p];
+		  if (d == 0)
+		  {
+			if (c == 0)
+			{
+			  return eq[p];
+			}
+			i++;
+			p = eq[p];
+		  }
+		  else if (d < 0)
+		  {
+			p = lo[p];
+		  }
+		  else
+		  {
+			p = hi[p];
+		  }
+		}
+		return -1;
+	  }
+
+	  public virtual bool knows(string key)
+	  {
+		return (find(key) >= 0);
+	  }
+
+	  // redimension the arrays
+	  private void redimNodeArrays(int newsize)
+	  {
+		int len = newsize < lo.Length ? newsize : lo.Length;
+		char[] na = new char[newsize];
+		Array.Copy(lo, 0, na, 0, len);
+		lo = na;
+		na = new char[newsize];
+		Array.Copy(hi, 0, na, 0, len);
+		hi = na;
+		na = new char[newsize];
+		Array.Copy(eq, 0, na, 0, len);
+		eq = na;
+		na = new char[newsize];
+		Array.Copy(sc, 0, na, 0, len);
+		sc = na;
+	  }
+
+	  public virtual int size()
+	  {
+		return length;
+	  }
+
+	  public override TernaryTree clone()
+	  {
+		TernaryTree t = new TernaryTree();
+		t.lo = this.lo.Clone();
+		t.hi = this.hi.Clone();
+		t.eq = this.eq.Clone();
+		t.sc = this.sc.Clone();
+		t.kv = this.kv.clone();
+		t.root = this.root;
+		t.freenode = this.freenode;
+		t.length = this.length;
+
+		return t;
+	  }
+
+	  /// <summary>
+	  /// Recursively insert the median first and then the median of the lower and
+	  /// upper halves, and so on in order to get a balanced tree. The array of keys
+	  /// is assumed to be sorted in ascending order.
+	  /// </summary>
+	  protected internal virtual void insertBalanced(string[] k, char[] v, int offset, int n)
+	  {
+		int m;
+		if (n < 1)
+		{
+		  return;
+		}
+		m = n >> 1;
+
+		insert(k[m + offset], v[m + offset]);
+		insertBalanced(k, v, offset, m);
+
+		insertBalanced(k, v, offset + m + 1, n - m - 1);
+	  }
+
+	  /// <summary>
+	  /// Balance the tree for best search performance
+	  /// </summary>
+	  public virtual void balance()
+	  {
+		// System.out.print("Before root splitchar = ");
+		// System.out.println(sc[root]);
+
+		int i = 0, n = length;
+		string[] k = new string[n];
+		char[] v = new char[n];
+		Iterator iter = new Iterator(this);
+		while (iter.hasMoreElements())
+		{
+		  v[i] = iter.Value;
+		  k[i++] = iter.nextElement();
+		}
+		init();
+		insertBalanced(k, v, 0, n);
+
+		// With uniform letter distribution sc[root] should be around 'm'
+		// System.out.print("After root splitchar = ");
+		// System.out.println(sc[root]);
+	  }
+
+	  /// <summary>
+	  /// Each node stores a character (splitchar) which is part of some key(s). In a
+	  /// compressed branch (one that only contain a single string key) the trailer
+	  /// of the key which is not already in nodes is stored externally in the kv
+	  /// array. As items are inserted, key substrings decrease. Some substrings may
+	  /// completely disappear when the whole branch is totally decompressed. The
+	  /// tree is traversed to find the key substrings actually used. In addition,
+	  /// duplicate substrings are removed using a map (implemented with a
+	  /// TernaryTree!).
+	  /// 
+	  /// </summary>
+	  public virtual void trimToSize()
+	  {
+		// first balance the tree for best performance
+		balance();
+
+		// redimension the node arrays
+		redimNodeArrays(freenode);
+
+		// ok, compact kv array
+		CharVector kx = new CharVector();
+		kx.alloc(1);
+		TernaryTree map = new TernaryTree();
+		compact(kx, map, root);
+		kv = kx;
+		kv.trimToSize();
+	  }
+
+	  private void compact(CharVector kx, TernaryTree map, char p)
+	  {
+		int k;
+		if (p == 0)
+		{
+		  return;
+		}
+		if (sc[p] == 0xFFFF)
+		{
+		  k = map.find(kv.Array, lo[p]);
+		  if (k < 0)
+		  {
+			k = kx.alloc(strlen(kv.Array, lo[p]) + 1);
+			strcpy(kx.Array, k, kv.Array, lo[p]);
+			map.insert(kx.Array, k, (char) k);
+		  }
+		  lo[p] = (char) k;
+		}
+		else
+		{
+		  compact(kx, map, lo[p]);
+		  if (sc[p] != 0)
+		  {
+			compact(kx, map, eq[p]);
+		  }
+		  compact(kx, map, hi[p]);
+		}
+	  }
+
+	  public virtual IEnumerator<string> keys()
+	  {
+		return new Iterator(this);
+	  }
+
+	  public class Iterator : IEnumerator<string>
+	  {
+		  private readonly TernaryTree outerInstance;
+
+
+		/// <summary>
+		/// current node index
+		/// </summary>
+		internal int cur;
+
+		/// <summary>
+		/// current key
+		/// </summary>
+		internal string curkey;
+
+		private class Item : ICloneable
+		{
+			private readonly TernaryTree.Iterator outerInstance;
+
+		  internal char parent;
+
+		  internal char child;
+
+		  public Item(TernaryTree.Iterator outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+			parent = (char)0;
+			child = (char)0;
+		  }
+
+		  public Item(TernaryTree.Iterator outerInstance, char p, char c)
+		  {
+			  this.outerInstance = outerInstance;
+			parent = p;
+			child = c;
+		  }
+
+		  public override Item clone()
+		  {
+			return new Item(outerInstance, parent, child);
+		  }
+
+		}
+
+		/// <summary>
+		/// Node stack
+		/// </summary>
+		internal Stack<Item> ns;
+
+		/// <summary>
+		/// key stack implemented with a StringBuilder
+		/// </summary>
+		internal StringBuilder ks;
+
+		public Iterator(TernaryTree outerInstance)
+		{
+			this.outerInstance = outerInstance;
+		  cur = -1;
+		  ns = new Stack<>();
+		  ks = new StringBuilder();
+		  rewind();
+		}
+
+		public virtual void rewind()
+		{
+		  ns.removeAllElements();
+		  ks.Length = 0;
+		  cur = outerInstance.root;
+		  run();
+		}
+
+		public override string nextElement()
+		{
+		  string res = curkey;
+		  cur = up();
+		  run();
+		  return res;
+		}
+
+		public virtual char Value
+		{
+			get
+			{
+			  if (cur >= 0)
+			  {
+				return outerInstance.eq[cur];
+			  }
+			  return 0;
+			}
+		}
+
+		public override bool hasMoreElements()
+		{
+		  return (cur != -1);
+		}
+
+		/// <summary>
+		/// traverse upwards
+		/// </summary>
+		internal virtual int up()
+		{
+		  Item i = new Item(this);
+		  int res = 0;
+
+		  if (ns.Count == 0)
+		  {
+			return -1;
+		  }
+
+		  if (cur != 0 && outerInstance.sc[cur] == 0)
+		  {
+			return outerInstance.lo[cur];
+		  }
+
+		  bool climb = true;
+
+		  while (climb)
+		  {
+			i = ns.Pop();
+			i.child++;
+			switch (i.child)
+			{
+			  case 1:
+				if (outerInstance.sc[i.parent] != 0)
+				{
+				  res = outerInstance.eq[i.parent];
+				  ns.Push(i.clone());
+				  ks.Append(outerInstance.sc[i.parent]);
+				}
+				else
+				{
+				  i.child++;
+				  ns.Push(i.clone());
+				  res = outerInstance.hi[i.parent];
+				}
+				climb = false;
+				break;
+
+			  case 2:
+				res = outerInstance.hi[i.parent];
+				ns.Push(i.clone());
+				if (ks.Length > 0)
+				{
+				  ks.Length = ks.Length - 1; // pop
+				}
+				climb = false;
+				break;
+
+			  default:
+				if (ns.Count == 0)
+				{
+				  return -1;
+				}
+				climb = true;
+				break;
+			}
+		  }
+		  return res;
+		}
+
+		/// <summary>
+		/// traverse the tree to find next key
+		/// </summary>
+		internal virtual int run()
+		{
+		  if (cur == -1)
+		  {
+			return -1;
+		  }
+
+		  bool leaf = false;
+		  while (true)
+		  {
+			// first go down on low branch until leaf or compressed branch
+			while (cur != 0)
+			{
+			  if (outerInstance.sc[cur] == 0xFFFF)
+			  {
+				leaf = true;
+				break;
+			  }
+			  ns.Push(new Item(this, (char) cur, '\u0000'));
+			  if (outerInstance.sc[cur] == 0)
+			  {
+				leaf = true;
+				break;
+			  }
+			  cur = outerInstance.lo[cur];
+			}
+			if (leaf)
+			{
+			  break;
+			}
+			// nothing found, go up one node and try again
+			cur = up();
+			if (cur == -1)
+			{
+			  return -1;
+			}
+		  }
+		  // The current node should be a data node and
+		  // the key should be in the key stack (at least partially)
+		  StringBuilder buf = new StringBuilder(ks.ToString());
+		  if (outerInstance.sc[cur] == 0xFFFF)
+		  {
+			int p = outerInstance.lo[cur];
+			while (outerInstance.kv.get(p) != 0)
+			{
+			  buf.Append(outerInstance.kv.get(p++));
+			}
+		  }
+		  curkey = buf.ToString();
+		  return 0;
+		}
+
+	  }
+
+	  public virtual void printStats(PrintStream @out)
+	  {
+		@out.println("Number of keys = " + Convert.ToString(length));
+		@out.println("Node count = " + Convert.ToString(freenode));
+		// System.out.println("Array length = " + Integer.toString(eq.length));
+		@out.println("Key Array length = " + Convert.ToString(kv.length()));
+
+		/*
+		 * for(int i=0; i<kv.length(); i++) if ( kv.get(i) != 0 )
+		 * System.out.print(kv.get(i)); else System.out.println("");
+		 * System.out.println("Keys:"); for(Enumeration enum = keys();
+		 * enum.hasMoreElements(); ) System.out.println(enum.nextElement());
+		 */
+
+	  }
+	/*
+	  public static void main(String[] args) {
+	    TernaryTree tt = new TernaryTree();
+	    tt.insert("Carlos", 'C');
+	    tt.insert("Car", 'r');
+	    tt.insert("palos", 'l');
+	    tt.insert("pa", 'p');
+	    tt.trimToSize();
+	    System.out.println((char) tt.find("Car"));
+	    System.out.println((char) tt.find("Carlos"));
+	    System.out.println((char) tt.find("alto"));
+	    tt.printStats(System.out);
+	  }
+	  */
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordAnalyzer.cs
new file mode 100644
index 0000000..1f1a42b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordAnalyzer.cs
@@ -0,0 +1,40 @@
+using System.IO;
+using org.apache.lucene.analysis.core;
+
+namespace Lucene.Net.Analysis.Core
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+
+    /// <summary>
+    /// "Tokenizes" the entire stream as a single token. This is useful
+    /// for data like zip codes, ids, and some product names.
+    /// </summary>
+    public sealed class KeywordAnalyzer : Analyzer
+    {
+        public KeywordAnalyzer()
+        {
+        }
+
+        protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+        {
+            return new TokenStreamComponents(new KeywordTokenizer(reader));
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizer.cs
new file mode 100644
index 0000000..6d2cbde
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizer.cs
@@ -0,0 +1,106 @@
+using System.IO;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+using Reader = System.IO.TextReader;
+
+namespace Lucene.Net.Analysis.Core
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// Emits the entire input as a single token.
+    /// </summary>
+    public sealed class KeywordTokenizer : Tokenizer
+    {
+        /// <summary>
+        /// Default read buffer size </summary>
+        public const int DEFAULT_BUFFER_SIZE = 256;
+
+        private bool done = false;
+        private int finalOffset;
+        private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+        private OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+
+        public KeywordTokenizer(TextReader input)
+            : this(input, DEFAULT_BUFFER_SIZE)
+        {
+        }
+
+        public KeywordTokenizer(TextReader input, int bufferSize)
+            : base(input)
+        {
+            if (bufferSize <= 0)
+            {
+                throw new System.ArgumentException("bufferSize must be > 0");
+            }
+            termAtt.ResizeBuffer(bufferSize);
+        }
+
+        public KeywordTokenizer(AttributeSource.AttributeFactory factory, Reader input, int bufferSize)
+            : base(factory, input)
+        {
+            if (bufferSize <= 0)
+            {
+                throw new System.ArgumentException("bufferSize must be > 0");
+            }
+            termAtt.ResizeBuffer(bufferSize);
+        }
+
+        public override bool IncrementToken()
+        {
+            if (!done)
+            {
+                ClearAttributes();
+                done = true;
+                int upto = 0;
+                char[] buffer = termAtt.Buffer();
+                while (true)
+                {
+                    int length = input.Read(buffer, upto, buffer.Length - upto);
+                    if (length == -1)
+                    {
+                        break;
+                    }
+                    upto += length;
+                    if (upto == buffer.Length)
+                    {
+                        buffer = termAtt.ResizeBuffer(1 + buffer.Length);
+                    }
+                }
+                termAtt.Length = upto;
+                finalOffset = CorrectOffset(upto);
+                offsetAtt.SetOffset(CorrectOffset(0), finalOffset);
+                return true;
+            }
+            return false;
+        }
+
+        public override void End()
+        {
+            base.End();
+            // set final offset 
+            offsetAtt.SetOffset(finalOffset, finalOffset);
+        }
+
+        public override void Reset()
+        {
+            base.Reset();
+            this.done = false;
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizerFactory.cs
new file mode 100644
index 0000000..8c3929f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizerFactory.cs
@@ -0,0 +1,53 @@
+using System.Collections.Generic;
+using System.IO;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
+using org.apache.lucene.analysis.util;
+
+namespace Lucene.Net.Analysis.Core
+{
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+    /// <summary>
+	/// Factory for <seealso cref="KeywordTokenizer"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_keyword" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.KeywordTokenizerFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre> 
+	/// </summary>
+	public class KeywordTokenizerFactory : TokenizerFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new KeywordTokenizerFactory </summary>
+	  public KeywordTokenizerFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input)
+	  {
+		return new KeywordTokenizer(factory, input, KeywordTokenizer.DEFAULT_BUFFER_SIZE);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizer.cs
new file mode 100644
index 0000000..3a85d5d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizer.cs
@@ -0,0 +1,84 @@
+using System.IO;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using org.apache.lucene.analysis.util;
+
+namespace Lucene.Net.Analysis.Core
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+    /// <summary>
+	/// A LetterTokenizer is a tokenizer that divides text at non-letters. That's to
+	/// say, it defines tokens as maximal strings of adjacent letters, as defined by
+	/// java.lang.Character.isLetter() predicate.
+	/// <para>
+	/// Note: this does a decent job for most European languages, but does a terrible
+	/// job for some Asian languages, where words are not separated by spaces.
+	/// </para>
+	/// <para>
+	/// <a name="version"/>
+	/// You must specify the required <seealso cref="Version"/> compatibility when creating
+	/// <seealso cref="LetterTokenizer"/>:
+	/// <ul>
+	/// <li>As of 3.1, <seealso cref="CharTokenizer"/> uses an int based API to normalize and
+	/// detect token characters. See <seealso cref="CharTokenizer#isTokenChar(int)"/> and
+	/// <seealso cref="CharTokenizer#normalize(int)"/> for details.</li>
+	/// </ul>
+	/// </para>
+	/// </summary>
+
+	public class LetterTokenizer : CharTokenizer
+	{
+
+	  /// <summary>
+	  /// Construct a new LetterTokenizer.
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          Lucene version to match See <seealso cref="<a href="#version">above</a>"/> </param>
+	  /// <param name="in">
+	  ///          the input to split up into tokens </param>
+	  public LetterTokenizer(Version matchVersion, TextReader @in) : base(matchVersion, @in)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Construct a new LetterTokenizer using a given
+	  /// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>.
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          Lucene version to match See <seealso cref="<a href="#version">above</a>"/> </param>
+	  /// <param name="factory">
+	  ///          the attribute factory to use for this <seealso cref="Tokenizer"/> </param>
+	  /// <param name="in">
+	  ///          the input to split up into tokens </param>
+	  public LetterTokenizer(Version matchVersion, AttributeSource.AttributeFactory factory, TextReader @in) : base(matchVersion, factory, @in)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Collects only characters which satisfy
+	  /// <seealso cref="Character#isLetter(int)"/>.
+	  /// </summary>
+	  protected internal override bool isTokenChar(int c)
+	  {
+		return char.IsLetter(c);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizerFactory.cs
new file mode 100644
index 0000000..8909bb3
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizerFactory.cs
@@ -0,0 +1,54 @@
+using System.Collections.Generic;
+using System.IO;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Core
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// Factory for <seealso cref="LetterTokenizer"/>. 
+    /// <pre class="prettyprint">
+    /// &lt;fieldType name="text_letter" class="solr.TextField" positionIncrementGap="100"&gt;
+    ///   &lt;analyzer&gt;
+    ///     &lt;tokenizer class="solr.LetterTokenizerFactory"/&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;</pre>
+    /// </summary>
+    public class LetterTokenizerFactory : TokenizerFactory
+    {
+
+        /// <summary>
+        /// Creates a new LetterTokenizerFactory </summary>
+        public LetterTokenizerFactory(IDictionary<string, string> args)
+            : base(args)
+        {
+            assureMatchVersion();
+            if (args.Count > 0)
+            {
+                throw new System.ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+        public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input)
+        {
+            return new LetterTokenizer(luceneMatchVersion, factory, input);
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilter.cs
new file mode 100644
index 0000000..097bc4b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilter.cs
@@ -0,0 +1,62 @@
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+using org.apache.lucene.analysis.util;
+
+namespace Lucene.Net.Analysis.Core
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+    /// <summary>
+	/// Normalizes token text to lower case.
+	/// <a name="version"/>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating LowerCaseFilter:
+	/// <ul>
+	///   <li> As of 3.1, supplementary characters are properly lowercased.
+	/// </ul>
+	/// </para>
+	/// </summary>
+	public sealed class LowerCaseFilter : TokenFilter
+	{
+	  private readonly CharacterUtils charUtils;
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+
+	  /// <summary>
+	  /// Create a new LowerCaseFilter, that normalizes token text to lower case.
+	  /// </summary>
+	  /// <param name="matchVersion"> See <a href="#version">above</a> </param>
+	  /// <param name="in"> TokenStream to filter </param>
+	  public LowerCaseFilter(Version matchVersion, TokenStream @in) : base(@in)
+	  {
+		charUtils = CharacterUtils.getInstance(matchVersion);
+	  }
+
+	  public override bool IncrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  charUtils.ToLower(termAtt.Buffer(), 0, termAtt.Length);
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilterFactory.cs
new file mode 100644
index 0000000..c2efbd1
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilterFactory.cs
@@ -0,0 +1,62 @@
+using System.Collections.Generic;
+using Lucene.Net.Analysis.Util;
+using org.apache.lucene.analysis.util;
+
+namespace Lucene.Net.Analysis.Core
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// Factory for <seealso cref="LowerCaseFilter"/>. 
+    /// <pre class="prettyprint">
+    /// &lt;fieldType name="text_lwrcase" class="solr.TextField" positionIncrementGap="100"&gt;
+    ///   &lt;analyzer&gt;
+    ///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+    ///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;</pre>
+    /// </summary>
+    public class LowerCaseFilterFactory : TokenFilterFactory, MultiTermAwareComponent
+    {
+
+        /// <summary>
+        /// Creates a new LowerCaseFilterFactory </summary>
+        public LowerCaseFilterFactory(IDictionary<string, string> args)
+            : base(args)
+        {
+            assureMatchVersion();
+            if (args.Count > 0)
+            {
+                throw new System.ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+        public override TokenStream Create(TokenStream input)
+        {
+            return new LowerCaseFilter(luceneMatchVersion, input);
+        }
+
+        public virtual AbstractAnalysisFactory MultiTermComponent
+        {
+            get
+            {
+                return this;
+            }
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizer.cs
new file mode 100644
index 0000000..659f9f3
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizer.cs
@@ -0,0 +1,84 @@
+using System.IO;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Core
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+    /// <summary>
+	/// LowerCaseTokenizer performs the function of LetterTokenizer
+	/// and LowerCaseFilter together.  It divides text at non-letters and converts
+	/// them to lower case.  While it is functionally equivalent to the combination
+	/// of LetterTokenizer and LowerCaseFilter, there is a performance advantage
+	/// to doing the two tasks at once, hence this (redundant) implementation.
+	/// <P>
+	/// Note: this does a decent job for most European languages, but does a terrible
+	/// job for some Asian languages, where words are not separated by spaces.
+	/// </p>
+	/// <para>
+	/// <a name="version"/>
+	/// You must specify the required <seealso cref="Version"/> compatibility when creating
+	/// <seealso cref="LowerCaseTokenizer"/>:
+	/// <ul>
+	/// <li>As of 3.1, <seealso cref="CharTokenizer"/> uses an int based API to normalize and
+	/// detect token characters. See <seealso cref="CharTokenizer#isTokenChar(int)"/> and
+	/// <seealso cref="CharTokenizer#normalize(int)"/> for details.</li>
+	/// </ul>
+	/// </para>
+	/// </summary>
+	public sealed class LowerCaseTokenizer : LetterTokenizer
+	{
+
+	  /// <summary>
+	  /// Construct a new LowerCaseTokenizer.
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          Lucene version to match See <seealso cref="<a href="#version">above</a>"/>
+	  /// </param>
+	  /// <param name="in">
+	  ///          the input to split up into tokens </param>
+	  public LowerCaseTokenizer(Version matchVersion, TextReader @in) : base(matchVersion, @in)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Construct a new LowerCaseTokenizer using a given
+	  /// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>.
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          Lucene version to match See <seealso cref="<a href="#version">above</a>"/> </param>
+	  /// <param name="factory">
+	  ///          the attribute factory to use for this <seealso cref="Tokenizer"/> </param>
+	  /// <param name="in">
+	  ///          the input to split up into tokens </param>
+	  public LowerCaseTokenizer(Version matchVersion, AttributeFactory factory, TextReader @in) : base(matchVersion, factory, @in)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Converts char to lower case
+	  /// <seealso cref="Character#toLowerCase(int)"/>.
+	  /// </summary>
+	  protected override int Normalize(int c)
+	  {
+		return char.ToLower(c);
+	  }
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizerFactory.cs
new file mode 100644
index 0000000..3d9b2e2
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizerFactory.cs
@@ -0,0 +1,63 @@
+using System.Collections.Generic;
+using System.IO;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
+using org.apache.lucene.analysis.util;
+
+namespace Lucene.Net.Analysis.Core
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// Factory for <seealso cref="LowerCaseTokenizer"/>. 
+    /// <pre class="prettyprint">
+    /// &lt;fieldType name="text_lwrcase" class="solr.TextField" positionIncrementGap="100"&gt;
+    ///   &lt;analyzer&gt;
+    ///     &lt;tokenizer class="solr.LowerCaseTokenizerFactory"/&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;</pre>
+    /// </summary>
+    public class LowerCaseTokenizerFactory : TokenizerFactory, MultiTermAwareComponent
+    {
+
+        /// <summary>
+        /// Creates a new LowerCaseTokenizerFactory </summary>
+        public LowerCaseTokenizerFactory(IDictionary<string, string> args)
+            : base(args)
+        {
+            assureMatchVersion();
+            if (args.Count > 0)
+            {
+                throw new System.ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+        public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input)
+        {
+            return new LowerCaseTokenizer(luceneMatchVersion, factory, input);
+        }
+
+        public virtual AbstractAnalysisFactory MultiTermComponent
+        {
+            get
+		  {
+			return new LowerCaseFilterFactory(new Dictionary<>(OriginalArgs));
+		  }
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Core/SimpleAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/SimpleAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/SimpleAnalyzer.cs
new file mode 100644
index 0000000..56c9133
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/SimpleAnalyzer.cs
@@ -0,0 +1,58 @@
+using System.IO;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Core
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// An <seealso cref="Analyzer"/> that filters <seealso cref="LetterTokenizer"/> 
+    ///  with <seealso cref="LowerCaseFilter"/> 
+    /// <para>
+    /// <a name="version">You must specify the required <seealso cref="Version"/> compatibility
+    /// when creating <seealso cref="CharTokenizer"/>:
+    /// <ul>
+    /// <li>As of 3.1, <seealso cref="LowerCaseTokenizer"/> uses an int based API to normalize and
+    /// detect token codepoints. See <seealso cref="CharTokenizer#isTokenChar(int)"/> and
+    /// <seealso cref="CharTokenizer#normalize(int)"/> for details.</li>
+    /// </ul>
+    /// </para>
+    /// <para>
+    /// 
+    /// </para>
+    /// </summary>
+    public sealed class SimpleAnalyzer : Analyzer
+    {
+
+        private readonly Version matchVersion;
+
+        /// <summary>
+        /// Creates a new <seealso cref="SimpleAnalyzer"/> </summary>
+        /// <param name="matchVersion"> Lucene version to match See <seealso cref="<a href="#version">above</a>"/> </param>
+        public SimpleAnalyzer(Version matchVersion)
+        {
+            this.matchVersion = matchVersion;
+        }
+
+        protected internal override TokenStreamComponents createComponents(string fieldName, TextReader reader)
+        {
+            return new TokenStreamComponents(new LowerCaseTokenizer(matchVersion, reader));
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs
new file mode 100644
index 0000000..cc5a39e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs
@@ -0,0 +1,104 @@
+using System.Collections.Generic;
+using System.IO;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using org.apache.lucene.analysis.util;
+
+namespace Lucene.Net.Analysis.Core
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+    /// <summary>
+	/// Filters <seealso cref="LetterTokenizer"/> with <seealso cref="LowerCaseFilter"/> and <seealso cref="StopFilter"/>.
+	/// 
+	/// <a name="version"/>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating StopAnalyzer:
+	/// <ul>
+	///    <li> As of 3.1, StopFilter correctly handles Unicode 4.0
+	///         supplementary characters in stopwords
+	///   <li> As of 2.9, position increments are preserved
+	/// </ul>
+	/// </para>
+	/// </summary>
+
+	public sealed class StopAnalyzer : StopwordAnalyzerBase
+	{
+
+	  /// <summary>
+	  /// An unmodifiable set containing some common English words that are not usually useful
+	  /// for searching.
+	  /// </summary>
+	  public static readonly CharArraySet ENGLISH_STOP_WORDS_SET;
+
+	  static StopAnalyzer()
+	  {
+		IList<string> stopWords = Arrays.AsList("a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with");
+		var stopSet = new CharArraySet(Version.LUCENE_CURRENT, stopWords, false);
+		ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet);
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer which removes words in
+	  ///  <seealso cref="#ENGLISH_STOP_WORDS_SET"/>. </summary>
+	  /// <param name="matchVersion"> See <a href="#version">above</a> </param>
+	  public StopAnalyzer(Version matchVersion) : this(matchVersion, ENGLISH_STOP_WORDS_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the stop words from the given set. </summary>
+	  /// <param name="matchVersion"> See <a href="#version">above</a> </param>
+	  /// <param name="stopWords"> Set of stop words  </param>
+	  public StopAnalyzer(Version matchVersion, CharArraySet stopWords) : base(matchVersion, stopWords)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the stop words from the given file. </summary>
+	  /// <seealso cref= WordlistLoader#getWordSet(Reader, Version) </seealso>
+	  /// <param name="matchVersion"> See <a href="#version">above</a> </param>
+	  /// <param name="stopwordsFile"> File to load stop words from  </param>
+	  public StopAnalyzer(Version matchVersion, File stopwordsFile) : this(matchVersion, loadStopwordSet(stopwordsFile, matchVersion))
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the stop words from the given reader. </summary>
+	  /// <seealso cref= WordlistLoader#getWordSet(Reader, Version) </seealso>
+	  /// <param name="matchVersion"> See <a href="#version">above</a> </param>
+	  /// <param name="stopwords"> Reader to load stop words from  </param>
+	  public StopAnalyzer(Version matchVersion, TextReader stopwords) : this(matchVersion, loadStopwordSet(stopwords, matchVersion))
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates
+	  /// <seealso cref="Analyzer.TokenStreamComponents"/>
+	  /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> <seealso cref="Analyzer.TokenStreamComponents"/>
+	  ///         built from a <seealso cref="LowerCaseTokenizer"/> filtered with
+	  ///         <seealso cref="StopFilter"/> </returns>
+	  protected internal override Analyzer.TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+	  {
+		Tokenizer source = new LowerCaseTokenizer(matchVersion, reader);
+		return new Analyzer.TokenStreamComponents(source, new StopFilter(matchVersion, source, stopwords));
+	  }
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilter.cs
new file mode 100644
index 0000000..aeaf324
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilter.cs
@@ -0,0 +1,129 @@
+using System.Collections.Generic;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using org.apache.lucene.analysis.util;
+
+namespace Lucene.Net.Analysis.Core
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+    /// <summary>
+	/// Removes stop words from a token stream.
+	/// 
+	/// <a name="version"/>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating StopFilter:
+	/// <ul>
+	///   <li> As of 3.1, StopFilter correctly handles Unicode 4.0
+	///         supplementary characters in stopwords and position
+	///         increments are preserved
+	/// </ul>
+	/// </para>
+	/// </summary>
+	public sealed class StopFilter : FilteringTokenFilter
+	{
+
+	  private readonly CharArraySet stopWords;
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+
+	  /// <summary>
+	  /// Constructs a filter which removes words from the input TokenStream that are
+	  /// named in the Set.
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          Lucene version to enable correct Unicode 4.0 behavior in the stop
+	  ///          set if Version > 3.0.  See <a href="#version">above</a> for details. </param>
+	  /// <param name="in">
+	  ///          Input stream </param>
+	  /// <param name="stopWords">
+	  ///          A <seealso cref="CharArraySet"/> representing the stopwords. </param>
+	  /// <seealso cref= #makeStopSet(Version, java.lang.String...) </seealso>
+	  public StopFilter(Version matchVersion, TokenStream @in, CharArraySet stopWords) : base(matchVersion, @in)
+	  {
+		this.stopWords = stopWords;
+	  }
+
+	  /// <summary>
+	  /// Builds a Set from an array of stop words,
+	  /// appropriate for passing into the StopFilter constructor.
+	  /// This permits this stopWords construction to be cached once when
+	  /// an Analyzer is constructed.
+	  /// </summary>
+	  /// <param name="matchVersion"> Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 </param>
+	  /// <param name="stopWords"> An array of stopwords </param>
+	  /// <seealso cref= #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase </seealso>
+	  public static CharArraySet makeStopSet(Version matchVersion, params string[] stopWords)
+	  {
+		return makeStopSet(matchVersion, stopWords, false);
+	  }
+
+	  /// <summary>
+	  /// Builds a Set from an array of stop words,
+	  /// appropriate for passing into the StopFilter constructor.
+	  /// This permits this stopWords construction to be cached once when
+	  /// an Analyzer is constructed.
+	  /// </summary>
+	  /// <param name="matchVersion"> Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 </param>
+	  /// <param name="stopWords"> A List of Strings or char[] or any other toString()-able list representing the stopwords </param>
+	  /// <returns> A Set (<seealso cref="CharArraySet"/>) containing the words </returns>
+	  /// <seealso cref= #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase </seealso>
+	  public static CharArraySet MakeStopSet<T1>(Version matchVersion, IList<T1> stopWords)
+	  {
+		return makeStopSet(matchVersion, stopWords, false);
+	  }
+
+	  /// <summary>
+	  /// Creates a stopword set from the given stopword array.
+	  /// </summary>
+	  /// <param name="matchVersion"> Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 </param>
+	  /// <param name="stopWords"> An array of stopwords </param>
+	  /// <param name="ignoreCase"> If true, all words are lower cased first. </param>
+	  /// <returns> a Set containing the words </returns>
+	  public static CharArraySet MakeStopSet(Version matchVersion, string[] stopWords, bool ignoreCase)
+	  {
+		CharArraySet stopSet = new CharArraySet(matchVersion, stopWords.Length, ignoreCase);
+		stopSet.AddAll(Arrays.AsList(stopWords));
+		return stopSet;
+	  }
+
+	  /// <summary>
+	  /// Creates a stopword set from the given stopword list. </summary>
+	  /// <param name="matchVersion"> Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 </param>
+	  /// <param name="stopWords"> A List of Strings or char[] or any other toString()-able list representing the stopwords </param>
+	  /// <param name="ignoreCase"> if true, all words are lower cased first </param>
+	  /// <returns> A Set (<seealso cref="CharArraySet"/>) containing the words </returns>
+	  public static CharArraySet makeStopSet<T1>(Version matchVersion, IList<T1> stopWords, bool ignoreCase)
+	  {
+		CharArraySet stopSet = new CharArraySet(matchVersion, stopWords.Count, ignoreCase);
+		stopSet.addAll(stopWords);
+		return stopSet;
+	  }
+
+	  /// <summary>
+	  /// Returns the next input Token whose term() is not a stop word.
+	  /// </summary>
+	  protected internal override bool Accept()
+	  {
+		return !stopWords.contains(termAtt.buffer(), 0, termAtt.length());
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilterFactory.cs
new file mode 100644
index 0000000..c74874d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilterFactory.cs
@@ -0,0 +1,162 @@
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Analysis.Core;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.core
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
+	using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
+	using TokenFilterFactory = TokenFilterFactory;
+	using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader; // jdocs
+
+
+	/// <summary>
+	/// Factory for <seealso cref="StopFilter"/>.
+	/// 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_stop" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.StopFilterFactory" ignoreCase="true"
+	///             words="stopwords.txt" format="wordset" /&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// 
+	/// <para>
+	/// All attributes are optional:
+	/// </para>
+	/// <ul>
+	///  <li><code>ignoreCase</code> defaults to <code>false</code></li>
+	///  <li><code>words</code> should be the name of a stopwords file to parse, if not 
+	///      specified the factory will use <seealso cref="StopAnalyzer#ENGLISH_STOP_WORDS_SET"/>
+	///  </li>
+	///  <li><code>format</code> defines how the <code>words</code> file will be parsed, 
+	///      and defaults to <code>wordset</code>.  If <code>words</code> is not specified, 
+	///      then <code>format</code> must not be specified.
+	///  </li>
+	/// </ul>
+	/// <para>
+	/// The valid values for the <code>format</code> option are:
+	/// </para>
+	/// <ul>
+	///  <li><code>wordset</code> - This is the default format, which supports one word per 
+	///      line (including any intra-word whitespace) and allows whole line comments 
+	///      begining with the "#" character.  Blank lines are ignored.  See 
+	///      <seealso cref="WordlistLoader#getLines WordlistLoader.getLines"/> for details.
+	///  </li>
+	///  <li><code>snowball</code> - This format allows for multiple words specified on each 
+	///      line, and trailing comments may be specified using the vertical line ("&#124;"). 
+	///      Blank lines are ignored.  See 
+	///      <seealso cref="WordlistLoader#getSnowballWordSet WordlistLoader.getSnowballWordSet"/> 
+	///      for details.
+	///  </li>
+	/// </ul>
+	/// </summary>
+	public class StopFilterFactory : TokenFilterFactory, ResourceLoaderAware
+	{
+	  public const string FORMAT_WORDSET = "wordset";
+	  public const string FORMAT_SNOWBALL = "snowball";
+
+	  private CharArraySet stopWords;
+	  private readonly string stopWordFiles;
+	  private readonly string format;
+	  private readonly bool ignoreCase;
+	  private readonly bool enablePositionIncrements;
+
+	  /// <summary>
+	  /// Creates a new StopFilterFactory </summary>
+	  public StopFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		assureMatchVersion();
+		stopWordFiles = get(args, "words");
+		format = get(args, "format", (null == stopWordFiles ? null : FORMAT_WORDSET));
+		ignoreCase = getBoolean(args, "ignoreCase", false);
+		enablePositionIncrements = getBoolean(args, "enablePositionIncrements", true);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void inform(org.apache.lucene.analysis.util.ResourceLoader loader) throws java.io.IOException
+	  public virtual void inform(ResourceLoader loader)
+	  {
+		if (stopWordFiles != null)
+		{
+		  if (FORMAT_WORDSET.Equals(format, StringComparison.CurrentCultureIgnoreCase))
+		  {
+			stopWords = getWordSet(loader, stopWordFiles, ignoreCase);
+		  }
+		  else if (FORMAT_SNOWBALL.Equals(format, StringComparison.CurrentCultureIgnoreCase))
+		  {
+			stopWords = getSnowballWordSet(loader, stopWordFiles, ignoreCase);
+		  }
+		  else
+		  {
+			throw new System.ArgumentException("Unknown 'format' specified for 'words' file: " + format);
+		  }
+		}
+		else
+		{
+		  if (null != format)
+		  {
+			throw new System.ArgumentException("'format' can not be specified w/o an explicit 'words' file: " + format);
+		  }
+		  stopWords = new CharArraySet(luceneMatchVersion, StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
+		}
+	  }
+
+	  public virtual bool EnablePositionIncrements
+	  {
+		  get
+		  {
+			return enablePositionIncrements;
+		  }
+	  }
+
+	  public virtual bool IgnoreCase
+	  {
+		  get
+		  {
+			return ignoreCase;
+		  }
+	  }
+
+	  public virtual CharArraySet StopWords
+	  {
+		  get
+		  {
+			return stopWords;
+		  }
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		StopFilter stopFilter = new StopFilter(luceneMatchVersion,input,stopWords);
+		stopFilter.EnablePositionIncrements = enablePositionIncrements;
+		return stopFilter;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilter.cs
new file mode 100644
index 0000000..c546f3a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilter.cs
@@ -0,0 +1,83 @@
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Analysis.Util;
+
+namespace Lucene.Net.Analysis.Core
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// Removes tokens whose types appear in a set of blocked types from a token stream.
+    /// </summary>
+    public sealed class TypeTokenFilter : FilteringTokenFilter
+    {
+
+        private readonly HashSet<string> stopTypes;
+        private readonly TypeAttribute typeAttribute = addAttribute(typeof(TypeAttribute));
+        private readonly bool useWhiteList;
+
+        /// @deprecated enablePositionIncrements=false is not supported anymore as of Lucene 4.4. 
+        [Obsolete("enablePositionIncrements=false is not supported anymore as of Lucene 4.4.")]
+        public TypeTokenFilter(Version version, bool enablePositionIncrements, TokenStream input, HashSet<string> stopTypes, bool useWhiteList)
+            : base(version, enablePositionIncrements, input)
+        {
+            this.stopTypes = stopTypes;
+            this.useWhiteList = useWhiteList;
+        }
+
+        /// @deprecated enablePositionIncrements=false is not supported anymore as of Lucene 4.4. 
+        [Obsolete("enablePositionIncrements=false is not supported anymore as of Lucene 4.4.")]
+        public TypeTokenFilter(Version version, bool enablePositionIncrements, TokenStream input, HashSet<string> stopTypes)
+            : this(version, enablePositionIncrements, input, stopTypes, false)
+        {
+        }
+
+        /// <summary>
+        /// Create a new <seealso cref="TypeTokenFilter"/>. </summary>
+        /// <param name="version">      the Lucene match version </param>
+        /// <param name="input">        the <seealso cref="TokenStream"/> to consume </param>
+        /// <param name="stopTypes">    the types to filter </param>
+        /// <param name="useWhiteList"> if true, then tokens whose type is in stopTypes will
+        ///                     be kept, otherwise they will be filtered out </param>
+        public TypeTokenFilter(Version version, TokenStream input, HashSet<string> stopTypes, bool useWhiteList)
+            : base(version, input)
+        {
+            this.stopTypes = stopTypes;
+            this.useWhiteList = useWhiteList;
+        }
+
+        /// <summary>
+        /// Create a new <seealso cref="TypeTokenFilter"/> that filters tokens out
+        /// (useWhiteList=false). </summary>
+        /// <seealso cref= #TypeTokenFilter(Version, TokenStream, Set, boolean) </seealso>
+        public TypeTokenFilter(Version version, TokenStream input, HashSet<string> stopTypes)
+            : this(version, input, stopTypes, false)
+        {
+        }
+
+        /// <summary>
+        /// By default accept the token if its type is not a stop type.
+        /// When the useWhiteList parameter is set to true then accept the token if its type is contained in the stopTypes
+        /// </summary>
+        protected internal override bool Accept()
+        {
+            return useWhiteList == stopTypes.Contains(typeAttribute.Type);
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilterFactory.cs
new file mode 100644
index 0000000..42e82d2
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilterFactory.cs
@@ -0,0 +1,94 @@
+using System.Collections.Generic;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Support;
+using org.apache.lucene.analysis.util;
+
+namespace Lucene.Net.Analysis.Core
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// Factory class for <seealso cref="TypeTokenFilter"/>.
+    /// <pre class="prettyprint">
+    /// &lt;fieldType name="chars" class="solr.TextField" positionIncrementGap="100"&gt;
+    ///   &lt;analyzer&gt;
+    ///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+    ///     &lt;filter class="solr.TypeTokenFilterFactory" types="stoptypes.txt"
+    ///                   useWhitelist="false"/&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;</pre>
+    /// </summary>
+    public class TypeTokenFilterFactory : TokenFilterFactory, ResourceLoaderAware
+    {
+        private readonly bool useWhitelist;
+        private readonly bool enablePositionIncrements;
+        private readonly string stopTypesFiles;
+        private HashSet<string> stopTypes;
+
+        /// <summary>
+        /// Creates a new TypeTokenFilterFactory </summary>
+        public TypeTokenFilterFactory(IDictionary<string, string> args)
+            : base(args)
+        {
+            stopTypesFiles = require(args, "types");
+            enablePositionIncrements = getBoolean(args, "enablePositionIncrements", true);
+            useWhitelist = getBoolean(args, "useWhitelist", false);
+            if (args.Count > 0)
+            {
+                throw new System.ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+        public virtual void inform(ResourceLoader loader)
+        {
+            IList<string> files = splitFileNames(stopTypesFiles);
+            if (files.Count > 0)
+            {
+                stopTypes = new HashSet<string>();
+                foreach (string file in files)
+                {
+                    IList<string> typesLines = getLines(loader, file.Trim());
+                    stopTypes.AddAll(typesLines);
+                }
+            }
+        }
+
+        public virtual bool EnablePositionIncrements
+        {
+            get
+            {
+                return enablePositionIncrements;
+            }
+        }
+
+        public virtual HashSet<string> StopTypes
+        {
+            get
+            {
+                return stopTypes;
+            }
+        }
+
+        public override TokenStream Create(TokenStream input)
+        {
+            TokenStream filter = new TypeTokenFilter(luceneMatchVersion, enablePositionIncrements, input, stopTypes, useWhitelist);
+            return filter;
+        }
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs
new file mode 100644
index 0000000..d5b7f10
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs
@@ -0,0 +1,71 @@
+using Lucene.Net.Analysis.Core;
+
+namespace org.apache.lucene.analysis.core
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using CharacterUtils = org.apache.lucene.analysis.util.CharacterUtils;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Normalizes token text to UPPER CASE.
+	/// <a name="version"/>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating UpperCaseFilter
+	/// 
+	/// </para>
+	/// <para><b>NOTE:</b> In Unicode, this transformation may lose information when the
+	/// upper case character represents more than one lower case character. Use this filter
+	/// when you require uppercase tokens.  Use the <seealso cref="LowerCaseFilter"/> for 
+	/// general search matching
+	/// </para>
+	/// </summary>
+	public sealed class UpperCaseFilter : TokenFilter
+	{
+	  private readonly CharacterUtils charUtils;
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+
+	  /// <summary>
+	  /// Create a new UpperCaseFilter, that normalizes token text to upper case.
+	  /// </summary>
+	  /// <param name="matchVersion"> See <a href="#version">above</a> </param>
+	  /// <param name="in"> TokenStream to filter </param>
+	  public UpperCaseFilter(Version matchVersion, TokenStream @in) : base(@in)
+	  {
+		charUtils = CharacterUtils.getInstance(matchVersion);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  charUtils.ToUpper(termAtt.buffer(), 0, termAtt.length());
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilterFactory.cs
new file mode 100644
index 0000000..df3580f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilterFactory.cs
@@ -0,0 +1,74 @@
+using System.Collections.Generic;
+using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Util;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.core
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using AbstractAnalysisFactory = AbstractAnalysisFactory;
+	using MultiTermAwareComponent = org.apache.lucene.analysis.util.MultiTermAwareComponent;
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="UpperCaseFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_uppercase" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.UpperCaseFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// 
+	/// <para><b>NOTE:</b> In Unicode, this transformation may lose information when the
+	/// upper case character represents more than one lower case character. Use this filter
+	/// when you require uppercase tokens.  Use the <seealso cref="LowerCaseFilterFactory"/> for 
+	/// general search matching
+	/// </para>
+	/// </summary>
+	public class UpperCaseFilterFactory : TokenFilterFactory, MultiTermAwareComponent
+	{
+
+	  /// <summary>
+	  /// Creates a new UpperCaseFilterFactory </summary>
+	  public UpperCaseFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		assureMatchVersion();
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override UpperCaseFilter create(TokenStream input)
+	  {
+		return new UpperCaseFilter(luceneMatchVersion,input);
+	  }
+
+	  public virtual AbstractAnalysisFactory MultiTermComponent
+	  {
+		  get
+		  {
+			return this;
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceAnalyzer.cs
new file mode 100644
index 0000000..7e77c8d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceAnalyzer.cs
@@ -0,0 +1,58 @@
+using System.IO;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
+using org.apache.lucene.analysis.core;
+
+namespace Lucene.Net.Analysis.Core
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// An Analyzer that uses <seealso cref="WhitespaceTokenizer"/>.
+    /// <para>
+    /// <a name="version">You must specify the required <seealso cref="Version"/> compatibility
+    /// when creating <seealso cref="CharTokenizer"/>:
+    /// <ul>
+    /// <li>As of 3.1, <seealso cref="WhitespaceTokenizer"/> uses an int based API to normalize and
+    /// detect token codepoints. See <seealso cref="CharTokenizer#isTokenChar(int)"/> and
+    /// <seealso cref="CharTokenizer#normalize(int)"/> for details.</li>
+    /// </ul>
+    /// </para>
+    /// <para>
+    /// 
+    /// </para>
+    /// </summary>
+    public sealed class WhitespaceAnalyzer : Analyzer
+    {
+
+        private readonly Version matchVersion;
+
+        /// <summary>
+        /// Creates a new <seealso cref="WhitespaceAnalyzer"/> </summary>
+        /// <param name="matchVersion"> Lucene version to match See <seealso cref="<a href="#version">above</a>"/> </param>
+        public WhitespaceAnalyzer(Version matchVersion)
+        {
+            this.matchVersion = matchVersion;
+        }
+
+        protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+        {
+            return new TokenStreamComponents(new WhitespaceTokenizer(matchVersion, reader));
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizer.cs
new file mode 100644
index 0000000..1ee9e69
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizer.cs
@@ -0,0 +1,75 @@
+using System.IO;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Core
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+    /// <summary>
+	/// A WhitespaceTokenizer is a tokenizer that divides text at whitespace.
+	/// Adjacent sequences of non-Whitespace characters form tokens. <a
+	/// name="version"/>
+	/// <para>
+	/// You must specify the required <seealso cref="Version"/> compatibility when creating
+	/// <seealso cref="WhitespaceTokenizer"/>:
+	/// <ul>
+	/// <li>As of 3.1, <seealso cref="CharTokenizer"/> uses an int based API to normalize and
+	/// detect token characters. See <seealso cref="CharTokenizer#isTokenChar(int)"/> and
+	/// <seealso cref="CharTokenizer#normalize(int)"/> for details.</li>
+	/// </ul>
+	/// </para>
+	/// </summary>
+	public sealed class WhitespaceTokenizer : CharTokenizer
+	{
+
+	  /// Construct a new WhitespaceTokenizer. * <param name="matchVersion"> Lucene version
+	  /// to match See <seealso cref="<a href="#version">above</a>"/>
+	  /// </param>
+	  /// <param name="in">
+	  ///          the input to split up into tokens </param>
+	  public WhitespaceTokenizer(Version matchVersion, TextReader @in) : base(matchVersion, @in)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Construct a new WhitespaceTokenizer using a given
+	  /// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>.
+	  /// 
+	  /// @param
+	  ///          matchVersion Lucene version to match See
+	  ///          <seealso cref="<a href="#version">above</a>"/> </summary>
+	  /// <param name="factory">
+	  ///          the attribute factory to use for this <seealso cref="Tokenizer"/> </param>
+	  /// <param name="in">
+	  ///          the input to split up into tokens </param>
+	  public WhitespaceTokenizer(Version matchVersion, AttributeFactory factory, TextReader @in) : base(matchVersion, factory, @in)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Collects only characters which do not satisfy
+	  /// <seealso cref="Character#isWhitespace(int)"/>.
+	  /// </summary>
+	  protected internal override bool IsTokenChar(char c)
+	  {
+		return !char.IsWhiteSpace(c);
+	  }
+	}
+}
\ No newline at end of file


[09/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleAnalyzerWrapper.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleAnalyzerWrapper.cs b/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleAnalyzerWrapper.cs
new file mode 100644
index 0000000..06c5e10
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleAnalyzerWrapper.cs
@@ -0,0 +1,182 @@
+namespace org.apache.lucene.analysis.shingle
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using StandardAnalyzer = org.apache.lucene.analysis.standard.StandardAnalyzer;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// A ShingleAnalyzerWrapper wraps a <seealso cref="ShingleFilter"/> around another <seealso cref="Analyzer"/>.
+	/// <para>
+	/// A shingle is another name for a token based n-gram.
+	/// </para>
+	/// </summary>
+	public sealed class ShingleAnalyzerWrapper : AnalyzerWrapper
+	{
+
+	  private readonly Analyzer @delegate;
+	  private readonly int maxShingleSize;
+	  private readonly int minShingleSize;
+	  private readonly string tokenSeparator;
+	  private readonly bool outputUnigrams;
+	  private readonly bool outputUnigramsIfNoShingles;
+	  private readonly string fillerToken;
+
+	  public ShingleAnalyzerWrapper(Analyzer defaultAnalyzer) : this(defaultAnalyzer, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE)
+	  {
+	  }
+
+	  public ShingleAnalyzerWrapper(Analyzer defaultAnalyzer, int maxShingleSize) : this(defaultAnalyzer, ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, maxShingleSize)
+	  {
+	  }
+
+	  public ShingleAnalyzerWrapper(Analyzer defaultAnalyzer, int minShingleSize, int maxShingleSize) : this(defaultAnalyzer, minShingleSize, maxShingleSize, ShingleFilter.DEFAULT_TOKEN_SEPARATOR, true, false, ShingleFilter.DEFAULT_FILLER_TOKEN)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates a new ShingleAnalyzerWrapper
+	  /// </summary>
+	  /// <param name="delegate"> Analyzer whose TokenStream is to be filtered </param>
+	  /// <param name="minShingleSize"> Min shingle (token ngram) size </param>
+	  /// <param name="maxShingleSize"> Max shingle size </param>
+	  /// <param name="tokenSeparator"> Used to separate input stream tokens in output shingles </param>
+	  /// <param name="outputUnigrams"> Whether or not the filter shall pass the original
+	  ///        tokens to the output stream </param>
+	  /// <param name="outputUnigramsIfNoShingles"> Overrides the behavior of outputUnigrams==false for those
+	  ///        times when no shingles are available (because there are fewer than
+	  ///        minShingleSize tokens in the input stream)?
+	  ///        Note that if outputUnigrams==true, then unigrams are always output,
+	  ///        regardless of whether any shingles are available. </param>
+	  /// <param name="fillerToken"> filler token to use when positionIncrement is more than 1 </param>
+	  public ShingleAnalyzerWrapper(Analyzer @delegate, int minShingleSize, int maxShingleSize, string tokenSeparator, bool outputUnigrams, bool outputUnigramsIfNoShingles, string fillerToken) : base(@delegate.ReuseStrategy)
+	  {
+		this.@delegate = @delegate;
+
+		if (maxShingleSize < 2)
+		{
+		  throw new System.ArgumentException("Max shingle size must be >= 2");
+		}
+		this.maxShingleSize = maxShingleSize;
+
+		if (minShingleSize < 2)
+		{
+		  throw new System.ArgumentException("Min shingle size must be >= 2");
+		}
+		if (minShingleSize > maxShingleSize)
+		{
+		  throw new System.ArgumentException("Min shingle size must be <= max shingle size");
+		}
+		this.minShingleSize = minShingleSize;
+
+		this.tokenSeparator = (tokenSeparator == null ? "" : tokenSeparator);
+		this.outputUnigrams = outputUnigrams;
+		this.outputUnigramsIfNoShingles = outputUnigramsIfNoShingles;
+		this.fillerToken = fillerToken;
+	  }
+
+	  /// <summary>
+	  /// Wraps <seealso cref="StandardAnalyzer"/>. 
+	  /// </summary>
+	  public ShingleAnalyzerWrapper(Version matchVersion) : this(matchVersion, ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Wraps <seealso cref="StandardAnalyzer"/>. 
+	  /// </summary>
+	  public ShingleAnalyzerWrapper(Version matchVersion, int minShingleSize, int maxShingleSize) : this(new StandardAnalyzer(matchVersion), minShingleSize, maxShingleSize)
+	  {
+	  }
+
+	  /// <summary>
+	  /// The max shingle (token ngram) size
+	  /// </summary>
+	  /// <returns> The max shingle (token ngram) size </returns>
+	  public int MaxShingleSize
+	  {
+		  get
+		  {
+			return maxShingleSize;
+		  }
+	  }
+
+	  /// <summary>
+	  /// The min shingle (token ngram) size
+	  /// </summary>
+	  /// <returns> The min shingle (token ngram) size </returns>
+	  public int MinShingleSize
+	  {
+		  get
+		  {
+			return minShingleSize;
+		  }
+	  }
+
+	  public string TokenSeparator
+	  {
+		  get
+		  {
+			return tokenSeparator;
+		  }
+	  }
+
+	  public bool OutputUnigrams
+	  {
+		  get
+		  {
+			return outputUnigrams;
+		  }
+	  }
+
+	  public bool OutputUnigramsIfNoShingles
+	  {
+		  get
+		  {
+			return outputUnigramsIfNoShingles;
+		  }
+	  }
+
+	  public string FillerToken
+	  {
+		  get
+		  {
+			return fillerToken;
+		  }
+	  }
+
+	  public override Analyzer getWrappedAnalyzer(string fieldName)
+	  {
+		return @delegate;
+	  }
+
+	  protected internal override TokenStreamComponents wrapComponents(string fieldName, TokenStreamComponents components)
+	  {
+		ShingleFilter filter = new ShingleFilter(components.TokenStream, minShingleSize, maxShingleSize);
+		filter.MinShingleSize = minShingleSize;
+		filter.MaxShingleSize = maxShingleSize;
+		filter.TokenSeparator = tokenSeparator;
+		filter.OutputUnigrams = outputUnigrams;
+		filter.OutputUnigramsIfNoShingles = outputUnigramsIfNoShingles;
+		filter.FillerToken = fillerToken;
+		return new TokenStreamComponents(components.Tokenizer, filter);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilter.cs
new file mode 100644
index 0000000..9bdc341
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilter.cs
@@ -0,0 +1,724 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace org.apache.lucene.analysis.shingle
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using PositionLengthAttribute = org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+	using AttributeSource = org.apache.lucene.util.AttributeSource;
+
+
+	/// <summary>
+	/// <para>A ShingleFilter constructs shingles (token n-grams) from a token stream.
+	/// In other words, it creates combinations of tokens as a single token.
+	/// 
+	/// </para>
+	/// <para>For example, the sentence "please divide this sentence into shingles"
+	/// might be tokenized into shingles "please divide", "divide this",
+	/// "this sentence", "sentence into", and "into shingles".
+	/// 
+	/// </para>
+	/// <para>This filter handles position increments > 1 by inserting filler tokens
+	/// (tokens with termtext "_"). It does not handle a position increment of 0.
+	/// </para>
+	/// </summary>
+	public sealed class ShingleFilter : TokenFilter
+	{
+
+	  /// <summary>
+	  /// filler token for when positionIncrement is more than 1
+	  /// </summary>
+	  public const string DEFAULT_FILLER_TOKEN = "_";
+
+	  /// <summary>
+	  /// default maximum shingle size is 2.
+	  /// </summary>
+	  public const int DEFAULT_MAX_SHINGLE_SIZE = 2;
+
+	  /// <summary>
+	  /// default minimum shingle size is 2.
+	  /// </summary>
+	  public const int DEFAULT_MIN_SHINGLE_SIZE = 2;
+
+	  /// <summary>
+	  /// default token type attribute value is "shingle" 
+	  /// </summary>
+	  public const string DEFAULT_TOKEN_TYPE = "shingle";
+
+	  /// <summary>
+	  /// The default string to use when joining adjacent tokens to form a shingle
+	  /// </summary>
+	  public const string DEFAULT_TOKEN_SEPARATOR = " ";
+
+	  /// <summary>
+	  /// The sequence of input stream tokens (or filler tokens, if necessary)
+	  /// that will be composed to form output shingles.
+	  /// </summary>
+	  private LinkedList<InputWindowToken> inputWindow = new LinkedList<InputWindowToken>();
+
+	  /// <summary>
+	  /// The number of input tokens in the next output token.  This is the "n" in
+	  /// "token n-grams".
+	  /// </summary>
+	  private CircularSequence gramSize;
+
+	  /// <summary>
+	  /// Shingle and unigram text is composed here.
+	  /// </summary>
+	  private StringBuilder gramBuilder = new StringBuilder();
+
+	  /// <summary>
+	  /// The token type attribute value to use - default is "shingle"
+	  /// </summary>
+	  private string tokenType = DEFAULT_TOKEN_TYPE;
+
+	  /// <summary>
+	  /// The string to use when joining adjacent tokens to form a shingle
+	  /// </summary>
+	  private string tokenSeparator = DEFAULT_TOKEN_SEPARATOR;
+
+	  /// <summary>
+	  /// The string to insert for each position at which there is no token
+	  /// (i.e., when position increment is greater than one).
+	  /// </summary>
+	  private char[] fillerToken = DEFAULT_FILLER_TOKEN.ToCharArray();
+
+	  /// <summary>
+	  /// By default, we output unigrams (individual tokens) as well as shingles
+	  /// (token n-grams).
+	  /// </summary>
+	  private bool outputUnigrams = true;
+
+	  /// <summary>
+	  /// By default, we don't override behavior of outputUnigrams.
+	  /// </summary>
+	  private bool outputUnigramsIfNoShingles = false;
+
+	  /// <summary>
+	  /// maximum shingle size (number of tokens)
+	  /// </summary>
+	  private int maxShingleSize;
+
+	  /// <summary>
+	  /// minimum shingle size (number of tokens)
+	  /// </summary>
+	  private int minShingleSize;
+
+	  /// <summary>
+	  /// The remaining number of filler tokens to be inserted into the input stream
+	  /// from which shingles are composed, to handle position increments greater
+	  /// than one.
+	  /// </summary>
+	  private int numFillerTokensToInsert;
+
+	  /// <summary>
+	  /// When the next input stream token has a position increment greater than
+	  /// one, it is stored in this field until sufficient filler tokens have been
+	  /// inserted to account for the position increment. 
+	  /// </summary>
+	  private AttributeSource nextInputStreamToken;
+
+	  /// <summary>
+	  /// Whether or not there is a next input stream token.
+	  /// </summary>
+	  private bool isNextInputStreamToken = false;
+
+	  /// <summary>
+	  /// Whether at least one unigram or shingle has been output at the current 
+	  /// position.
+	  /// </summary>
+	  private bool isOutputHere = false;
+
+	  /// <summary>
+	  /// true if no shingles have been output yet (for outputUnigramsIfNoShingles).
+	  /// </summary>
+	  internal bool noShingleOutput = true;
+
+	  /// <summary>
+	  /// Holds the State after input.end() was called, so we can
+	  /// restore it in our end() impl.
+	  /// </summary>
+	  private State endState;
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+	  private readonly PositionIncrementAttribute posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
+	  private readonly PositionLengthAttribute posLenAtt = addAttribute(typeof(PositionLengthAttribute));
+	  private readonly TypeAttribute typeAtt = addAttribute(typeof(TypeAttribute));
+
+
+	  /// <summary>
+	  /// Constructs a ShingleFilter with the specified shingle size from the
+	  /// <seealso cref="TokenStream"/> <code>input</code>
+	  /// </summary>
+	  /// <param name="input"> input stream </param>
+	  /// <param name="minShingleSize"> minimum shingle size produced by the filter. </param>
+	  /// <param name="maxShingleSize"> maximum shingle size produced by the filter. </param>
+	  public ShingleFilter(TokenStream input, int minShingleSize, int maxShingleSize) : base(input)
+	  {
+		MaxShingleSize = maxShingleSize;
+		MinShingleSize = minShingleSize;
+	  }
+
+	  /// <summary>
+	  /// Constructs a ShingleFilter with the specified shingle size from the
+	  /// <seealso cref="TokenStream"/> <code>input</code>
+	  /// </summary>
+	  /// <param name="input"> input stream </param>
+	  /// <param name="maxShingleSize"> maximum shingle size produced by the filter. </param>
+	  public ShingleFilter(TokenStream input, int maxShingleSize) : this(input, DEFAULT_MIN_SHINGLE_SIZE, maxShingleSize)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Construct a ShingleFilter with default shingle size: 2.
+	  /// </summary>
+	  /// <param name="input"> input stream </param>
+	  public ShingleFilter(TokenStream input) : this(input, DEFAULT_MIN_SHINGLE_SIZE, DEFAULT_MAX_SHINGLE_SIZE)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Construct a ShingleFilter with the specified token type for shingle tokens
+	  /// and the default shingle size: 2
+	  /// </summary>
+	  /// <param name="input"> input stream </param>
+	  /// <param name="tokenType"> token type for shingle tokens </param>
+	  public ShingleFilter(TokenStream input, string tokenType) : this(input, DEFAULT_MIN_SHINGLE_SIZE, DEFAULT_MAX_SHINGLE_SIZE)
+	  {
+		TokenType = tokenType;
+	  }
+
+	  /// <summary>
+	  /// Set the type of the shingle tokens produced by this filter.
+	  /// (default: "shingle")
+	  /// </summary>
+	  /// <param name="tokenType"> token tokenType </param>
+	  public string TokenType
+	  {
+		  set
+		  {
+			this.tokenType = value;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Shall the output stream contain the input tokens (unigrams) as well as
+	  /// shingles? (default: true.)
+	  /// </summary>
+	  /// <param name="outputUnigrams"> Whether or not the output stream shall contain
+	  /// the input tokens (unigrams) </param>
+	  public bool OutputUnigrams
+	  {
+		  set
+		  {
+			this.outputUnigrams = value;
+			gramSize = new CircularSequence(this);
+		  }
+	  }
+
+	  /// <summary>
+	  /// <para>Shall we override the behavior of outputUnigrams==false for those
+	  /// times when no shingles are available (because there are fewer than
+	  /// minShingleSize tokens in the input stream)? (default: false.)
+	  /// </para>
+	  /// <para>Note that if outputUnigrams==true, then unigrams are always output,
+	  /// regardless of whether any shingles are available.
+	  /// 
+	  /// </para>
+	  /// </summary>
+	  /// <param name="outputUnigramsIfNoShingles"> Whether or not to output a single
+	  /// unigram when no shingles are available. </param>
+	  public bool OutputUnigramsIfNoShingles
+	  {
+		  set
+		  {
+			this.outputUnigramsIfNoShingles = value;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Set the max shingle size (default: 2)
+	  /// </summary>
+	  /// <param name="maxShingleSize"> max size of output shingles </param>
+	  public int MaxShingleSize
+	  {
+		  set
+		  {
+			if (value < 2)
+			{
+			  throw new System.ArgumentException("Max shingle size must be >= 2");
+			}
+			this.maxShingleSize = value;
+		  }
+	  }
+
+	  /// <summary>
+	  /// <para>Set the min shingle size (default: 2).
+	  /// </para>
+	  /// <para>This method requires that the passed in minShingleSize is not greater
+	  /// than maxShingleSize, so make sure that maxShingleSize is set before
+	  /// calling this method.
+	  /// </para>
+	  /// <para>The unigram output option is independent of the min shingle size.
+	  /// 
+	  /// </para>
+	  /// </summary>
+	  /// <param name="minShingleSize"> min size of output shingles </param>
+	  public int MinShingleSize
+	  {
+		  set
+		  {
+			if (value < 2)
+			{
+			  throw new System.ArgumentException("Min shingle size must be >= 2");
+			}
+			if (value > maxShingleSize)
+			{
+			  throw new System.ArgumentException("Min shingle size must be <= max shingle size");
+			}
+			this.minShingleSize = value;
+			gramSize = new CircularSequence(this);
+		  }
+	  }
+
+	  /// <summary>
+	  /// Sets the string to use when joining adjacent tokens to form a shingle </summary>
+	  /// <param name="tokenSeparator"> used to separate input stream tokens in output shingles </param>
+	  public string TokenSeparator
+	  {
+		  set
+		  {
+			this.tokenSeparator = null == value ? "" : value;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Sets the string to insert for each position at which there is no token
+	  /// (i.e., when position increment is greater than one).
+	  /// </summary>
+	  /// <param name="fillerToken"> string to insert at each position where there is no token </param>
+	  public string FillerToken
+	  {
+		  set
+		  {
+			this.fillerToken = null == value ? new char[0] : value.ToCharArray();
+		  }
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		bool tokenAvailable = false;
+		int builtGramSize = 0;
+		if (gramSize.atMinValue() || inputWindow.Count < gramSize.Value)
+		{
+		  shiftInputWindow();
+		  gramBuilder.Length = 0;
+		}
+		else
+		{
+		  builtGramSize = gramSize.PreviousValue;
+		}
+		if (inputWindow.Count >= gramSize.Value)
+		{
+		  bool isAllFiller = true;
+		  InputWindowToken nextToken = null;
+		  IEnumerator<InputWindowToken> iter = inputWindow.GetEnumerator();
+//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
+		  for (int gramNum = 1 ; iter.hasNext() && builtGramSize < gramSize.Value ; ++gramNum)
+		  {
+			nextToken = iter.Current;
+			if (builtGramSize < gramNum)
+			{
+			  if (builtGramSize > 0)
+			  {
+				gramBuilder.Append(tokenSeparator);
+			  }
+			  gramBuilder.Append(nextToken.termAtt.buffer(), 0, nextToken.termAtt.length());
+			  ++builtGramSize;
+			}
+			if (isAllFiller && nextToken.isFiller)
+			{
+			  if (gramNum == gramSize.Value)
+			  {
+				gramSize.advance();
+			  }
+			}
+			else
+			{
+			  isAllFiller = false;
+			}
+		  }
+		  if (!isAllFiller && builtGramSize == gramSize.Value)
+		  {
+			inputWindow.First.Value.attSource.copyTo(this);
+			posIncrAtt.PositionIncrement = isOutputHere ? 0 : 1;
+			termAtt.setEmpty().append(gramBuilder);
+			if (gramSize.Value > 1)
+			{
+			  typeAtt.Type = tokenType;
+			  noShingleOutput = false;
+			}
+			offsetAtt.setOffset(offsetAtt.startOffset(), nextToken.offsetAtt.endOffset());
+			posLenAtt.PositionLength = builtGramSize;
+			isOutputHere = true;
+			gramSize.advance();
+			tokenAvailable = true;
+		  }
+		}
+		return tokenAvailable;
+	  }
+
+	  private bool exhausted;
+
+	  /// <summary>
+	  /// <para>Get the next token from the input stream.
+	  /// </para>
+	  /// <para>If the next token has <code>positionIncrement > 1</code>,
+	  /// <code>positionIncrement - 1</code> <seealso cref="#fillerToken"/>s are
+	  /// inserted first.
+	  /// </para>
+	  /// </summary>
+	  /// <param name="target"> Where to put the new token; if null, a new instance is created. </param>
+	  /// <returns> On success, the populated token; null otherwise </returns>
+	  /// <exception cref="IOException"> if the input stream has a problem </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private InputWindowToken getNextToken(InputWindowToken target) throws java.io.IOException
+	  private InputWindowToken getNextToken(InputWindowToken target)
+	  {
+		InputWindowToken newTarget = target;
+		if (numFillerTokensToInsert > 0)
+		{
+		  if (null == target)
+		  {
+			newTarget = new InputWindowToken(this, nextInputStreamToken.cloneAttributes());
+		  }
+		  else
+		  {
+			nextInputStreamToken.copyTo(target.attSource);
+		  }
+		  // A filler token occupies no space
+		  newTarget.offsetAtt.setOffset(newTarget.offsetAtt.startOffset(), newTarget.offsetAtt.startOffset());
+		  newTarget.termAtt.copyBuffer(fillerToken, 0, fillerToken.Length);
+		  newTarget.isFiller = true;
+		  --numFillerTokensToInsert;
+		}
+		else if (isNextInputStreamToken)
+		{
+		  if (null == target)
+		  {
+			newTarget = new InputWindowToken(this, nextInputStreamToken.cloneAttributes());
+		  }
+		  else
+		  {
+			nextInputStreamToken.copyTo(target.attSource);
+		  }
+		  isNextInputStreamToken = false;
+		  newTarget.isFiller = false;
+		}
+		else if (!exhausted)
+		{
+		  if (input.incrementToken())
+		  {
+			if (null == target)
+			{
+			  newTarget = new InputWindowToken(this, cloneAttributes());
+			}
+			else
+			{
+			  this.copyTo(target.attSource);
+			}
+			if (posIncrAtt.PositionIncrement > 1)
+			{
+			  // Each output shingle must contain at least one input token, 
+			  // so no more than (maxShingleSize - 1) filler tokens will be inserted.
+			  numFillerTokensToInsert = Math.Min(posIncrAtt.PositionIncrement - 1, maxShingleSize - 1);
+			  // Save the current token as the next input stream token
+			  if (null == nextInputStreamToken)
+			  {
+				nextInputStreamToken = cloneAttributes();
+			  }
+			  else
+			  {
+				this.copyTo(nextInputStreamToken);
+			  }
+			  isNextInputStreamToken = true;
+			  // A filler token occupies no space
+			  newTarget.offsetAtt.setOffset(offsetAtt.startOffset(), offsetAtt.startOffset());
+			  newTarget.termAtt.copyBuffer(fillerToken, 0, fillerToken.Length);
+			  newTarget.isFiller = true;
+			  --numFillerTokensToInsert;
+			}
+			else
+			{
+			  newTarget.isFiller = false;
+			}
+		  }
+		  else
+		  {
+			exhausted = true;
+			input.end();
+			endState = captureState();
+			numFillerTokensToInsert = Math.Min(posIncrAtt.PositionIncrement, maxShingleSize - 1);
+			if (numFillerTokensToInsert > 0)
+			{
+			  nextInputStreamToken = new AttributeSource(AttributeFactory);
+			  nextInputStreamToken.addAttribute(typeof(CharTermAttribute));
+			  OffsetAttribute newOffsetAtt = nextInputStreamToken.addAttribute(typeof(OffsetAttribute));
+			  newOffsetAtt.setOffset(offsetAtt.endOffset(), offsetAtt.endOffset());
+			  // Recurse/loop just once:
+			  return getNextToken(target);
+			}
+			else
+			{
+			  newTarget = null;
+			}
+		  }
+		}
+		else
+		{
+		  newTarget = null;
+		}
+		return newTarget;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void end() throws java.io.IOException
+	  public override void end()
+	  {
+		if (!exhausted)
+		{
+		  base.end();
+		}
+		else
+		{
+		  restoreState(endState);
+		}
+	  }
+
+	  /// <summary>
+	  /// <para>Fills <seealso cref="#inputWindow"/> with input stream tokens, if available, 
+	  /// shifting to the right if the window was previously full.
+	  /// </para>
+	  /// <para>Resets <seealso cref="#gramSize"/> to its minimum value.
+	  /// 
+	  /// </para>
+	  /// </summary>
+	  /// <exception cref="IOException"> if there's a problem getting the next token </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void shiftInputWindow() throws java.io.IOException
+	  private void shiftInputWindow()
+	  {
+		InputWindowToken firstToken = null;
+		if (inputWindow.Count > 0)
+		{
+		  firstToken = inputWindow.RemoveFirst();
+		}
+		while (inputWindow.Count < maxShingleSize)
+		{
+		  if (null != firstToken) // recycle the firstToken, if available
+		  {
+			if (null != getNextToken(firstToken))
+			{
+			  inputWindow.AddLast(firstToken); // the firstToken becomes the last
+			  firstToken = null;
+			}
+			else
+			{
+			  break; // end of input stream
+			}
+		  }
+		  else
+		  {
+			InputWindowToken nextToken = getNextToken(null);
+			if (null != nextToken)
+			{
+			  inputWindow.AddLast(nextToken);
+			}
+			else
+			{
+			  break; // end of input stream
+			}
+		  }
+		}
+		if (outputUnigramsIfNoShingles && noShingleOutput && gramSize.minValue > 1 && inputWindow.Count < minShingleSize)
+		{
+		  gramSize.minValue = 1;
+		}
+		gramSize.reset();
+		isOutputHere = false;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		gramSize.reset();
+		inputWindow.Clear();
+		nextInputStreamToken = null;
+		isNextInputStreamToken = false;
+		numFillerTokensToInsert = 0;
+		isOutputHere = false;
+		noShingleOutput = true;
+		exhausted = false;
+		endState = null;
+		if (outputUnigramsIfNoShingles && !outputUnigrams)
+		{
+		  // Fix up gramSize if minValue was reset for outputUnigramsIfNoShingles
+		  gramSize.minValue = minShingleSize;
+		}
+	  }
+
+
+	  /// <summary>
+	  /// <para>An instance of this class is used to maintain the number of input
+	  /// stream tokens that will be used to compose the next unigram or shingle:
+	  /// <seealso cref="#gramSize"/>.
+	  /// </para>
+	  /// <para><code>gramSize</code> will take on values from the circular sequence
+	  /// <b>{ [ 1, ] <seealso cref="#minShingleSize"/> [ , ... , <seealso cref="#maxShingleSize"/> ] }</b>.
+	  /// </para>
+	  /// <para>1 is included in the circular sequence only if 
+	  /// <seealso cref="#outputUnigrams"/> = true.
+	  /// </para>
+	  /// </summary>
+	  private class CircularSequence
+	  {
+		  private readonly ShingleFilter outerInstance;
+
+		internal int value;
+		internal int previousValue;
+		internal int minValue;
+
+		public CircularSequence(ShingleFilter outerInstance)
+		{
+			this.outerInstance = outerInstance;
+		  minValue = outerInstance.outputUnigrams ? 1 : outerInstance.minShingleSize;
+		  reset();
+		}
+
+		/// <returns> the current value. </returns>
+		/// <seealso cref= #advance() </seealso>
+		public virtual int Value
+		{
+			get
+			{
+			  return value;
+			}
+		}
+
+		/// <summary>
+		/// <para>Increments this circular number's value to the next member in the
+		/// circular sequence
+		/// <code>gramSize</code> will take on values from the circular sequence
+		/// <b>{ [ 1, ] <seealso cref="#minShingleSize"/> [ , ... , <seealso cref="#maxShingleSize"/> ] }</b>.
+		/// </para>
+		/// <para>1 is included in the circular sequence only if 
+		/// <seealso cref="#outputUnigrams"/> = true.
+		/// </para>
+		/// </summary>
+		public virtual void advance()
+		{
+		  previousValue = value;
+		  if (value == 1)
+		  {
+			value = outerInstance.minShingleSize;
+		  }
+		  else if (value == outerInstance.maxShingleSize)
+		  {
+			reset();
+		  }
+		  else
+		  {
+			++value;
+		  }
+		}
+
+		/// <summary>
+		/// <para>Sets this circular number's value to the first member of the 
+		/// circular sequence
+		/// </para>
+		/// <para><code>gramSize</code> will take on values from the circular sequence
+		/// <b>{ [ 1, ] <seealso cref="#minShingleSize"/> [ , ... , <seealso cref="#maxShingleSize"/> ] }</b>.
+		/// </para>
+		/// <para>1 is included in the circular sequence only if 
+		/// <seealso cref="#outputUnigrams"/> = true.
+		/// </para>
+		/// </summary>
+		public virtual void reset()
+		{
+		  previousValue = value = minValue;
+		}
+
+		/// <summary>
+		/// <para>Returns true if the current value is the first member of the circular
+		/// sequence.
+		/// </para>
+		/// <para>If <seealso cref="#outputUnigrams"/> = true, the first member of the circular
+		/// sequence will be 1; otherwise, it will be <seealso cref="#minShingleSize"/>.
+		/// 
+		/// </para>
+		/// </summary>
+		/// <returns> true if the current value is the first member of the circular
+		///  sequence; false otherwise </returns>
+		public virtual bool atMinValue()
+		{
+		  return value == minValue;
+		}
+
+		/// <returns> the value this instance had before the last advance() call </returns>
+		public virtual int PreviousValue
+		{
+			get
+			{
+			  return previousValue;
+			}
+		}
+	  }
+
+	  private class InputWindowToken
+	  {
+		  private readonly ShingleFilter outerInstance;
+
+		internal readonly AttributeSource attSource;
+		internal readonly CharTermAttribute termAtt;
+		internal readonly OffsetAttribute offsetAtt;
+		internal bool isFiller = false;
+
+		public InputWindowToken(ShingleFilter outerInstance, AttributeSource attSource)
+		{
+			this.outerInstance = outerInstance;
+		  this.attSource = attSource;
+		  this.termAtt = attSource.getAttribute(typeof(CharTermAttribute));
+		  this.offsetAtt = attSource.getAttribute(typeof(OffsetAttribute));
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilterFactory.cs
new file mode 100644
index 0000000..429e9ce
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilterFactory.cs
@@ -0,0 +1,86 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.shingle
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="ShingleFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_shingle" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.ShingleFilterFactory" minShingleSize="2" maxShingleSize="2"
+	///             outputUnigrams="true" outputUnigramsIfNoShingles="false" tokenSeparator=" " fillerToken="_"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class ShingleFilterFactory : TokenFilterFactory
+	{
+	  private readonly int minShingleSize;
+	  private readonly int maxShingleSize;
+	  private readonly bool outputUnigrams;
+	  private readonly bool outputUnigramsIfNoShingles;
+	  private readonly string tokenSeparator;
+	  private readonly string fillerToken;
+
+	  /// <summary>
+	  /// Creates a new ShingleFilterFactory </summary>
+	  public ShingleFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		maxShingleSize = getInt(args, "maxShingleSize", ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE);
+		if (maxShingleSize < 2)
+		{
+		  throw new System.ArgumentException("Invalid maxShingleSize (" + maxShingleSize + ") - must be at least 2");
+		}
+		minShingleSize = getInt(args, "minShingleSize", ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE);
+		if (minShingleSize < 2)
+		{
+		  throw new System.ArgumentException("Invalid minShingleSize (" + minShingleSize + ") - must be at least 2");
+		}
+		if (minShingleSize > maxShingleSize)
+		{
+		  throw new System.ArgumentException("Invalid minShingleSize (" + minShingleSize + ") - must be no greater than maxShingleSize (" + maxShingleSize + ")");
+		}
+		outputUnigrams = getBoolean(args, "outputUnigrams", true);
+		outputUnigramsIfNoShingles = getBoolean(args, "outputUnigramsIfNoShingles", false);
+		tokenSeparator = get(args, "tokenSeparator", ShingleFilter.DEFAULT_TOKEN_SEPARATOR);
+		fillerToken = get(args, "fillerToken", ShingleFilter.DEFAULT_FILLER_TOKEN);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override ShingleFilter create(TokenStream input)
+	  {
+		ShingleFilter r = new ShingleFilter(input, minShingleSize, maxShingleSize);
+		r.OutputUnigrams = outputUnigrams;
+		r.OutputUnigramsIfNoShingles = outputUnigramsIfNoShingles;
+		r.TokenSeparator = tokenSeparator;
+		r.FillerToken = fillerToken;
+		return r;
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Sinks/DateRecognizerSinkFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Sinks/DateRecognizerSinkFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/DateRecognizerSinkFilter.cs
new file mode 100644
index 0000000..a04fd51
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/DateRecognizerSinkFilter.cs
@@ -0,0 +1,79 @@
+using System;
+
+namespace org.apache.lucene.analysis.sinks
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using AttributeSource = org.apache.lucene.util.AttributeSource;
+
+	/// <summary>
+	/// Attempts to parse the <seealso cref="CharTermAttribute#buffer()"/> as a Date using a <seealso cref="java.text.DateFormat"/>.
+	/// If the value is a Date, it will add it to the sink.
+	/// <p/> 
+	/// 
+	/// 
+	/// </summary>
+	public class DateRecognizerSinkFilter : TeeSinkTokenFilter.SinkFilter
+	{
+	  public const string DATE_TYPE = "date";
+
+	  protected internal DateFormat dateFormat;
+	  protected internal CharTermAttribute termAtt;
+
+	  /// <summary>
+	  /// Uses {@link java.text.DateFormat#getDateInstance(int, Locale)
+	  /// DateFormat#getDateInstance(DateFormat.DEFAULT, Locale.ROOT)} as 
+	  /// the <seealso cref="java.text.DateFormat"/> object.
+	  /// </summary>
+	  public DateRecognizerSinkFilter() : this(DateFormat.getDateInstance(DateFormat.DEFAULT, Locale.ROOT))
+	  {
+	  }
+
+	  public DateRecognizerSinkFilter(DateFormat dateFormat)
+	  {
+		this.dateFormat = dateFormat;
+	  }
+
+	  public override bool accept(AttributeSource source)
+	  {
+		if (termAtt == null)
+		{
+		  termAtt = source.addAttribute(typeof(CharTermAttribute));
+		}
+		try
+		{
+		  DateTime date = dateFormat.parse(termAtt.ToString()); //We don't care about the date, just that we can parse it as a date
+		  if (date != null)
+		  {
+			return true;
+		  }
+		}
+		catch (ParseException)
+		{
+
+		}
+
+		return false;
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TeeSinkTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TeeSinkTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TeeSinkTokenFilter.cs
new file mode 100644
index 0000000..f6857d9
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TeeSinkTokenFilter.cs
@@ -0,0 +1,300 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.sinks
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using AttributeImpl = org.apache.lucene.util.AttributeImpl;
+	using AttributeSource = org.apache.lucene.util.AttributeSource;
+
+	/// <summary>
+	/// This TokenFilter provides the ability to set aside attribute states
+	/// that have already been analyzed.  This is useful in situations where multiple fields share
+	/// many common analysis steps and then go their separate ways.
+	/// <p/>
+	/// It is also useful for doing things like entity extraction or proper noun analysis as
+	/// part of the analysis workflow and saving off those tokens for use in another field.
+	/// 
+	/// <pre class="prettyprint">
+	/// TeeSinkTokenFilter source1 = new TeeSinkTokenFilter(new WhitespaceTokenizer(version, reader1));
+	/// TeeSinkTokenFilter.SinkTokenStream sink1 = source1.newSinkTokenStream();
+	/// TeeSinkTokenFilter.SinkTokenStream sink2 = source1.newSinkTokenStream();
+	/// 
+	/// TeeSinkTokenFilter source2 = new TeeSinkTokenFilter(new WhitespaceTokenizer(version, reader2));
+	/// source2.addSinkTokenStream(sink1);
+	/// source2.addSinkTokenStream(sink2);
+	/// 
+	/// TokenStream final1 = new LowerCaseFilter(version, source1);
+	/// TokenStream final2 = source2;
+	/// TokenStream final3 = new EntityDetect(sink1);
+	/// TokenStream final4 = new URLDetect(sink2);
+	/// 
+	/// d.add(new TextField("f1", final1, Field.Store.NO));
+	/// d.add(new TextField("f2", final2, Field.Store.NO));
+	/// d.add(new TextField("f3", final3, Field.Store.NO));
+	/// d.add(new TextField("f4", final4, Field.Store.NO));
+	/// </pre>
+	/// In this example, <code>sink1</code> and <code>sink2</code> will both get tokens from both
+	/// <code>reader1</code> and <code>reader2</code> after whitespace tokenizer
+	/// and now we can further wrap any of these in extra analysis, and more "sources" can be inserted if desired.
+	/// It is important, that tees are consumed before sinks (in the above example, the field names must be
+	/// less the sink's field names). If you are not sure, which stream is consumed first, you can simply
+	/// add another sink and then pass all tokens to the sinks at once using <seealso cref="#consumeAllTokens"/>.
+	/// This TokenFilter is exhausted after this. In the above example, change
+	/// the example above to:
+	/// <pre class="prettyprint">
+	/// ...
+	/// TokenStream final1 = new LowerCaseFilter(version, source1.newSinkTokenStream());
+	/// TokenStream final2 = source2.newSinkTokenStream();
+	/// sink1.consumeAllTokens();
+	/// sink2.consumeAllTokens();
+	/// ...
+	/// </pre>
+	/// In this case, the fields can be added in any order, because the sources are not used anymore and all sinks are ready.
+	/// <para>Note, the EntityDetect and URLDetect TokenStreams are for the example and do not currently exist in Lucene.
+	/// </para>
+	/// </summary>
+	public sealed class TeeSinkTokenFilter : TokenFilter
+	{
+	  private readonly IList<WeakReference<SinkTokenStream>> sinks = new LinkedList<WeakReference<SinkTokenStream>>();
+
+	  /// <summary>
+	  /// Instantiates a new TeeSinkTokenFilter.
+	  /// </summary>
+	  public TeeSinkTokenFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Returns a new <seealso cref="SinkTokenStream"/> that receives all tokens consumed by this stream.
+	  /// </summary>
+	  public SinkTokenStream newSinkTokenStream()
+	  {
+		return newSinkTokenStream(ACCEPT_ALL_FILTER);
+	  }
+
+	  /// <summary>
+	  /// Returns a new <seealso cref="SinkTokenStream"/> that receives all tokens consumed by this stream
+	  /// that pass the supplied filter. </summary>
+	  /// <seealso cref= SinkFilter </seealso>
+	  public SinkTokenStream newSinkTokenStream(SinkFilter filter)
+	  {
+		SinkTokenStream sink = new SinkTokenStream(this.cloneAttributes(), filter);
+		this.sinks.Add(new WeakReference<>(sink));
+		return sink;
+	  }
+
+	  /// <summary>
+	  /// Adds a <seealso cref="SinkTokenStream"/> created by another <code>TeeSinkTokenFilter</code>
+	  /// to this one. The supplied stream will also receive all consumed tokens.
+	  /// This method can be used to pass tokens from two different tees to one sink.
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public void addSinkTokenStream(final SinkTokenStream sink)
+	  public void addSinkTokenStream(SinkTokenStream sink)
+	  {
+		// check that sink has correct factory
+		if (!this.AttributeFactory.Equals(sink.AttributeFactory))
+		{
+		  throw new System.ArgumentException("The supplied sink is not compatible to this tee");
+		}
+		// add eventually missing attribute impls to the existing sink
+		for (IEnumerator<AttributeImpl> it = this.cloneAttributes().AttributeImplsIterator; it.MoveNext();)
+		{
+		  sink.addAttributeImpl(it.Current);
+		}
+		this.sinks.Add(new WeakReference<>(sink));
+	  }
+
+	  /// <summary>
+	  /// <code>TeeSinkTokenFilter</code> passes all tokens to the added sinks
+	  /// when itself is consumed. To be sure, that all tokens from the input
+	  /// stream are passed to the sinks, you can call this methods.
+	  /// This instance is exhausted after this, but all sinks are instant available.
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void consumeAllTokens() throws java.io.IOException
+	  public void consumeAllTokens()
+	  {
+		while (incrementToken())
+		{
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  // capture state lazily - maybe no SinkFilter accepts this state
+		  AttributeSource.State state = null;
+		  foreach (WeakReference<SinkTokenStream> @ref in sinks)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final SinkTokenStream sink = ref.get();
+			SinkTokenStream sink = @ref.get();
+			if (sink != null)
+			{
+			  if (sink.accept(this))
+			  {
+				if (state == null)
+				{
+				  state = this.captureState();
+				}
+				sink.addState(state);
+			  }
+			}
+		  }
+		  return true;
+		}
+
+		return false;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
+	  public override void end()
+	  {
+		base.end();
+		AttributeSource.State finalState = captureState();
+		foreach (WeakReference<SinkTokenStream> @ref in sinks)
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final SinkTokenStream sink = ref.get();
+		  SinkTokenStream sink = @ref.get();
+		  if (sink != null)
+		  {
+			sink.FinalState = finalState;
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// A filter that decides which <seealso cref="AttributeSource"/> states to store in the sink.
+	  /// </summary>
+	  public abstract class SinkFilter
+	  {
+		/// <summary>
+		/// Returns true, iff the current state of the passed-in <seealso cref="AttributeSource"/> shall be stored
+		/// in the sink. 
+		/// </summary>
+		public abstract bool accept(AttributeSource source);
+
+		/// <summary>
+		/// Called by <seealso cref="SinkTokenStream#reset()"/>. This method does nothing by default
+		/// and can optionally be overridden.
+		/// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void reset() throws java.io.IOException
+		public virtual void reset()
+		{
+		  // nothing to do; can be overridden
+		}
+	  }
+
+	  /// <summary>
+	  /// TokenStream output from a tee with optional filtering.
+	  /// </summary>
+	  public sealed class SinkTokenStream : TokenStream
+	  {
+		internal readonly IList<AttributeSource.State> cachedStates = new LinkedList<AttributeSource.State>();
+		internal AttributeSource.State finalState;
+		internal IEnumerator<AttributeSource.State> it = null;
+		internal SinkFilter filter;
+
+		internal SinkTokenStream(AttributeSource source, SinkFilter filter) : base(source)
+		{
+		  this.filter = filter;
+		}
+
+		internal bool accept(AttributeSource source)
+		{
+		  return filter.accept(source);
+		}
+
+		internal void addState(AttributeSource.State state)
+		{
+		  if (it != null)
+		  {
+			throw new System.InvalidOperationException("The tee must be consumed before sinks are consumed.");
+		  }
+		  cachedStates.Add(state);
+		}
+
+		internal AttributeSource.State FinalState
+		{
+			set
+			{
+			  this.finalState = value;
+			}
+		}
+
+		public override bool incrementToken()
+		{
+		  // lazy init the iterator
+		  if (it == null)
+		  {
+			it = cachedStates.GetEnumerator();
+		  }
+
+//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
+		  if (!it.hasNext())
+		  {
+			return false;
+		  }
+
+//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
+		  AttributeSource.State state = it.next();
+		  restoreState(state);
+		  return true;
+		}
+
+		public override void end()
+		{
+		  if (finalState != null)
+		  {
+			restoreState(finalState);
+		  }
+		}
+
+		public override void reset()
+		{
+		  it = cachedStates.GetEnumerator();
+		}
+	  }
+
+	  private static readonly SinkFilter ACCEPT_ALL_FILTER = new SinkFilterAnonymousInnerClassHelper();
+
+	  private class SinkFilterAnonymousInnerClassHelper : SinkFilter
+	  {
+		  public SinkFilterAnonymousInnerClassHelper()
+		  {
+		  }
+
+		  public override bool accept(AttributeSource source)
+		  {
+			return true;
+		  }
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenRangeSinkFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenRangeSinkFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenRangeSinkFilter.cs
new file mode 100644
index 0000000..568fea6
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenRangeSinkFilter.cs
@@ -0,0 +1,73 @@
+namespace org.apache.lucene.analysis.sinks
+{
+
+	/// <summary>
+	/// Licensed to the Apache Software Foundation (ASF) under one or more
+	/// contributor license agreements.  See the NOTICE file distributed with
+	/// this work for additional information regarding copyright ownership.
+	/// The ASF licenses this file to You under the Apache License, Version 2.0
+	/// (the "License"); you may not use this file except in compliance with
+	/// the License.  You may obtain a copy of the License at
+	/// 
+	///     http://www.apache.org/licenses/LICENSE-2.0
+	/// 
+	/// Unless required by applicable law or agreed to in writing, software
+	/// distributed under the License is distributed on an "AS IS" BASIS,
+	/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	/// See the License for the specific language governing permissions and
+	/// limitations under the License.
+	/// </summary>
+
+	using AttributeSource = org.apache.lucene.util.AttributeSource;
+
+	/// <summary>
+	/// Counts the tokens as they go by and saves to the internal list those between the range of lower and upper, exclusive of upper
+	/// 
+	/// 
+	/// </summary>
+	public class TokenRangeSinkFilter : TeeSinkTokenFilter.SinkFilter
+	{
+	  private int lower;
+	  private int upper;
+	  private int count;
+
+	  public TokenRangeSinkFilter(int lower, int upper)
+	  {
+		if (lower < 1)
+		{
+		  throw new System.ArgumentException("lower must be greater than zero");
+		}
+		if (lower > upper)
+		{
+		  throw new System.ArgumentException("lower must not be greater than upper");
+		}
+		this.lower = lower;
+		this.upper = upper;
+	  }
+
+
+	  public override bool accept(AttributeSource source)
+	  {
+		try
+		{
+		  if (count >= lower && count < upper)
+		  {
+			return true;
+		  }
+		  return false;
+		}
+		finally
+		{
+		  count++;
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		count = 0;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenTypeSinkFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenTypeSinkFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenTypeSinkFilter.cs
new file mode 100644
index 0000000..f844a1c
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenTypeSinkFilter.cs
@@ -0,0 +1,50 @@
+namespace org.apache.lucene.analysis.sinks
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+	using AttributeSource = org.apache.lucene.util.AttributeSource;
+
+	/// <summary>
+	/// Adds a token to the sink if it has a specific type.
+	/// </summary>
+	public class TokenTypeSinkFilter : TeeSinkTokenFilter.SinkFilter
+	{
+	  private string typeToMatch;
+	  private TypeAttribute typeAtt;
+
+	  public TokenTypeSinkFilter(string typeToMatch)
+	  {
+		this.typeToMatch = typeToMatch;
+	  }
+
+	  public override bool accept(AttributeSource source)
+	  {
+		if (typeAtt == null)
+		{
+		  typeAtt = source.addAttribute(typeof(TypeAttribute));
+		}
+
+		//check to see if this is a Category
+		return (typeToMatch.Equals(typeAtt.type()));
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballAnalyzer.cs
new file mode 100644
index 0000000..1ce0ffd
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballAnalyzer.cs
@@ -0,0 +1,102 @@
+using System;
+
+namespace org.apache.lucene.analysis.snowball
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using org.apache.lucene.analysis;
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using EnglishPossessiveFilter = org.apache.lucene.analysis.en.EnglishPossessiveFilter;
+	using org.apache.lucene.analysis.standard;
+	using TurkishLowerCaseFilter = org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Filters <seealso cref="StandardTokenizer"/> with <seealso cref="StandardFilter"/>, {@link
+	/// LowerCaseFilter}, <seealso cref="StopFilter"/> and <seealso cref="SnowballFilter"/>.
+	/// 
+	/// Available stemmers are listed in org.tartarus.snowball.ext.  The name of a
+	/// stemmer is the part of the class name before "Stemmer", e.g., the stemmer in
+	/// <seealso cref="org.tartarus.snowball.ext.EnglishStemmer"/> is named "English".
+	/// 
+	/// <para><b>NOTE</b>: This class uses the same <seealso cref="Version"/>
+	/// dependent settings as <seealso cref="StandardAnalyzer"/>, with the following addition:
+	/// <ul>
+	///   <li> As of 3.1, uses <seealso cref="TurkishLowerCaseFilter"/> for Turkish language.
+	/// </ul>
+	/// </para> </summary>
+	/// @deprecated (3.1) Use the language-specific analyzer in modules/analysis instead. 
+	/// This analyzer will be removed in Lucene 5.0 
+	[Obsolete("(3.1) Use the language-specific analyzer in modules/analysis instead.")]
+	public sealed class SnowballAnalyzer : Analyzer
+	{
+	  private string name;
+	  private CharArraySet stopSet;
+	  private readonly Version matchVersion;
+
+	  /// <summary>
+	  /// Builds the named analyzer with no stop words. </summary>
+	  public SnowballAnalyzer(Version matchVersion, string name)
+	  {
+		this.name = name;
+		this.matchVersion = matchVersion;
+	  }
+
+	  /// <summary>
+	  /// Builds the named analyzer with the given stop words. </summary>
+	  public SnowballAnalyzer(Version matchVersion, string name, CharArraySet stopWords) : this(matchVersion, name)
+	  {
+		stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopWords));
+	  }
+
+	  /// <summary>
+	  /// Constructs a <seealso cref="StandardTokenizer"/> filtered by a {@link
+	  ///    StandardFilter}, a <seealso cref="LowerCaseFilter"/>, a <seealso cref="StopFilter"/>,
+	  ///    and a <seealso cref="SnowballFilter"/> 
+	  /// </summary>
+	  public override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+		Tokenizer tokenizer = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, tokenizer);
+		// remove the possessive 's for english stemmers
+		if (matchVersion.onOrAfter(Version.LUCENE_31) && (name.Equals("English") || name.Equals("Porter") || name.Equals("Lovins")))
+		{
+		  result = new EnglishPossessiveFilter(result);
+		}
+		// Use a special lowercase filter for turkish, the stemmer expects it.
+		if (matchVersion.onOrAfter(Version.LUCENE_31) && name.Equals("Turkish"))
+		{
+		  result = new TurkishLowerCaseFilter(result);
+		}
+		else
+		{
+		  result = new LowerCaseFilter(matchVersion, result);
+		}
+		if (stopSet != null)
+		{
+		  result = new StopFilter(matchVersion, result, stopSet);
+		}
+		result = new SnowballFilter(result, name);
+		return new TokenStreamComponents(tokenizer, result);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballFilter.cs
new file mode 100644
index 0000000..58a8361
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballFilter.cs
@@ -0,0 +1,129 @@
+using System;
+
+namespace org.apache.lucene.analysis.snowball
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using TurkishLowerCaseFilter = org.apache.lucene.analysis.tr.TurkishLowerCaseFilter; // javadoc @link
+	using SnowballProgram = org.tartarus.snowball.SnowballProgram;
+
+	/// <summary>
+	/// A filter that stems words using a Snowball-generated stemmer.
+	/// 
+	/// Available stemmers are listed in <seealso cref="org.tartarus.snowball.ext"/>.
+	/// <para><b>NOTE</b>: SnowballFilter expects lowercased text.
+	/// <ul>
+	///  <li>For the Turkish language, see <seealso cref="TurkishLowerCaseFilter"/>.
+	///  <li>For other languages, see <seealso cref="LowerCaseFilter"/>.
+	/// </ul>
+	/// </para>
+	/// 
+	/// <para>
+	/// Note: This filter is aware of the <seealso cref="KeywordAttribute"/>. To prevent
+	/// certain terms from being passed to the stemmer
+	/// <seealso cref="KeywordAttribute#isKeyword()"/> should be set to <code>true</code>
+	/// in a previous <seealso cref="TokenStream"/>.
+	/// 
+	/// Note: For including the original term as well as the stemmed version, see
+	/// <seealso cref="org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory"/>
+	/// </para>
+	/// 
+	/// 
+	/// </summary>
+	public sealed class SnowballFilter : TokenFilter
+	{
+
+	  private readonly SnowballProgram stemmer;
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public SnowballFilter(TokenStream input, SnowballProgram stemmer) : base(input)
+	  {
+		this.stemmer = stemmer;
+	  }
+
+	  /// <summary>
+	  /// Construct the named stemming filter.
+	  /// 
+	  /// Available stemmers are listed in <seealso cref="org.tartarus.snowball.ext"/>.
+	  /// The name of a stemmer is the part of the class name before "Stemmer",
+	  /// e.g., the stemmer in <seealso cref="org.tartarus.snowball.ext.EnglishStemmer"/> is named "English".
+	  /// </summary>
+	  /// <param name="in"> the input tokens to stem </param>
+	  /// <param name="name"> the name of a stemmer </param>
+	  public SnowballFilter(TokenStream @in, string name) : base(@in)
+	  {
+		//Class.forName is frowned upon in place of the ResourceLoader but in this case,
+		// the factory will use the other constructor so that the program is already loaded.
+		try
+		{
+		  Type stemClass = Type.GetType("org.tartarus.snowball.ext." + name + "Stemmer").asSubclass(typeof(SnowballProgram));
+		  stemmer = stemClass.newInstance();
+		}
+		catch (Exception e)
+		{
+		  throw new System.ArgumentException("Invalid stemmer class specified: " + name, e);
+		}
+	  }
+
+	  /// <summary>
+	  /// Returns the next input Token, after being stemmed </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+			char[] termBuffer = termAtt.buffer();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int length = termAtt.length();
+			int length = termAtt.length();
+			stemmer.setCurrent(termBuffer, length);
+			stemmer.stem();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char finalTerm[] = stemmer.getCurrentBuffer();
+			char[] finalTerm = stemmer.CurrentBuffer;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newLength = stemmer.getCurrentBufferLength();
+			int newLength = stemmer.CurrentBufferLength;
+			if (finalTerm != termBuffer)
+			{
+			  termAtt.copyBuffer(finalTerm, 0, newLength);
+			}
+			else
+			{
+			  termAtt.Length = newLength;
+			}
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballPorterFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballPorterFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballPorterFilterFactory.cs
new file mode 100644
index 0000000..310391e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballPorterFilterFactory.cs
@@ -0,0 +1,101 @@
+using System;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.snowball
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
+	using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+	using SnowballProgram = org.tartarus.snowball.SnowballProgram;
+
+	/// <summary>
+	/// Factory for <seealso cref="SnowballFilter"/>, with configurable language
+	/// <para>
+	/// Note: Use of the "Lovins" stemmer is not recommended, as it is implemented with reflection.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_snowballstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.SnowballPorterFilterFactory" protected="protectedkeyword.txt" language="English"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </para>
+	/// </summary>
+	public class SnowballPorterFilterFactory : TokenFilterFactory, ResourceLoaderAware
+	{
+	  public const string PROTECTED_TOKENS = "protected";
+
+	  private readonly string language;
+	  private readonly string wordFiles;
+	  private Type stemClass;
+	  private CharArraySet protectedWords = null;
+
+	  /// <summary>
+	  /// Creates a new SnowballPorterFilterFactory </summary>
+	  public SnowballPorterFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		language = get(args, "language", "English");
+		wordFiles = get(args, PROTECTED_TOKENS);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void inform(org.apache.lucene.analysis.util.ResourceLoader loader) throws java.io.IOException
+	  public virtual void inform(ResourceLoader loader)
+	  {
+		string className = "org.tartarus.snowball.ext." + language + "Stemmer";
+		stemClass = loader.newInstance(className, typeof(SnowballProgram)).GetType();
+
+		if (wordFiles != null)
+		{
+		  protectedWords = getWordSet(loader, wordFiles, false);
+		}
+	  }
+
+	  public override TokenFilter create(TokenStream input)
+	  {
+		SnowballProgram program;
+		try
+		{
+		  program = stemClass.newInstance();
+		}
+		catch (Exception e)
+		{
+		  throw new Exception("Error instantiating stemmer for language " + language + "from class " + stemClass, e);
+		}
+
+		if (protectedWords != null)
+		{
+		  input = new SetKeywordMarkerFilter(input, protectedWords);
+		}
+		return new SnowballFilter(input, program);
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs
new file mode 100644
index 0000000..f2387f1
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs
@@ -0,0 +1,161 @@
+using Lucene.Net.Analysis.Core;
+
+namespace org.apache.lucene.analysis.standard
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using org.apache.lucene.analysis;
+	using LowerCaseFilter = LowerCaseFilter;
+	using StopAnalyzer = StopAnalyzer;
+	using StopFilter = StopFilter;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader;
+	using Version = org.apache.lucene.util.Version;
+
+
+	/// <summary>
+	/// Filters <seealso cref="ClassicTokenizer"/> with <seealso cref="ClassicFilter"/>, {@link
+	/// LowerCaseFilter} and <seealso cref="StopFilter"/>, using a list of
+	/// English stop words.
+	/// 
+	/// <a name="version"/>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating ClassicAnalyzer:
+	/// <ul>
+	///   <li> As of 3.1, StopFilter correctly handles Unicode 4.0
+	///         supplementary characters in stopwords
+	///   <li> As of 2.9, StopFilter preserves position
+	///        increments
+	///   <li> As of 2.4, Tokens incorrectly identified as acronyms
+	///        are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
+	/// </ul>
+	/// 
+	/// ClassicAnalyzer was named StandardAnalyzer in Lucene versions prior to 3.1. 
+	/// As of 3.1, <seealso cref="StandardAnalyzer"/> implements Unicode text segmentation,
+	/// as specified by UAX#29.
+	/// </para>
+	/// </summary>
+	public sealed class ClassicAnalyzer : StopwordAnalyzerBase
+	{
+
+	  /// <summary>
+	  /// Default maximum allowed token length </summary>
+	  public const int DEFAULT_MAX_TOKEN_LENGTH = 255;
+
+	  private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
+
+	  /// <summary>
+	  /// An unmodifiable set containing some common English words that are usually not
+	  /// useful for searching. 
+	  /// </summary>
+	  public static readonly CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. </summary>
+	  /// <param name="matchVersion"> Lucene version to match See {@link
+	  /// <a href="#version">above</a>} </param>
+	  /// <param name="stopWords"> stop words  </param>
+	  public ClassicAnalyzer(Version matchVersion, CharArraySet stopWords) : base(matchVersion, stopWords)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words ({@link
+	  /// #STOP_WORDS_SET}). </summary>
+	  /// <param name="matchVersion"> Lucene version to match See {@link
+	  /// <a href="#version">above</a>} </param>
+	  public ClassicAnalyzer(Version matchVersion) : this(matchVersion, STOP_WORDS_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the stop words from the given reader. </summary>
+	  /// <seealso cref= WordlistLoader#getWordSet(Reader, Version) </seealso>
+	  /// <param name="matchVersion"> Lucene version to match See {@link
+	  /// <a href="#version">above</a>} </param>
+	  /// <param name="stopwords"> Reader to read stop words from  </param>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public ClassicAnalyzer(org.apache.lucene.util.Version matchVersion, java.io.Reader stopwords) throws java.io.IOException
+	  public ClassicAnalyzer(Version matchVersion, Reader stopwords) : this(matchVersion, loadStopwordSet(stopwords, matchVersion))
+	  {
+	  }
+
+	  /// <summary>
+	  /// Set maximum allowed token length.  If a token is seen
+	  /// that exceeds this length then it is discarded.  This
+	  /// setting only takes effect the next time tokenStream or
+	  /// tokenStream is called.
+	  /// </summary>
+	  public int MaxTokenLength
+	  {
+		  set
+		  {
+			maxTokenLength = value;
+		  }
+		  get
+		  {
+			return maxTokenLength;
+		  }
+	  }
+
+
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: @Override protected TokenStreamComponents createComponents(final String fieldName, final java.io.Reader reader)
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final ClassicTokenizer src = new ClassicTokenizer(matchVersion, reader);
+		ClassicTokenizer src = new ClassicTokenizer(matchVersion, reader);
+		src.MaxTokenLength = maxTokenLength;
+		TokenStream tok = new ClassicFilter(src);
+		tok = new LowerCaseFilter(matchVersion, tok);
+		tok = new StopFilter(matchVersion, tok, stopwords);
+		return new TokenStreamComponentsAnonymousInnerClassHelper(this, src, tok, reader);
+	  }
+
+	  private class TokenStreamComponentsAnonymousInnerClassHelper : TokenStreamComponents
+	  {
+		  private readonly ClassicAnalyzer outerInstance;
+
+		  private Reader reader;
+		  private org.apache.lucene.analysis.standard.ClassicTokenizer src;
+
+		  public TokenStreamComponentsAnonymousInnerClassHelper(ClassicAnalyzer outerInstance, org.apache.lucene.analysis.standard.ClassicTokenizer src, TokenStream tok, Reader reader) : base(src, tok)
+		  {
+			  this.outerInstance = outerInstance;
+			  this.reader = reader;
+			  this.src = src;
+		  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override protected void setReader(final java.io.Reader reader) throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+		  protected internal override Reader Reader
+		  {
+			  set
+			  {
+				src.MaxTokenLength = outerInstance.maxTokenLength;
+				base.Reader = value;
+			  }
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilter.cs
new file mode 100644
index 0000000..9ee4b32
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilter.cs
@@ -0,0 +1,92 @@
+namespace org.apache.lucene.analysis.standard
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+
+	/// <summary>
+	/// Normalizes tokens extracted with <seealso cref="ClassicTokenizer"/>. </summary>
+
+	public class ClassicFilter : TokenFilter
+	{
+
+	  /// <summary>
+	  /// Construct filtering <i>in</i>. </summary>
+	  public ClassicFilter(TokenStream @in) : base(@in)
+	  {
+	  }
+
+	  private static readonly string APOSTROPHE_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.APOSTROPHE];
+	  private static readonly string ACRONYM_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.ACRONYM];
+
+	  // this filters uses attribute type
+	  private readonly TypeAttribute typeAtt = addAttribute(typeof(TypeAttribute));
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+
+	  /// <summary>
+	  /// Returns the next token in the stream, or null at EOS.
+	  /// <para>Removes <tt>'s</tt> from the end of words.
+	  /// </para>
+	  /// <para>Removes dots from acronyms.
+	  /// </para>
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (!input.incrementToken())
+		{
+		  return false;
+		}
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] buffer = termAtt.buffer();
+		char[] buffer = termAtt.buffer();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int bufferLength = termAtt.length();
+		int bufferLength = termAtt.length();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String type = typeAtt.type();
+		string type = typeAtt.type();
+
+		if (type == APOSTROPHE_TYPE && bufferLength >= 2 && buffer[bufferLength - 2] == '\'' && (buffer[bufferLength - 1] == 's' || buffer[bufferLength - 1] == 'S')) // remove 's
+		{
+		  // Strip last 2 characters off
+		  termAtt.Length = bufferLength - 2;
+		} // remove dots
+		else if (type == ACRONYM_TYPE)
+		{
+		  int upto = 0;
+		  for (int i = 0;i < bufferLength;i++)
+		  {
+			char c = buffer[i];
+			if (c != '.')
+			{
+			  buffer[upto++] = c;
+			}
+		  }
+		  termAtt.Length = upto;
+		}
+
+		return true;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilterFactory.cs
new file mode 100644
index 0000000..2107ccc
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.standard
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="ClassicFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_clssc" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.ClassicTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.ClassicFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class ClassicFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new ClassicFilterFactory </summary>
+	  public ClassicFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenFilter create(TokenStream input)
+	  {
+		return new ClassicFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs
new file mode 100644
index 0000000..a41f48d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs
@@ -0,0 +1,210 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.analysis.standard
+{
+
+
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// A grammar-based tokenizer constructed with JFlex
+	/// 
+	/// <para> This should be a good tokenizer for most European-language documents:
+	/// 
+	/// <ul>
+	///   <li>Splits words at punctuation characters, removing punctuation. However, a 
+	///     dot that's not followed by whitespace is considered part of a token.
+	///   <li>Splits words at hyphens, unless there's a number in the token, in which case
+	///     the whole token is interpreted as a product number and is not split.
+	///   <li>Recognizes email addresses and internet hostnames as one token.
+	/// </ul>
+	/// 
+	/// </para>
+	/// <para>Many applications have specific tokenizer needs.  If this tokenizer does
+	/// not suit your application, please consider copying this source code
+	/// directory to your project and maintaining your own grammar-based tokenizer.
+	/// 
+	/// ClassicTokenizer was named StandardTokenizer in Lucene versions prior to 3.1.
+	/// As of 3.1, <seealso cref="StandardTokenizer"/> implements Unicode text segmentation,
+	/// as specified by UAX#29.
+	/// </para>
+	/// </summary>
+
+	public sealed class ClassicTokenizer : Tokenizer
+	{
+	  /// <summary>
+	  /// A private instance of the JFlex-constructed scanner </summary>
+	  private StandardTokenizerInterface scanner;
+
+	  public const int ALPHANUM = 0;
+	  public const int APOSTROPHE = 1;
+	  public const int ACRONYM = 2;
+	  public const int COMPANY = 3;
+	  public const int EMAIL = 4;
+	  public const int HOST = 5;
+	  public const int NUM = 6;
+	  public const int CJ = 7;
+
+	  public const int ACRONYM_DEP = 8;
+
+	  /// <summary>
+	  /// String token types that correspond to token type int constants </summary>
+	  public static readonly string[] TOKEN_TYPES = new string [] {"<ALPHANUM>", "<APOSTROPHE>", "<ACRONYM>", "<COMPANY>", "<EMAIL>", "<HOST>", "<NUM>", "<CJ>", "<ACRONYM_DEP>"};
+
+	  private int skippedPositions;
+
+	  private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
+
+	  /// <summary>
+	  /// Set the max allowed token length.  Any token longer
+	  ///  than this is skipped. 
+	  /// </summary>
+	  public int MaxTokenLength
+	  {
+		  set
+		  {
+			if (value < 1)
+			{
+			  throw new System.ArgumentException("maxTokenLength must be greater than zero");
+			}
+			this.maxTokenLength = value;
+		  }
+		  get
+		  {
+			return maxTokenLength;
+		  }
+	  }
+
+
+	  /// <summary>
+	  /// Creates a new instance of the <seealso cref="ClassicTokenizer"/>.  Attaches
+	  /// the <code>input</code> to the newly created JFlex scanner.
+	  /// </summary>
+	  /// <param name="input"> The input reader
+	  /// 
+	  /// See http://issues.apache.org/jira/browse/LUCENE-1068 </param>
+	  public ClassicTokenizer(Version matchVersion, Reader input) : base(input)
+	  {
+		init(matchVersion);
+	  }
+
+	  /// <summary>
+	  /// Creates a new ClassicTokenizer with a given <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> 
+	  /// </summary>
+	  public ClassicTokenizer(Version matchVersion, AttributeFactory factory, Reader input) : base(factory, input)
+	  {
+		init(matchVersion);
+	  }
+
+	  private void init(Version matchVersion)
+	  {
+		this.scanner = new ClassicTokenizerImpl(input);
+	  }
+
+	  // this tokenizer generates three attributes:
+	  // term offset, positionIncrement and type
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+	  private readonly PositionIncrementAttribute posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
+	  private readonly TypeAttribute typeAtt = addAttribute(typeof(TypeAttribute));
+
+	  /*
+	   * (non-Javadoc)
+	   *
+	   * @see org.apache.lucene.analysis.TokenStream#next()
+	   */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		clearAttributes();
+		skippedPositions = 0;
+
+		while (true)
+		{
+		  int tokenType = scanner.NextToken;
+
+		  if (tokenType == StandardTokenizerInterface_Fields.YYEOF)
+		  {
+			return false;
+		  }
+
+		  if (scanner.yylength() <= maxTokenLength)
+		  {
+			posIncrAtt.PositionIncrement = skippedPositions + 1;
+			scanner.getText(termAtt);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int start = scanner.yychar();
+			int start = scanner.yychar();
+			offsetAtt.setOffset(correctOffset(start), correctOffset(start + termAtt.length()));
+
+			if (tokenType == ClassicTokenizer.ACRONYM_DEP)
+			{
+			  typeAtt.Type = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.HOST];
+			  termAtt.Length = termAtt.length() - 1; // remove extra '.'
+			}
+			else
+			{
+			  typeAtt.Type = ClassicTokenizer.TOKEN_TYPES[tokenType];
+			}
+			return true;
+		  }
+		  else
+			// When we skip a too-long term, we still increment the
+			// position increment
+		  {
+			skippedPositions++;
+		  }
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
+	  public override void end()
+	  {
+		base.end();
+		// set final offset
+		int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
+		offsetAtt.setOffset(finalOffset, finalOffset);
+		// adjust any skipped tokens
+		posIncrAtt.PositionIncrement = posIncrAtt.PositionIncrement + skippedPositions;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void close() throws java.io.IOException
+	  public override void close()
+	  {
+		base.close();
+		scanner.yyreset(input);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		scanner.yyreset(input);
+		skippedPositions = 0;
+	  }
+	}
+
+}
\ No newline at end of file


[24/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData7.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData7.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData7.cs
new file mode 100644
index 0000000..c504008
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData7.cs
@@ -0,0 +1,53 @@
+/*
+Copyright © 2003,
+Center for Intelligent Information Retrieval,
+University of Massachusetts, Amherst.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+3. The names "Center for Intelligent Information Retrieval" and
+"University of Massachusetts" must not be used to endorse or promote products
+derived from this software without prior written permission. To obtain
+permission, contact info@ciir.cs.umass.edu.
+
+THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF MASSACHUSETTS AND OTHER CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
+*/
+/* This is a java version of Bob Krovetz' KStem.
+ *
+ * Java version by Sergio Guzman-Lara.
+ * CIIR-UMass Amherst http://ciir.cs.umass.edu
+ */
+namespace org.apache.lucene.analysis.en
+{
+
+	/// <summary>
+	/// A list of words used by Kstem
+	/// </summary>
+	internal class KStemData7
+	{
+		private KStemData7()
+		{
+		}
+	   internal static string[] data = new string[] {"rupee","rupture","rural","ruritanian","ruse", "rush","rushes","rushlight","rusk","russet", "rust","rustic","rusticate","rustication","rustle", "rustler","rustless","rustling","rustproof","rusty", "rut","ruthless","rutting","rye","sabbatarian", "sabbath","sabbatical","saber","sable","sabot", "sabotage","saboteur","sabra","sabre","sac", "saccharin","saccharine","sacerdotal","sacerdotalism","sachet", "sack","sackbut","sackcloth","sacral","sacrament", "sacramental","sacred","sacrifice","sacrificial","sacrilege", "sacrilegious","sacristan","sacristy","sacroiliac","sacrosanct", "sad","sadden","saddle","saddlebag","saddler", "saddlery","sadducee","sadhu","sadism","sadly", "sadomasochism","safari","safe","safebreaker","safeguard", "safekeeping","safety","saffron","sag","saga", "sagacious","sagacity","sagebrush","sago","sahib", "said","sail","sailcloth","sailing","sailor", "sailplane","saint","sainted","saintly","saith", "sake","saki","salaa
 m","salable","salacious", "salacity","salad","salamander","salami","salaried", "salary","sale","saleable","saleroom","sales", "salesclerk","salesgirl","saleslady","salesman","salesmanship", "salient","saliferous","salify","saline","salinometer", "saliva","salivary","salivate","sallow","sally", "salmon","salmonella","salon","saloon","salsify", "salt","saltcellar","saltire","saltlick","saltpan", "saltpeter","saltpetre","salts","saltshaker","saltwater", "salty","salubrious","salutary","salutation","salute", "salvage","salvation","salvationist","salve","salvedge", "salver","salvia","salvo","samaritan","samaritans", "samba","same","sameness","samovar","sampan", "sample","sampler","samurai","sanatorium","sanctify", "sanctimonious","sanction","sanctities","sanctity","sanctuary", "sanctum","sanctus","sand","sandal","sandalwood", "sandbag","sandbank","sandbar","sandblast","sandbox", "sandboy","sandcastle","sander","sandglass","sandman", "sandpaper","sandpiper","sandpit","sands","sandshoe", "
 sandstone","sandstorm","sandwich","sandy","sane", "sang","sangfroid","sangria","sanguinary","sanguine", "sanitary","sanitation","sanitorium","sanity","sank", "sans","sanskrit","sap","sapience","sapient", "sapless","sapling","sapper","sapphic","sapphire", "sappy","sapwood","saraband","sarabande","sarcasm", "sarcastic","sarcophagus","sardine","sardonic","sarge", "sari","sarky","sarong","sarsaparilla","sartorial", "sash","sashay","sass","sassafras","sassy", "sat","satan","satanic","satanism","satchel", "sate","sateen","satellite","satiable","satiate", "satiety","satin","satinwood","satiny","satire", "satirical","satirise","satirize","satisfaction","satisfactory", "satisfy","satisfying","satrap","satsuma","saturate", "saturation","saturday","saturn","saturnalia","saturnine", "satyr","sauce","saucepan","saucer","saucy", "sauerkraut","sauna","saunter","saurian","sausage", "sauterne","sauternes","savage","savagery","savanna", "savannah","savant","save","saveloy","saver", "saving","savings"
 ,"savior","saviour","savor", "savory","savour","savoury","savoy","savvy", "saw","sawbones","sawbuck","sawdust","sawhorse", "sawmill","sawpit","sawyer","saxifrage","saxon", "saxophone","saxophonist","say","saying","scab", "scabbard","scabby","scabies","scabious","scabrous", "scads","scaffold","scaffolding","scalar","scalawag", "scald","scalding","scale","scalene","scallion", "scallop","scallywag","scalp","scalpel","scaly", "scamp","scamper","scampi","scan","scandal", "scandalise","scandalize","scandalmonger","scandalous","scandinavian", "scanner","scansion","scant","scanty","scapegoat", "scapegrace","scapula","scar","scarab","scarce", "scarcely","scarcity","scare","scarecrow","scared", "scaremonger","scarf","scarify","scarlet","scarp", "scarper","scary","scat","scathing","scatology", "scatter","scatterbrain","scatterbrained","scattered","scatty", "scavenge","scavenger","scenario","scenarist","scene", "scenery","sceneshifter","scenic","scent","scepter", "sceptic","sceptical","sceptici
 sm","sceptre","schedule", "schema","schematic","schematize","scheme","scherzo", "schism","schismatic","schist","schizoid","schizophrenia", "schizophrenic","schmaltz","schmalz","schnapps","schnitzel", "schnorkel","scholar","scholarly","scholarship","scholastic", "scholasticism","school","schoolboy","schoolhouse","schooling", "schoolman","schoolmarm","schoolmaster","schoolmastering","schoolmate", "schoolwork","schooner","schwa","sciatic","sciatica", "science","scientific","scientist","scientology","scimitar", "scintilla","scintillate","scion","scissor","scissors", "sclerosis","scoff","scold","scollop","sconce", "scone","scoop","scoot","scooter","scope", "scorbutic","scorch","scorcher","scorching","score", "scoreboard","scorebook","scorecard","scorekeeper","scoreless", "scorer","scorn","scorpio","scorpion","scotch", "scoundrel","scoundrelly","scour","scourer","scourge", "scout","scoutmaster","scow","scowl","scrabble", "scrag","scraggly","scraggy","scram","scramble", "scrap","scrapbook"
 ,"scrape","scraper","scrapings", "scrappy","scraps","scratch","scratchpad","scratchy", "scrawl","scrawny","scream","screamingly","scree", "screech","screed","screen","screening","screenplay", "screw","screwball","screwdriver","screwy","scribble", "scribbler","scribe","scrimmage","scrimp","scrimshank", "scrimshaw","scrip","script","scripted","scriptural", "scripture","scriptwriter","scrivener","scrofula","scrofulous", "scroll","scrollwork","scrooge","scrotum","scrounge", "scrub","scrubber","scrubby","scruff","scruffy", "scrum","scrumcap","scrumhalf","scrummage","scrumptious", "scrumpy","scrunch","scruple","scrupulous","scrutineer", "scrutinise","scrutinize","scrutiny","scuba","scud", "scuff","scuffle","scull","scullery","scullion", "sculptor","sculptural","sculpture","scum","scupper", "scurf","scurrility","scurrilous","scurry","scurvy", "scut","scutcheon","scuttle","scylla","scythe", "sea","seabed","seabird","seaboard","seaborne", "seafaring","seafood","seafront","seagirt","seagoing"
 , "seagull","seahorse","seakale","seal","sealer", "sealing","sealskin","sealyham","seam","seaman", "seamanlike","seamanship","seamstress","seamy","seaplane", "seaport","sear","search","searching","searchlight", "searing","seascape","seashell","seashore","seasick", "seaside","season","seasonable","seasonal","seasoning", "seat","seating","seawall","seaward","seawards", "seawater","seaway","seaweed","seaworthy","sec", "secateurs","secede","secession","seclude","secluded", "seclusion","seclusive","second","secondary","seconds", "secrecy","secret","secretarial","secretariat","secretary", "secrete","secretion","secretive","sect","sectarian", "section","sectional","sectionalism","sector","secular", "secularise","secularism","secularize","secure","security", "sedan","sedate","sedation","sedative","sedentary", "sedge","sediment","sedimentary","sedimentation","sedition", "seditious","seduce","seduction","seductive","sedulous", "see","seed","seedbed","seedcake","seedling", "seedsman","seedy","
 seeing","seek","seem", "seeming","seemingly","seemly","seen","seep", "seepage","seer","seersucker","seesaw","seethe", "segment","segmentation","segregate","segregated","segregation", "seigneur","seine","seismic","seismograph","seismology", "seize","seizure","seldom","select","selection", "selective","selector","selenium","self","selfish", "selfless","selfsame","sell","seller","sellotape", "selvage","selves","semantic","semantics","semaphore", "semblance","semeiology","semen","semester","semibreve", "semicircle","semicolon","semiconductor","semidetached","semifinal", "semifinalist","seminal","seminar","seminarist","seminary", "semiology","semiprecious","semiquaver","semitic","semitone", "semitropical","semivowel","semiweekly","semolina","sempstress", "sen","senate","senator","senatorial","send", "sender","senescence","senescent","seneschal","senile", "senility","senior","seniority","senna","sensation", "sensational","sensationalism","sense","senseless","senses", "sensibility","sensib
 le","sensitise","sensitive","sensitivity", "sensitize","sensor","sensory","sensual","sensualist", "sensuality","sensuous","sent","sentence","sententious", "sentient","sentiment","sentimental","sentimentalise","sentimentalism", "sentimentality","sentimentalize","sentinel","sentry","sepal", "separable","separate","separation","separatism","separator", "sepia","sepoy","sepsis","september","septet", "septic","septicaemia","septicemia","septuagenarian","septuagesima", "septuagint","sepulcher","sepulchral","sepulchre","sequel", "sequence","sequencing","sequent","sequential","sequester", "sequestrate","sequestration","sequin","sequoia","seraglio", "seraph","seraphic","sere","serenade","serendipity", "serene","serf","serfdom","serge","sergeant", "serial","serialise","serialize","seriatim","sericulture", "series","serif","seriocomic","serious","seriously", "sermon","sermonise","sermonize","serous","serpent", "serpentine","serrated","serried","serum","serval", "servant","serve","server","serv
 ery","service", "serviceable","serviceman","serviette","servile","serving", "servitor","servitude","servomechanism","servomotor","sesame", "session","sessions","set","setback","setscrew", "setsquare","sett","settee","setter","setting", "settle","settled","settlement","settler","seven", "seventeen","seventy","sever","several","severally", "severance","severity","sew","sewage","sewer", "sewerage","sewing","sex","sexagenarian","sexagesima", "sexism","sexist","sexless","sextant","sextet", "sexton","sextuplet","sexual","sexuality","sexy", "sforzando","sgt","shabby","shack","shackle", "shad","shade","shades","shading","shadow", "shadowbox","shadowy","shady","shaft","shag", "shagged","shaggy","shagreen","shah","shake", "shakedown","shaker","shakes","shako","shaky", "shale","shall","shallop","shallot","shallow", "shallows","shalom","shalt","sham","shaman", "shamble","shambles","shame","shamefaced","shameful", "shameless","shammy","shampoo","shamrock","shandy", "shanghai","shank","shantung",
 "shanty","shantytown", "shape","shaped","shapely","shard","share", "sharecropper","shareholder","shares","shark","sharkskin", "sharp","sharpen","sharpener","sharper","sharpshooter", "shatter","shave","shaver","shaving","shawl", "shay","she","sheaf","shear","shears", "sheath","sheathe","sheathing","shebang","shebeen", "shed","sheen","sheep","sheepdip","sheepdog", "sheepfold","sheepish","sheepskin","sheer","sheet", "sheeting","sheik","sheikdom","sheikh","sheikhdom", "sheila","shekels","shelduck","shelf","shell", "shellac","shellacking","shellfish","shellshock","shelter", "sheltered","shelve","shelves","shelving","shenanigan", "shepherd","shepherdess","sheraton","sherbet","sherd", "sheriff","sherpa","sherry","shew","shh", "shibboleth","shield","shift","shiftless","shifty", "shilling","shimmer","shin","shinbone","shindig", "shindy","shine","shiner","shingle","shingles", "shining","shinny","shinto","shiny","ship", "shipboard","shipbroker","shipbuilding","shipmate","shipment", "shipper","
 shipping","shipshape","shipwreck","shipwright", "shipyard","shire","shires","shirk","shirring", "shirt","shirtfront","shirting","shirtsleeve","shirttail", "shirtwaist","shirtwaister","shirty","shit","shits", "shitty","shiver","shivers","shivery","shoal", "shock","shocker","shockheaded","shocking","shockproof", "shod","shoddy","shoe","shoeblack","shoehorn", "shoelace","shoemaker","shoeshine","shoestring","shone", "shoo","shook","shoot","shop","shopkeeper", "shoplift","shopsoiled","shopworn","shore","shorn", "short","shortage","shortbread","shortcake","shortcoming", "shorten","shortening","shortfall","shorthand","shorthanded", "shorthorn","shortie","shortly","shorts","shortsighted", "shorty","shot","shotgun","should","shoulder", "shouldst","shout","shouting","shove","shovel", "shovelboard","show","showboat","showcase","showdown", "shower","showery","showgirl","showing","showman", "showmanship","shown","showpiece","showplace","showroom", "showy","shrank","shrapnel","shred","shredder", 
 "shrew","shrewd","shrewish","shriek","shrift", "shrike","shrill","shrimp","shrine","shrink", "shrinkage","shrive","shrivel","shroud","shrub", "shrubbery","shrug","shuck","shucks","shudder", "shuffle","shuffleboard","shufty","shun","shunt", "shunter","shush","shut","shutdown","shutter", "shuttle","shuttlecock","shy","shyster","sibilant", "sibling","sibyl","sibylline","sic","sick", "sickbay","sickbed","sicken","sickening","sickle", "sickly","sickness","sickroom","side","sidearm", "sideboard","sideboards","sidecar","sidekick","sidelight", "sideline","sidelong","sidereal","sidesaddle","sideshow", "sideslip","sidesman","sidesplitting","sidestep","sidestroke", "sideswipe","sidetrack","sidewalk","sideward","sidewards", "sideways","siding","sidle","siege","sienna", "sierra","siesta","sieve","sift","sifter", "sigh","sight","sighted","sightless","sightly", "sightscreen","sightsee","sightseer","sign","signal", "signaler","signalise","signalize","signaller","signally", "signalman","signatory","
 signature","signer","signet", "significance","significant","signification","signify","signor", "signora","signorina","signpost","signposted","silage", "silence","silencer","silent","silhouette","silica", "silicate","silicon","silicone","silicosis","silk", "silken","silkworm","silky","sill","sillabub", "silly","silo","silt","silvan","silver", "silverfish","silverside","silversmith","silverware","silvery", "simian","similar","similarity","similarly","simile", "similitude","simmer","simony","simper","simple", "simpleton","simplicity","simplify","simply","simulacrum", "simulate","simulated","simulation","simulator","simultaneous", "sin","since","sincere","sincerely","sincerity", "sinecure","sinew","sinewy","sinful","sing", "singe","singhalese","singing","single","singleness", "singles","singlestick","singlet","singleton","singly", "singsong","singular","singularly","sinhalese","sinister", "sink","sinker","sinless","sinner","sinology", "sinuous","sinus","sip","siphon","sir", "sire","sire
 n","sirloin","sirocco","sirrah", "sis","sisal","sissy","sister","sisterhood", "sisterly","sit","sitar","site","sitter", "sitting","situated","situation","six","sixpence", "sixteen","sixty","sizable","size","sizeable", "sizzle","sizzler","skate","skateboard","skedaddle", "skeet","skein","skeleton","skeptic","skeptical", "skepticism","sketch","sketchpad","sketchy","skew", "skewbald","skewer","ski","skibob","skid", "skidlid","skidpan","skiff","skiffle","skilful", "skill","skilled","skillet","skillful","skim", "skimmer","skimp","skimpy","skin","skinflint", "skinful","skinhead","skinny","skint","skip", "skipper","skirl","skirmish","skirt","skit", "skitter","skittish","skittle","skittles","skive", "skivvy","skua","skulduggery","skulk","skull", "skullcap","skullduggery","skunk","sky","skydiving", "skyhook","skyjack","skylark","skylight","skyline", "skyrocket","skyscraper","skywriting","slab","slack", "slacken","slacker","slacks","slag","slagheap", "slain","slake","slalom","slam","slander",
  "slanderous","slang","slangy","slant","slantwise", "slap","slapdash","slaphappy","slapstick","slash", "slat","slate","slattern","slaty","slaughter", "slaughterhouse","slave","slaver","slavery","slavic", "slavish","slay","sleazy","sled","sledge", "sledgehammer","sleek","sleep","sleeper","sleepless", "sleepwalker","sleepy","sleepyhead","sleet","sleeve", "sleigh","slender","slenderise","slenderize","slept", "sleuth","slew","slewed","slice","slick", "slicker","slide","slight","slightly","slim", "slimy","sling","slingshot","slink","slip", "slipcover","slipknot","slipover","slipper","slippery", "slippy","slips","slipshod","slipstream","slipway", "slit","slither","slithery","sliver","slivovitz", "slob","slobber","sloe","slog","slogan", "sloop","slop","slope","sloppy","slosh", "sloshed","slot","sloth","slothful","slouch", "slough","sloven","slovenly","slow","slowcoach", "slowworm","sludge","slue","slug","sluggard", "sluggish","sluice","sluiceway","slum","slumber", "slumberous","slummy","sl
 ump","slung","slunk", "slur","slurp","slurry","slush","slut", "sly","smack","smacker","small","smallholder", "smallholding","smallpox","smalls","smarmy","smart", "smarten","smash","smashed","smasher","smashing", "smattering","smear","smell","smelly","smelt", "smile","smirch","smirk","smite","smith", "smithereens","smithy","smitten","smock","smocking", "smog","smoke","smoker","smokescreen","smokestack", "smoking","smoky","smolder","smooch","smooth", "smoothie","smoothy","smorgasbord","smote","smother", "smoulder","smudge","smug","smuggle","smut", "smutty","snack","snaffle","snag","snail", "snake","snakebite","snaky","snap","snapdragon", "snapper","snappish","snappy","snapshot","snare", "snarl","snatch","snazzy","sneak","sneaker", "sneaking","sneaky","sneer","sneeze","snick", "snicker","snide","sniff","sniffle","sniffles", "sniffy","snifter","snigger","snip","snippet", "snips","snitch","snivel","snob","snobbery", "snobbish","snog","snood","snook","snooker", "snoop","snooper","snoot","
 snooty","snooze", "snore","snorkel","snort","snorter","snot", "snotty","snout","snow","snowball","snowberry", "snowbound","snowdrift","snowdrop","snowfall","snowfield", "snowflake","snowline","snowman","snowplough","snowplow", "snowshoe","snowstorm","snowy","snr","snub", "snuff","snuffer","snuffle","snug","snuggle", "soak","soaked","soaking","soap","soapbox", "soapstone","soapsuds","soapy","soar","sob", "sober","sobriety","sobriquet","soccer","sociable", "social","socialise","socialism","socialist","socialite", "socialize","society","sociology","sock","socket", "sod","soda","sodden","sodium","sodomite", "sodomy","soever","sofa","soft","softball", "soften","softhearted","softie","software","softwood", "softy","soggy","soigne","soignee","soil", "sojourn","sol","solace","solar","solarium", "sold","solder","soldier","soldierly","soldiery", "sole","solecism","solely","solemn","solemnise", "solemnity","solemnize","solicit","solicitor","solicitous", "solicitude","solid","solidarity","solid
 ify","solidity", "solidus","soliloquise","soliloquize","soliloquy","solipsism", "solitaire","solitary","solitude","solo","soloist", "solstice","soluble","solution","solve","solvency", "solvent","somber","sombre","sombrero","some", "somebody","someday","somehow","somersault","something", "sometime","sometimes","someway","somewhat","somewhere", "somnambulism","somnolent","son","sonar","sonata", "song","songbird","songbook","songster","sonic", "sonnet","sonny","sonority","sonorous","sonsy", "soon","soot","soothe","soothsayer","sop", "sophism","sophisticate","sophisticated","sophistication","sophistry", "sophomore","soporific","sopping","soppy","soprano", "sorbet","sorcerer","sorcery","sordid","sore", "sorehead","sorely","sorghum","sorority","sorrel", "sorrow","sorry","sort","sortie","sos", "sot","sottish","sou","soubrette","soubriquet", "sough","sought","soul","soulful","soulless", "sound","soundings","soundproof","soundtrack","soup", "sour","source","sourdough","sourpuss","sousaphone"
 , "souse","soused","south","southbound","southeast", "southeaster","southeasterly","southeastern","southeastward","southeastwards", "southerly","southern","southerner","southernmost","southpaw", "southward","southwards","southwest","southwester","southwesterly", "southwestern","southwestward","southwestwards","souvenir","sovereign", "sovereignty","soviet","sow","sox","soy", "soybean","sozzled","spa","space","spacecraft", "spaceship","spacesuit","spacing","spacious","spade", "spadework","spaghetti","spake","spam","span", "spangle","spaniel","spank","spanking","spanner", "spar","spare","spareribs","sparing","spark", "sparkle","sparkler","sparks","sparrow","sparse", "spartan","spasm","spasmodic","spastic","spat", "spatchcock","spate","spatial","spatter","spatula", "spavin","spawn","spay","speak","speakeasy", "speaker","speakership","spear","spearhead","spearmint", "spec","special","specialise","specialised","specialist", "speciality","specialize","specialized","specially","specie", "sp
 ecies","specific","specifically","specification","specifics", "specify","specimen","specious","speck","speckle", "spectacle","spectacled","spectacles","spectacular","spectator", "specter","spectral","spectre","spectroscope","spectrum", "speculate","speculation","speculative","speech","speechify", "speechless","speed","speedboat","speeding","speedometer", "speedway","speedwell","speedy","spelaeology","speleology", "spell","spellbind","spelling","spend","spender", "spendthrift","spent","sperm","spermaceti","spermatozoa", "spew","sphagnum","sphere","spherical","spheroid", "sphincter","sphinx","spice","spicy","spider", "spidery","spiel","spigot","spike","spikenard", "spiky","spill","spillover","spillway","spin", "spinach","spinal","spindle","spindly","spine", "spineless","spinet","spinnaker","spinner","spinney", "spinster","spiny","spiral","spire","spirit", "spirited","spiritless","spirits","spiritual","spiritualise", "spiritualism","spirituality","spiritualize","spirituous","spirt", "s
 pit","spite","spitfire","spittle","spittoon", "spiv","splash","splashy","splat","splatter", "splay","splayfoot","spleen","splendid","splendiferous", "splendor","splendour","splenetic","splice","splicer", "splint","splinter","split","splits","splitting", "splotch","splurge","splutter","spoil","spoilage", "spoils","spoilsport","spoke","spoken","spokeshave", "spokesman","spoliation","spondee","sponge","spongy", "sponsor","spontaneous","spoof","spook","spooky", "spool","spoon","spoonerism","spoonful","spoor", "sporadic","spore","sporran","sport","sporting", "sportive","sports","sportsman","sportsmanlike","sportsmanship", "sporty","spot","spotless","spotlight","spotted", "spotter","spotty","spouse","spout","sprain", "sprang","sprat","sprawl","spray","sprayer", "spread","spree","sprig","sprigged","sprightly", "spring","springboard","springbok","springtime","springy", "sprinkle","sprinkler","sprinkling","sprint","sprite", "sprocket","sprout","spruce","sprung","spry", "spud","spume","spun",
 "spunk","spur", "spurious","spurn","spurt","sputter","sputum", "spy","spyglass","squab","squabble","squad", "squadron","squalid","squall","squalor","squander", "square","squash","squashy","squat","squatter", "squaw","squawk","squeak","squeaky","squeal", "squeamish","squeegee","squeeze","squeezer","squelch", "squib","squid","squidgy","squiffy","squiggle", "squint","squirarchy","squire","squirearchy","squirm", "squirrel","squirt","squirter","sri","srn", "ssh","stab","stabbing","stabilise","stabiliser", "stability","stabilize","stabilizer","stable","stabling", "staccato","stack","stadium","staff","stag", "stage","stagecoach","stager","stagestruck","stagger", "staggering","staggers","staging","stagnant","stagnate", "stagy","staid","stain","stainless","stair", "staircase","stairs","stairwell","stake","stakeholder", "stakes","stalactite","stalagmite","stale","stalemate", "stalk","stall","stallholder","stallion","stalls", "stalwart","stamen","stamina","stammer","stamp", "stampede","stance"
 ,"stanch","stanchion","stand", "standard","standardise","standardize","standby","standing", "standoffish","standpipe","standpoint","standstill","stank", "stanza","staple","stapler","star","starboard", "starch","starchy","stardom","stardust","stare", "starfish","stargazer","stargazing","staring","stark", "starkers","starlet","starlight","starling","starlit", "starry","stars","start","starter","starters", "startle","starvation","starve","starveling","stash", "state","statecraft","statehood","stateless","stately", "statement","stateroom","states","stateside","statesman", "static","statics","station","stationary","stationer", "stationery","stationmaster","statistic","statistician","statistics", "statuary","statue","statuesque","statuette","stature", "status","statute","statutory","staunch","stave", "staves","stay","stayer","stays","std", "stead","steadfast","steady","steak","steal", "stealth","stealthy","steam","steamboat","steamer", "steamroller","steamship","steed","steel","steelworke
 r", "steelworks","steely","steelyard","steenbok","steep", "steepen","steeple","steeplechase","steeplejack","steer", "steerage","steerageway","steersman","stein","steinbok", "stele","stellar","stem","stench","stencil", "stenographer","stenography","stentorian","step","stepbrother", "stepchild","stepladder","stepparent","steps","stepsister", "stereo","stereoscope","stereoscopic","stereotype","sterile", "sterilise","sterility","sterilize","sterling","stern", "sternum","steroid","stertorous","stet","stethoscope", "stetson","stevedore","stew","steward","stewardess", "stewardship","stewed","stick","sticker","stickleback", "stickler","stickpin","sticks","sticky","stiff", "stiffen","stiffener","stiffening","stifle","stigma", "stigmata","stigmatise","stigmatize","stile","stiletto", "still","stillbirth","stillborn","stillroom","stilly", "stilt","stilted","stilton","stimulant","stimulate", "stimulus","sting","stinger","stingo","stingray", "stingy","stink","stinking","stint","stipend", "stipend
 iary","stipple","stipulate","stipulation","stir", "stirrer","stirring","stirrup","stitch","stoat", "stock","stockade","stockbreeder","stockbroker","stockcar", "stockfish","stockholder","stockily","stockinet","stockinette", "stocking","stockist","stockjobber","stockman","stockpile", "stockpot","stockroom","stocks","stocktaking","stocky", "stockyard","stodge","stodgy","stoic","stoical", "stoicism","stoke","stokehold","stoker","stole", "stolen","stolid","stomach","stomachache","stomachful", "stomp","stone","stonebreaker","stonecutter","stoned", "stoneless","stonemason","stonewall","stoneware","stonework", "stony","stood","stooge","stool","stoolpigeon", "stoop","stop","stopcock","stopgap","stopover", "stoppage","stopper","stopping","stopwatch","storage", "store","storehouse","storekeeper","storeroom","stores", "storey","storied","stork","storm","stormbound", "stormy","story","storybook","storyteller","stoup", "stout","stouthearted","stove","stovepipe","stow", "stowage","stowaway","strad
 dle","stradivarius","strafe", "straggle","straggly","straight","straightaway","straightedge", "straighten","straightforward","straightway","strain","strained", "strainer","strait","straitened","straitjacket","straitlaced", "straits","strand","stranded","strange","stranger", "strangle","stranglehold","strangulate","strangulation","strap", "straphanging","strapless","strapping","strata","stratagem", "strategic","strategist","strategy","stratification","stratify", "stratosphere","stratum","straw","strawberry","strawboard", "stray","streak","streaker","streaky","stream", "streamer","streamline","streamlined","street","streetcar", "streetwalker","strength","strengthen","strenuous","streptococcus", "streptomycin","stress","stretch","stretcher","stretchy", "strew","strewth","striated","striation","stricken", "strict","stricture","stride","stridency","strident", "stridulate","strife","strike","strikebound","strikebreaker", "strikebreaking","striker","striking","string","stringency", "string
 ent","strings","stringy","strip","stripe", "striped","stripling","stripper","striptease","stripy", "strive","strode","stroke","stroll","stroller", "strolling","strong","strongarm","strongbox","stronghold", "strontium","strop","strophe","stroppy","strove", "struck","structural","structure","strudel","struggle", "strum","strumpet","strung","strut","strychnine", "stub","stubble","stubborn","stubby","stucco", "stuck","stud","studbook","student","studied", "studio","studious","study","stuff","stuffing", "stuffy","stultify","stumble","stump","stumper", "stumpy","stun","stung","stunk","stunner", "stunning","stunt","stupefaction","stupefy","stupendous", "stupid","stupidity","stupor","sturdy","sturgeon", "stutter","sty","stye","stygian","style", "stylise","stylish","stylist","stylistic","stylistics", "stylize","stylus","stymie","styptic","suasion", "suave","sub","subaltern","subatomic","subcommittee", "subconscious","subcontinent","subcontract","subcontractor","subcutaneous", "subdivide","su
 bdue","subdued","subedit","subeditor", "subheading","subhuman","subject","subjection","subjective", "subjoin","subjugate","subjunctive","sublease","sublet", "sublieutenant","sublimate","sublime","subliminal","submarine", "submariner","submerge","submergence","submersible","submission", "submissive","submit","subnormal","suborbital","subordinate", "suborn","subplot","subpoena","subscribe","subscriber", "subscription","subsequent","subservience","subservient","subside", "subsidence","subsidiary","subsidise","subsidize","subsidy", "subsist","subsistence","subsoil","subsonic","substance", "substandard","substantial","substantially","substantiate","substantival", "substantive","substation","substitute","substratum","substructure", "subsume","subtenant","subtend","subterfuge","subterranean", "subtitle","subtitles","subtle","subtlety","subtopia", "subtract","subtraction","subtropical","suburb","suburban", "suburbanite","suburbia","suburbs","subvention","subversive", "subvert","subway","suc
 ceed","success","successful", "succession","successive","successor","succinct","succor", "succour","succubus","succulence","succulent","succumb", "such","suchlike","suck","sucker","suckle", "suckling","sucrose","suction","sudden","suds", "sue","suet","suffer","sufferable","sufferance", "sufferer","suffering","suffice","sufficiency","sufficient", "suffix","suffocate","suffragan","suffrage","suffragette", "suffuse","sugar","sugarcane","sugarcoated","sugarloaf", "sugary","suggest","suggestible","suggestion","suggestive", "suicidal","suicide","suit","suitability","suitable", "suitcase","suiting","suitor","sulfate","sulfide", "sulfur","sulfuret","sulfurous","sulk","sulks", "sulky","sullen","sully","sulphate","sulphide", "sulphur","sulphuret","sulphurous","sultan","sultana", "sultanate","sultry","sum","sumac","sumach", "summarise","summarize","summary","summat","summation", "summer","summerhouse","summertime","summery","summit", "summon","summons","sump","sumptuary","sumptuous", "sun","su
 nbaked","sunbathe","sunbeam","sunblind", "sunbonnet","sunburn","sunburnt","sundae","sunday", "sundeck","sunder","sundew","sundial","sundown", "sundowner","sundrenched","sundries","sundry","sunfish", "sunflower","sung","sunglasses","sunk","sunken", "sunlamp","sunless","sunlight","sunlit","sunny", "sunray","sunrise","sunroof","sunset","sunshade", "sunshine","sunspot","sunstroke","suntan","suntrap", "sup","super","superabundance","superabundant","superannuate", "superannuated","superannuation","superb","supercharged","supercharger", "supercilious","superconductivity","superduper","superego","superficial", "superficies","superfine","superfluity","superfluous","superhuman", "superimpose","superintend","superintendent","superior","superlative", "superlatively","superman","supermarket","supernal","supernatural", "supernova","supernumerary","superscription","supersede","supersession", "supersonic","superstar","superstition","superstitious","superstructure", "supertax","supervene","supervise
 ","supervisory","supine", "supper","supplant","supple","supplement","supplementary", "suppliant","supplicant","supplicate","supplier","supplies", "supply","support","supportable","supporter","supportive", "suppose","supposed","supposedly","supposing","supposition", "suppository","suppress","suppression","suppressive","suppressor", "suppurate","supranational","supremacist","supremacy","supreme", "surcharge","surcoat","surd","sure","surefire", "surefooted","surely","surety","surf","surface", "surfboard","surfboat","surfeit","surfer","surge", "surgeon","surgery","surgical","surly","surmise", "surmount","surname","surpass","surpassing","surplice", "surplus","surprise","surprising","surreal","surrealism", "surrealist","surrealistic","surrender","surreptitious","surrey", "surrogate","surround","surrounding","surroundings","surtax", "surveillance","survey","surveyor","survival","survive", "survivor","susceptibilities","susceptibility","susceptible","suspect", "suspend","suspender","suspend
 ers","suspense","suspension", "suspicion","suspicious","sustain","sustenance","suttee", "suture","suzerain","suzerainty","svelte","swab", "swaddle","swag","swagger","swain","swallow", "swallowtailed","swam","swami","swamp","swampy", "swan","swank","swanky","swansdown","swansong", "swap","sward","swarf","swarm","swarthy", "swashbuckler","swashbuckling","swastika","swat","swatch", "swath","swathe","swatter","sway","swayback", "swear","swearword","sweat","sweatband","sweated", "sweater","sweatshirt","sweatshop","sweaty","swede", "sweep","sweeper","sweeping","sweepings","sweepstake", "sweepstakes","sweet","sweetbread","sweetbriar","sweetbrier", "sweeten","sweetener","sweetening","sweetheart","sweetie", "sweetish","sweetmeat","sweets","swell","swelling", "swelter","sweltering","swept","swerve","swift", "swig","swill","swim","swimming","swimmingly", "swindle","swine","swineherd","swing","swingeing", "swinger","swinging","swinish","swipe","swirl", "swish","switch","switchback","switchblade
 ","switchboard", "switchgear","switchman","swivel","swiz","swizzle", "swollen","swoon","swoop","swop","sword", "swordfish","swordplay","swordsman","swordsmanship","swordstick", "swore","sworn","swot","swum","swung", "sybarite","sybaritic","sycamore","sycophant","sycophantic", "sylabub","syllabary","syllabic","syllabify","syllable", "syllabub","syllabus","syllogism","syllogistic","sylph", "sylphlike","sylvan","symbiosis","symbol","symbolic", "symbolise","symbolism","symbolist","symbolize","symmetrical", "symmetry","sympathetic","sympathies","sympathise","sympathize", "sympathy","symphonic","symphony","symposium","symptom", "symptomatic","synagogue","sync","synch","synchonise", "synchromesh","synchronize","synchrotron","syncopate","syncope", "syndic","syndicalism","syndicate","syndrome","synod", "synonym","synonymous","synopsis","synoptic","syntactic", "syntax","synthesis","synthesise","synthesiser","synthesize", "synthesizer","synthetic","syphilis","syphilitic","syphon", "syringe","s
 yrup","syrupy","system","systematic", "systematise","systematize","systemic","tab","tabard", "tabasco","tabby","tabernacle","table","tableau", "tablecloth","tableland","tablemat","tablespoon","tablespoonful", "tablet","tableware","tabloid","taboo","tabor", "tabular","tabulate","tabulator","tacit","taciturn", "tack","tackiness","tackle","tacky","tact", "tactic","tactical","tactician","tactics","tactile", "tactual","tadpole","taffeta","taffrail","taffy", "tag","tail","tailback","tailboard","tailcoat", "taillight","tailor","tailpiece","tails","tailspin", "tailwind","taint","take","takeaway","takeoff", "takeover","taking","takings","talc","tale", "talebearer","talent","talented","talisman","talk", "talkative","talker","talkie","talks","tall", "tallboy","tallow","tally","tallyho","tallyman", "talmud","talon","tamale","tamarind","tamarisk", "tambour","tambourine","tame","tammany","tamp", "tamper","tampon","tan","tandem","tang", "tangent","tangential","tangerine","tangible","tangle", "tang
 o","tank","tankard","tanker","tanner", "tannery","tannin","tanning","tannoy","tansy", "tantalise","tantalize","tantalus","tantamount","tantrum", "taoism","tap","tape","taper","tapestry", "tapeworm","tapioca","tapir","tappet","taproom", "taproot","taps","tar","tarantella","tarantula", "tarboosh","tardy","target","tariff","tarmac", "tarn","tarnish","taro","tarot","tarpaulin", "tarragon","tarry","tarsal","tarsus","tart", "tartan","tartar","task","taskmaster","tassel", "taste","tasteful","tasteless","taster","tasty", "tat","tatas","tatter","tattered","tatters", "tatting","tattle","tattoo","tattooist","tatty", "taught","taunt","taurus","taut","tautological", "tautology","tavern","tawdry","tawny","tawse", "tax","taxation","taxi","taxidermist","taxidermy", "taximeter","taxonomy","tea","teabag","teacake", "teach","teacher","teaching","teacup","teacupful", "teagarden","teahouse","teak","teakettle","teal", "tealeaf","team","teamster","teamwork","teapot", "tear","tearaway","teardrop","tearful"
 ,"teargas", "tearjerker","tearless","tearoom","tease","teasel", "teaser","teaspoon","teaspoonful","teat","teatime", "teazle","tech","technical","technicality","technician", "technique","technocracy","technocrat","technological","technologist", "technology","techy","tedious","tedium","tee", "teem","teeming","teenage","teenager","teens", "teenybopper","teeter","teeth","teethe","teetotal", "teetotaler","teetotaller","teflon","tegument","tele", "telecast","telecommunications","telegram","telegraph","telegrapher", "telegraphese","telegraphic","telemarketing","telemeter","telemetry", "teleology","telepathic","telepathist","telepathy","telephone", "telephonist","telephony","telephotograph","telephotography","teleprinter", "teleprompter","telescope","telescopic","televise","television", "televisual","telex","telfer","tell","teller", "telling","telltale","telly","telpher","telstar", "temerity","temp","temper","tempera","temperament", "temperamental","temperance","temperate","temperature","te
 mpest", "tempestuous","template","temple","templet","tempo", "temporal","temporary","temporise","temporize","tempt", "temptation","ten","tenable","tenacious","tenacity", "tenancy","tenant","tenantry","tench","tend", "tendency","tendentious","tender","tenderfoot","tenderhearted", "tenderise","tenderize","tenderloin","tendon","tendril", "tenement","tenet","tenner","tennis","tenon"};
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData8.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData8.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData8.cs
new file mode 100644
index 0000000..dd39504
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData8.cs
@@ -0,0 +1,53 @@
+/*
+Copyright © 2003,
+Center for Intelligent Information Retrieval,
+University of Massachusetts, Amherst.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+3. The names "Center for Intelligent Information Retrieval" and
+"University of Massachusetts" must not be used to endorse or promote products
+derived from this software without prior written permission. To obtain
+permission, contact info@ciir.cs.umass.edu.
+
+THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF MASSACHUSETTS AND OTHER CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
+*/
+/* This is a java version of Bob Krovetz' KStem.
+ *
+ * Java version by Sergio Guzman-Lara.
+ * CIIR-UMass Amherst http://ciir.cs.umass.edu
+ */
+namespace org.apache.lucene.analysis.en
+{
+
+	/// <summary>
+	/// A list of words used by Kstem
+	/// </summary>
+	internal class KStemData8
+	{
+		private KStemData8()
+		{
+		}
+	   internal static string[] data = new string[] {"tenor","tenpin","tense","tensile","tension", "tent","tentacle","tentative","tenterhooks","tenuity", "tenuous","tenure","tepee","tepid","tequila", "tercentenary","tercentennial","term","termagant","terminable", "terminal","terminate","termination","terminology","terminus", "termite","terms","tern","terpsichorean","terrace", "terracotta","terrain","terrapin","terrestrial","terrible", "terribly","terrier","terrific","terrifically","terrify", "territorial","territory","terror","terrorise","terrorism", "terrorize","terrycloth","terse","tertian","tertiary", "terylene","tessellated","test","testament","testamentary", "testate","testator","tester","testicle","testify", "testimonial","testimony","testis","testy","tetanus", "tetchy","tether","teutonic","text","textbook", "textile","textual","texture","thalidomide","than", "thane","thank","thankful","thankless","thanks", "thanksgiving","thankyou","that","thatch","thaw", "the","theater","theate
 rgoer","theatre","theatregoer", "theatrical","theatricals","thee","theft","thegn", "their","theirs","theism","them","theme", "themselves","then","thence","thenceforth","theocracy", "theocratic","theodolite","theologian","theology","theorem", "theoretical","theoretically","theorise","theorist","theorize", "theory","theosophy","therapeutic","therapeutics","therapist", "therapy","there","thereabouts","thereafter","thereby", "therefore","therein","thereinafter","thereof","thereon", "thereto","thereunder","thereupon","therm","thermal", "thermionic","thermionics","thermodynamics","thermometer","thermonuclear", "thermoplastic","thermos","thermosetting","thermostat","thesaurus", "these","thesis","thespian","thews","they", "thick","thicken","thickener","thicket","thickheaded", "thickness","thickset","thief","thieve","thieving", "thievish","thigh","thimble","thimbleful","thin", "thine","thing","thingamajig","thingamujig","things", "think","thinkable","thinking","thinner","third", "thirst","th
 irsty","thirteen","thirty","this", "thistle","thistledown","thither","thole","thong", "thorax","thorn","thorny","thorough","thoroughbred", "thoroughfare","thoroughgoing","those","thou","though", "thought","thoughtful","thoughtless","thousand","thraldom", "thrall","thralldom","thrash","thrashing","thread", "threadbare","threadlike","threat","threaten","three", "threepence","threnody","thresh","thresher","threshold", "threw","thrice","thrift","thrifty","thrill", "thriller","thrive","throat","throaty","throb", "throes","thrombosis","throne","throng","throstle", "throttle","through","throughout","throughput","throughway", "throw","throwaway","throwback","thru","thrum", "thrush","thrust","thruster","thruway","thud", "thug","thuggery","thumb","thumbnail","thumbscrew", "thumbtack","thump","thumping","thunder","thunderbolt", "thunderclap","thundercloud","thundering","thunderous","thunderstorm", "thunderstruck","thundery","thurible","thursday","thus", "thwack","thwart","thy","thyme","thyroid
 ", "thyself","tiara","tibia","tic","tick", "ticker","tickertape","ticket","ticking","tickle", "tickler","ticklish","tidal","tidbit","tiddler", "tiddley","tiddleywinks","tiddly","tiddlywinks","tide", "tidemark","tidewater","tideway","tidings","tidy", "tie","tiebreaker","tiepin","tier","tiff", "tiffin","tig","tiger","tigerish","tight", "tighten","tightfisted","tightrope","tights","tightwad", "tigress","tike","tilde","tile","till", "tillage","tiller","tilt","timber","timbered", "timberline","timbre","timbrel","time","timekeeper", "timeless","timely","timepiece","timer","times", "timesaving","timeserver","timeserving","timetable","timework", "timeworn","timid","timing","timorous","timothy", "timpani","timpanist","tin","tincture","tinder", "tinderbox","tinfoil","ting","tingaling","tinge", "tingle","tinker","tinkle","tinny","tinplate", "tinsel","tint","tintack","tintinnabulation","tiny", "tip","tippet","tipple","tipstaff","tipster", "tipsy","tiptoe","tirade","tire","tired", "tireless","ti
 resome","tiro","tissue","tit", "titan","titanic","titanium","titbit","titfer", "tithe","titillate","titivate","title","titled", "titleholder","titmouse","titter","tittivate","tittle", "titty","titular","tizzy","tnt","toad", "toadstool","toady","toast","toaster","toastmaster", "tobacco","tobacconist","toboggan","toccata","tocsin", "tod","today","toddle","toddler","toddy", "toe","toehold","toenail","toff","toffee", "toffy","tog","toga","together","togetherness", "toggle","togs","toil","toilet","toiletries", "toiletry","toils","tokay","token","told", "tolerable","tolerably","tolerance","tolerant","tolerate", "toleration","toll","tollgate","tollhouse","tomahawk", "tomato","tomb","tombola","tomboy","tombstone", "tomcat","tome","tomfoolery","tommyrot","tomorrow", "tomtit","ton","tonal","tonality","tone", "toneless","tong","tongs","tongue","tonic", "tonight","tonnage","tonne","tonsil","tonsilitis", "tonsillitis","tonsorial","tonsure","tontine","too", "took","tool","toot","tooth","toothache
 ", "toothbrush","toothcomb","toothpaste","toothpick","toothsome", "toothy","tootle","toots","tootsie","top", "topaz","topcoat","topdressing","topee","topgallant", "topi","topiary","topic","topical","topicality", "topknot","topless","topmast","topmost","topographer", "topographical","topography","topper","topping","topple", "tops","topsail","topside","topsoil","topspin", "toque","tor","torch","torchlight","tore", "toreador","torment","tormentor","torn","tornado", "torpedo","torpid","torpor","torque","torrent", "torrential","torrid","torsion","torso","tort", "tortilla","tortoise","tortoiseshell","tortuous","torture", "tory","toss","tot","total","totalisator", "totalitarian","totalitarianism","totality","totalizator","tote", "totem","totter","tottery","toucan","touch", "touchdown","touched","touching","touchline","touchstone", "touchy","tough","toughen","toupee","tour", "tourism","tourist","tournament","tourney","tourniquet", "tousle","tout","tow","towards","towel", "toweling","towelli
 ng","tower","towering","towline", "town","townscape","township","townsman","townspeople", "towpath","toxaemia","toxemia","toxic","toxicologist", "toxicology","toxin","toy","toyshop","trace", "tracer","tracery","trachea","trachoma","tracing", "track","trackless","tracksuit","tract","tractable", "traction","tractor","trad","trade","trademark", "trader","trades","tradesman","tradespeople","tradition", "traditional","traditionalism","traduce","traffic","trafficator", "trafficker","tragedian","tragedienne","tragedy","tragic", "tragicomedy","trail","trailer","train","trainbearer", "trainee","training","trainman","traipse","trait", "traitor","traitorous","trajectory","tram","tramline", "trammel","trammels","tramp","trample","trampoline", "trance","tranny","tranquil","tranquiliser","tranquillise", "tranquillize","tranquillizer","transact","transaction","transactions", "transalpine","transatlantic","transcend","transcendence","transcendent", "transcendental","transcendentalism","transcontine
 ntal","transcribe","transcript", "transcription","transept","transfer","transference","transfiguration", "transfigure","transfix","transform","transformation","transformer", "transfuse","transgress","tranship","transience","transient", "transistor","transistorise","transistorize","transit","transition", "transitive","translate","translator","transliterate","translucence", "translucent","transmigration","transmission","transmit","transmitter", "transmogrify","transmute","transoceanic","transom","transparency", "transparent","transpiration","transpire","transplant","transpolar", "transport","transportation","transporter","transpose","transship", "transubstantiation","transverse","transvestism","transvestite","trap", "trapdoor","trapeze","trapezium","trapezoid","trapper", "trappings","trappist","trapse","trapshooting","trash", "trashcan","trashy","trauma","traumatic","travail", "travel","traveled","traveler","travelled","traveller", "travelog","travelogue","travels","travelsick","trave
 rse", "travesty","trawl","trawler","tray","treacherous", "treachery","treacle","treacly","tread","treadle", "treadmill","treason","treasonable","treasure","treasurer", "treasury","treat","treatise","treatment","treaty", "treble","tree","trefoil","trek","trellis", "tremble","tremendous","tremolo","tremor","tremulous", "trench","trenchant","trencher","trencherman","trend", "trendsetter","trendy","trepan","trephine","trepidation", "trespass","tresses","trestle","trews","triad", "trial","triangle","triangular","tribal","tribalism", "tribe","tribesman","tribulation","tribunal","tribune", "tributary","tribute","trice","triceps","trichinosis", "trick","trickery","trickle","trickster","tricky", "tricolor","tricolour","tricycle","trident","triennial", "trier","trifle","trifler","trifling","trigger", "trigonometry","trike","trilateral","trilby","trilingual", "trill","trillion","trilobite","trilogy","trim", "trimaran","trimester","trimmer","trimming","trinitrotoluene", "trinity","trinket","tri
 o","trip","tripartite", "triple","triplet","triplex","triplicate","tripod", "tripos","tripper","tripping","triptych","tripwire", "trireme","trisect","trite","triumph","triumphal", "triumphant","triumvir","triumvirate","trivet","trivia", "trivial","trivialise","triviality","trivialize","trochaic", "trochee","trod","trodden","troglodyte","troika", "trojan","troll","trolley","trolleybus","trollop", "trombone","trombonist","troop","trooper","troops", "troopship","trope","trophy","tropic","tropical", "tropics","trot","troth","trotskyist","trotter", "troubadour","trouble","troublemaker","troubleshooter","troublesome", "trough","trounce","troupe","trouper","trouser", "trousers","trousseau","trout","trove","trowel", "truancy","truant","truce","truck","trucking", "truckle","truculence","truculent","trudge","true", "trueborn","truehearted","truelove","truffle","trug", "truism","truly","trump","trumpery","trumpet", "trumps","truncate","truncheon","trundle","trunk", "trunks","truss","trust","tr
 ustee","trusteeship", "trustful","trustworthy","trusty","truth","truthful", "try","tryst","tsar","tsarina","tsp", "tub","tuba","tubby","tube","tubeless", "tuber","tubercular","tuberculosis","tubful","tubing", "tubular","tuck","tucker","tuckerbag","tuesday", "tuft","tug","tugboat","tuition","tulip", "tulle","tumble","tumbledown","tumbler","tumbleweed", "tumbrel","tumbril","tumescent","tumid","tummy", "tumor","tumour","tumult","tumultuous","tumulus", "tun","tuna","tundra","tune","tuneful", "tuneless","tuner","tungsten","tunic","tunnel", "tunny","tup","tuppence","tuppenny","turban", "turbid","turbine","turbojet","turboprop","turbot", "turbulence","turbulent","turd","tureen","turf", "turgid","turkey","turmeric","turmoil","turn", "turnabout","turncoat","turncock","turner","turning", "turnip","turnkey","turnout","turnover","turnpike", "turnstile","turntable","turpentine","turpitude","turquoise", "turret","turtle","turtledove","turtleneck","tush", "tusk","tusker","tussle","tussock","tut", 
 "tutelage","tutelary","tutor","tutorial","tutu", "tuxedo","twaddle","twain","twang","twat", "tweak","twee","tweed","tweeds","tweedy", "tweet","tweeter","tweezers","twelfth","twelve", "twelvemonth","twenty","twerp","twice","twiddle", "twig","twilight","twill","twin","twinge", "twinkle","twinkling","twirl","twirp","twist", "twister","twit","twitch","twitter","twixt", "two","twofaced","twopence","twopenny","twosome", "tycoon","tyke","tympanum","type","typecast", "typeface","typescript","typesetter","typewriter","typewritten", "typhoid","typhoon","typhus","typical","typically", "typify","typist","typographer","typographic","typography", "tyrannical","tyrannise","tyrannize","tyrannosaurus","tyranny", "tyrant","tyre","tyro","tzar","tzarina", "ubiquitous","ucca","udder","ufo","ugh", "ugly","uhf","ukulele","ulcer","ulcerate", "ulcerous","ullage","ulna","ult","ulterior", "ultimate","ultimately","ultimatum","ultimo","ultramarine", "ultrasonic","ultraviolet","umber","umbrage","umbrella", "umla
 ut","umpire","umpteen","unabashed","unabated", "unable","unabridged","unaccompanied","unaccountable","unaccustomed", "unadopted","unadulterated","unadvised","unaffected","unalloyed", "unanimous","unannounced","unanswerable","unapproachable","unarmed", "unasked","unassuming","unattached","unattended","unavailing", "unawares","unbalance","unbar","unbearable","unbearably", "unbeknown","unbelief","unbelievable","unbeliever","unbelieving", "unbend","unbending","unbidden","unbind","unblushing", "unborn","unbosom","unbounded","unbowed","unbridled", "unbuckle","unburden","unbuttoned","uncanny","unceremonious", "uncertain","uncertainty","uncharitable","uncharted","unchecked", "unchristian","unclad","uncle","unclean","unclouded", "uncolored","uncoloured","uncomfortable","uncommitted","uncommonly", "uncompromising","unconcerned","unconditional","unconscionable","unconscious", "unconsidered","uncork","uncouple","uncouth","uncover", "uncritical","uncrowned","uncrushable","unction","unctuous", "u
 ncut","undaunted","undeceive","undecided","undeclared", "undeniable","under","underact","underarm","underbelly", "underbrush","undercarriage","undercharge","underclothes","undercoat", "undercover","undercurrent","undercut","underdog","underdone", "underestimate","underfelt","underfloor","underfoot","undergarment", "undergo","undergraduate","underground","undergrowth","underhand", "underhanded","underhung","underlay","underlie","underline", "underling","underlying","undermanned","undermentioned","undermine", "underneath","undernourish","underpants","underpass","underpin", "underplay","underprivileged","underproof","underquote","underrate", "underscore","undersecretary","undersell","undersexed","undershirt", "underside","undersigned","undersized","underslung","understaffed", "understand","understanding","understate","understatement","understudy", "undertake","undertaker","undertaking","undertone","undertow", "underwater","underwear","underweight","underwent","underworld", "underwrite"
 ,"underwriter","undesirable","undeveloped","undies", "undischarged","undistinguished","undivided","undo","undoing", "undomesticated","undone","undoubted","undress","undressed", "undue","undulate","undulation","unduly","undying", "unearth","unearthly","unease","uneasy","uneconomic", "uneducated","unemployed","unemployment","unenlightened","unenviable", "unequal","unequaled","unequalled","unequivocal","unerring", "unesco","uneven","uneventful","unexampled","unexceptionable", "unfailing","unfaithful","unfaltering","unfathomable","unfathomed", "unfavorable","unfavourable","unfeeling","unfettered","unfit", "unflagging","unflappable","unflinching","unfold","unforeseen", "unforgettable","unfortunate","unfortunately","unfounded","unfrequented", "unfrock","unfurl","ungainly","ungenerous","ungodly", "ungovernable","ungracious","ungrateful","ungrudging","unguarded", "unguent","unhallowed","unhand","unhappily","unhappy", "unhealthy","unheard","unhinge","unholy","unhook", "unhorse","unicef","uni
 corn","unidentified","unification", "uniform","uniformed","unify","unilateral","unimpeachable", "uninformed","uninhabitable","uninhibited","uninterested","uninterrupted", "union","unionise","unionism","unionist","unionize", "unique","unisex","unison","unit","unitarian", "unite","united","unity","universal","universally", "universe","university","unkempt","unkind","unkindly", "unknowing","unknown","unlawful","unlearn","unleash", "unleavened","unless","unlettered","unlike","unlikely", "unload","unlock","unloose","unloosen","unmade", "unmannerly","unmarried","unmask","unmatched","unmeasured", "unmentionable","unmentionables","unmindful","unmistakable","unmitigated", "unmoved","unnatural","unnecessary","unnerve","unnumbered", "uno","unobtrusive","unofficial","unorthodox","unpack", "unparalleled","unparliamentary","unperson","unpick","unplaced", "unplayable","unpleasant","unplumbed","unpracticed","unpractised", "unprecedented","unprejudiced","unpretentious","unprincipled","unprintable", 
 "unprofessional","unprompted","unprovoked","unqualified","unquestionable", "unquestioning","unquiet","unquote","unravel","unreadable", "unreal","unreasonable","unreasoning","unrelenting","unrelieved", "unremitting","unrequited","unreserved","unrest","unrestrained", "unrip","unrivaled","unrivalled","unroll","unruffled", "unruly","unsaddle","unsaid","unsavory","unsavoury", "unsay","unscathed","unschooled","unscramble","unscrew", "unscripted","unscrupulous","unseat","unseeing","unseemly", "unseen","unserviceable","unsettle","unsettled","unsex", "unsexed","unshakable","unshakeable","unshod","unsightly", "unskilled","unsociable","unsocial","unsophisticated","unsound", "unsparing","unspeakable","unspotted","unstop","unstrung", "unstuck","unstudied","unsullied","unsung","unswerving", "untangle","untapped","untenable","unthinkable","unthinking", "untie","until","untimely","untinged","untiring", "unto","untold","untouchable","untoward","untruth", "untruthful","untutored","unused","unusual","
 unusually", "unutterable","unvarnished","unveil","unversed","unvoiced", "unwarranted","unwed","unwell","unwieldy","unwind", "unwitting","unwonted","unzip","upbeat","upbraid", "upbringing","upcoming","update","upend","upgrade", "upheaval","uphill","uphold","upholster","upholsterer", "upholstery","upkeep","upland","uplift","upon", "upper","uppercut","uppermost","uppish","uppity", "upright","uprising","uproar","uproarious","uproot", "upset","upshot","upstage","upstairs","upstanding", "upstart","upstream","upsurge","upswing","uptake", "uptight","uptown","upturn","upturned","upward", "upwards","uranium","uranus","urban","urbane", "urbanise","urbanize","urchin","urge","urgent", "uric","urinal","urinary","urinate","urine", "urn","usage","use","useful","usefulness", "useless","user","usher","usherette","ussr", "usual","usually","usurer","usurious","usurp", "usury","utensil","uterine","uterus","utilise", "utilitarian","utilitarianism","utility","utilize","utmost", "utopia","utopian","utter",
 "utterance","utterly", "uvula","uvular","uxorious","vac","vacancy", "vacant","vacate","vacation","vaccinate","vaccination", "vaccine","vacillate","vacuity","vacuous","vacuum", "vagabond","vagary","vagina","vaginal","vagrancy", "vagrant","vague","vain","vainglorious","vainglory", "valance","vale","valediction","valedictory","valency", "valentine","valerian","valet","valetudinarian","valiant", "valiantly","valid","validate","valise","valley", "valor","valour","valse","valuable","valuation", "value","valuer","valve","valvular","vamoose", "vamp","vampire","van","vanadium","vandal", "vandalise","vandalism","vandalize","vane","vanguard", "vanilla","vanish","vanity","vanquish","vantagepoint", "vapid","vapidity","vapor","vaporise","vaporize", "vaporous","vapors","vapour","vapours","variability", "variable","variance","variant","variation","varicolored", "varicoloured","varicose","varied","variegated","variegation", "variety","variform","variorum","various","variously", "varlet","varmint","v
 arnish","varsity","vary", "vascular","vase","vasectomy","vaseline","vassal", "vassalage","vast","vastly","vastness","vat", "vatican","vaudeville","vault","vaulted","vaulting", "vaunt","veal","vector","veer","veg", "vegan","vegetable","vegetarian","vegetarianism","vegetate", "vegetation","vehement","vehicle","vehicular","veil", "veiled","vein","veined","veining","velar", "velarize","veld","veldt","vellum","velocipede", "velocity","velour","velours","velvet","velveteen", "velvety","venal","vend","vendee","vender", "vendetta","vendor","veneer","venerable","venerate", "venereal","vengeance","vengeful","venial","venison", "venom","venomous","venous","vent","ventilate", "ventilation","ventilator","ventricle","ventriloquism","ventriloquist", "venture","venturer","venturesome","venue","veracious", "veracity","veranda","verandah","verb","verbal", "verbalise","verbalize","verbally","verbatim","verbena", "verbiage","verbose","verbosity","verdant","verdict", "verdigris","verdure","verge","verge
 r","verify", "verily","verisimilitude","veritable","verity","vermicelli", "vermiculite","vermiform","vermifuge","vermilion","vermin", "verminous","vermouth","vernacular","vernal","veronal", "veronica","verruca","versatile","verse","versed", "versification","versify","version","verso","versus", "vertebra","vertebrate","vertex","vertical","vertiginous", "vertigo","verve","very","vesicle","vesicular", "vesper","vespers","vessel","vest","vestibule", "vestige","vestigial","vestment","vestry","vestryman", "vesture","vet","vetch","veteran","veterinary", "veto","vex","vexation","vexatious","vhf", "via","viable","viaduct","vial","viands", "vibes","vibrancy","vibrant","vibraphone","vibrate", "vibration","vibrato","vibrator","vicar","vicarage", "vicarious","vice","vicelike","viceregal","vicereine", "viceroy","vicinity","vicious","vicissitudes","victim", "victimise","victimize","victor","victorian","victorious", "victory","victual","victualer","victualler","victuals", "vicuaa","vicuana","vide",
 "videlicet","video", "videotape","vie","view","viewer","viewfinder", "viewless","viewpoint","vigil","vigilance","vigilant", "vigilante","vignette","vigor","vigorous","vigour", "viking","vile","vilification","vilify","villa", "village","villager","villain","villainies","villainous", "villainy","villein","villeinage","villenage","vim", "vinaigrette","vindicate","vindication","vindictive","vine", "vinegar","vinegary","vinery","vineyard","vino", "vinous","vintage","vintner","vinyl","viol", "viola","violate","violence","violent","violet", "violin","violoncello","vip","viper","virago", "virgin","virginal","virginals","virginia","virginity", "virgo","virgule","virile","virility","virologist", "virology","virtu","virtual","virtually","virtue", "virtuosity","virtuoso","virtuous","virulence","virulent", "virus","visa","visage","viscera","visceral", "viscosity","viscount","viscountcy","viscountess","viscous", "vise","visibility","visible","visibly","vision", "visionary","visit","visitant","vis
 itation","visiting", "visitor","visor","vista","visual","visualise", "visualize","visually","vital","vitalise","vitality", "vitalize","vitally","vitals","vitamin","vitiate", "viticulture","vitreous","vitrify","vitriol","vitriolic", "vituperate","vituperation","vituperative","vivace","vivacious", "vivarium","vivid","viviparous","vivisect","vivisection", "vivisectionist","vixen","vixenish","vizier","vocab", "vocabulary","vocal","vocalise","vocalist","vocalize", "vocation","vocational","vocative","vociferate","vociferation", "vociferous","vodka","vogue","voice","voiceless", "void","voile","vol","volatile","volcanic", "volcano","vole","volition","volitional","volley", "volleyball","volt","voltage","voluble","volume", "volumes","voluminous","voluntary","volunteer","voluptuary", "voluptuous","volute","vomit","voodoo","voracious", "vortex","votary","vote","voter","votive", "vouch","voucher","vouchsafe","vow","vowel", "voyage","voyager","voyages","voyeur","vtol", "vulcanise","vulcanite","vu
 lcanize","vulgar","vulgarian", "vulgarise","vulgarism","vulgarity","vulgarize","vulgate", "vulnerable","vulpine","vulture","vulva","wac", "wack","wacky","wad","wadding","waddle", "wade","wader","wadge","wadi","wady", "wafer","waffle","waft","wag","wage", "wager","wages","waggery","waggish","waggle", "waggon","waggoner","waggonette","wagon","wagoner", "wagonette","wagtail","waif","wail","wain", "wainscot","waist","waistband","waistcoat","waistline", "wait","waiter","waits","waive","waiver", "wake","wakeful","waken","waking","walk", "walkabout","walkaway","walker","walking","walkout", "walkover","wall","walla","wallaby","wallah", "wallet","wallflower","wallop","walloping","wallow", "wallpaper","walnut","walrus","waltz","wampum", "wan","wand","wander","wanderer","wandering", "wanderings","wanderlust","wane","wangle","wank", "wanker","want","wanting","wanton","wants", "wapiti","war","warble","warbler","ward", "warden","warder","wardrobe","wardroom","warehouse", "wares","warfare","warhea
 d","warhorse","warily", "warlike","warlock","warlord","warm","warmonger", "warmth","warn","warning","warp","warpath", "warrant","warrantee","warrantor","warranty","warren", "warrior","warship","wart","warthog","wartime", "wary","was","wash","washable","washbasin", "washboard","washbowl","washcloth","washday","washer", "washerwoman","washhouse","washing","washout","washroom", "washstand","washwoman","washy","wasp","waspish", "wassail","wast","wastage","waste","wasteful", "waster","wastrel","watch","watchband","watchdog", "watches","watchful","watchmaker","watchman","watchtower", "watchword","water","waterborne","watercolor","watercolour", "watercourse","watercress","waterfall","waterfowl","waterfront", "waterhole","waterline","waterlogged","waterloo","waterman", "watermark","watermelon","watermill","waterpower","waterproof", "waters","watershed","waterside","waterspout","watertight", "waterway","waterwheel","waterwings","waterworks","watery", "watt","wattage","wattle","wave","wavelen
 gth", "waver","wavy","wax","waxen","waxworks", "waxy","way","waybill","wayfarer","wayfaring", "waylay","ways","wayside","wayward","weak", "weaken","weakling","weakness","weal","weald", "wealth","wealthy","wean","weapon","weaponry", "wear","wearing","wearisome","weary","weasel", "weather","weatherboard","weathercock","weatherglass","weatherman", "weatherproof","weathers","weave","weaver","web", "webbed","webbing","wed","wedded","wedding", "wedge","wedged","wedgwood","wedlock","wednesday", "wee","weed","weeds","weedy","week", "weekday","weekend","weekender","weekly","weeknight", "weeny","weep","weeping","weepy","weevil", "weft","weigh","weighbridge","weight","weighted", "weighting","weightless","weighty","weir","weird", "weirdie","weirdo","welch","welcome","weld", "welder","welfare","welkin","well","wellbeing", "wellborn","wellington","wellspring","welsh","welt", "weltanschauung","welter","welterweight","wen","wench", "wend","wensleydale","went","wept","were", "werewolf","wert","wesle
 yan","west","westbound", "westerly","western","westerner","westernise","westernize", "westernmost","westward","westwards","wet","wether", "wetting","whack","whacked","whacker","whacking", "whale","whalebone","whaler","whaling","wham", "wharf","what","whatever","whatnot","wheat", "wheaten","wheedle","wheel","wheelbarrow","wheelbase", "wheelchair","wheelhouse","wheeling","wheels","wheelwright", "wheeze","wheezy","whelk","whelp","when", "whence","whenever","where","whereabouts","whereas", "whereat","whereby","wherefore","wherefores","wherein", "whereof","whereon","wheresoever","whereto","whereupon", "wherever","wherewithal","wherry","whet","whether", "whetstone","whew","whey","which","whichever", "whiff","whiffy","whig","while","whim", "whimper","whimsey","whimsical","whimsicality","whimsy", "whin","whine","whiner","whinny","whip", "whipcord","whiplash","whippersnapper","whippet","whipping", "whippoorwill","whippy","whir","whirl","whirligig", "whirlpool","whirlwind","whirlybird","whirr
 ","whisk", "whisker","whiskered","whiskers","whiskey","whisky", "whisper","whist","whistle","whit","white", "whitebait","whitehall","whiten","whitening","whites", "whitethorn","whitethroat","whitewash","whither","whiting", "whitlow","whitsun","whitsuntide","whittle","whiz", "whizz","who","whoa","whodunit","whoever", "whole","wholemeal","wholesale","wholesaler","wholesome", "wholly","whom","whoop","whoopee","whoosh", "whop","whopper","whopping","whore","whorehouse", "whoremonger","whorl","whortleberry","whose","whosoever", "why","whys","wick","wicked","wicker", "wickerwork","wicket","wide","widely","widen", "widespread","widgeon","widow","widowed","widower", "widowhood","width","wield","wife","wifely", "wig","wigged","wigging","wiggle","wight", "wigwam","wilco","wild","wildcat","wildebeest", "wilderness","wildfire","wildfowl","wildlife","wildly", "wile","wiles","wilful","wiliness","will", "willful","willies","willing","willow","willowy", "willpower","wilt","wily","wimple","wimpy", "w
 in","wince","winceyette","winch","wind", "windbag","windbreak","windcheater","windfall","windily", "winding","windjammer","windlass","windless","windmill", "window","windowpane","windowsill","windpipe","windscreen", "windshield","windsock","windstorm","windswept","windward", "windy","wine","winebibbing","wineglass","winepress", "wineskin","wing","winger","wings","wingspan", "wink","winkers","winkle","winner","winning", "winnings","winnow","winsome","winter","wintergreen", "wintertime","wintry","wipe","wiper","wire", "wirecutters","wireless","wiretap","wireworm","wiring", "wiry","wisdom","wise","wisecrack","wish", "wishbone","wisp","wispy","wisteria","wistful", "wit","witch","witchcraft","witchdoctor","witchery", "witching","with","withal","withdraw","withdrawal", "withdrawn","withe","wither","withering","withers", "withhold","within","without","withstand","withy", "witless","witness","witticism","witting","witty", "wives","wizard","wizardry","wizened","woad", "wobble","wobbly","woe"
 ,"woebegone","woeful", "wog","woke","woken","wold","wolf", "wolfhound","wolfram","wolfsbane","woman","womanhood", "womanise","womanish","womanize","womankind","womanly", "womb","wombat","womenfolk","won","wonder", "wonderful","wonderland","wonderment","wonders","wondrous", "wonky","wont","wonted","woo","wood", "woodbine","woodblock","woodcock","woodcraft","woodcut", "woodcutter","wooded","wooden","woodenheaded","woodland", "woodlouse","woodpecker","woodpile","woodshed","woodsman", "woodwind","woodwork","woodworm","woody","wooer", "woof","woofer","wool","woolen","woolens", "woolgather","woolgathering","woollen","woollens","woolly", "woolsack","woozy","wop","word","wording", "wordless","wordplay","words","wordy","wore", "work","workable","workaday","workbag","workbasket", "workbench","workbook","workday","worker","workhorse", "workhouse","working","workings","workman","workmanlike", "workmanship","workout","workpeople","workroom","works", "workshop","worktop","world","worldly","worlds
 haking", "worldwide","worm","wormhole","wormwood","wormy", "worn","worried","worrisome","worry","worse", "worsen","worship","worshipful","worst","worsted", "wort","worth","worthless","worthwhile","worthy", "wot","wotcher","would","wouldst","wound", "wove","woven","wow","wrac","wrack", "wraith","wrangle","wrangler","wrap","wrapper", "wrapping","wrath","wreak","wreath","wreathe", "wreck","wreckage","wrecker","wren","wrench", "wrest","wrestle","wretch","wretched","wriggle", "wright","wring","wringer","wrinkle","wrist", "wristband","wristlet","wristwatch","wristy","writ", "write","writer","writhe","writing","writings", "written","wrong","wrongdoing","wrongful","wrongheaded", "wrote","wroth","wrought","wrung","wry", "wurst","wyvern","xenon","xenophobia","xerox", "xylophone","yacht","yachting","yachtsman","yahoo", "yak","yam","yammer","yang","yank", "yankee","yap","yard","yardage","yardarm", "yardstick","yarn","yarrow","yashmak","yaw", "yawl","yawn","yaws","yea","yeah", "year","yearbook",
 "yearling","yearlong","yearly", "yearn","yearning","years","yeast","yeasty", "yell","yellow","yelp","yen","yeoman", "yeomanry","yes","yesterday","yet","yeti", "yew","yid","yiddish","yield","yielding", "yin","yippee","yobbo","yodel","yoga", "yoghurt","yogi","yogurt","yoke","yokel", "yolk","yonder","yonks","yore","yorker", "you","young","younger","youngster","your", "yours","yourself","youth","youthful","yowl", "yoyo","yucca","yule","yuletide","zany", "zeal","zealot","zealotry","zealous","zebra", "zebu","zed","zeitgeist","zen","zenana", "zenith","zephyr","zeppelin","zero","zest", "ziggurat","zigzag","zinc","zinnia","zionism", "zip","zipper","zippy","zither","zizz", "zodiac","zombi","zombie","zonal","zone", "zoning","zonked","zoo","zoologist","zoology", "zoom","zoophyte","zouave","zucchini","zulu"};
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/En/KStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemFilter.cs
new file mode 100644
index 0000000..79bf268
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemFilter.cs
@@ -0,0 +1,81 @@
+namespace org.apache.lucene.analysis.en
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A high-performance kstem filter for english.
+	/// <p/>
+	/// See <a href="http://ciir.cs.umass.edu/pubfiles/ir-35.pdf">
+	/// "Viewing Morphology as an Inference Process"</a>
+	/// (Krovetz, R., Proceedings of the Sixteenth Annual International ACM SIGIR
+	/// Conference on Research and Development in Information Retrieval, 191-203, 1993).
+	/// <p/>
+	/// All terms must already be lowercased for this filter to work correctly.
+	/// 
+	/// <para>
+	/// Note: This filter is aware of the <seealso cref="KeywordAttribute"/>. To prevent
+	/// certain terms from being passed to the stemmer
+	/// <seealso cref="KeywordAttribute#isKeyword()"/> should be set to <code>true</code>
+	/// in a previous <seealso cref="TokenStream"/>.
+	/// 
+	/// Note: For including the original term as well as the stemmed version, see
+	/// <seealso cref="org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory"/>
+	/// </para>
+	/// 
+	/// 
+	/// </summary>
+
+	public sealed class KStemFilter : TokenFilter
+	{
+	  private readonly KStemmer stemmer = new KStemmer();
+	  private readonly CharTermAttribute termAttribute = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAtt = addAttribute(typeof(KeywordAttribute));
+
+	  public KStemFilter(TokenStream @in) : base(@in)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Returns the next, stemmed, input Token. </summary>
+	  ///  <returns> The stemmed form of a token. </returns>
+	  ///  <exception cref="IOException"> If there is a low-level I/O error. </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (!input.incrementToken())
+		{
+		  return false;
+		}
+
+		char[] term = termAttribute.buffer();
+		int len = termAttribute.length();
+		if ((!keywordAtt.Keyword) && stemmer.stem(term, len))
+		{
+		  termAttribute.setEmpty().append(stemmer.asCharSequence());
+		}
+
+		return true;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/En/KStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemFilterFactory.cs
new file mode 100644
index 0000000..35e8296
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.en
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="KStemFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_kstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.KStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class KStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new KStemFilterFactory </summary>
+	  public KStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenFilter create(TokenStream input)
+	  {
+		return new KStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file


[15/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
new file mode 100644
index 0000000..fa5d5da
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
@@ -0,0 +1,625 @@
+using System;
+using System.Text;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+using Lucene.Net.Analysis.Core;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	using WhitespaceTokenizer = WhitespaceTokenizer;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using ArrayUtil = org.apache.lucene.util.ArrayUtil;
+	using RamUsageEstimator = org.apache.lucene.util.RamUsageEstimator;
+
+	/// <summary>
+	/// Old Broken version of <seealso cref="WordDelimiterFilter"/>
+	/// </summary>
+	[Obsolete]
+	public sealed class Lucene47WordDelimiterFilter : TokenFilter
+	{
+		private bool InstanceFieldsInitialized = false;
+
+		private void InitializeInstanceFields()
+		{
+			concat = new WordDelimiterConcatenation(this);
+			concatAll = new WordDelimiterConcatenation(this);
+		}
+
+
+	  public const int LOWER = 0x01;
+	  public const int UPPER = 0x02;
+	  public const int DIGIT = 0x04;
+	  public const int SUBWORD_DELIM = 0x08;
+
+	  // combinations: for testing, not for setting bits
+	  public const int ALPHA = 0x03;
+	  public const int ALPHANUM = 0x07;
+
+	  /// <summary>
+	  /// Causes parts of words to be generated:
+	  /// <p/>
+	  /// "PowerShot" => "Power" "Shot"
+	  /// </summary>
+	  public const int GENERATE_WORD_PARTS = 1;
+
+	  /// <summary>
+	  /// Causes number subwords to be generated:
+	  /// <p/>
+	  /// "500-42" => "500" "42"
+	  /// </summary>
+	  public const int GENERATE_NUMBER_PARTS = 2;
+
+	  /// <summary>
+	  /// Causes maximum runs of word parts to be catenated:
+	  /// <p/>
+	  /// "wi-fi" => "wifi"
+	  /// </summary>
+	  public const int CATENATE_WORDS = 4;
+
+	  /// <summary>
+	  /// Causes maximum runs of word parts to be catenated:
+	  /// <p/>
+	  /// "wi-fi" => "wifi"
+	  /// </summary>
+	  public const int CATENATE_NUMBERS = 8;
+
+	  /// <summary>
+	  /// Causes all subword parts to be catenated:
+	  /// <p/>
+	  /// "wi-fi-4000" => "wifi4000"
+	  /// </summary>
+	  public const int CATENATE_ALL = 16;
+
+	  /// <summary>
+	  /// Causes original words are preserved and added to the subword list (Defaults to false)
+	  /// <p/>
+	  /// "500-42" => "500" "42" "500-42"
+	  /// </summary>
+	  public const int PRESERVE_ORIGINAL = 32;
+
+	  /// <summary>
+	  /// If not set, causes case changes to be ignored (subwords will only be generated
+	  /// given SUBWORD_DELIM tokens)
+	  /// </summary>
+	  public const int SPLIT_ON_CASE_CHANGE = 64;
+
+	  /// <summary>
+	  /// If not set, causes numeric changes to be ignored (subwords will only be generated
+	  /// given SUBWORD_DELIM tokens).
+	  /// </summary>
+	  public const int SPLIT_ON_NUMERICS = 128;
+
+	  /// <summary>
+	  /// Causes trailing "'s" to be removed for each subword
+	  /// <p/>
+	  /// "O'Neil's" => "O", "Neil"
+	  /// </summary>
+	  public const int STEM_ENGLISH_POSSESSIVE = 256;
+
+	  /// <summary>
+	  /// If not null is the set of tokens to protect from being delimited
+	  /// 
+	  /// </summary>
+	  internal readonly CharArraySet protWords;
+
+	  private readonly int flags;
+
+	  private readonly CharTermAttribute termAttribute = addAttribute(typeof(CharTermAttribute));
+	  private readonly OffsetAttribute offsetAttribute = addAttribute(typeof(OffsetAttribute));
+	  private readonly PositionIncrementAttribute posIncAttribute = addAttribute(typeof(PositionIncrementAttribute));
+	  private readonly TypeAttribute typeAttribute = addAttribute(typeof(TypeAttribute));
+
+	  // used for iterating word delimiter breaks
+	  private readonly WordDelimiterIterator iterator;
+
+	  // used for concatenating runs of similar typed subwords (word,number)
+	  private WordDelimiterConcatenation concat;
+	  // number of subwords last output by concat.
+	  private int lastConcatCount = 0;
+
+	  // used for catenate all
+	  private WordDelimiterConcatenation concatAll;
+
+	  // used for accumulating position increment gaps
+	  private int accumPosInc = 0;
+
+	  private char[] savedBuffer = new char[1024];
+	  private int savedStartOffset;
+	  private int savedEndOffset;
+	  private string savedType;
+	  private bool hasSavedState = false;
+	  // if length by start + end offsets doesn't match the term text then assume
+	  // this is a synonym and don't adjust the offsets.
+	  private bool hasIllegalOffsets = false;
+
+	  // for a run of the same subword type within a word, have we output anything?
+	  private bool hasOutputToken = false;
+	  // when preserve original is on, have we output any token following it?
+	  // this token must have posInc=0!
+	  private bool hasOutputFollowingOriginal = false;
+
+	  /// <summary>
+	  /// Creates a new WordDelimiterFilter
+	  /// </summary>
+	  /// <param name="in"> TokenStream to be filtered </param>
+	  /// <param name="charTypeTable"> table containing character types </param>
+	  /// <param name="configurationFlags"> Flags configuring the filter </param>
+	  /// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param>
+	  public Lucene47WordDelimiterFilter(TokenStream @in, sbyte[] charTypeTable, int configurationFlags, CharArraySet protWords) : base(@in)
+	  {
+		  if (!InstanceFieldsInitialized)
+		  {
+			  InitializeInstanceFields();
+			  InstanceFieldsInitialized = true;
+		  }
+		this.flags = configurationFlags;
+		this.protWords = protWords;
+		this.iterator = new WordDelimiterIterator(charTypeTable, has(SPLIT_ON_CASE_CHANGE), has(SPLIT_ON_NUMERICS), has(STEM_ENGLISH_POSSESSIVE));
+	  }
+
+	  /// <summary>
+	  /// Creates a new WordDelimiterFilter using <seealso cref="WordDelimiterIterator#DEFAULT_WORD_DELIM_TABLE"/>
+	  /// as its charTypeTable
+	  /// </summary>
+	  /// <param name="in"> TokenStream to be filtered </param>
+	  /// <param name="configurationFlags"> Flags configuring the filter </param>
+	  /// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param>
+	  public Lucene47WordDelimiterFilter(TokenStream @in, int configurationFlags, CharArraySet protWords) : this(@in, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, configurationFlags, protWords)
+	  {
+		  if (!InstanceFieldsInitialized)
+		  {
+			  InitializeInstanceFields();
+			  InstanceFieldsInitialized = true;
+		  }
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		while (true)
+		{
+		  if (!hasSavedState)
+		  {
+			// process a new input word
+			if (!input.incrementToken())
+			{
+			  return false;
+			}
+
+			int termLength = termAttribute.length();
+			char[] termBuffer = termAttribute.buffer();
+
+			accumPosInc += posIncAttribute.PositionIncrement;
+
+			iterator.setText(termBuffer, termLength);
+			iterator.next();
+
+			// word of no delimiters, or protected word: just return it
+			if ((iterator.current == 0 && iterator.end == termLength) || (protWords != null && protWords.contains(termBuffer, 0, termLength)))
+			{
+			  posIncAttribute.PositionIncrement = accumPosInc;
+			  accumPosInc = 0;
+			  return true;
+			}
+
+			// word of simply delimiters
+			if (iterator.end == WordDelimiterIterator.DONE && !has(PRESERVE_ORIGINAL))
+			{
+			  // if the posInc is 1, simply ignore it in the accumulation
+			  if (posIncAttribute.PositionIncrement == 1)
+			  {
+				accumPosInc--;
+			  }
+			  continue;
+			}
+
+			saveState();
+
+			hasOutputToken = false;
+			hasOutputFollowingOriginal = !has(PRESERVE_ORIGINAL);
+			lastConcatCount = 0;
+
+			if (has(PRESERVE_ORIGINAL))
+			{
+			  posIncAttribute.PositionIncrement = accumPosInc;
+			  accumPosInc = 0;
+			  return true;
+			}
+		  }
+
+		  // at the end of the string, output any concatenations
+		  if (iterator.end == WordDelimiterIterator.DONE)
+		  {
+			if (!concat.Empty)
+			{
+			  if (flushConcatenation(concat))
+			  {
+				return true;
+			  }
+			}
+
+			if (!concatAll.Empty)
+			{
+			  // only if we haven't output this same combo above!
+			  if (concatAll.subwordCount > lastConcatCount)
+			  {
+				concatAll.writeAndClear();
+				return true;
+			  }
+			  concatAll.clear();
+			}
+
+			// no saved concatenations, on to the next input word
+			hasSavedState = false;
+			continue;
+		  }
+
+		  // word surrounded by delimiters: always output
+		  if (iterator.SingleWord)
+		  {
+			generatePart(true);
+			iterator.next();
+			return true;
+		  }
+
+		  int wordType = iterator.type();
+
+		  // do we already have queued up incompatible concatenations?
+		  if (!concat.Empty && (concat.type & wordType) == 0)
+		  {
+			if (flushConcatenation(concat))
+			{
+			  hasOutputToken = false;
+			  return true;
+			}
+			hasOutputToken = false;
+		  }
+
+		  // add subwords depending upon options
+		  if (shouldConcatenate(wordType))
+		  {
+			if (concat.Empty)
+			{
+			  concat.type = wordType;
+			}
+			concatenate(concat);
+		  }
+
+		  // add all subwords (catenateAll)
+		  if (has(CATENATE_ALL))
+		  {
+			concatenate(concatAll);
+		  }
+
+		  // if we should output the word or number part
+		  if (shouldGenerateParts(wordType))
+		  {
+			generatePart(false);
+			iterator.next();
+			return true;
+		  }
+
+		  iterator.next();
+		}
+	  }
+
+	  /// <summary>
+	  /// {@inheritDoc}
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		hasSavedState = false;
+		concat.clear();
+		concatAll.clear();
+		accumPosInc = 0;
+	  }
+
+	  // ================================================= Helper Methods ================================================
+
+	  /// <summary>
+	  /// Saves the existing attribute states
+	  /// </summary>
+	  private void saveState()
+	  {
+		// otherwise, we have delimiters, save state
+		savedStartOffset = offsetAttribute.startOffset();
+		savedEndOffset = offsetAttribute.endOffset();
+		// if length by start + end offsets doesn't match the term text then assume this is a synonym and don't adjust the offsets.
+		hasIllegalOffsets = (savedEndOffset - savedStartOffset != termAttribute.length());
+		savedType = typeAttribute.type();
+
+		if (savedBuffer.Length < termAttribute.length())
+		{
+		  savedBuffer = new char[ArrayUtil.oversize(termAttribute.length(), RamUsageEstimator.NUM_BYTES_CHAR)];
+		}
+
+		Array.Copy(termAttribute.buffer(), 0, savedBuffer, 0, termAttribute.length());
+		iterator.text = savedBuffer;
+
+		hasSavedState = true;
+	  }
+
+	  /// <summary>
+	  /// Flushes the given WordDelimiterConcatenation by either writing its concat and then clearing, or just clearing.
+	  /// </summary>
+	  /// <param name="concatenation"> WordDelimiterConcatenation that will be flushed </param>
+	  /// <returns> {@code true} if the concatenation was written before it was cleared, {@code false} otherwise </returns>
+	  private bool flushConcatenation(WordDelimiterConcatenation concatenation)
+	  {
+		lastConcatCount = concatenation.subwordCount;
+		if (concatenation.subwordCount != 1 || !shouldGenerateParts(concatenation.type))
+		{
+		  concatenation.writeAndClear();
+		  return true;
+		}
+		concatenation.clear();
+		return false;
+	  }
+
+	  /// <summary>
+	  /// Determines whether to concatenate a word or number if the current word is the given type
+	  /// </summary>
+	  /// <param name="wordType"> Type of the current word used to determine if it should be concatenated </param>
+	  /// <returns> {@code true} if concatenation should occur, {@code false} otherwise </returns>
+	  private bool shouldConcatenate(int wordType)
+	  {
+		return (has(CATENATE_WORDS) && isAlpha(wordType)) || (has(CATENATE_NUMBERS) && isDigit(wordType));
+	  }
+
+	  /// <summary>
+	  /// Determines whether a word/number part should be generated for a word of the given type
+	  /// </summary>
+	  /// <param name="wordType"> Type of the word used to determine if a word/number part should be generated </param>
+	  /// <returns> {@code true} if a word/number part should be generated, {@code false} otherwise </returns>
+	  private bool shouldGenerateParts(int wordType)
+	  {
+		return (has(GENERATE_WORD_PARTS) && isAlpha(wordType)) || (has(GENERATE_NUMBER_PARTS) && isDigit(wordType));
+	  }
+
+	  /// <summary>
+	  /// Concatenates the saved buffer to the given WordDelimiterConcatenation
+	  /// </summary>
+	  /// <param name="concatenation"> WordDelimiterConcatenation to concatenate the buffer to </param>
+	  private void concatenate(WordDelimiterConcatenation concatenation)
+	  {
+		if (concatenation.Empty)
+		{
+		  concatenation.startOffset = savedStartOffset + iterator.current;
+		}
+		concatenation.append(savedBuffer, iterator.current, iterator.end - iterator.current);
+		concatenation.endOffset = savedStartOffset + iterator.end;
+	  }
+
+	  /// <summary>
+	  /// Generates a word/number part, updating the appropriate attributes
+	  /// </summary>
+	  /// <param name="isSingleWord"> {@code true} if the generation is occurring from a single word, {@code false} otherwise </param>
+	  private void generatePart(bool isSingleWord)
+	  {
+		clearAttributes();
+		termAttribute.copyBuffer(savedBuffer, iterator.current, iterator.end - iterator.current);
+
+		int startOffset = savedStartOffset + iterator.current;
+		int endOffset = savedStartOffset + iterator.end;
+
+		if (hasIllegalOffsets)
+		{
+		  // historically this filter did this regardless for 'isSingleWord', 
+		  // but we must do a sanity check:
+		  if (isSingleWord && startOffset <= savedEndOffset)
+		  {
+			offsetAttribute.setOffset(startOffset, savedEndOffset);
+		  }
+		  else
+		  {
+			offsetAttribute.setOffset(savedStartOffset, savedEndOffset);
+		  }
+		}
+		else
+		{
+		  offsetAttribute.setOffset(startOffset, endOffset);
+		}
+		posIncAttribute.PositionIncrement = position(false);
+		typeAttribute.Type = savedType;
+	  }
+
+	  /// <summary>
+	  /// Get the position increment gap for a subword or concatenation
+	  /// </summary>
+	  /// <param name="inject"> true if this token wants to be injected </param>
+	  /// <returns> position increment gap </returns>
+	  private int position(bool inject)
+	  {
+		int posInc = accumPosInc;
+
+		if (hasOutputToken)
+		{
+		  accumPosInc = 0;
+		  return inject ? 0 : Math.Max(1, posInc);
+		}
+
+		hasOutputToken = true;
+
+		if (!hasOutputFollowingOriginal)
+		{
+		  // the first token following the original is 0 regardless
+		  hasOutputFollowingOriginal = true;
+		  return 0;
+		}
+		// clear the accumulated position increment
+		accumPosInc = 0;
+		return Math.Max(1, posInc);
+	  }
+
+	  /// <summary>
+	  /// Checks if the given word type includes <seealso cref="#ALPHA"/>
+	  /// </summary>
+	  /// <param name="type"> Word type to check </param>
+	  /// <returns> {@code true} if the type contains ALPHA, {@code false} otherwise </returns>
+	  internal static bool isAlpha(int type)
+	  {
+		return (type & ALPHA) != 0;
+	  }
+
+	  /// <summary>
+	  /// Checks if the given word type includes <seealso cref="#DIGIT"/>
+	  /// </summary>
+	  /// <param name="type"> Word type to check </param>
+	  /// <returns> {@code true} if the type contains DIGIT, {@code false} otherwise </returns>
+	  internal static bool isDigit(int type)
+	  {
+		return (type & DIGIT) != 0;
+	  }
+
+	  /// <summary>
+	  /// Checks if the given word type includes <seealso cref="#SUBWORD_DELIM"/>
+	  /// </summary>
+	  /// <param name="type"> Word type to check </param>
+	  /// <returns> {@code true} if the type contains SUBWORD_DELIM, {@code false} otherwise </returns>
+	  internal static bool isSubwordDelim(int type)
+	  {
+		return (type & SUBWORD_DELIM) != 0;
+	  }
+
+	  /// <summary>
+	  /// Checks if the given word type includes <seealso cref="#UPPER"/>
+	  /// </summary>
+	  /// <param name="type"> Word type to check </param>
+	  /// <returns> {@code true} if the type contains UPPER, {@code false} otherwise </returns>
+	  internal static bool isUpper(int type)
+	  {
+		return (type & UPPER) != 0;
+	  }
+
+	  /// <summary>
+	  /// Determines whether the given flag is set
+	  /// </summary>
+	  /// <param name="flag"> Flag to see if set </param>
+	  /// <returns> {@code true} if flag is set </returns>
+	  private bool has(int flag)
+	  {
+		return (flags & flag) != 0;
+	  }
+
+	  // ================================================= Inner Classes =================================================
+
+	  /// <summary>
+	  /// A WDF concatenated 'run'
+	  /// </summary>
+	  internal sealed class WordDelimiterConcatenation
+	  {
+		  private readonly Lucene47WordDelimiterFilter outerInstance;
+
+		  public WordDelimiterConcatenation(Lucene47WordDelimiterFilter outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		internal readonly StringBuilder buffer = new StringBuilder();
+		internal int startOffset;
+		internal int endOffset;
+		internal int type;
+		internal int subwordCount;
+
+		/// <summary>
+		/// Appends the given text of the given length, to the concetenation at the given offset
+		/// </summary>
+		/// <param name="text"> Text to append </param>
+		/// <param name="offset"> Offset in the concetenation to add the text </param>
+		/// <param name="length"> Length of the text to append </param>
+		internal void append(char[] text, int offset, int length)
+		{
+		  buffer.Append(text, offset, length);
+		  subwordCount++;
+		}
+
+		/// <summary>
+		/// Writes the concatenation to the attributes
+		/// </summary>
+		internal void write()
+		{
+		  clearAttributes();
+		  if (outerInstance.termAttribute.length() < buffer.Length)
+		  {
+			outerInstance.termAttribute.resizeBuffer(buffer.Length);
+		  }
+		  char[] termbuffer = outerInstance.termAttribute.buffer();
+
+		  buffer.getChars(0, buffer.Length, termbuffer, 0);
+		  outerInstance.termAttribute.Length = buffer.Length;
+
+		  if (outerInstance.hasIllegalOffsets)
+		  {
+			outerInstance.offsetAttribute.setOffset(outerInstance.savedStartOffset, outerInstance.savedEndOffset);
+		  }
+		  else
+		  {
+			outerInstance.offsetAttribute.setOffset(startOffset, endOffset);
+		  }
+		  outerInstance.posIncAttribute.PositionIncrement = outerInstance.position(true);
+		  outerInstance.typeAttribute.Type = outerInstance.savedType;
+		  outerInstance.accumPosInc = 0;
+		}
+
+		/// <summary>
+		/// Determines if the concatenation is empty
+		/// </summary>
+		/// <returns> {@code true} if the concatenation is empty, {@code false} otherwise </returns>
+		internal bool Empty
+		{
+			get
+			{
+			  return buffer.Length == 0;
+			}
+		}
+
+		/// <summary>
+		/// Clears the concatenation and resets its state
+		/// </summary>
+		internal void clear()
+		{
+		  buffer.Length = 0;
+		  startOffset = endOffset = type = subwordCount = 0;
+		}
+
+		/// <summary>
+		/// Convenience method for the common scenario of having to write the concetenation and then clearing its state
+		/// </summary>
+		internal void writeAndClear()
+		{
+		  write();
+		  clear();
+		}
+	  }
+	  // questions:
+	  // negative numbers?  -42 indexed as just 42?
+	  // dollar sign?  $42
+	  // percent sign?  33%
+	  // downsides:  if source text is "powershot" then a query of "PowerShot" won't match!
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
new file mode 100644
index 0000000..77cbe8e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
@@ -0,0 +1,566 @@
+using System;
+using Lucene.Net.Analysis.Core;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using StopAnalyzer = StopAnalyzer;
+	using StopFilter = StopFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Efficient Lucene analyzer/tokenizer that preferably operates on a String rather than a
+	/// <seealso cref="java.io.Reader"/>, that can flexibly separate text into terms via a regular expression <seealso cref="Pattern"/>
+	/// (with behaviour identical to <seealso cref="String#split(String)"/>),
+	/// and that combines the functionality of
+	/// <seealso cref="LetterTokenizer"/>,
+	/// <seealso cref="LowerCaseTokenizer"/>,
+	/// <seealso cref="WhitespaceTokenizer"/>,
+	/// <seealso cref="StopFilter"/> into a single efficient
+	/// multi-purpose class.
+	/// <para>
+	/// If you are unsure how exactly a regular expression should look like, consider 
+	/// prototyping by simply trying various expressions on some test texts via
+	/// <seealso cref="String#split(String)"/>. Once you are satisfied, give that regex to 
+	/// PatternAnalyzer. Also see <a target="_blank" 
+	/// href="http://java.sun.com/docs/books/tutorial/extra/regex/">Java Regular Expression Tutorial</a>.
+	/// </para>
+	/// <para>
+	/// This class can be considerably faster than the "normal" Lucene tokenizers. 
+	/// It can also serve as a building block in a compound Lucene
+	/// <seealso cref="org.apache.lucene.analysis.TokenFilter"/> chain. For example as in this 
+	/// stemming example:
+	/// <pre>
+	/// PatternAnalyzer pat = ...
+	/// TokenStream tokenStream = new SnowballFilter(
+	///     pat.tokenStream("content", "James is running round in the woods"), 
+	///     "English"));
+	/// </pre>
+	/// </para>
+	/// </summary>
+	/// @deprecated (4.0) use the pattern-based analysis in the analysis/pattern package instead. 
+	[Obsolete("(4.0) use the pattern-based analysis in the analysis/pattern package instead.")]
+	public sealed class PatternAnalyzer : Analyzer
+	{
+
+	  /// <summary>
+	  /// <code>"\\W+"</code>; Divides text at non-letters (NOT Character.isLetter(c)) </summary>
+	  public static readonly Pattern NON_WORD_PATTERN = Pattern.compile("\\W+");
+
+	  /// <summary>
+	  /// <code>"\\s+"</code>; Divides text at whitespaces (Character.isWhitespace(c)) </summary>
+	  public static readonly Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
+
+	  private static readonly CharArraySet EXTENDED_ENGLISH_STOP_WORDS = CharArraySet.unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("a", "about", "above", "across", "adj", "after", "afterwards", "again", "against", "albeit", "all", "almost", "alone", "along", "already", "also", "although", "always", "among", "amongst", "an", "and", "another", "any", "anyhow", "anyone", "anything", "anywhere", "are", "around", "as", "at", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "both", "but", "by", "can", "cannot", "co", "could", "down", "during", "each", "eg", "either", "else", "elsewhere", "enough", "etc", "even", "ever", "every", "everyone", "everything", "everywhere", "except", "few", "first", "for", "former", "formerly", "from", "further", "had", "has", "have", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", 
 "him", "himself", "his", "how", "however", "i", "ie", "if", "in", "inc", "indeed", "into", "is", "it", "its", "itself", "last", "latter", "latterly", "least", "less", "ltd", "many", "may", "me", "meanwhile", "might", "more", "moreover", "most", "mostly", "much", "must", "my", "myself", "namely", "neither", "never", "nevertheless", "next", "no", "nobody", "none", "noone", "nor", "not", "nothing", "now", "nowhere", "of", "off", "often", "on", "once one", "only", "onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "own", "per", "perhaps", "rather", "s", "same", "seem", "seemed", "seeming", "seems", "several", "she", "should", "since", "so", "some", "somehow", "someone", "something", "sometime", "sometimes", "somewhere", "still", "such", "t", "than", "that", "the", "their", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefor", "therein", "thereupon", "these", "they", "this", "those", "though", "through", "throughout", 
 "thru", "thus", "to", "together", "too", "toward", "towards", "under", "until", "up", "upon", "us", "very", "via", "was", "we", "well", "were", "what", "whatever", "whatsoever", "when", "whence", "whenever", "whensoever", "where", "whereafter", "whereas", "whereat", "whereby", "wherefrom", "wherein", "whereinto", "whereof", "whereon", "whereto", "whereunto", "whereupon", "wherever", "wherewith", "whether", "which", "whichever", "whichsoever", "while", "whilst", "whither", "who", "whoever", "whole", "whom", "whomever", "whomsoever", "whose", "whosoever", "why", "will", "with", "within", "without", "would", "xsubj", "xcal", "xauthor", "xother ", "xnote", "yet", "you", "your", "yours", "yourself", "yourselves"), true));
+
+	  /// <summary>
+	  /// A lower-casing word analyzer with English stop words (can be shared
+	  /// freely across threads without harm); global per class loader.
+	  /// </summary>
+	  public static readonly PatternAnalyzer DEFAULT_ANALYZER = new PatternAnalyzer(Version.LUCENE_CURRENT, NON_WORD_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+
+	  /// <summary>
+	  /// A lower-casing word analyzer with <b>extended </b> English stop words
+	  /// (can be shared freely across threads without harm); global per class
+	  /// loader. The stop words are borrowed from
+	  /// http://thomas.loc.gov/home/stopwords.html, see
+	  /// http://thomas.loc.gov/home/all.about.inquery.html
+	  /// </summary>
+	  public static readonly PatternAnalyzer EXTENDED_ANALYZER = new PatternAnalyzer(Version.LUCENE_CURRENT, NON_WORD_PATTERN, true, EXTENDED_ENGLISH_STOP_WORDS);
+
+	  private readonly Pattern pattern;
+	  private readonly bool toLowerCase;
+	  private readonly CharArraySet stopWords;
+
+	  private readonly Version matchVersion;
+
+	  /// <summary>
+	  /// Constructs a new instance with the given parameters.
+	  /// </summary>
+	  /// <param name="matchVersion"> currently does nothing </param>
+	  /// <param name="pattern">
+	  ///            a regular expression delimiting tokens </param>
+	  /// <param name="toLowerCase">
+	  ///            if <code>true</code> returns tokens after applying
+	  ///            String.toLowerCase() </param>
+	  /// <param name="stopWords">
+	  ///            if non-null, ignores all tokens that are contained in the
+	  ///            given stop set (after previously having applied toLowerCase()
+	  ///            if applicable). For example, created via
+	  ///            <seealso cref="StopFilter#makeStopSet(Version, String[])"/>and/or
+	  ///            <seealso cref="org.apache.lucene.analysis.util.WordlistLoader"/>as in
+	  ///            <code>WordlistLoader.getWordSet(new File("samples/fulltext/stopwords.txt")</code>
+	  ///            or <a href="http://www.unine.ch/info/clef/">other stop words
+	  ///            lists </a>. </param>
+	  public PatternAnalyzer(Version matchVersion, Pattern pattern, bool toLowerCase, CharArraySet stopWords)
+	  {
+		if (pattern == null)
+		{
+		  throw new System.ArgumentException("pattern must not be null");
+		}
+
+		if (eqPattern(NON_WORD_PATTERN, pattern))
+		{
+			pattern = NON_WORD_PATTERN;
+		}
+		else if (eqPattern(WHITESPACE_PATTERN, pattern))
+		{
+			pattern = WHITESPACE_PATTERN;
+		}
+
+		if (stopWords != null && stopWords.size() == 0)
+		{
+			stopWords = null;
+		}
+
+		this.pattern = pattern;
+		this.toLowerCase = toLowerCase;
+		this.stopWords = stopWords;
+		this.matchVersion = matchVersion;
+	  }
+
+	  /// <summary>
+	  /// Creates a token stream that tokenizes the given string into token terms
+	  /// (aka words).
+	  /// </summary>
+	  /// <param name="fieldName">
+	  ///            the name of the field to tokenize (currently ignored). </param>
+	  /// <param name="reader">
+	  ///            reader (e.g. charfilter) of the original text. can be null. </param>
+	  /// <param name="text">
+	  ///            the string to tokenize </param>
+	  /// <returns> a new token stream </returns>
+	  public TokenStreamComponents createComponents(string fieldName, Reader reader, string text)
+	  {
+		// Ideally the Analyzer superclass should have a method with the same signature, 
+		// with a default impl that simply delegates to the StringReader flavour. 
+		if (reader == null)
+		{
+		  reader = new FastStringReader(text);
+		}
+
+		if (pattern == NON_WORD_PATTERN) // fast path
+		{
+		  return new TokenStreamComponents(new FastStringTokenizer(reader, true, toLowerCase, stopWords));
+		} // fast path
+		else if (pattern == WHITESPACE_PATTERN)
+		{
+		  return new TokenStreamComponents(new FastStringTokenizer(reader, false, toLowerCase, stopWords));
+		}
+
+		Tokenizer tokenizer = new PatternTokenizer(reader, pattern, toLowerCase);
+		TokenStream result = (stopWords != null) ? new StopFilter(matchVersion, tokenizer, stopWords) : tokenizer;
+		return new TokenStreamComponents(tokenizer, result);
+	  }
+
+	  /// <summary>
+	  /// Creates a token stream that tokenizes all the text in the given Reader;
+	  /// This implementation forwards to <code>tokenStream(String, Reader, String)</code> and is
+	  /// less efficient than <code>tokenStream(String, Reader, String)</code>.
+	  /// </summary>
+	  /// <param name="fieldName">
+	  ///            the name of the field to tokenize (currently ignored). </param>
+	  /// <param name="reader">
+	  ///            the reader delivering the text </param>
+	  /// <returns> a new token stream </returns>
+	  public override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+		return createComponents(fieldName, reader, null);
+	  }
+
+	  /// <summary>
+	  /// Indicates whether some other object is "equal to" this one.
+	  /// </summary>
+	  /// <param name="other">
+	  ///            the reference object with which to compare. </param>
+	  /// <returns> true if equal, false otherwise </returns>
+	  public override bool Equals(object other)
+	  {
+		if (this == other)
+		{
+			return true;
+		}
+		if (this == DEFAULT_ANALYZER && other == EXTENDED_ANALYZER)
+		{
+			return false;
+		}
+		if (other == DEFAULT_ANALYZER && this == EXTENDED_ANALYZER)
+		{
+			return false;
+		}
+
+		if (other is PatternAnalyzer)
+		{
+		  PatternAnalyzer p2 = (PatternAnalyzer) other;
+		  return toLowerCase == p2.toLowerCase && eqPattern(pattern, p2.pattern) && eq(stopWords, p2.stopWords);
+		}
+		return false;
+	  }
+
+	  /// <summary>
+	  /// Returns a hash code value for the object.
+	  /// </summary>
+	  /// <returns> the hash code. </returns>
+	  public override int GetHashCode()
+	  {
+		if (this == DEFAULT_ANALYZER) // fast path
+		{
+			return -1218418418;
+		}
+		if (this == EXTENDED_ANALYZER) // fast path
+		{
+			return 1303507063;
+		}
+
+		int h = 1;
+		h = 31 * h + pattern.pattern().GetHashCode();
+		h = 31 * h + pattern.flags();
+		h = 31 * h + (toLowerCase ? 1231 : 1237);
+		h = 31 * h + (stopWords != null ? stopWords.GetHashCode() : 0);
+		return h;
+	  }
+
+	  /// <summary>
+	  /// equality where o1 and/or o2 can be null </summary>
+	  private static bool eq(object o1, object o2)
+	  {
+		return (o1 == o2) || (o1 != null ? o1.Equals(o2) : false);
+	  }
+
+	  /// <summary>
+	  /// assumes p1 and p2 are not null </summary>
+	  private static bool eqPattern(Pattern p1, Pattern p2)
+	  {
+		return p1 == p2 || (p1.flags() == p2.flags() && p1.pattern().Equals(p2.pattern()));
+	  }
+
+	  /// <summary>
+	  /// Reads until end-of-stream and returns all read chars, finally closes the stream.
+	  /// </summary>
+	  /// <param name="input"> the input stream </param>
+	  /// <exception cref="IOException"> if an I/O error occurs while reading the stream </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private static String toString(java.io.Reader input) throws java.io.IOException
+	  private static string ToString(Reader input)
+	  {
+		if (input is FastStringReader) // fast path
+		{
+		  return ((FastStringReader) input).String;
+		}
+
+		try
+		{
+		  int len = 256;
+		  char[] buffer = new char[len];
+		  char[] output = new char[len];
+
+		  len = 0;
+		  int n;
+		  while ((n = input.read(buffer)) >= 0)
+		  {
+			if (len + n > output.Length) // grow capacity
+			{
+			  char[] tmp = new char[Math.Max(output.Length << 1, len + n)];
+			  Array.Copy(output, 0, tmp, 0, len);
+			  Array.Copy(buffer, 0, tmp, len, n);
+			  buffer = output; // use larger buffer for future larger bulk reads
+			  output = tmp;
+			}
+			else
+			{
+			  Array.Copy(buffer, 0, output, len, n);
+			}
+			len += n;
+		  }
+
+		  return new string(output, 0, len);
+		}
+		finally
+		{
+		  input.close();
+		}
+	  }
+
+
+	  ///////////////////////////////////////////////////////////////////////////////
+	  // Nested classes:
+	  ///////////////////////////////////////////////////////////////////////////////
+	  /// <summary>
+	  /// The work horse; performance isn't fantastic, but it's not nearly as bad
+	  /// as one might think - kudos to the Sun regex developers.
+	  /// </summary>
+	  private sealed class PatternTokenizer : Tokenizer
+	  {
+
+		internal readonly Pattern pattern;
+		internal string str;
+		internal readonly bool toLowerCase;
+		internal Matcher matcher;
+		internal int pos = 0;
+		internal bool initialized = false;
+		internal static readonly Locale locale = Locale.Default;
+		internal readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+		internal readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+
+		public PatternTokenizer(Reader input, Pattern pattern, bool toLowerCase) : base(input)
+		{
+		  this.pattern = pattern;
+		  this.matcher = pattern.matcher("");
+		  this.toLowerCase = toLowerCase;
+		}
+
+		public override bool incrementToken()
+		{
+		  if (!initialized)
+		  {
+			throw new System.InvalidOperationException("Consumer did not call reset().");
+		  }
+		  if (matcher == null)
+		  {
+			  return false;
+		  }
+		  clearAttributes();
+		  while (true) // loop takes care of leading and trailing boundary cases
+		  {
+			int start = pos;
+			int end_Renamed;
+			bool isMatch = matcher.find();
+			if (isMatch)
+			{
+			  end_Renamed = matcher.start();
+			  pos = matcher.end();
+			}
+			else
+			{
+			  end_Renamed = str.Length;
+			  matcher = null; // we're finished
+			}
+
+			if (start != end_Renamed) // non-empty match (header/trailer)
+			{
+			  string text = str.Substring(start, end_Renamed - start);
+			  if (toLowerCase)
+			  {
+				  text = text.ToLower(locale);
+			  }
+			  termAtt.setEmpty().append(text);
+			  offsetAtt.setOffset(correctOffset(start), correctOffset(end_Renamed));
+			  return true;
+			}
+			if (!isMatch)
+			{
+				return false;
+			}
+		  }
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
+		public override void end()
+		{
+		  base.end();
+		  // set final offset
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int finalOffset = correctOffset(str.length());
+		  int finalOffset = correctOffset(str.Length);
+		  this.offsetAtt.setOffset(finalOffset, finalOffset);
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void close() throws java.io.IOException
+		public override void close()
+		{
+		  base.close();
+		  this.initialized = false;
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+		public override void reset()
+		{
+		  base.reset();
+		  this.str = PatternAnalyzer.ToString(input);
+		  this.matcher = pattern.matcher(this.str);
+		  this.pos = 0;
+		  this.initialized = true;
+		}
+	  }
+
+
+	  ///////////////////////////////////////////////////////////////////////////////
+	  // Nested classes:
+	  ///////////////////////////////////////////////////////////////////////////////
+	  /// <summary>
+	  /// Special-case class for best performance in common cases; this class is
+	  /// otherwise unnecessary.
+	  /// </summary>
+	  private sealed class FastStringTokenizer : Tokenizer
+	  {
+
+		internal string str;
+		internal int pos;
+		internal readonly bool isLetter;
+		internal readonly bool toLowerCase;
+		internal readonly CharArraySet stopWords;
+		internal static readonly Locale locale = Locale.Default;
+		internal readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+		internal readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+
+		public FastStringTokenizer(Reader input, bool isLetter, bool toLowerCase, CharArraySet stopWords) : base(input)
+		{
+		  this.isLetter = isLetter;
+		  this.toLowerCase = toLowerCase;
+		  this.stopWords = stopWords;
+		}
+
+		public override bool incrementToken()
+		{
+		  if (str == null)
+		  {
+			throw new System.InvalidOperationException("Consumer did not call reset().");
+		  }
+		  clearAttributes();
+		  // cache loop instance vars (performance)
+		  string s = str;
+		  int len = s.Length;
+		  int i = pos;
+		  bool letter = isLetter;
+
+		  int start = 0;
+		  string text;
+		  do
+		  {
+			// find beginning of token
+			text = null;
+			while (i < len && !isTokenChar(s[i], letter))
+			{
+			  i++;
+			}
+
+			if (i < len) // found beginning; now find end of token
+			{
+			  start = i;
+			  while (i < len && isTokenChar(s[i], letter))
+			  {
+				i++;
+			  }
+
+			  text = s.Substring(start, i - start);
+			  if (toLowerCase)
+			  {
+				  text = text.ToLower(locale);
+			  }
+	//          if (toLowerCase) {            
+	////            use next line once JDK 1.5 String.toLowerCase() performance regression is fixed
+	////            see http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6265809
+	//            text = s.substring(start, i).toLowerCase(); 
+	////            char[] chars = new char[i-start];
+	////            for (int j=start; j < i; j++) chars[j-start] = Character.toLowerCase(s.charAt(j));
+	////            text = new String(chars);
+	//          } else {
+	//            text = s.substring(start, i);
+	//          }
+			}
+		  } while (text != null && isStopWord(text));
+
+		  pos = i;
+		  if (text == null)
+		  {
+			return false;
+		  }
+		  termAtt.setEmpty().append(text);
+		  offsetAtt.setOffset(correctOffset(start), correctOffset(i));
+		  return true;
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
+		public override void end()
+		{
+		  base.end();
+		  // set final offset
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int finalOffset = str.length();
+		  int finalOffset = str.Length;
+		  this.offsetAtt.setOffset(correctOffset(finalOffset), correctOffset(finalOffset));
+		}
+
+		internal bool isTokenChar(char c, bool isLetter)
+		{
+		  return isLetter ? char.IsLetter(c) :!char.IsWhiteSpace(c);
+		}
+
+		internal bool isStopWord(string text)
+		{
+		  return stopWords != null && stopWords.contains(text);
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void close() throws java.io.IOException
+		public override void close()
+		{
+		  base.close();
+		  this.str = null;
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+		public override void reset()
+		{
+		  base.reset();
+		  this.str = PatternAnalyzer.ToString(input);
+		  this.pos = 0;
+		}
+	  }
+
+
+	  ///////////////////////////////////////////////////////////////////////////////
+	  // Nested classes:
+	  ///////////////////////////////////////////////////////////////////////////////
+	  /// <summary>
+	  /// A StringReader that exposes it's contained string for fast direct access.
+	  /// Might make sense to generalize this to CharSequence and make it public?
+	  /// </summary>
+	  internal sealed class FastStringReader : StringReader
+	  {
+
+		internal readonly string s;
+
+		internal FastStringReader(string s) : base(s)
+		{
+		  this.s = s;
+		}
+
+		internal string String
+		{
+			get
+			{
+			  return s;
+			}
+		}
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
new file mode 100644
index 0000000..4402d5a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
@@ -0,0 +1,60 @@
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// Marks terms as keywords via the <seealso cref="KeywordAttribute"/>. Each token
+	/// that matches the provided pattern is marked as a keyword by setting
+	/// <seealso cref="KeywordAttribute#setKeyword(boolean)"/> to <code>true</code>.
+	/// </summary>
+	public sealed class PatternKeywordMarkerFilter : KeywordMarkerFilter
+	{
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly Matcher matcher;
+
+	  /// <summary>
+	  /// Create a new <seealso cref="PatternKeywordMarkerFilter"/>, that marks the current
+	  /// token as a keyword if the tokens term buffer matches the provided
+	  /// <seealso cref="Pattern"/> via the <seealso cref="KeywordAttribute"/>.
+	  /// </summary>
+	  /// <param name="in">
+	  ///          TokenStream to filter </param>
+	  /// <param name="pattern">
+	  ///          the pattern to apply to the incoming term buffer
+	  ///  </param>
+	  public PatternKeywordMarkerFilter(TokenStream @in, Pattern pattern) : base(@in)
+	  {
+		this.matcher = pattern.matcher("");
+	  }
+
+	  protected internal override bool Keyword
+	  {
+		  get
+		  {
+			matcher.reset(termAtt);
+			return matcher.matches();
+		  }
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PerFieldAnalyzerWrapper.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PerFieldAnalyzerWrapper.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PerFieldAnalyzerWrapper.cs
new file mode 100644
index 0000000..f61b230
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PerFieldAnalyzerWrapper.cs
@@ -0,0 +1,93 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+
+	/// <summary>
+	/// This analyzer is used to facilitate scenarios where different
+	/// fields require different analysis techniques.  Use the Map
+	/// argument in <seealso cref="#PerFieldAnalyzerWrapper(Analyzer, java.util.Map)"/>
+	/// to add non-default analyzers for fields.
+	/// 
+	/// <para>Example usage:
+	/// 
+	/// <pre class="prettyprint">
+	/// {@code
+	/// Map<String,Analyzer> analyzerPerField = new HashMap<>();
+	/// analyzerPerField.put("firstname", new KeywordAnalyzer());
+	/// analyzerPerField.put("lastname", new KeywordAnalyzer());
+	/// 
+	/// PerFieldAnalyzerWrapper aWrapper =
+	///   new PerFieldAnalyzerWrapper(new StandardAnalyzer(version), analyzerPerField);
+	/// }
+	/// </pre>
+	/// 
+	/// </para>
+	/// <para>In this example, StandardAnalyzer will be used for all fields except "firstname"
+	/// and "lastname", for which KeywordAnalyzer will be used.
+	/// 
+	/// </para>
+	/// <para>A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing
+	/// and query parsing.
+	/// </para>
+	/// </summary>
+	public sealed class PerFieldAnalyzerWrapper : AnalyzerWrapper
+	{
+	  private readonly Analyzer defaultAnalyzer;
+	  private readonly IDictionary<string, Analyzer> fieldAnalyzers;
+
+	  /// <summary>
+	  /// Constructs with default analyzer.
+	  /// </summary>
+	  /// <param name="defaultAnalyzer"> Any fields not specifically
+	  /// defined to use a different analyzer will use the one provided here. </param>
+	  public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer) : this(defaultAnalyzer, null)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Constructs with default analyzer and a map of analyzers to use for 
+	  /// specific fields.
+	  /// </summary>
+	  /// <param name="defaultAnalyzer"> Any fields not specifically
+	  /// defined to use a different analyzer will use the one provided here. </param>
+	  /// <param name="fieldAnalyzers"> a Map (String field name to the Analyzer) to be 
+	  /// used for those fields  </param>
+	  public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer, IDictionary<string, Analyzer> fieldAnalyzers) : base(PER_FIELD_REUSE_STRATEGY)
+	  {
+		this.defaultAnalyzer = defaultAnalyzer;
+		this.fieldAnalyzers = (fieldAnalyzers != null) ? fieldAnalyzers : System.Linq.Enumerable.Empty<string, Analyzer>();
+	  }
+
+	  protected internal override Analyzer getWrappedAnalyzer(string fieldName)
+	  {
+		Analyzer analyzer = fieldAnalyzers[fieldName];
+		return (analyzer != null) ? analyzer : defaultAnalyzer;
+	  }
+
+	  public override string ToString()
+	  {
+		return "PerFieldAnalyzerWrapper(" + fieldAnalyzers + ", default=" + defaultAnalyzer + ")";
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
new file mode 100644
index 0000000..de8b8ba
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
@@ -0,0 +1,112 @@
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	/// <summary>
+	/// Links two <seealso cref="PrefixAwareTokenFilter"/>.
+	/// <p/>
+	/// <b>NOTE:</b> This filter might not behave correctly if used with custom Attributes, i.e. Attributes other than
+	/// the ones located in org.apache.lucene.analysis.tokenattributes. 
+	/// </summary>
+	public class PrefixAndSuffixAwareTokenFilter : TokenStream
+	{
+
+	  private PrefixAwareTokenFilter suffix;
+
+	  public PrefixAndSuffixAwareTokenFilter(TokenStream prefix, TokenStream input, TokenStream suffix) : base(suffix)
+	  {
+		prefix = new PrefixAwareTokenFilterAnonymousInnerClassHelper(this, prefix, input);
+		this.suffix = new PrefixAwareTokenFilterAnonymousInnerClassHelper2(this, prefix, suffix);
+	  }
+
+	  private class PrefixAwareTokenFilterAnonymousInnerClassHelper : PrefixAwareTokenFilter
+	  {
+		  private readonly PrefixAndSuffixAwareTokenFilter outerInstance;
+
+		  public PrefixAwareTokenFilterAnonymousInnerClassHelper(PrefixAndSuffixAwareTokenFilter outerInstance, TokenStream prefix, TokenStream input) : base(prefix, input)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  public override Token updateSuffixToken(Token suffixToken, Token lastInputToken)
+		  {
+			return outerInstance.updateInputToken(suffixToken, lastInputToken);
+		  }
+	  }
+
+	  private class PrefixAwareTokenFilterAnonymousInnerClassHelper2 : PrefixAwareTokenFilter
+	  {
+		  private readonly PrefixAndSuffixAwareTokenFilter outerInstance;
+
+		  public PrefixAwareTokenFilterAnonymousInnerClassHelper2(PrefixAndSuffixAwareTokenFilter outerInstance, TokenStream prefix, TokenStream suffix) : base(prefix, suffix)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  public override Token updateSuffixToken(Token suffixToken, Token lastInputToken)
+		  {
+			return outerInstance.updateSuffixToken(suffixToken, lastInputToken);
+		  }
+	  }
+
+	  public virtual Token updateInputToken(Token inputToken, Token lastPrefixToken)
+	  {
+		inputToken.setOffset(lastPrefixToken.endOffset() + inputToken.startOffset(), lastPrefixToken.endOffset() + inputToken.endOffset());
+		return inputToken;
+	  }
+
+	  public virtual Token updateSuffixToken(Token suffixToken, Token lastInputToken)
+	  {
+		suffixToken.setOffset(lastInputToken.endOffset() + suffixToken.startOffset(), lastInputToken.endOffset() + suffixToken.endOffset());
+		return suffixToken;
+	  }
+
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		return suffix.incrementToken();
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		suffix.reset();
+	  }
+
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void close() throws java.io.IOException
+	  public override void close()
+	  {
+		suffix.close();
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void end() throws java.io.IOException
+	  public override void end()
+	  {
+		suffix.end();
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAwareTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAwareTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAwareTokenFilter.cs
new file mode 100644
index 0000000..7835e7a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAwareTokenFilter.cs
@@ -0,0 +1,246 @@
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using FlagsAttribute = org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PayloadAttribute = org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+	using BytesRef = org.apache.lucene.util.BytesRef;
+
+
+	/// <summary>
+	/// Joins two token streams and leaves the last token of the first stream available
+	/// to be used when updating the token values in the second stream based on that token.
+	/// 
+	/// The default implementation adds last prefix token end offset to the suffix token start and end offsets.
+	/// <p/>
+	/// <b>NOTE:</b> This filter might not behave correctly if used with custom Attributes, i.e. Attributes other than
+	/// the ones located in org.apache.lucene.analysis.tokenattributes. 
+	/// </summary>
+	public class PrefixAwareTokenFilter : TokenStream
+	{
+
+	  private TokenStream prefix;
+	  private TokenStream suffix;
+
+	  private CharTermAttribute termAtt;
+	  private PositionIncrementAttribute posIncrAtt;
+	  private PayloadAttribute payloadAtt;
+	  private OffsetAttribute offsetAtt;
+	  private TypeAttribute typeAtt;
+	  private FlagsAttribute flagsAtt;
+
+	  private CharTermAttribute p_termAtt;
+	  private PositionIncrementAttribute p_posIncrAtt;
+	  private PayloadAttribute p_payloadAtt;
+	  private OffsetAttribute p_offsetAtt;
+	  private TypeAttribute p_typeAtt;
+	  private FlagsAttribute p_flagsAtt;
+
+	  public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix)
+	  {
+		this.suffix = suffix;
+		this.prefix = prefix;
+		prefixExhausted = false;
+
+		termAtt = addAttribute(typeof(CharTermAttribute));
+		posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
+		payloadAtt = addAttribute(typeof(PayloadAttribute));
+		offsetAtt = addAttribute(typeof(OffsetAttribute));
+		typeAtt = addAttribute(typeof(TypeAttribute));
+		flagsAtt = addAttribute(typeof(FlagsAttribute));
+
+		p_termAtt = prefix.addAttribute(typeof(CharTermAttribute));
+		p_posIncrAtt = prefix.addAttribute(typeof(PositionIncrementAttribute));
+		p_payloadAtt = prefix.addAttribute(typeof(PayloadAttribute));
+		p_offsetAtt = prefix.addAttribute(typeof(OffsetAttribute));
+		p_typeAtt = prefix.addAttribute(typeof(TypeAttribute));
+		p_flagsAtt = prefix.addAttribute(typeof(FlagsAttribute));
+	  }
+
+	  private Token previousPrefixToken = new Token();
+	  private Token reusableToken = new Token();
+
+	  private bool prefixExhausted;
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (!prefixExhausted)
+		{
+		  Token nextToken = getNextPrefixInputToken(reusableToken);
+		  if (nextToken == null)
+		  {
+			prefixExhausted = true;
+		  }
+		  else
+		  {
+			previousPrefixToken.reinit(nextToken);
+			// Make it a deep copy
+			BytesRef p = previousPrefixToken.Payload;
+			if (p != null)
+			{
+			  previousPrefixToken.Payload = p.clone();
+			}
+			CurrentToken = nextToken;
+			return true;
+		  }
+		}
+
+		Token nextToken = getNextSuffixInputToken(reusableToken);
+		if (nextToken == null)
+		{
+		  return false;
+		}
+
+		nextToken = updateSuffixToken(nextToken, previousPrefixToken);
+		CurrentToken = nextToken;
+		return true;
+	  }
+
+	  private Token CurrentToken
+	  {
+		  set
+		  {
+			if (value == null)
+			{
+				return;
+			}
+			clearAttributes();
+			termAtt.copyBuffer(value.buffer(), 0, value.length());
+			posIncrAtt.PositionIncrement = value.PositionIncrement;
+			flagsAtt.Flags = value.Flags;
+			offsetAtt.setOffset(value.startOffset(), value.endOffset());
+			typeAtt.Type = value.type();
+			payloadAtt.Payload = value.Payload;
+		  }
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private org.apache.lucene.analysis.Token getNextPrefixInputToken(org.apache.lucene.analysis.Token token) throws java.io.IOException
+	  private Token getNextPrefixInputToken(Token token)
+	  {
+		if (!prefix.incrementToken())
+		{
+			return null;
+		}
+		token.copyBuffer(p_termAtt.buffer(), 0, p_termAtt.length());
+		token.PositionIncrement = p_posIncrAtt.PositionIncrement;
+		token.Flags = p_flagsAtt.Flags;
+		token.setOffset(p_offsetAtt.startOffset(), p_offsetAtt.endOffset());
+		token.Type = p_typeAtt.type();
+		token.Payload = p_payloadAtt.Payload;
+		return token;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private org.apache.lucene.analysis.Token getNextSuffixInputToken(org.apache.lucene.analysis.Token token) throws java.io.IOException
+	  private Token getNextSuffixInputToken(Token token)
+	  {
+		if (!suffix.incrementToken())
+		{
+			return null;
+		}
+		token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
+		token.PositionIncrement = posIncrAtt.PositionIncrement;
+		token.Flags = flagsAtt.Flags;
+		token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
+		token.Type = typeAtt.type();
+		token.Payload = payloadAtt.Payload;
+		return token;
+	  }
+
+	  /// <summary>
+	  /// The default implementation adds last prefix token end offset to the suffix token start and end offsets.
+	  /// </summary>
+	  /// <param name="suffixToken"> a token from the suffix stream </param>
+	  /// <param name="lastPrefixToken"> the last token from the prefix stream </param>
+	  /// <returns> consumer token </returns>
+	  public virtual Token updateSuffixToken(Token suffixToken, Token lastPrefixToken)
+	  {
+		suffixToken.setOffset(lastPrefixToken.endOffset() + suffixToken.startOffset(), lastPrefixToken.endOffset() + suffixToken.endOffset());
+		return suffixToken;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void end() throws java.io.IOException
+	  public override void end()
+	  {
+		prefix.end();
+		suffix.end();
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void close() throws java.io.IOException
+	  public override void close()
+	  {
+		prefix.close();
+		suffix.close();
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		if (prefix != null)
+		{
+		  prefixExhausted = false;
+		  prefix.reset();
+		}
+		if (suffix != null)
+		{
+		  suffix.reset();
+		}
+
+
+	  }
+
+	  public virtual TokenStream Prefix
+	  {
+		  get
+		  {
+			return prefix;
+		  }
+		  set
+		  {
+			this.prefix = value;
+		  }
+	  }
+
+
+	  public virtual TokenStream Suffix
+	  {
+		  get
+		  {
+			return suffix;
+		  }
+		  set
+		  {
+			this.suffix = value;
+		  }
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilter.cs
new file mode 100644
index 0000000..9c2586f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilter.cs
@@ -0,0 +1,99 @@
+using System;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// A TokenFilter which filters out Tokens at the same position and Term text as the previous token in the stream.
+	/// </summary>
+	public sealed class RemoveDuplicatesTokenFilter : TokenFilter
+	{
+
+	  private readonly CharTermAttribute termAttribute = addAttribute(typeof(CharTermAttribute));
+	  private readonly PositionIncrementAttribute posIncAttribute = addAttribute(typeof(PositionIncrementAttribute));
+
+	  // use a fixed version, as we don't care about case sensitivity.
+	  private readonly CharArraySet previous = new CharArraySet(Version.LUCENE_31, 8, false);
+
+	  /// <summary>
+	  /// Creates a new RemoveDuplicatesTokenFilter
+	  /// </summary>
+	  /// <param name="in"> TokenStream that will be filtered </param>
+	  public RemoveDuplicatesTokenFilter(TokenStream @in) : base(@in)
+	  {
+	  }
+
+	  /// <summary>
+	  /// {@inheritDoc}
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		while (input.incrementToken())
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char term[] = termAttribute.buffer();
+		  char[] term = termAttribute.buffer();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int length = termAttribute.length();
+		  int length = termAttribute.length();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int posIncrement = posIncAttribute.getPositionIncrement();
+		  int posIncrement = posIncAttribute.PositionIncrement;
+
+		  if (posIncrement > 0)
+		  {
+			previous.clear();
+		  }
+
+		  bool duplicate = (posIncrement == 0 && previous.contains(term, 0, length));
+
+		  // clone the term, and add to the set of seen terms.
+		  char[] saved = new char[length];
+		  Array.Copy(term, 0, saved, 0, length);
+		  previous.add(saved);
+
+		  if (!duplicate)
+		  {
+			return true;
+		  }
+		}
+		return false;
+	  }
+
+	  /// <summary>
+	  /// {@inheritDoc}
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		previous.clear();
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilterFactory.cs
new file mode 100644
index 0000000..bae261e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="RemoveDuplicatesTokenFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_rmdup" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.RemoveDuplicatesTokenFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class RemoveDuplicatesTokenFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new RemoveDuplicatesTokenFilterFactory </summary>
+	  public RemoveDuplicatesTokenFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override RemoveDuplicatesTokenFilter create(TokenStream input)
+	  {
+		return new RemoveDuplicatesTokenFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilter.cs
new file mode 100644
index 0000000..06ecebc
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilter.cs
@@ -0,0 +1,135 @@
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using StemmerUtil = org.apache.lucene.analysis.util.StemmerUtil;
+
+	/// <summary>
+	/// This filter folds Scandinavian characters åÅäæÄÆ->a and öÖøØ->o.
+	/// It also discriminate against use of double vowels aa, ae, ao, oe and oo, leaving just the first one.
+	/// <p/>
+	/// It's is a semantically more destructive solution than <seealso cref="ScandinavianNormalizationFilter"/> but
+	/// can in addition help with matching raksmorgas as räksmörgås.
+	/// <p/>
+	/// blåbærsyltetøj == blåbärsyltetöj == blaabaarsyltetoej == blabarsyltetoj
+	/// räksmörgås == ræksmørgås == ræksmörgaos == raeksmoergaas == raksmorgas
+	/// <p/>
+	/// Background:
+	/// Swedish åäö are in fact the same letters as Norwegian and Danish åæø and thus interchangeable
+	/// when used between these languages. They are however folded differently when people type
+	/// them on a keyboard lacking these characters.
+	/// <p/>
+	/// In that situation almost all Swedish people use a, a, o instead of å, ä, ö.
+	/// <p/>
+	/// Norwegians and Danes on the other hand usually type aa, ae and oe instead of å, æ and ø.
+	/// Some do however use a, a, o, oo, ao and sometimes permutations of everything above.
+	/// <p/>
+	/// This filter solves that mismatch problem, but might also cause new.
+	/// <p/> </summary>
+	/// <seealso cref= ScandinavianNormalizationFilter </seealso>
+	public sealed class ScandinavianFoldingFilter : TokenFilter
+	{
+
+	  public ScandinavianFoldingFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+	  private readonly CharTermAttribute charTermAttribute = addAttribute(typeof(CharTermAttribute));
+
+	  private const char AA = '\u00C5'; // Å
+	  private const char aa = '\u00E5'; // å
+	  private const char AE = '\u00C6'; // Æ
+	  private const char ae = '\u00E6'; // æ
+	  private const char AE_se = '\u00C4'; // Ä
+	  private const char ae_se = '\u00E4'; // ä
+	  private const char OE = '\u00D8'; // Ø
+	  private const char oe = '\u00F8'; // ø
+	  private const char OE_se = '\u00D6'; // Ö
+	  private const char oe_se = '\u00F6'; //ö
+
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (!input.incrementToken())
+		{
+		  return false;
+		}
+
+		char[] buffer = charTermAttribute.buffer();
+		int length = charTermAttribute.length();
+
+
+		int i;
+		for (i = 0; i < length; i++)
+		{
+
+		  if (buffer[i] == aa || buffer[i] == ae_se || buffer[i] == ae)
+		  {
+
+			buffer[i] = 'a';
+
+		  }
+		  else if (buffer[i] == AA || buffer[i] == AE_se || buffer[i] == AE)
+		  {
+
+			buffer[i] = 'A';
+
+		  }
+		  else if (buffer[i] == oe || buffer[i] == oe_se)
+		  {
+
+			buffer[i] = 'o';
+
+		  }
+		  else if (buffer[i] == OE || buffer[i] == OE_se)
+		  {
+
+			buffer[i] = 'O';
+
+		  }
+		  else if (length - 1 > i)
+		  {
+
+			if ((buffer[i] == 'a' || buffer[i] == 'A') && (buffer[i + 1] == 'a' || buffer[i + 1] == 'A' || buffer[i + 1] == 'e' || buffer[i + 1] == 'E' || buffer[i + 1] == 'o' || buffer[i + 1] == 'O'))
+			{
+
+			  length = StemmerUtil.delete(buffer, i + 1, length);
+
+			}
+			else if ((buffer[i] == 'o' || buffer[i] == 'O') && (buffer[i + 1] == 'e' || buffer[i + 1] == 'E' || buffer[i + 1] == 'o' || buffer[i + 1] == 'O'))
+			{
+
+			  length = StemmerUtil.delete(buffer, i + 1, length);
+
+			}
+		  }
+		}
+
+		charTermAttribute.Length = length;
+
+
+		return true;
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilterFactory.cs
new file mode 100644
index 0000000..5b16722
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilterFactory.cs
@@ -0,0 +1,53 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="ScandinavianFoldingFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_scandfold" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.ScandinavianFoldingFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class ScandinavianFoldingFilterFactory : TokenFilterFactory
+	{
+
+	  public ScandinavianFoldingFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override ScandinavianFoldingFilter create(TokenStream input)
+	  {
+		return new ScandinavianFoldingFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilter.cs
new file mode 100644
index 0000000..3113949
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilter.cs
@@ -0,0 +1,145 @@
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using StemmerUtil = org.apache.lucene.analysis.util.StemmerUtil;
+
+	/// <summary>
+	/// This filter normalize use of the interchangeable Scandinavian characters æÆäÄöÖøØ
+	/// and folded variants (aa, ao, ae, oe and oo) by transforming them to åÅæÆøØ.
+	/// <p/>
+	/// It's a semantically less destructive solution than <seealso cref="ScandinavianFoldingFilter"/>,
+	/// most useful when a person with a Norwegian or Danish keyboard queries a Swedish index
+	/// and vice versa. This filter does <b>not</b>  the common Swedish folds of å and ä to a nor ö to o.
+	/// <p/>
+	/// blåbærsyltetøj == blåbärsyltetöj == blaabaarsyltetoej but not blabarsyltetoj
+	/// räksmörgås == ræksmørgås == ræksmörgaos == raeksmoergaas but not raksmorgas
+	/// <p/> </summary>
+	/// <seealso cref= ScandinavianFoldingFilter </seealso>
+	public sealed class ScandinavianNormalizationFilter : TokenFilter
+	{
+
+	  public ScandinavianNormalizationFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+	  private readonly CharTermAttribute charTermAttribute = addAttribute(typeof(CharTermAttribute));
+
+	  private const char AA = '\u00C5'; // Å
+	  private const char aa = '\u00E5'; // å
+	  private const char AE = '\u00C6'; // Æ
+	  private const char ae = '\u00E6'; // æ
+	  private const char AE_se = '\u00C4'; // Ä
+	  private const char ae_se = '\u00E4'; // ä
+	  private const char OE = '\u00D8'; // Ø
+	  private const char oe = '\u00F8'; // ø
+	  private const char OE_se = '\u00D6'; // Ö
+	  private const char oe_se = '\u00F6'; //ö
+
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (!input.incrementToken())
+		{
+		  return false;
+		}
+
+		char[] buffer = charTermAttribute.buffer();
+		int length = charTermAttribute.length();
+
+
+		int i;
+		for (i = 0; i < length; i++)
+		{
+
+		  if (buffer[i] == ae_se)
+		  {
+			buffer[i] = ae;
+
+		  }
+		  else if (buffer[i] == AE_se)
+		  {
+			buffer[i] = AE;
+
+		  }
+		  else if (buffer[i] == oe_se)
+		  {
+			buffer[i] = oe;
+
+		  }
+		  else if (buffer[i] == OE_se)
+		  {
+			buffer[i] = OE;
+
+		  }
+		  else if (length - 1 > i)
+		  {
+
+			if (buffer[i] == 'a' && (buffer[i + 1] == 'a' || buffer[i + 1] == 'o' || buffer[i + 1] == 'A' || buffer[i + 1] == 'O'))
+			{
+			  length = StemmerUtil.delete(buffer, i + 1, length);
+			  buffer[i] = aa;
+
+			}
+			else if (buffer[i] == 'A' && (buffer[i + 1] == 'a' || buffer[i + 1] == 'A' || buffer[i + 1] == 'o' || buffer[i + 1] == 'O'))
+			{
+			  length = StemmerUtil.delete(buffer, i + 1, length);
+			  buffer[i] = AA;
+
+			}
+			else if (buffer[i] == 'a' && (buffer[i + 1] == 'e' || buffer[i + 1] == 'E'))
+			{
+			  length = StemmerUtil.delete(buffer, i + 1, length);
+			  buffer[i] = ae;
+
+			}
+			else if (buffer[i] == 'A' && (buffer[i + 1] == 'e' || buffer[i + 1] == 'E'))
+			{
+			  length = StemmerUtil.delete(buffer, i + 1, length);
+			  buffer[i] = AE;
+
+			}
+			else if (buffer[i] == 'o' && (buffer[i + 1] == 'e' || buffer[i + 1] == 'E' || buffer[i + 1] == 'o' || buffer[i + 1] == 'O'))
+			{
+			  length = StemmerUtil.delete(buffer, i + 1, length);
+			  buffer[i] = oe;
+
+			}
+			else if (buffer[i] == 'O' && (buffer[i + 1] == 'e' || buffer[i + 1] == 'E' || buffer[i + 1] == 'o' || buffer[i + 1] == 'O'))
+			{
+			  length = StemmerUtil.delete(buffer, i + 1, length);
+			  buffer[i] = OE;
+
+			}
+
+		  }
+		}
+
+		charTermAttribute.Length = length;
+
+
+		return true;
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilterFactory.cs
new file mode 100644
index 0000000..e3d3c7e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilterFactory.cs
@@ -0,0 +1,53 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="org.apache.lucene.analysis.miscellaneous.ScandinavianNormalizationFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_scandnorm" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.ScandinavianNormalizationFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class ScandinavianNormalizationFilterFactory : TokenFilterFactory
+	{
+
+	  public ScandinavianNormalizationFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override ScandinavianNormalizationFilter create(TokenStream input)
+	  {
+		return new ScandinavianNormalizationFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SetKeywordMarkerFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SetKeywordMarkerFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SetKeywordMarkerFilter.cs
new file mode 100644
index 0000000..b732319
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SetKeywordMarkerFilter.cs
@@ -0,0 +1,59 @@
+namespace org.apache.lucene.analysis.miscellaneous
+{
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+
+	/// <summary>
+	/// Marks terms as keywords via the <seealso cref="KeywordAttribute"/>. Each token
+	/// contained in the provided set is marked as a keyword by setting
+	/// <seealso cref="KeywordAttribute#setKeyword(boolean)"/> to <code>true</code>.
+	/// </summary>
+	public sealed class SetKeywordMarkerFilter : KeywordMarkerFilter
+	{
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly CharArraySet keywordSet;
+
+	  /// <summary>
+	  /// Create a new KeywordSetMarkerFilter, that marks the current token as a
+	  /// keyword if the tokens term buffer is contained in the given set via the
+	  /// <seealso cref="KeywordAttribute"/>.
+	  /// </summary>
+	  /// <param name="in">
+	  ///          TokenStream to filter </param>
+	  /// <param name="keywordSet">
+	  ///          the keywords set to lookup the current termbuffer </param>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public SetKeywordMarkerFilter(final org.apache.lucene.analysis.TokenStream in, final org.apache.lucene.analysis.util.CharArraySet keywordSet)
+	  public SetKeywordMarkerFilter(TokenStream @in, CharArraySet keywordSet) : base(@in)
+	  {
+		this.keywordSet = keywordSet;
+	  }
+
+	  protected internal override bool Keyword
+	  {
+		  get
+		  {
+			return keywordSet.contains(termAtt.buffer(), 0, termAtt.length());
+		  }
+	  }
+
+	}
+
+}
\ No newline at end of file


[02/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/TokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/TokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/TokenizerFactory.cs
new file mode 100644
index 0000000..65d7325
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/TokenizerFactory.cs
@@ -0,0 +1,93 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using Lucene.Net.Util;
+using org.apache.lucene.analysis.util;
+
+namespace Lucene.Net.Analysis.Util
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// Abstract parent class for analysis factories that create <seealso cref="Tokenizer"/>
+    /// instances.
+    /// </summary>
+    public abstract class TokenizerFactory : AbstractAnalysisFactory
+    {
+
+        private static readonly AnalysisSPILoader<TokenizerFactory> loader = new AnalysisSPILoader<TokenizerFactory>(typeof(TokenizerFactory));
+
+        /// <summary>
+        /// looks up a tokenizer by name from context classpath </summary>
+        public static TokenizerFactory ForName(string name, IDictionary<string, string> args)
+        {
+            return loader.newInstance(name, args);
+        }
+
+        /// <summary>
+        /// looks up a tokenizer class by name from context classpath </summary>
+        public static Type LookupClass(string name)
+        {
+            return loader.lookupClass(name);
+        }
+
+        /// <summary>
+        /// returns a list of all available tokenizer names from context classpath </summary>
+        public static HashSet<string> AvailableTokenizers()
+        {
+            return loader.availableServices();
+        }
+
+        /// <summary>
+        /// Reloads the factory list from the given <seealso cref="ClassLoader"/>.
+        /// Changes to the factories are visible after the method ends, all
+        /// iterators (<seealso cref="#availableTokenizers()"/>,...) stay consistent. 
+        /// 
+        /// <para><b>NOTE:</b> Only new factories are added, existing ones are
+        /// never removed or replaced.
+        /// 
+        /// </para>
+        /// <para><em>This method is expensive and should only be called for discovery
+        /// of new factories on the given classpath/classloader!</em>
+        /// </para>
+        /// </summary>
+        public static void ReloadTokenizers(ClassLoader classloader)
+        {
+            loader.reload(classloader);
+        }
+
+        /// <summary>
+        /// Initialize this factory via a set of key-value pairs.
+        /// </summary>
+        protected internal TokenizerFactory(IDictionary<string, string> args)
+            : base(args)
+        {
+        }
+
+        /// <summary>
+        /// Creates a TokenStream of the specified input using the default attribute factory. </summary>
+        public Tokenizer Create(TextReader input)
+        {
+            return Create(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input);
+        }
+
+        /// <summary>
+        /// Creates a TokenStream of the specified input using the given AttributeFactory </summary>
+        public abstract Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input);
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/WordlistLoader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/WordlistLoader.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/WordlistLoader.cs
new file mode 100644
index 0000000..baf3975
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/WordlistLoader.cs
@@ -0,0 +1,305 @@
+using System;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.util
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Loader for text files that represent a list of stopwords.
+	/// </summary>
+	/// <seealso cref= IOUtils to obtain <seealso cref="Reader"/> instances
+	/// @lucene.internal </seealso>
+	public class WordlistLoader
+	{
+
+	  private const int INITIAL_CAPACITY = 16;
+
+	  /// <summary>
+	  /// no instance </summary>
+	  private WordlistLoader()
+	  {
+	  }
+
+	  /// <summary>
+	  /// Reads lines from a Reader and adds every line as an entry to a CharArraySet (omitting
+	  /// leading and trailing whitespace). Every line of the Reader should contain only
+	  /// one word. The words need to be in lowercase if you make use of an
+	  /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+	  /// </summary>
+	  /// <param name="reader"> Reader containing the wordlist </param>
+	  /// <param name="result"> the <seealso cref="CharArraySet"/> to fill with the readers words </param>
+	  /// <returns> the given <seealso cref="CharArraySet"/> with the reader's words </returns>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public static CharArraySet getWordSet(java.io.Reader reader, CharArraySet result) throws java.io.IOException
+	  public static CharArraySet getWordSet(Reader reader, CharArraySet result)
+	  {
+		BufferedReader br = null;
+		try
+		{
+		  br = getBufferedReader(reader);
+		  string word = null;
+		  while ((word = br.readLine()) != null)
+		  {
+			result.add(word.Trim());
+		  }
+		}
+		finally
+		{
+		  IOUtils.close(br);
+		}
+		return result;
+	  }
+
+	  /// <summary>
+	  /// Reads lines from a Reader and adds every line as an entry to a CharArraySet (omitting
+	  /// leading and trailing whitespace). Every line of the Reader should contain only
+	  /// one word. The words need to be in lowercase if you make use of an
+	  /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+	  /// </summary>
+	  /// <param name="reader"> Reader containing the wordlist </param>
+	  /// <param name="matchVersion"> the Lucene <seealso cref="Version"/> </param>
+	  /// <returns> A <seealso cref="CharArraySet"/> with the reader's words </returns>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public static CharArraySet getWordSet(java.io.Reader reader, org.apache.lucene.util.Version matchVersion) throws java.io.IOException
+	  public static CharArraySet getWordSet(Reader reader, Version matchVersion)
+	  {
+		return getWordSet(reader, new CharArraySet(matchVersion, INITIAL_CAPACITY, false));
+	  }
+
+	  /// <summary>
+	  /// Reads lines from a Reader and adds every non-comment line as an entry to a CharArraySet (omitting
+	  /// leading and trailing whitespace). Every line of the Reader should contain only
+	  /// one word. The words need to be in lowercase if you make use of an
+	  /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+	  /// </summary>
+	  /// <param name="reader"> Reader containing the wordlist </param>
+	  /// <param name="comment"> The string representing a comment. </param>
+	  /// <param name="matchVersion"> the Lucene <seealso cref="Version"/> </param>
+	  /// <returns> A CharArraySet with the reader's words </returns>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public static CharArraySet getWordSet(java.io.Reader reader, String comment, org.apache.lucene.util.Version matchVersion) throws java.io.IOException
+	  public static CharArraySet getWordSet(Reader reader, string comment, Version matchVersion)
+	  {
+		return getWordSet(reader, comment, new CharArraySet(matchVersion, INITIAL_CAPACITY, false));
+	  }
+
+	  /// <summary>
+	  /// Reads lines from a Reader and adds every non-comment line as an entry to a CharArraySet (omitting
+	  /// leading and trailing whitespace). Every line of the Reader should contain only
+	  /// one word. The words need to be in lowercase if you make use of an
+	  /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+	  /// </summary>
+	  /// <param name="reader"> Reader containing the wordlist </param>
+	  /// <param name="comment"> The string representing a comment. </param>
+	  /// <param name="result"> the <seealso cref="CharArraySet"/> to fill with the readers words </param>
+	  /// <returns> the given <seealso cref="CharArraySet"/> with the reader's words </returns>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public static CharArraySet getWordSet(java.io.Reader reader, String comment, CharArraySet result) throws java.io.IOException
+	  public static CharArraySet getWordSet(Reader reader, string comment, CharArraySet result)
+	  {
+		BufferedReader br = null;
+		try
+		{
+		  br = getBufferedReader(reader);
+		  string word = null;
+		  while ((word = br.readLine()) != null)
+		  {
+			if (word.StartsWith(comment, StringComparison.Ordinal) == false)
+			{
+			  result.add(word.Trim());
+			}
+		  }
+		}
+		finally
+		{
+		  IOUtils.close(br);
+		}
+		return result;
+	  }
+
+
+	  /// <summary>
+	  /// Reads stopwords from a stopword list in Snowball format.
+	  /// <para>
+	  /// The snowball format is the following:
+	  /// <ul>
+	  /// <li>Lines may contain multiple words separated by whitespace.
+	  /// <li>The comment character is the vertical line (&#124;).
+	  /// <li>Lines may contain trailing comments.
+	  /// </ul>
+	  /// </para>
+	  /// </summary>
+	  /// <param name="reader"> Reader containing a Snowball stopword list </param>
+	  /// <param name="result"> the <seealso cref="CharArraySet"/> to fill with the readers words </param>
+	  /// <returns> the given <seealso cref="CharArraySet"/> with the reader's words </returns>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public static CharArraySet getSnowballWordSet(java.io.Reader reader, CharArraySet result) throws java.io.IOException
+	  public static CharArraySet getSnowballWordSet(Reader reader, CharArraySet result)
+	  {
+		BufferedReader br = null;
+		try
+		{
+		  br = getBufferedReader(reader);
+		  string line = null;
+		  while ((line = br.readLine()) != null)
+		  {
+			int comment = line.IndexOf('|');
+			if (comment >= 0)
+			{
+				line = line.Substring(0, comment);
+			}
+			string[] words = line.Split("\\s+", true);
+			for (int i = 0; i < words.Length; i++)
+			{
+			  if (words[i].Length > 0)
+			  {
+				  result.add(words[i]);
+			  }
+			}
+		  }
+		}
+		finally
+		{
+		  IOUtils.close(br);
+		}
+		return result;
+	  }
+
+	  /// <summary>
+	  /// Reads stopwords from a stopword list in Snowball format.
+	  /// <para>
+	  /// The snowball format is the following:
+	  /// <ul>
+	  /// <li>Lines may contain multiple words separated by whitespace.
+	  /// <li>The comment character is the vertical line (&#124;).
+	  /// <li>Lines may contain trailing comments.
+	  /// </ul>
+	  /// </para>
+	  /// </summary>
+	  /// <param name="reader"> Reader containing a Snowball stopword list </param>
+	  /// <param name="matchVersion"> the Lucene <seealso cref="Version"/> </param>
+	  /// <returns> A <seealso cref="CharArraySet"/> with the reader's words </returns>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public static CharArraySet getSnowballWordSet(java.io.Reader reader, org.apache.lucene.util.Version matchVersion) throws java.io.IOException
+	  public static CharArraySet getSnowballWordSet(Reader reader, Version matchVersion)
+	  {
+		return getSnowballWordSet(reader, new CharArraySet(matchVersion, INITIAL_CAPACITY, false));
+	  }
+
+
+	  /// <summary>
+	  /// Reads a stem dictionary. Each line contains:
+	  /// <pre>word<b>\t</b>stem</pre>
+	  /// (i.e. two tab separated words)
+	  /// </summary>
+	  /// <returns> stem dictionary that overrules the stemming algorithm </returns>
+	  /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public static CharArrayMap<String> getStemDict(java.io.Reader reader, CharArrayMap<String> result) throws java.io.IOException
+	  public static CharArrayMap<string> getStemDict(Reader reader, CharArrayMap<string> result)
+	  {
+		BufferedReader br = null;
+		try
+		{
+		  br = getBufferedReader(reader);
+		  string line;
+		  while ((line = br.readLine()) != null)
+		  {
+			string[] wordstem = line.Split("\t", 2);
+			result.put(wordstem[0], wordstem[1]);
+		  }
+		}
+		finally
+		{
+		  IOUtils.close(br);
+		}
+		return result;
+	  }
+
+	  /// <summary>
+	  /// Accesses a resource by name and returns the (non comment) lines containing
+	  /// data using the given character encoding.
+	  /// 
+	  /// <para>
+	  /// A comment line is any line that starts with the character "#"
+	  /// </para>
+	  /// </summary>
+	  /// <returns> a list of non-blank non-comment lines with whitespace trimmed </returns>
+	  /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public static java.util.List<String> getLines(java.io.InputStream stream, java.nio.charset.Charset charset) throws java.io.IOException
+	  public static IList<string> getLines(InputStream stream, Charset charset)
+	  {
+		BufferedReader input = null;
+		List<string> lines;
+		bool success = false;
+		try
+		{
+		  input = getBufferedReader(IOUtils.getDecodingReader(stream, charset));
+
+		  lines = new List<>();
+		  for (string word = null; (word = input.readLine()) != null;)
+		  {
+			// skip initial bom marker
+			if (lines.Count == 0 && word.Length > 0 && word[0] == '\uFEFF')
+			{
+			  word = word.Substring(1);
+			}
+			// skip comments
+			if (word.StartsWith("#", StringComparison.Ordinal))
+			{
+				continue;
+			}
+			word = word.Trim();
+			// skip blank lines
+			if (word.length() == 0)
+			{
+				continue;
+			}
+			lines.Add(word);
+		  }
+		  success = true;
+		  return lines;
+		}
+		finally
+		{
+		  if (success)
+		  {
+			IOUtils.close(input);
+		  }
+		  else
+		  {
+			IOUtils.closeWhileHandlingException(input);
+		  }
+		}
+	  }
+
+	  private static BufferedReader getBufferedReader(Reader reader)
+	  {
+		return (reader is BufferedReader) ? (BufferedReader) reader : new BufferedReader(reader);
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizer.cs
new file mode 100644
index 0000000..1fd76f8
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizer.cs
@@ -0,0 +1,343 @@
+using System.Collections.Generic;
+using System.Text;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.analysis.wikipedia
+{
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using FlagsAttribute = org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+	using AttributeSource = org.apache.lucene.util.AttributeSource;
+
+
+
+	/// <summary>
+	/// Extension of StandardTokenizer that is aware of Wikipedia syntax.  It is based off of the
+	/// Wikipedia tutorial available at http://en.wikipedia.org/wiki/Wikipedia:Tutorial, but it may not be complete.
+	/// <p/>
+	/// <p/>
+	/// @lucene.experimental
+	/// </summary>
+	public sealed class WikipediaTokenizer : Tokenizer
+	{
+	  public const string INTERNAL_LINK = "il";
+	  public const string EXTERNAL_LINK = "el";
+	  //The URL part of the link, i.e. the first token
+	  public const string EXTERNAL_LINK_URL = "elu";
+	  public const string CITATION = "ci";
+	  public const string CATEGORY = "c";
+	  public const string BOLD = "b";
+	  public const string ITALICS = "i";
+	  public const string BOLD_ITALICS = "bi";
+	  public const string HEADING = "h";
+	  public const string SUB_HEADING = "sh";
+
+	  public const int ALPHANUM_ID = 0;
+	  public const int APOSTROPHE_ID = 1;
+	  public const int ACRONYM_ID = 2;
+	  public const int COMPANY_ID = 3;
+	  public const int EMAIL_ID = 4;
+	  public const int HOST_ID = 5;
+	  public const int NUM_ID = 6;
+	  public const int CJ_ID = 7;
+	  public const int INTERNAL_LINK_ID = 8;
+	  public const int EXTERNAL_LINK_ID = 9;
+	  public const int CITATION_ID = 10;
+	  public const int CATEGORY_ID = 11;
+	  public const int BOLD_ID = 12;
+	  public const int ITALICS_ID = 13;
+	  public const int BOLD_ITALICS_ID = 14;
+	  public const int HEADING_ID = 15;
+	  public const int SUB_HEADING_ID = 16;
+	  public const int EXTERNAL_LINK_URL_ID = 17;
+
+	  /// <summary>
+	  /// String token types that correspond to token type int constants </summary>
+	  public static readonly string[] TOKEN_TYPES = new string [] {"<ALPHANUM>", "<APOSTROPHE>", "<ACRONYM>", "<COMPANY>", "<EMAIL>", "<HOST>", "<NUM>", "<CJ>", INTERNAL_LINK, EXTERNAL_LINK, CITATION, CATEGORY, BOLD, ITALICS, BOLD_ITALICS, HEADING, SUB_HEADING, EXTERNAL_LINK_URL};
+
+	  /// <summary>
+	  /// Only output tokens
+	  /// </summary>
+	  public const int TOKENS_ONLY = 0;
+	  /// <summary>
+	  /// Only output untokenized tokens, which are tokens that would normally be split into several tokens
+	  /// </summary>
+	  public const int UNTOKENIZED_ONLY = 1;
+	  /// <summary>
+	  /// Output the both the untokenized token and the splits
+	  /// </summary>
+	  public const int BOTH = 2;
+	  /// <summary>
+	  /// This flag is used to indicate that the produced "Token" would, if <seealso cref="#TOKENS_ONLY"/> was used, produce multiple tokens.
+	  /// </summary>
+	  public const int UNTOKENIZED_TOKEN_FLAG = 1;
+	  /// <summary>
+	  /// A private instance of the JFlex-constructed scanner
+	  /// </summary>
+	  private readonly WikipediaTokenizerImpl scanner;
+
+	  private int tokenOutput = TOKENS_ONLY;
+	  private HashSet<string> untokenizedTypes = java.util.Collections.emptySet();
+	  private IEnumerator<AttributeSource.State> tokens = null;
+
+	  private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+	  private readonly TypeAttribute typeAtt = addAttribute(typeof(TypeAttribute));
+	  private readonly PositionIncrementAttribute posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly FlagsAttribute flagsAtt = addAttribute(typeof(FlagsAttribute));
+
+	  private bool first;
+
+	  /// <summary>
+	  /// Creates a new instance of the <seealso cref="WikipediaTokenizer"/>. Attaches the
+	  /// <code>input</code> to a newly created JFlex scanner.
+	  /// </summary>
+	  /// <param name="input"> The Input Reader </param>
+	  public WikipediaTokenizer(Reader input) : this(input, TOKENS_ONLY, System.Linq.Enumerable.Empty<string>())
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates a new instance of the <seealso cref="org.apache.lucene.analysis.wikipedia.WikipediaTokenizer"/>.  Attaches the
+	  /// <code>input</code> to a the newly created JFlex scanner.
+	  /// </summary>
+	  /// <param name="input"> The input </param>
+	  /// <param name="tokenOutput"> One of <seealso cref="#TOKENS_ONLY"/>, <seealso cref="#UNTOKENIZED_ONLY"/>, <seealso cref="#BOTH"/> </param>
+	  public WikipediaTokenizer(Reader input, int tokenOutput, HashSet<string> untokenizedTypes) : base(input)
+	  {
+		this.scanner = new WikipediaTokenizerImpl(this.input);
+		init(tokenOutput, untokenizedTypes);
+	  }
+
+	  /// <summary>
+	  /// Creates a new instance of the <seealso cref="org.apache.lucene.analysis.wikipedia.WikipediaTokenizer"/>.  Attaches the
+	  /// <code>input</code> to a the newly created JFlex scanner. Uses the given <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>.
+	  /// </summary>
+	  /// <param name="input"> The input </param>
+	  /// <param name="tokenOutput"> One of <seealso cref="#TOKENS_ONLY"/>, <seealso cref="#UNTOKENIZED_ONLY"/>, <seealso cref="#BOTH"/> </param>
+	  public WikipediaTokenizer(AttributeFactory factory, Reader input, int tokenOutput, HashSet<string> untokenizedTypes) : base(factory, input)
+	  {
+		this.scanner = new WikipediaTokenizerImpl(this.input);
+		init(tokenOutput, untokenizedTypes);
+	  }
+
+	  private void init(int tokenOutput, HashSet<string> untokenizedTypes)
+	  {
+		// TODO: cutover to enum
+		if (tokenOutput != TOKENS_ONLY && tokenOutput != UNTOKENIZED_ONLY && tokenOutput != BOTH)
+		{
+		  throw new System.ArgumentException("tokenOutput must be TOKENS_ONLY, UNTOKENIZED_ONLY or BOTH");
+		}
+		this.tokenOutput = tokenOutput;
+		this.untokenizedTypes = untokenizedTypes;
+	  }
+
+	  /*
+	  * (non-Javadoc)
+	  *
+	  * @see org.apache.lucene.analysis.TokenStream#next()
+	  */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
+		if (tokens != null && tokens.hasNext())
+		{
+//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
+		  AttributeSource.State state = tokens.next();
+		  restoreState(state);
+		  return true;
+		}
+		clearAttributes();
+		int tokenType = scanner.NextToken;
+
+		if (tokenType == WikipediaTokenizerImpl.YYEOF)
+		{
+		  return false;
+		}
+		string type = WikipediaTokenizerImpl.TOKEN_TYPES[tokenType];
+		if (tokenOutput == TOKENS_ONLY || untokenizedTypes.Contains(type) == false)
+		{
+		  setupToken();
+		}
+		else if (tokenOutput == UNTOKENIZED_ONLY && untokenizedTypes.Contains(type) == true)
+		{
+		  collapseTokens(tokenType);
+
+		}
+		else if (tokenOutput == BOTH)
+		{
+		  //collapse into a single token, add it to tokens AND output the individual tokens
+		  //output the untokenized Token first
+		  collapseAndSaveTokens(tokenType, type);
+		}
+		int posinc = scanner.PositionIncrement;
+		if (first && posinc == 0)
+		{
+		  posinc = 1; // don't emit posinc=0 for the first token!
+		}
+		posIncrAtt.PositionIncrement = posinc;
+		typeAtt.Type = type;
+		first = false;
+		return true;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void collapseAndSaveTokens(int tokenType, String type) throws java.io.IOException
+	  private void collapseAndSaveTokens(int tokenType, string type)
+	  {
+		//collapse
+		StringBuilder buffer = new StringBuilder(32);
+		int numAdded = scanner.setText(buffer);
+		//TODO: how to know how much whitespace to add
+		int theStart = scanner.yychar();
+		int lastPos = theStart + numAdded;
+		int tmpTokType;
+		int numSeen = 0;
+		IList<AttributeSource.State> tmp = new List<AttributeSource.State>();
+		setupSavedToken(0, type);
+		tmp.Add(captureState());
+		//while we can get a token and that token is the same type and we have not transitioned to a new wiki-item of the same type
+		while ((tmpTokType = scanner.NextToken) != WikipediaTokenizerImpl.YYEOF && tmpTokType == tokenType && scanner.NumWikiTokensSeen > numSeen)
+		{
+		  int currPos = scanner.yychar();
+		  //append whitespace
+		  for (int i = 0; i < (currPos - lastPos); i++)
+		  {
+			buffer.Append(' ');
+		  }
+		  numAdded = scanner.setText(buffer);
+		  setupSavedToken(scanner.PositionIncrement, type);
+		  tmp.Add(captureState());
+		  numSeen++;
+		  lastPos = currPos + numAdded;
+		}
+		//trim the buffer
+		// TODO: this is inefficient
+		string s = buffer.ToString().Trim();
+		termAtt.setEmpty().append(s);
+		offsetAtt.setOffset(correctOffset(theStart), correctOffset(theStart + s.Length));
+		flagsAtt.Flags = UNTOKENIZED_TOKEN_FLAG;
+		//The way the loop is written, we will have proceeded to the next token.  We need to pushback the scanner to lastPos
+		if (tmpTokType != WikipediaTokenizerImpl.YYEOF)
+		{
+		  scanner.yypushback(scanner.yylength());
+		}
+		tokens = tmp.GetEnumerator();
+	  }
+
+	  private void setupSavedToken(int positionInc, string type)
+	  {
+		setupToken();
+		posIncrAtt.PositionIncrement = positionInc;
+		typeAtt.Type = type;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void collapseTokens(int tokenType) throws java.io.IOException
+	  private void collapseTokens(int tokenType)
+	  {
+		//collapse
+		StringBuilder buffer = new StringBuilder(32);
+		int numAdded = scanner.setText(buffer);
+		//TODO: how to know how much whitespace to add
+		int theStart = scanner.yychar();
+		int lastPos = theStart + numAdded;
+		int tmpTokType;
+		int numSeen = 0;
+		//while we can get a token and that token is the same type and we have not transitioned to a new wiki-item of the same type
+		while ((tmpTokType = scanner.NextToken) != WikipediaTokenizerImpl.YYEOF && tmpTokType == tokenType && scanner.NumWikiTokensSeen > numSeen)
+		{
+		  int currPos = scanner.yychar();
+		  //append whitespace
+		  for (int i = 0; i < (currPos - lastPos); i++)
+		  {
+			buffer.Append(' ');
+		  }
+		  numAdded = scanner.setText(buffer);
+		  numSeen++;
+		  lastPos = currPos + numAdded;
+		}
+		//trim the buffer
+		// TODO: this is inefficient
+		string s = buffer.ToString().Trim();
+		termAtt.setEmpty().append(s);
+		offsetAtt.setOffset(correctOffset(theStart), correctOffset(theStart + s.Length));
+		flagsAtt.Flags = UNTOKENIZED_TOKEN_FLAG;
+		//The way the loop is written, we will have proceeded to the next token.  We need to pushback the scanner to lastPos
+		if (tmpTokType != WikipediaTokenizerImpl.YYEOF)
+		{
+		  scanner.yypushback(scanner.yylength());
+		}
+		else
+		{
+		  tokens = null;
+		}
+	  }
+
+	  private void setupToken()
+	  {
+		scanner.getText(termAtt);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int start = scanner.yychar();
+		int start = scanner.yychar();
+		offsetAtt.setOffset(correctOffset(start), correctOffset(start + termAtt.length()));
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void close() throws java.io.IOException
+	  public override void close()
+	  {
+		base.close();
+		scanner.yyreset(input);
+	  }
+
+	  /*
+	  * (non-Javadoc)
+	  *
+	  * @see org.apache.lucene.analysis.TokenStream#reset()
+	  */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		scanner.yyreset(input);
+		tokens = null;
+		scanner.reset();
+		first = true;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void end() throws java.io.IOException
+	  public override void end()
+	  {
+		base.end();
+		// set final offset
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
+		int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
+		this.offsetAtt.setOffset(finalOffset, finalOffset);
+	  }
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizerFactory.cs
new file mode 100644
index 0000000..ad7027f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizerFactory.cs
@@ -0,0 +1,57 @@
+using System.Collections.Generic;
+using TokenizerFactory = Lucene.Net.Analysis.Util.TokenizerFactory;
+
+namespace org.apache.lucene.analysis.wikipedia
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using TokenizerFactory = TokenizerFactory;
+	using AttributeFactory = org.apache.lucene.util.AttributeSource.AttributeFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="WikipediaTokenizer"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_wiki" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WikipediaTokenizerFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class WikipediaTokenizerFactory : TokenizerFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new WikipediaTokenizerFactory </summary>
+	  public WikipediaTokenizerFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  // TODO: add support for WikipediaTokenizer's advanced options.
+	  public override WikipediaTokenizer create(AttributeFactory factory, Reader input)
+	  {
+		return new WikipediaTokenizer(factory, input, WikipediaTokenizer.TOKENS_ONLY, System.Linq.Enumerable.Empty<string>());
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Collation/CollationAttributeFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Collation/CollationAttributeFactory.cs b/src/Lucene.Net.Analysis.Common/Collation/CollationAttributeFactory.cs
new file mode 100644
index 0000000..cfdba3e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Collation/CollationAttributeFactory.cs
@@ -0,0 +1,99 @@
+using System;
+using Lucene.Net.Util;
+using org.apache.lucene.collation.tokenattributes;
+
+namespace Lucene.Net.Collation
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+    /// <summary>
+	/// <para>
+	///   Converts each token into its <seealso cref="java.text.CollationKey"/>, and then
+	///   encodes the bytes as an index term.
+	/// </para>
+	/// <para>
+	///   <strong>WARNING:</strong> Make sure you use exactly the same Collator at
+	///   index and query time -- CollationKeys are only comparable when produced by
+	///   the same Collator.  Since <seealso cref="java.text.RuleBasedCollator"/>s are not
+	///   independently versioned, it is unsafe to search against stored
+	///   CollationKeys unless the following are exactly the same (best practice is
+	///   to store this information with the index and check that they remain the
+	///   same at query time):
+	/// </para>
+	/// <ol>
+	///   <li>JVM vendor</li>
+	///   <li>JVM version, including patch version</li>
+	///   <li>
+	///     The language (and country and variant, if specified) of the Locale
+	///     used when constructing the collator via
+	///     <seealso cref="Collator#getInstance(java.util.Locale)"/>.
+	///   </li>
+	///   <li>
+	///     The collation strength used - see <seealso cref="Collator#setStrength(int)"/>
+	///   </li>
+	/// </ol> 
+	/// <para>
+	///   The <code>ICUCollationAttributeFactory</code> in the analysis-icu package 
+	///   uses ICU4J's Collator, which makes its
+	///   version available, thus allowing collation to be versioned independently
+	///   from the JVM.  ICUCollationAttributeFactory is also significantly faster and
+	///   generates significantly shorter keys than CollationAttributeFactory.  See
+	///   <a href="http://site.icu-project.org/charts/collation-icu4j-sun"
+	///   >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key
+	///   generation timing and key length comparisons between ICU4J and
+	///   java.text.Collator over several languages.
+	/// </para>
+	/// <para>
+	///   CollationKeys generated by java.text.Collators are not compatible
+	///   with those those generated by ICU Collators.  Specifically, if you use 
+	///   CollationAttributeFactory to generate index terms, do not use
+	///   ICUCollationAttributeFactory on the query side, or vice versa.
+	/// </para>
+	/// </summary>
+	public class CollationAttributeFactory : AttributeSource.AttributeFactory
+	{
+	  private readonly Collator collator;
+	  private readonly AttributeSource.AttributeFactory @delegate;
+
+	  /// <summary>
+	  /// Create a CollationAttributeFactory, using 
+	  /// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY"/> as the
+	  /// factory for all other attributes. </summary>
+	  /// <param name="collator"> CollationKey generator </param>
+	  public CollationAttributeFactory(Collator collator) : this(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, collator)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Create a CollationAttributeFactory, using the supplied Attribute Factory 
+	  /// as the factory for all other attributes. </summary>
+	  /// <param name="delegate"> Attribute Factory </param>
+	  /// <param name="collator"> CollationKey generator </param>
+	  public CollationAttributeFactory(AttributeSource.AttributeFactory @delegate, Collator collator)
+	  {
+		this.@delegate = @delegate;
+		this.collator = collator;
+	  }
+
+	  public override AttributeImpl CreateAttributeInstance(Type attClass)
+	  {
+		return typeof(CollatedTermAttributeImpl).IsSubclassOf(attClass) ? new CollatedTermAttributeImpl(collator) : @delegate.createAttributeInstance(attClass);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Collation/CollationKeyAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Collation/CollationKeyAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Collation/CollationKeyAnalyzer.cs
new file mode 100644
index 0000000..06fb9e0
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Collation/CollationKeyAnalyzer.cs
@@ -0,0 +1,129 @@
+using System;
+using Lucene.Net.Analysis.Core;
+using Lucene.Net.Collation;
+
+namespace org.apache.lucene.collation
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using Analyzer = org.apache.lucene.analysis.Analyzer;
+	using KeywordTokenizer = KeywordTokenizer;
+	using IndexableBinaryStringTools = org.apache.lucene.util.IndexableBinaryStringTools; // javadoc @link
+	using Version = org.apache.lucene.util.Version;
+
+
+	/// <summary>
+	/// <para>
+	///   Configures <seealso cref="KeywordTokenizer"/> with <seealso cref="CollationAttributeFactory"/>.
+	/// </para>
+	/// <para>
+	///   Converts the token into its <seealso cref="java.text.CollationKey"/>, and then
+	///   encodes the CollationKey either directly or with 
+	///   <seealso cref="IndexableBinaryStringTools"/> (see <a href="#version">below</a>), to allow 
+	///   it to be stored as an index term.
+	/// </para>
+	/// <para>
+	///   <strong>WARNING:</strong> Make sure you use exactly the same Collator at
+	///   index and query time -- CollationKeys are only comparable when produced by
+	///   the same Collator.  Since <seealso cref="java.text.RuleBasedCollator"/>s are not
+	///   independently versioned, it is unsafe to search against stored
+	///   CollationKeys unless the following are exactly the same (best practice is
+	///   to store this information with the index and check that they remain the
+	///   same at query time):
+	/// </para>
+	/// <ol>
+	///   <li>JVM vendor</li>
+	///   <li>JVM version, including patch version</li>
+	///   <li>
+	///     The language (and country and variant, if specified) of the Locale
+	///     used when constructing the collator via
+	///     <seealso cref="Collator#getInstance(java.util.Locale)"/>.
+	///   </li>
+	///   <li>
+	///     The collation strength used - see <seealso cref="Collator#setStrength(int)"/>
+	///   </li>
+	/// </ol> 
+	/// <para>
+	///   The <code>ICUCollationKeyAnalyzer</code> in the analysis-icu package 
+	///   uses ICU4J's Collator, which makes its
+	///   its version available, thus allowing collation to be versioned
+	///   independently from the JVM.  ICUCollationKeyAnalyzer is also significantly
+	///   faster and generates significantly shorter keys than CollationKeyAnalyzer.
+	///   See <a href="http://site.icu-project.org/charts/collation-icu4j-sun"
+	///   >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key
+	///   generation timing and key length comparisons between ICU4J and
+	///   java.text.Collator over several languages.
+	/// </para>
+	/// <para>
+	///   CollationKeys generated by java.text.Collators are not compatible
+	///   with those those generated by ICU Collators.  Specifically, if you use 
+	///   CollationKeyAnalyzer to generate index terms, do not use
+	///   ICUCollationKeyAnalyzer on the query side, or vice versa.
+	/// </para>
+	/// <a name="version"/>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating CollationKeyAnalyzer:
+	/// <ul>
+	///   <li> As of 4.0, Collation Keys are directly encoded as bytes. Previous
+	///   versions will encode the bytes with <seealso cref="IndexableBinaryStringTools"/>.
+	/// </ul>
+	/// </para>
+	/// </summary>
+	public sealed class CollationKeyAnalyzer : Analyzer
+	{
+	  private readonly Collator collator;
+	  private readonly CollationAttributeFactory factory;
+	  private readonly Version matchVersion;
+
+	  /// <summary>
+	  /// Create a new CollationKeyAnalyzer, using the specified collator.
+	  /// </summary>
+	  /// <param name="matchVersion"> See <a href="#version">above</a> </param>
+	  /// <param name="collator"> CollationKey generator </param>
+	  public CollationKeyAnalyzer(Version matchVersion, Collator collator)
+	  {
+		this.matchVersion = matchVersion;
+		this.collator = collator;
+		this.factory = new CollationAttributeFactory(collator);
+	  }
+
+	  /// @deprecated Use <seealso cref="CollationKeyAnalyzer#CollationKeyAnalyzer(Version, Collator)"/>
+	  ///   and specify a version instead. This ctor will be removed in Lucene 5.0 
+	  [Obsolete("Use <seealso cref="CollationKeyAnalyzer#CollationKeyAnalyzer(org.apache.lucene.util.Version, java.text.Collator)"/>")]
+	  public CollationKeyAnalyzer(Collator collator) : this(Version.LUCENE_31, collator)
+	  {
+	  }
+
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+		if (matchVersion.onOrAfter(Version.LUCENE_40))
+		{
+		  KeywordTokenizer tokenizer = new KeywordTokenizer(factory, reader, KeywordTokenizer.DEFAULT_BUFFER_SIZE);
+		  return new TokenStreamComponents(tokenizer, tokenizer);
+		}
+		else
+		{
+		  KeywordTokenizer tokenizer = new KeywordTokenizer(reader);
+		  return new TokenStreamComponents(tokenizer, new CollationKeyFilter(tokenizer, collator));
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilter.cs b/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilter.cs
new file mode 100644
index 0000000..a098632
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilter.cs
@@ -0,0 +1,112 @@
+using System;
+
+namespace org.apache.lucene.collation
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using TokenFilter = org.apache.lucene.analysis.TokenFilter;
+	using TokenStream = org.apache.lucene.analysis.TokenStream;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using IndexableBinaryStringTools = org.apache.lucene.util.IndexableBinaryStringTools;
+
+
+
+	/// <summary>
+	/// <para>
+	///   Converts each token into its <seealso cref="java.text.CollationKey"/>, and then
+	///   encodes the CollationKey with <seealso cref="IndexableBinaryStringTools"/>, to allow 
+	///   it to be stored as an index term.
+	/// </para>
+	/// <para>
+	///   <strong>WARNING:</strong> Make sure you use exactly the same Collator at
+	///   index and query time -- CollationKeys are only comparable when produced by
+	///   the same Collator.  Since <seealso cref="java.text.RuleBasedCollator"/>s are not
+	///   independently versioned, it is unsafe to search against stored
+	///   CollationKeys unless the following are exactly the same (best practice is
+	///   to store this information with the index and check that they remain the
+	///   same at query time):
+	/// </para>
+	/// <ol>
+	///   <li>JVM vendor</li>
+	///   <li>JVM version, including patch version</li>
+	///   <li>
+	///     The language (and country and variant, if specified) of the Locale
+	///     used when constructing the collator via
+	///     <seealso cref="Collator#getInstance(java.util.Locale)"/>.
+	///   </li>
+	///   <li>
+	///     The collation strength used - see <seealso cref="Collator#setStrength(int)"/>
+	///   </li>
+	/// </ol> 
+	/// <para>
+	///   The <code>ICUCollationKeyFilter</code> in the analysis-icu package 
+	///   uses ICU4J's Collator, which makes its
+	///   version available, thus allowing collation to be versioned independently
+	///   from the JVM.  ICUCollationKeyFilter is also significantly faster and
+	///   generates significantly shorter keys than CollationKeyFilter.  See
+	///   <a href="http://site.icu-project.org/charts/collation-icu4j-sun"
+	///   >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key
+	///   generation timing and key length comparisons between ICU4J and
+	///   java.text.Collator over several languages.
+	/// </para>
+	/// <para>
+	///   CollationKeys generated by java.text.Collators are not compatible
+	///   with those those generated by ICU Collators.  Specifically, if you use 
+	///   CollationKeyFilter to generate index terms, do not use
+	///   ICUCollationKeyFilter on the query side, or vice versa.
+	/// </para> </summary>
+	/// @deprecated Use <seealso cref="CollationAttributeFactory"/> instead, which encodes
+	///  terms directly as bytes. This filter will be removed in Lucene 5.0 
+	[Obsolete("Use <seealso cref="CollationAttributeFactory"/> instead, which encodes")]
+	public sealed class CollationKeyFilter : TokenFilter
+	{
+	  private readonly Collator collator;
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+
+	  /// <param name="input"> Source token stream </param>
+	  /// <param name="collator"> CollationKey generator </param>
+	  public CollationKeyFilter(TokenStream input, Collator collator) : base(input)
+	  {
+		// clone in case JRE doesnt properly sync,
+		// or to reduce contention in case they do
+		this.collator = (Collator) collator.clone();
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  sbyte[] collationKey = collator.getCollationKey(termAtt.ToString()).toByteArray();
+		  int encodedLength = IndexableBinaryStringTools.getEncodedLength(collationKey, 0, collationKey.Length);
+		  termAtt.resizeBuffer(encodedLength);
+		  termAtt.Length = encodedLength;
+		  IndexableBinaryStringTools.encode(collationKey, 0, collationKey.Length, termAtt.buffer(), 0, encodedLength);
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilterFactory.cs
new file mode 100644
index 0000000..7396e1f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilterFactory.cs
@@ -0,0 +1,254 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+using Lucene.Net.Analysis.Util;
+
+namespace org.apache.lucene.collation
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using TokenStream = org.apache.lucene.analysis.TokenStream;
+	using org.apache.lucene.analysis.util;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+
+	/// <summary>
+	/// Factory for <seealso cref="CollationKeyFilter"/>.
+	/// <para>
+	/// This factory can be created in two ways: 
+	/// <ul>
+	///  <li>Based upon a system collator associated with a Locale.
+	///  <li>Based upon a tailored ruleset.
+	/// </ul>
+	/// </para>
+	/// <para>
+	/// Using a System collator:
+	/// <ul>
+	///  <li>language: ISO-639 language code (mandatory)
+	///  <li>country: ISO-3166 country code (optional)
+	///  <li>variant: vendor or browser-specific code (optional)
+	///  <li>strength: 'primary','secondary','tertiary', or 'identical' (optional)
+	///  <li>decomposition: 'no','canonical', or 'full' (optional)
+	/// </ul>
+	/// </para>
+	/// <para>
+	/// Using a Tailored ruleset:
+	/// <ul>
+	///  <li>custom: UTF-8 text file containing rules supported by RuleBasedCollator (mandatory)
+	///  <li>strength: 'primary','secondary','tertiary', or 'identical' (optional)
+	///  <li>decomposition: 'no','canonical', or 'full' (optional)
+	/// </ul>
+	/// 
+	/// <pre class="prettyprint" >
+	/// &lt;fieldType name="text_clltnky" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.KeywordTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.CollationKeyFilterFactory" language="ja" country="JP"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// 
+	/// </para>
+	/// </summary>
+	/// <seealso cref= Collator </seealso>
+	/// <seealso cref= Locale </seealso>
+	/// <seealso cref= RuleBasedCollator
+	/// @since solr 3.1 </seealso>
+	/// @deprecated use <seealso cref="CollationKeyAnalyzer"/> instead. 
+	[Obsolete("use <seealso cref="CollationKeyAnalyzer"/> instead.")]
+	public class CollationKeyFilterFactory : TokenFilterFactory, MultiTermAwareComponent, ResourceLoaderAware
+	{
+	  private Collator collator;
+	  private readonly string custom;
+	  private readonly string language;
+	  private readonly string country;
+	  private readonly string variant;
+	  private readonly string strength;
+	  private readonly string decomposition;
+
+	  public CollationKeyFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		custom = args.Remove("custom");
+		language = args.Remove("language");
+		country = args.Remove("country");
+		variant = args.Remove("variant");
+		strength = args.Remove("strength");
+		decomposition = args.Remove("decomposition");
+
+		if (custom == null && language == null)
+		{
+		  throw new System.ArgumentException("Either custom or language is required.");
+		}
+
+		if (custom != null && (language != null || country != null || variant != null))
+		{
+		  throw new System.ArgumentException("Cannot specify both language and custom. " + "To tailor rules for a built-in language, see the javadocs for RuleBasedCollator. " + "Then save the entire customized ruleset to a file, and use with the custom parameter");
+		}
+
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void inform(ResourceLoader loader) throws java.io.IOException
+	  public virtual void inform(ResourceLoader loader)
+	  {
+		if (language != null)
+		{
+		  // create from a system collator, based on Locale.
+		  collator = createFromLocale(language, country, variant);
+		}
+		else
+		{
+		  // create from a custom ruleset
+		  collator = createFromRules(custom, loader);
+		}
+
+		// set the strength flag, otherwise it will be the default.
+		if (strength != null)
+		{
+		  if (strength.Equals("primary", StringComparison.CurrentCultureIgnoreCase))
+		  {
+			collator.Strength = Collator.PRIMARY;
+		  }
+		  else if (strength.Equals("secondary", StringComparison.CurrentCultureIgnoreCase))
+		  {
+			collator.Strength = Collator.SECONDARY;
+		  }
+		  else if (strength.Equals("tertiary", StringComparison.CurrentCultureIgnoreCase))
+		  {
+			collator.Strength = Collator.TERTIARY;
+		  }
+		  else if (strength.Equals("identical", StringComparison.CurrentCultureIgnoreCase))
+		  {
+			collator.Strength = Collator.IDENTICAL;
+		  }
+		  else
+		  {
+			throw new System.ArgumentException("Invalid strength: " + strength);
+		  }
+		}
+
+		// set the decomposition flag, otherwise it will be the default.
+		if (decomposition != null)
+		{
+		  if (decomposition.Equals("no", StringComparison.CurrentCultureIgnoreCase))
+		  {
+			collator.Decomposition = Collator.NO_DECOMPOSITION;
+		  }
+		  else if (decomposition.Equals("canonical", StringComparison.CurrentCultureIgnoreCase))
+		  {
+			collator.Decomposition = Collator.CANONICAL_DECOMPOSITION;
+		  }
+		  else if (decomposition.Equals("full", StringComparison.CurrentCultureIgnoreCase))
+		  {
+			collator.Decomposition = Collator.FULL_DECOMPOSITION;
+		  }
+		  else
+		  {
+			throw new System.ArgumentException("Invalid decomposition: " + decomposition);
+		  }
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new CollationKeyFilter(input, collator);
+	  }
+
+	  /*
+	   * Create a locale from language, with optional country and variant.
+	   * Then return the appropriate collator for the locale.
+	   */
+	  private Collator createFromLocale(string language, string country, string variant)
+	  {
+		Locale locale;
+
+		if (language != null && country == null && variant != null)
+		{
+		  throw new System.ArgumentException("To specify variant, country is required");
+		}
+		else if (language != null && country != null && variant != null)
+		{
+		  locale = new Locale(language, country, variant);
+		}
+		else if (language != null && country != null)
+		{
+		  locale = new Locale(language, country);
+		}
+		else
+		{
+		  locale = new Locale(language);
+		}
+
+		return Collator.getInstance(locale);
+	  }
+
+	  /*
+	   * Read custom rules from a file, and create a RuleBasedCollator
+	   * The file cannot support comments, as # might be in the rules!
+	   */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private java.text.Collator createFromRules(String fileName, ResourceLoader loader) throws java.io.IOException
+	  private Collator createFromRules(string fileName, ResourceLoader loader)
+	  {
+		InputStream input = null;
+		try
+		{
+		 input = loader.openResource(fileName);
+		 string rules = toUTF8String(input);
+		 return new RuleBasedCollator(rules);
+		}
+		catch (ParseException e)
+		{
+		  // invalid rules
+		  throw new IOException("ParseException thrown while parsing rules", e);
+		}
+		finally
+		{
+		  IOUtils.closeWhileHandlingException(input);
+		}
+	  }
+
+	  public virtual AbstractAnalysisFactory MultiTermComponent
+	  {
+		  get
+		  {
+			return this;
+		  }
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private String toUTF8String(java.io.InputStream in) throws java.io.IOException
+	  private string toUTF8String(InputStream @in)
+	  {
+		StringBuilder sb = new StringBuilder();
+		char[] buffer = new char[1024];
+		Reader r = IOUtils.getDecodingReader(@in, StandardCharsets.UTF_8);
+		int len = 0;
+		while ((len = r.read(buffer)) > 0)
+		{
+		  sb.Append(buffer, 0, len);
+		}
+		return sb.ToString();
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/CollatedTermAttributeImpl.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/CollatedTermAttributeImpl.cs b/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/CollatedTermAttributeImpl.cs
new file mode 100644
index 0000000..89b57c5
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/CollatedTermAttributeImpl.cs
@@ -0,0 +1,52 @@
+namespace org.apache.lucene.collation.tokenattributes
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttributeImpl = org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl;
+	using BytesRef = org.apache.lucene.util.BytesRef;
+
+	/// <summary>
+	/// Extension of <seealso cref="CharTermAttributeImpl"/> that encodes the term
+	/// text as a binary Unicode collation key instead of as UTF-8 bytes.
+	/// </summary>
+	public class CollatedTermAttributeImpl : CharTermAttributeImpl
+	{
+	  private readonly Collator collator;
+
+	  /// <summary>
+	  /// Create a new CollatedTermAttributeImpl </summary>
+	  /// <param name="collator"> Collation key generator </param>
+	  public CollatedTermAttributeImpl(Collator collator)
+	  {
+		// clone in case JRE doesn't properly sync,
+		// or to reduce contention in case they do
+		this.collator = (Collator) collator.clone();
+	  }
+
+	  public override void fillBytesRef()
+	  {
+		BytesRef bytes = BytesRef;
+		bytes.bytes = collator.getCollationKey(ToString()).toByteArray();
+		bytes.offset = 0;
+		bytes.length = bytes.bytes.length;
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
new file mode 100644
index 0000000..5ff0050
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
@@ -0,0 +1,244 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <ProjectGuid>{4ADD0BBC-B900-4715-9526-D871DE8EEA64}</ProjectGuid>
+    <OutputType>Library</OutputType>
+    <AppDesignerFolder>Properties</AppDesignerFolder>
+    <RootNamespace>Lucene.Net</RootNamespace>
+    <AssemblyName>Lucene.Net.Analysis.Common</AssemblyName>
+    <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+    <FileAlignment>512</FileAlignment>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+    <DebugSymbols>true</DebugSymbols>
+    <DebugType>full</DebugType>
+    <Optimize>false</Optimize>
+    <OutputPath>bin\Debug\</OutputPath>
+    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+    <DebugType>pdbonly</DebugType>
+    <Optimize>true</Optimize>
+    <OutputPath>bin\Release\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <ItemGroup>
+    <Reference Include="System" />
+    <Reference Include="System.Core" />
+    <Reference Include="Microsoft.CSharp" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="Analysis\CharFilter\BaseCharFilter.cs" />
+    <Compile Include="Analysis\CharFilter\HTMLStripCharFilterFactory.cs" />
+    <Compile Include="Analysis\CharFilter\MappingCharFilter.cs" />
+    <Compile Include="Analysis\CharFilter\MappingCharFilterFactory.cs" />
+    <Compile Include="Analysis\CharFilter\NormalizeCharMap.cs" />
+    <Compile Include="Analysis\CommonGrams\CommonGramsFilter.cs" />
+    <Compile Include="Analysis\CommonGrams\CommonGramsFilterFactory.cs" />
+    <Compile Include="Analysis\CommonGrams\CommonGramsQueryFilter.cs" />
+    <Compile Include="Analysis\CommonGrams\CommonGramsQueryFilterFactory.cs" />
+    <Compile Include="Analysis\Compound\CompoundWordTokenFilterBase.cs" />
+    <Compile Include="Analysis\Compound\DictionaryCompoundWordTokenFilter.cs" />
+    <Compile Include="Analysis\Compound\DictionaryCompoundWordTokenFilterFactory.cs" />
+    <Compile Include="Analysis\Compound\HyphenationCompoundWordTokenFilter.cs" />
+    <Compile Include="Analysis\Compound\HyphenationCompoundWordTokenFilterFactory.cs" />
+    <Compile Include="Analysis\Compound\hyphenation\ByteVector.cs" />
+    <Compile Include="Analysis\Compound\hyphenation\CharVector.cs" />
+    <Compile Include="Analysis\Compound\hyphenation\Hyphen.cs" />
+    <Compile Include="Analysis\Compound\hyphenation\Hyphenation.cs" />
+    <Compile Include="Analysis\Compound\hyphenation\HyphenationTree.cs" />
+    <Compile Include="Analysis\Compound\hyphenation\PatternConsumer.cs" />
+    <Compile Include="Analysis\Compound\hyphenation\PatternParser.cs" />
+    <Compile Include="Analysis\Compound\hyphenation\TernaryTree.cs" />
+    <Compile Include="Analysis\Core\KeywordAnalyzer.cs" />
+    <Compile Include="Analysis\Core\KeywordTokenizer.cs" />
+    <Compile Include="Analysis\Core\KeywordTokenizerFactory.cs" />
+    <Compile Include="Analysis\Core\LetterTokenizer.cs" />
+    <Compile Include="Analysis\Core\LetterTokenizerFactory.cs" />
+    <Compile Include="Analysis\Core\LowerCaseFilter.cs" />
+    <Compile Include="Analysis\Core\LowerCaseFilterFactory.cs" />
+    <Compile Include="Analysis\Core\LowerCaseTokenizer.cs" />
+    <Compile Include="Analysis\Core\LowerCaseTokenizerFactory.cs" />
+    <Compile Include="Analysis\Core\SimpleAnalyzer.cs" />
+    <Compile Include="Analysis\Core\StopAnalyzer.cs" />
+    <Compile Include="Analysis\Core\StopFilter.cs" />
+    <Compile Include="Analysis\Core\StopFilterFactory.cs" />
+    <Compile Include="Analysis\Core\TypeTokenFilter.cs" />
+    <Compile Include="Analysis\Core\TypeTokenFilterFactory.cs" />
+    <Compile Include="Analysis\Core\UpperCaseFilter.cs" />
+    <Compile Include="Analysis\Core\UpperCaseFilterFactory.cs" />
+    <Compile Include="Analysis\Core\WhitespaceAnalyzer.cs" />
+    <Compile Include="Analysis\Core\WhitespaceTokenizer.cs" />
+    <Compile Include="Analysis\Core\WhitespaceTokenizerFactory.cs" />
+    <Compile Include="Analysis\Miscellaneous\ASCIIFoldingFilter.cs" />
+    <Compile Include="Analysis\Miscellaneous\ASCIIFoldingFilterFactory.cs" />
+    <Compile Include="Analysis\Miscellaneous\CapitalizationFilter.cs" />
+    <Compile Include="Analysis\Miscellaneous\CapitalizationFilterFactory.cs" />
+    <Compile Include="Analysis\Miscellaneous\CodepointCountFilter.cs" />
+    <Compile Include="Analysis\Miscellaneous\CodepointCountFilterFactory.cs" />
+    <Compile Include="Analysis\Miscellaneous\EmptyTokenStream.cs" />
+    <Compile Include="Analysis\Miscellaneous\HyphenatedWordsFilter.cs" />
+    <Compile Include="Analysis\Miscellaneous\HyphenatedWordsFilterFactory.cs" />
+    <Compile Include="Analysis\Miscellaneous\KeepWordFilter.cs" />
+    <Compile Include="Analysis\Miscellaneous\KeepWordFilterFactory.cs" />
+    <Compile Include="Analysis\Miscellaneous\KeywordMarkerFilter.cs" />
+    <Compile Include="Analysis\Miscellaneous\KeywordMarkerFilterFactory.cs" />
+    <Compile Include="Analysis\Miscellaneous\KeywordRepeatFilter.cs" />
+    <Compile Include="Analysis\Miscellaneous\KeywordRepeatFilterFactory.cs" />
+    <Compile Include="Analysis\Miscellaneous\LengthFilter.cs" />
+    <Compile Include="Analysis\Miscellaneous\LengthFilterFactory.cs" />
+    <Compile Include="Analysis\Miscellaneous\LimitTokenCountAnalyzer.cs" />
+    <Compile Include="Analysis\Miscellaneous\LimitTokenCountFilter.cs" />
+    <Compile Include="Analysis\Miscellaneous\LimitTokenCountFilterFactory.cs" />
+    <Compile Include="Analysis\Miscellaneous\LimitTokenPositionFilter.cs" />
+    <Compile Include="Analysis\Miscellaneous\LimitTokenPositionFilterFactory.cs" />
+    <Compile Include="Analysis\Miscellaneous\Lucene47WordDelimiterFilter.cs" />
+    <Compile Include="Analysis\Miscellaneous\PatternAnalyzer.cs" />
+    <Compile Include="Analysis\Miscellaneous\PatternKeywordMarkerFilter.cs" />
+    <Compile Include="Analysis\Miscellaneous\PerFieldAnalyzerWrapper.cs" />
+    <Compile Include="Analysis\Miscellaneous\PrefixAndSuffixAwareTokenFilter.cs" />
+    <Compile Include="Analysis\Miscellaneous\PrefixAwareTokenFilter.cs" />
+    <Compile Include="Analysis\Miscellaneous\RemoveDuplicatesTokenFilter.cs" />
+    <Compile Include="Analysis\Miscellaneous\RemoveDuplicatesTokenFilterFactory.cs" />
+    <Compile Include="Analysis\Miscellaneous\ScandinavianFoldingFilter.cs" />
+    <Compile Include="Analysis\Miscellaneous\ScandinavianFoldingFilterFactory.cs" />
+    <Compile Include="Analysis\Miscellaneous\ScandinavianNormalizationFilter.cs" />
+    <Compile Include="Analysis\Miscellaneous\ScandinavianNormalizationFilterFactory.cs" />
+    <Compile Include="Analysis\Miscellaneous\SetKeywordMarkerFilter.cs" />
+    <Compile Include="Analysis\Miscellaneous\SingleTokenTokenStream.cs" />
+    <Compile Include="Analysis\Miscellaneous\StemmerOverrideFilter.cs" />
+    <Compile Include="Analysis\Miscellaneous\StemmerOverrideFilterFactory.cs" />
+    <Compile Include="Analysis\Miscellaneous\TrimFilter.cs" />
+    <Compile Include="Analysis\Miscellaneous\TrimFilterFactory.cs" />
+    <Compile Include="Analysis\Miscellaneous\TruncateTokenFilter.cs" />
+    <Compile Include="Analysis\Miscellaneous\TruncateTokenFilterFactory.cs" />
+    <Compile Include="Analysis\Miscellaneous\WordDelimiterFilter.cs" />
+    <Compile Include="Analysis\Miscellaneous\WordDelimiterFilterFactory.cs" />
+    <Compile Include="Analysis\Miscellaneous\WordDelimiterIterator.cs" />
+    <Compile Include="Analysis\Ngram\EdgeNGramFilterFactory.cs" />
+    <Compile Include="Analysis\Ngram\EdgeNGramTokenFilter.cs" />
+    <Compile Include="Analysis\Ngram\EdgeNGramTokenizer.cs" />
+    <Compile Include="Analysis\Ngram\EdgeNGramTokenizerFactory.cs" />
+    <Compile Include="Analysis\Ngram\Lucene43EdgeNGramTokenizer.cs" />
+    <Compile Include="Analysis\Ngram\Lucene43NGramTokenizer.cs" />
+    <Compile Include="Analysis\Ngram\NGramFilterFactory.cs" />
+    <Compile Include="Analysis\Ngram\NGramTokenFilter.cs" />
+    <Compile Include="Analysis\Ngram\NGramTokenizer.cs" />
+    <Compile Include="Analysis\Ngram\NGramTokenizerFactory.cs" />
+    <Compile Include="Analysis\Path\PathHierarchyTokenizer.cs" />
+    <Compile Include="Analysis\Path\PathHierarchyTokenizerFactory.cs" />
+    <Compile Include="Analysis\Path\ReversePathHierarchyTokenizer.cs" />
+    <Compile Include="Analysis\Pattern\PatternCaptureGroupFilterFactory.cs" />
+    <Compile Include="Analysis\Pattern\PatternCaptureGroupTokenFilter.cs" />
+    <Compile Include="Analysis\Pattern\PatternReplaceCharFilter.cs" />
+    <Compile Include="Analysis\Pattern\PatternReplaceCharFilterFactory.cs" />
+    <Compile Include="Analysis\Pattern\PatternReplaceFilter.cs" />
+    <Compile Include="Analysis\Pattern\PatternReplaceFilterFactory.cs" />
+    <Compile Include="Analysis\Pattern\PatternTokenizer.cs" />
+    <Compile Include="Analysis\Pattern\PatternTokenizerFactory.cs" />
+    <Compile Include="Analysis\Payloads\AbstractEncoder.cs" />
+    <Compile Include="Analysis\Payloads\DelimitedPayloadTokenFilter.cs" />
+    <Compile Include="Analysis\Payloads\DelimitedPayloadTokenFilterFactory.cs" />
+    <Compile Include="Analysis\Payloads\FloatEncoder.cs" />
+    <Compile Include="Analysis\Payloads\IdentityEncoder.cs" />
+    <Compile Include="Analysis\Payloads\IntegerEncoder.cs" />
+    <Compile Include="Analysis\Payloads\NumericPayloadTokenFilter.cs" />
+    <Compile Include="Analysis\Payloads\NumericPayloadTokenFilterFactory.cs" />
+    <Compile Include="Analysis\Payloads\PayloadEncoder.cs" />
+    <Compile Include="Analysis\Payloads\PayloadHelper.cs" />
+    <Compile Include="Analysis\Payloads\TokenOffsetPayloadTokenFilter.cs" />
+    <Compile Include="Analysis\Payloads\TokenOffsetPayloadTokenFilterFactory.cs" />
+    <Compile Include="Analysis\Payloads\TypeAsPayloadTokenFilter.cs" />
+    <Compile Include="Analysis\Payloads\TypeAsPayloadTokenFilterFactory.cs" />
+    <Compile Include="Analysis\Position\PositionFilter.cs" />
+    <Compile Include="Analysis\Position\PositionFilterFactory.cs" />
+    <Compile Include="Analysis\Query\QueryAutoStopWordAnalyzer.cs" />
+    <Compile Include="Analysis\Reverse\ReverseStringFilter.cs" />
+    <Compile Include="Analysis\Reverse\ReverseStringFilterFactory.cs" />
+    <Compile Include="Analysis\Shingle\ShingleAnalyzerWrapper.cs" />
+    <Compile Include="Analysis\Shingle\ShingleFilter.cs" />
+    <Compile Include="Analysis\Shingle\ShingleFilterFactory.cs" />
+    <Compile Include="Analysis\Sinks\DateRecognizerSinkFilter.cs" />
+    <Compile Include="Analysis\Sinks\TeeSinkTokenFilter.cs" />
+    <Compile Include="Analysis\Sinks\TokenRangeSinkFilter.cs" />
+    <Compile Include="Analysis\Sinks\TokenTypeSinkFilter.cs" />
+    <Compile Include="Analysis\Standard\ClassicAnalyzer.cs" />
+    <Compile Include="Analysis\Standard\ClassicFilter.cs" />
+    <Compile Include="Analysis\Standard\ClassicFilterFactory.cs" />
+    <Compile Include="Analysis\Standard\ClassicTokenizer.cs" />
+    <Compile Include="Analysis\Standard\ClassicTokenizerFactory.cs" />
+    <Compile Include="Analysis\Standard\ClassicTokenizerImpl.cs" />
+    <Compile Include="Analysis\Standard\StandardAnalyzer.cs" />
+    <Compile Include="Analysis\Standard\StandardFilter.cs" />
+    <Compile Include="Analysis\Standard\StandardFilterFactory.cs" />
+    <Compile Include="Analysis\Standard\StandardTokenizer.cs" />
+    <Compile Include="Analysis\Standard\StandardTokenizerFactory.cs" />
+    <Compile Include="Analysis\Standard\StandardTokenizerImpl.cs" />
+    <Compile Include="Analysis\Standard\StandardTokenizerInterface.cs" />
+    <Compile Include="Analysis\Standard\UAX29URLEmailAnalyzer.cs" />
+    <Compile Include="Analysis\Standard\UAX29URLEmailTokenizer.cs" />
+    <Compile Include="Analysis\Standard\UAX29URLEmailTokenizerFactory.cs" />
+    <Compile Include="Analysis\Synonym\FSTSynonymFilterFactory.cs" />
+    <Compile Include="Analysis\Synonym\SlowSynonymFilter.cs" />
+    <Compile Include="Analysis\Synonym\SlowSynonymFilterFactory.cs" />
+    <Compile Include="Analysis\Synonym\SlowSynonymMap.cs" />
+    <Compile Include="Analysis\Synonym\SolrSynonymParser.cs" />
+    <Compile Include="Analysis\Synonym\SynonymFilter.cs" />
+    <Compile Include="Analysis\Synonym\SynonymFilterFactory.cs" />
+    <Compile Include="Analysis\Synonym\SynonymMap.cs" />
+    <Compile Include="Analysis\Synonym\WordnetSynonymParser.cs" />
+    <Compile Include="Analysis\Util\AbstractAnalysisFactory.cs" />
+    <Compile Include="Analysis\Util\AnalysisSPILoader.cs" />
+    <Compile Include="Analysis\Util\CharacterUtils.cs" />
+    <Compile Include="Analysis\Util\CharArrayIterator.cs" />
+    <Compile Include="Analysis\Util\CharArrayMap.cs" />
+    <Compile Include="Analysis\Util\CharArraySet.cs" />
+    <Compile Include="Analysis\Util\CharFilterFactory.cs" />
+    <Compile Include="Analysis\Util\CharTokenizer.cs" />
+    <Compile Include="Analysis\Util\ClasspathResourceLoader.cs" />
+    <Compile Include="Analysis\Util\ElisionFilter.cs" />
+    <Compile Include="Analysis\Util\ElisionFilterFactory.cs" />
+    <Compile Include="Analysis\Util\FilesystemResourceLoader.cs" />
+    <Compile Include="Analysis\Util\FilteringTokenFilter.cs" />
+    <Compile Include="Analysis\Util\MultiTermAwareComponent.cs" />
+    <Compile Include="Analysis\Util\OpenStringBuilder.cs" />
+    <Compile Include="Analysis\Util\ResourceLoader.cs" />
+    <Compile Include="Analysis\Util\ResourceLoaderAware.cs" />
+    <Compile Include="Analysis\Util\RollingCharBuffer.cs" />
+    <Compile Include="Analysis\Util\SegmentingTokenizerBase.cs" />
+    <Compile Include="Analysis\Util\StemmerUtil.cs" />
+    <Compile Include="Analysis\Util\StopwordAnalyzerBase.cs" />
+    <Compile Include="Analysis\Util\TokenFilterFactory.cs" />
+    <Compile Include="Analysis\Util\TokenizerFactory.cs" />
+    <Compile Include="Analysis\Util\WordlistLoader.cs" />
+    <Compile Include="Analysis\Wikipedia\WikipediaTokenizer.cs" />
+    <Compile Include="Analysis\Wikipedia\WikipediaTokenizerFactory.cs" />
+    <Compile Include="Collation\CollationAttributeFactory.cs" />
+    <Compile Include="Collation\CollationKeyAnalyzer.cs" />
+    <Compile Include="Collation\CollationKeyFilter.cs" />
+    <Compile Include="Collation\CollationKeyFilterFactory.cs" />
+    <Compile Include="Collation\TokenAttributes\CollatedTermAttributeImpl.cs" />
+    <Compile Include="Properties\AssemblyInfo.cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\Lucene.Net.Core\Lucene.Net.csproj">
+      <Project>{5d4ad9be-1ffb-41ab-9943-25737971bf57}</Project>
+      <Name>Lucene.Net</Name>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+  <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
+       Other similar extension points exist, see Microsoft.Common.targets.
+  <Target Name="BeforeBuild">
+  </Target>
+  <Target Name="AfterBuild">
+  </Target>
+  -->
+</Project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Properties/AssemblyInfo.cs b/src/Lucene.Net.Analysis.Common/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..83220d7
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Properties/AssemblyInfo.cs
@@ -0,0 +1,36 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following 
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Analysis.Common")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("Lucene.Net.Analysis.Common")]
+[assembly: AssemblyCopyright("Copyright ©  2014")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible 
+// to COM components.  If you need to access a type in this assembly from 
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("f57314a7-e71f-4b3c-860f-564046ca398b")]
+
+// Version information for an assembly consists of the following four values:
+//
+//      Major Version
+//      Minor Version 
+//      Build Number
+//      Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers 
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Core/Analysis/Analyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Analysis/Analyzer.cs b/src/Lucene.Net.Core/Analysis/Analyzer.cs
index bf2fcf1..0ae0c8e 100644
--- a/src/Lucene.Net.Core/Analysis/Analyzer.cs
+++ b/src/Lucene.Net.Core/Analysis/Analyzer.cs
@@ -104,7 +104,7 @@ namespace Lucene.Net.Analysis
         /// <param name="reader">
         ///          the reader passed to the <seealso cref="Tokenizer"/> constructor </param>
         /// <returns> the <seealso cref="TokenStreamComponents"/> for this analyzer. </returns>
-        protected internal abstract TokenStreamComponents CreateComponents(string fieldName, TextReader reader);
+        protected abstract TokenStreamComponents CreateComponents(string fieldName, TextReader reader);
 
         /// <summary>
         /// Returns a TokenStream suitable for <code>fieldName</code>, tokenizing

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Core/Analysis/AnalyzerWrapper.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Analysis/AnalyzerWrapper.cs b/src/Lucene.Net.Core/Analysis/AnalyzerWrapper.cs
index b18e17a..f58467b 100644
--- a/src/Lucene.Net.Core/Analysis/AnalyzerWrapper.cs
+++ b/src/Lucene.Net.Core/Analysis/AnalyzerWrapper.cs
@@ -95,7 +95,7 @@ namespace Lucene.Net.Analysis
             return reader;
         }
 
-        protected internal override sealed TokenStreamComponents CreateComponents(string fieldName, TextReader aReader)
+        protected override sealed TokenStreamComponents CreateComponents(string fieldName, TextReader aReader)
         {
             return WrapComponents(fieldName, GetWrappedAnalyzer(fieldName).CreateComponents(fieldName, aReader));
         }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Core/Analysis/TokenStream.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Analysis/TokenStream.cs b/src/Lucene.Net.Core/Analysis/TokenStream.cs
index 40cb92e..ccaed6b 100644
--- a/src/Lucene.Net.Core/Analysis/TokenStream.cs
+++ b/src/Lucene.Net.Core/Analysis/TokenStream.cs
@@ -1,5 +1,7 @@
 using Lucene.Net.Analysis.Tokenattributes;
 using System;
+using Lucene.Net.Documents;
+using Lucene.Net.Util;
 
 namespace Lucene.Net.Analysis
 {


[31/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
new file mode 100644
index 0000000..58b40a1
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
@@ -0,0 +1,202 @@
+using System.Diagnostics;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.compound
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using AttributeSource = org.apache.lucene.util.AttributeSource;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Base class for decomposition token filters.
+	/// <para>
+	/// 
+	/// <a name="version"></a>
+	/// You must specify the required <seealso cref="Version"/> compatibility when creating
+	/// CompoundWordTokenFilterBase:
+	/// <ul>
+	/// <li>As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0
+	/// supplementary characters in strings and char arrays provided as compound word
+	/// dictionaries.
+	/// <li>As of 4.4, <seealso cref="CompoundWordTokenFilterBase"/> doesn't update offsets.
+	/// </ul>
+	/// </para>
+	/// </summary>
+	public abstract class CompoundWordTokenFilterBase : TokenFilter
+	{
+	  /// <summary>
+	  /// The default for minimal word length that gets decomposed
+	  /// </summary>
+	  public const int DEFAULT_MIN_WORD_SIZE = 5;
+
+	  /// <summary>
+	  /// The default for minimal length of subwords that get propagated to the output of this filter
+	  /// </summary>
+	  public const int DEFAULT_MIN_SUBWORD_SIZE = 2;
+
+	  /// <summary>
+	  /// The default for maximal length of subwords that get propagated to the output of this filter
+	  /// </summary>
+	  public const int DEFAULT_MAX_SUBWORD_SIZE = 15;
+
+	  protected internal readonly Version matchVersion;
+	  protected internal readonly CharArraySet dictionary;
+	  protected internal readonly LinkedList<CompoundToken> tokens;
+	  protected internal readonly int minWordSize;
+	  protected internal readonly int minSubwordSize;
+	  protected internal readonly int maxSubwordSize;
+	  protected internal readonly bool onlyLongestMatch;
+
+	  protected internal readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  protected internal readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+	  private readonly PositionIncrementAttribute posIncAtt = addAttribute(typeof(PositionIncrementAttribute));
+
+	  private AttributeSource.State current;
+
+	  protected internal CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, CharArraySet dictionary, bool onlyLongestMatch) : this(matchVersion, input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, onlyLongestMatch)
+	  {
+	  }
+
+	  protected internal CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, CharArraySet dictionary) : this(matchVersion, input,dictionary,DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, false)
+	  {
+	  }
+
+	  protected internal CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch) : base(input)
+	  {
+		this.matchVersion = matchVersion;
+		this.tokens = new LinkedList<>();
+		if (minWordSize < 0)
+		{
+		  throw new System.ArgumentException("minWordSize cannot be negative");
+		}
+		this.minWordSize = minWordSize;
+		if (minSubwordSize < 0)
+		{
+		  throw new System.ArgumentException("minSubwordSize cannot be negative");
+		}
+		this.minSubwordSize = minSubwordSize;
+		if (maxSubwordSize < 0)
+		{
+		  throw new System.ArgumentException("maxSubwordSize cannot be negative");
+		}
+		this.maxSubwordSize = maxSubwordSize;
+		this.onlyLongestMatch = onlyLongestMatch;
+		this.dictionary = dictionary;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (tokens.Count > 0)
+		{
+		  Debug.Assert(current != null);
+		  CompoundToken token = tokens.RemoveFirst();
+		  restoreState(current); // keep all other attributes untouched
+		  termAtt.setEmpty().append(token.txt);
+		  offsetAtt.setOffset(token.startOffset, token.endOffset);
+		  posIncAtt.PositionIncrement = 0;
+		  return true;
+		}
+
+		current = null; // not really needed, but for safety
+		if (input.incrementToken())
+		{
+		  // Only words longer than minWordSize get processed
+		  if (termAtt.length() >= this.minWordSize)
+		  {
+			decompose();
+			// only capture the state if we really need it for producing new tokens
+			if (tokens.Count > 0)
+			{
+			  current = captureState();
+			}
+		  }
+		  // return original token:
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+
+	  /// <summary>
+	  /// Decomposes the current <seealso cref="#termAtt"/> and places <seealso cref="CompoundToken"/> instances in the <seealso cref="#tokens"/> list.
+	  /// The original token may not be placed in the list, as it is automatically passed through this filter.
+	  /// </summary>
+	  protected internal abstract void decompose();
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		tokens.Clear();
+		current = null;
+	  }
+
+	  /// <summary>
+	  /// Helper class to hold decompounded token information
+	  /// </summary>
+	  protected internal class CompoundToken
+	  {
+		  private readonly CompoundWordTokenFilterBase outerInstance;
+
+		public readonly CharSequence txt;
+		public readonly int startOffset, endOffset;
+
+		/// <summary>
+		/// Construct the compound token based on a slice of the current <seealso cref="CompoundWordTokenFilterBase#termAtt"/>. </summary>
+		public CompoundToken(CompoundWordTokenFilterBase outerInstance, int offset, int length)
+		{
+			this.outerInstance = outerInstance;
+		  this.txt = outerInstance.termAtt.subSequence(offset, offset + length);
+
+		  // offsets of the original word
+		  int startOff = outerInstance.offsetAtt.startOffset();
+		  int endOff = outerInstance.offsetAtt.endOffset();
+
+		  if (outerInstance.matchVersion.onOrAfter(Version.LUCENE_44) || endOff - startOff != outerInstance.termAtt.length())
+		  {
+			// if length by start + end offsets doesn't match the term text then assume
+			// this is a synonym and don't adjust the offsets.
+			this.startOffset = startOff;
+			this.endOffset = endOff;
+		  }
+		  else
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newStart = startOff + offset;
+			int newStart = startOff + offset;
+			this.startOffset = newStart;
+			this.endOffset = newStart + length;
+		  }
+		}
+
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilter.cs
new file mode 100644
index 0000000..6b875e0
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilter.cs
@@ -0,0 +1,137 @@
+namespace org.apache.lucene.analysis.compound
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that decomposes compound words found in many Germanic languages.
+	/// <para>
+	/// "Donaudampfschiff" becomes Donau, dampf, schiff so that you can find
+	/// "Donaudampfschiff" even when you only enter "schiff". 
+	///  It uses a brute-force algorithm to achieve this.
+	/// </para>
+	/// <para>
+	/// You must specify the required <seealso cref="Version"/> compatibility when creating
+	/// CompoundWordTokenFilterBase:
+	/// <ul>
+	/// <li>As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0
+	/// supplementary characters in strings and char arrays provided as compound word
+	/// dictionaries.
+	/// </ul>
+	/// </para>
+	/// </summary>
+	public class DictionaryCompoundWordTokenFilter : CompoundWordTokenFilterBase
+	{
+
+	  /// <summary>
+	  /// Creates a new <seealso cref="DictionaryCompoundWordTokenFilter"/>
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          Lucene version to enable correct Unicode 4.0 behavior in the
+	  ///          dictionaries if Version > 3.0. See <a
+	  ///          href="CompoundWordTokenFilterBase.html#version"
+	  ///          >CompoundWordTokenFilterBase</a> for details. </param>
+	  /// <param name="input">
+	  ///          the <seealso cref="TokenStream"/> to process </param>
+	  /// <param name="dictionary">
+	  ///          the word dictionary to match against. </param>
+	  public DictionaryCompoundWordTokenFilter(Version matchVersion, TokenStream input, CharArraySet dictionary) : base(matchVersion, input, dictionary)
+	  {
+		if (dictionary == null)
+		{
+		  throw new System.ArgumentException("dictionary cannot be null");
+		}
+	  }
+
+	  /// <summary>
+	  /// Creates a new <seealso cref="DictionaryCompoundWordTokenFilter"/>
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          Lucene version to enable correct Unicode 4.0 behavior in the
+	  ///          dictionaries if Version > 3.0. See <a
+	  ///          href="CompoundWordTokenFilterBase.html#version"
+	  ///          >CompoundWordTokenFilterBase</a> for details. </param>
+	  /// <param name="input">
+	  ///          the <seealso cref="TokenStream"/> to process </param>
+	  /// <param name="dictionary">
+	  ///          the word dictionary to match against. </param>
+	  /// <param name="minWordSize">
+	  ///          only words longer than this get processed </param>
+	  /// <param name="minSubwordSize">
+	  ///          only subwords longer than this get to the output stream </param>
+	  /// <param name="maxSubwordSize">
+	  ///          only subwords shorter than this get to the output stream </param>
+	  /// <param name="onlyLongestMatch">
+	  ///          Add only the longest matching subword to the stream </param>
+	  public DictionaryCompoundWordTokenFilter(Version matchVersion, TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch) : base(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch)
+	  {
+		if (dictionary == null)
+		{
+		  throw new System.ArgumentException("dictionary cannot be null");
+		}
+	  }
+
+	  protected internal override void decompose()
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int len = termAtt.length();
+		int len = termAtt.length();
+		for (int i = 0;i <= len - this.minSubwordSize;++i)
+		{
+			CompoundToken longestMatchToken = null;
+			for (int j = this.minSubwordSize;j <= this.maxSubwordSize;++j)
+			{
+				if (i + j > len)
+				{
+					break;
+				}
+				if (dictionary.contains(termAtt.buffer(), i, j))
+				{
+					if (this.onlyLongestMatch)
+					{
+					   if (longestMatchToken != null)
+					   {
+						 if (longestMatchToken.txt.length() < j)
+						 {
+						   longestMatchToken = new CompoundToken(this, i,j);
+						 }
+					   }
+					   else
+					   {
+						 longestMatchToken = new CompoundToken(this, i,j);
+					   }
+					}
+					else
+					{
+					   tokens.AddLast(new CompoundToken(this, i,j));
+					}
+				}
+			}
+			if (this.onlyLongestMatch && longestMatchToken != null)
+			{
+			  tokens.AddLast(longestMatchToken);
+			}
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilterFactory.cs
new file mode 100644
index 0000000..497d89d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilterFactory.cs
@@ -0,0 +1,81 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.compound
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
+	using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
+	using TokenFilterFactory = TokenFilterFactory;
+
+
+	/// <summary>
+	/// Factory for <seealso cref="DictionaryCompoundWordTokenFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_dictcomp" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.DictionaryCompoundWordTokenFilterFactory" dictionary="dictionary.txt"
+	///         minWordSize="5" minSubwordSize="2" maxSubwordSize="15" onlyLongestMatch="true"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class DictionaryCompoundWordTokenFilterFactory : TokenFilterFactory, ResourceLoaderAware
+	{
+	  private CharArraySet dictionary;
+	  private readonly string dictFile;
+	  private readonly int minWordSize;
+	  private readonly int minSubwordSize;
+	  private readonly int maxSubwordSize;
+	  private readonly bool onlyLongestMatch;
+
+	  /// <summary>
+	  /// Creates a new DictionaryCompoundWordTokenFilterFactory </summary>
+	  public DictionaryCompoundWordTokenFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		assureMatchVersion();
+		dictFile = require(args, "dictionary");
+		minWordSize = getInt(args, "minWordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
+		minSubwordSize = getInt(args, "minSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
+		maxSubwordSize = getInt(args, "maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
+		onlyLongestMatch = getBoolean(args, "onlyLongestMatch", true);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void inform(org.apache.lucene.analysis.util.ResourceLoader loader) throws java.io.IOException
+	  public virtual void inform(ResourceLoader loader)
+	  {
+		dictionary = base.getWordSet(loader, dictFile, false);
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		// if the dictionary is null, it means it was empty
+		return dictionary == null ? input : new DictionaryCompoundWordTokenFilter(luceneMatchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs
new file mode 100644
index 0000000..0b5e99c
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs
@@ -0,0 +1,255 @@
+namespace org.apache.lucene.analysis.compound
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using Hyphenation = org.apache.lucene.analysis.compound.hyphenation.Hyphenation;
+	using HyphenationTree = org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using Version = org.apache.lucene.util.Version;
+	using InputSource = org.xml.sax.InputSource;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that decomposes compound words found in many Germanic languages.
+	/// <para>
+	/// "Donaudampfschiff" becomes Donau, dampf, schiff so that you can find
+	/// "Donaudampfschiff" even when you only enter "schiff". It uses a hyphenation
+	/// grammar and a word dictionary to achieve this.
+	/// </para>
+	/// <para>
+	/// You must specify the required <seealso cref="Version"/> compatibility when creating
+	/// CompoundWordTokenFilterBase:
+	/// <ul>
+	/// <li>As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0
+	/// supplementary characters in strings and char arrays provided as compound word
+	/// dictionaries.
+	/// </ul>
+	/// </para>
+	/// </summary>
+	public class HyphenationCompoundWordTokenFilter : CompoundWordTokenFilterBase
+	{
+	  private HyphenationTree hyphenator;
+
+	  /// <summary>
+	  /// Creates a new <seealso cref="HyphenationCompoundWordTokenFilter"/> instance. 
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          Lucene version to enable correct Unicode 4.0 behavior in the
+	  ///          dictionaries if Version > 3.0. See <a
+	  ///          href="CompoundWordTokenFilterBase.html#version"
+	  ///          >CompoundWordTokenFilterBase</a> for details. </param>
+	  /// <param name="input">
+	  ///          the <seealso cref="TokenStream"/> to process </param>
+	  /// <param name="hyphenator">
+	  ///          the hyphenation pattern tree to use for hyphenation </param>
+	  /// <param name="dictionary">
+	  ///          the word dictionary to match against. </param>
+	  public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input, HyphenationTree hyphenator, CharArraySet dictionary) : this(matchVersion, input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates a new <seealso cref="HyphenationCompoundWordTokenFilter"/> instance.
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          Lucene version to enable correct Unicode 4.0 behavior in the
+	  ///          dictionaries if Version > 3.0. See <a
+	  ///          href="CompoundWordTokenFilterBase.html#version"
+	  ///          >CompoundWordTokenFilterBase</a> for details. </param>
+	  /// <param name="input">
+	  ///          the <seealso cref="TokenStream"/> to process </param>
+	  /// <param name="hyphenator">
+	  ///          the hyphenation pattern tree to use for hyphenation </param>
+	  /// <param name="dictionary">
+	  ///          the word dictionary to match against. </param>
+	  /// <param name="minWordSize">
+	  ///          only words longer than this get processed </param>
+	  /// <param name="minSubwordSize">
+	  ///          only subwords longer than this get to the output stream </param>
+	  /// <param name="maxSubwordSize">
+	  ///          only subwords shorter than this get to the output stream </param>
+	  /// <param name="onlyLongestMatch">
+	  ///          Add only the longest matching subword to the stream </param>
+	  public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input, HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch) : base(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch)
+	  {
+
+		this.hyphenator = hyphenator;
+	  }
+
+	  /// <summary>
+	  /// Create a HyphenationCompoundWordTokenFilter with no dictionary.
+	  /// <para>
+	  /// Calls {@link #HyphenationCompoundWordTokenFilter(Version, TokenStream, HyphenationTree, CharArraySet, int, int, int, boolean)
+	  /// HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator,
+	  /// null, minWordSize, minSubwordSize, maxSubwordSize }
+	  /// </para>
+	  /// </summary>
+	  public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input, HyphenationTree hyphenator, int minWordSize, int minSubwordSize, int maxSubwordSize) : this(matchVersion, input, hyphenator, null, minWordSize, minSubwordSize, maxSubwordSize, false)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Create a HyphenationCompoundWordTokenFilter with no dictionary.
+	  /// <para>
+	  /// Calls {@link #HyphenationCompoundWordTokenFilter(Version, TokenStream, HyphenationTree, int, int, int) 
+	  /// HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator, 
+	  /// DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE }
+	  /// </para>
+	  /// </summary>
+	  public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input, HyphenationTree hyphenator) : this(matchVersion, input, hyphenator, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Create a hyphenator tree
+	  /// </summary>
+	  /// <param name="hyphenationFilename"> the filename of the XML grammar to load </param>
+	  /// <returns> An object representing the hyphenation patterns </returns>
+	  /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public static org.apache.lucene.analysis.compound.hyphenation.HyphenationTree getHyphenationTree(String hyphenationFilename) throws java.io.IOException
+	  public static HyphenationTree getHyphenationTree(string hyphenationFilename)
+	  {
+		return getHyphenationTree(new InputSource(hyphenationFilename));
+	  }
+
+	  /// <summary>
+	  /// Create a hyphenator tree
+	  /// </summary>
+	  /// <param name="hyphenationFile"> the file of the XML grammar to load </param>
+	  /// <returns> An object representing the hyphenation patterns </returns>
+	  /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public static org.apache.lucene.analysis.compound.hyphenation.HyphenationTree getHyphenationTree(java.io.File hyphenationFile) throws java.io.IOException
+	  public static HyphenationTree getHyphenationTree(File hyphenationFile)
+	  {
+		return getHyphenationTree(new InputSource(hyphenationFile.toURI().toASCIIString()));
+	  }
+
+	  /// <summary>
+	  /// Create a hyphenator tree
+	  /// </summary>
+	  /// <param name="hyphenationSource"> the InputSource pointing to the XML grammar </param>
+	  /// <returns> An object representing the hyphenation patterns </returns>
+	  /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public static org.apache.lucene.analysis.compound.hyphenation.HyphenationTree getHyphenationTree(org.xml.sax.InputSource hyphenationSource) throws java.io.IOException
+	  public static HyphenationTree getHyphenationTree(InputSource hyphenationSource)
+	  {
+		HyphenationTree tree = new HyphenationTree();
+		tree.loadPatterns(hyphenationSource);
+		return tree;
+	  }
+
+	  protected internal override void decompose()
+	  {
+		// get the hyphenation points
+		Hyphenation hyphens = hyphenator.hyphenate(termAtt.buffer(), 0, termAtt.length(), 1, 1);
+		// No hyphen points found -> exit
+		if (hyphens == null)
+		{
+		  return;
+		}
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int[] hyp = hyphens.getHyphenationPoints();
+		int[] hyp = hyphens.HyphenationPoints;
+
+		for (int i = 0; i < hyp.Length; ++i)
+		{
+		  int remaining = hyp.Length - i;
+		  int start = hyp[i];
+		  CompoundToken longestMatchToken = null;
+		  for (int j = 1; j < remaining; j++)
+		  {
+			int partLength = hyp[i + j] - start;
+
+			// if the part is longer than maxSubwordSize we
+			// are done with this round
+			if (partLength > this.maxSubwordSize)
+			{
+			  break;
+			}
+
+			// we only put subwords to the token stream
+			// that are longer than minPartSize
+			if (partLength < this.minSubwordSize)
+			{
+			  // BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the 
+			  // calculation above, and we rely upon minSubwordSize being >=0 to filter them out...
+			  continue;
+			}
+
+			// check the dictionary
+			if (dictionary == null || dictionary.contains(termAtt.buffer(), start, partLength))
+			{
+			  if (this.onlyLongestMatch)
+			  {
+				if (longestMatchToken != null)
+				{
+				  if (longestMatchToken.txt.length() < partLength)
+				  {
+					longestMatchToken = new CompoundToken(this, start, partLength);
+				  }
+				}
+				else
+				{
+				  longestMatchToken = new CompoundToken(this, start, partLength);
+				}
+			  }
+			  else
+			  {
+				tokens.AddLast(new CompoundToken(this, start, partLength));
+			  }
+			}
+			else if (dictionary.contains(termAtt.buffer(), start, partLength - 1))
+			{
+			  // check the dictionary again with a word that is one character
+			  // shorter
+			  // to avoid problems with genitive 's characters and other binding
+			  // characters
+			  if (this.onlyLongestMatch)
+			  {
+				if (longestMatchToken != null)
+				{
+				  if (longestMatchToken.txt.length() < partLength - 1)
+				  {
+					longestMatchToken = new CompoundToken(this, start, partLength - 1);
+				  }
+				}
+				else
+				{
+				  longestMatchToken = new CompoundToken(this, start, partLength - 1);
+				}
+			  }
+			  else
+			  {
+				tokens.AddLast(new CompoundToken(this, start, partLength - 1));
+			  }
+			}
+		  }
+		  if (this.onlyLongestMatch && longestMatchToken != null)
+		  {
+			tokens.AddLast(longestMatchToken);
+		  }
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilterFactory.cs
new file mode 100644
index 0000000..4a51f7b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilterFactory.cs
@@ -0,0 +1,125 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.compound
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using HyphenationTree = org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
+	using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
+	using TokenFilterFactory = TokenFilterFactory;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+
+	using InputSource = org.xml.sax.InputSource;
+
+	/// <summary>
+	/// Factory for <seealso cref="HyphenationCompoundWordTokenFilter"/>.
+	/// <para>
+	/// This factory accepts the following parameters:
+	/// <ul>
+	///  <li><code>hyphenator</code> (mandatory): path to the FOP xml hyphenation pattern. 
+	///  See <a href="http://offo.sourceforge.net/hyphenation/">http://offo.sourceforge.net/hyphenation/</a>.
+	///  <li><code>encoding</code> (optional): encoding of the xml hyphenation file. defaults to UTF-8.
+	///  <li><code>dictionary</code> (optional): dictionary of words. defaults to no dictionary.
+	///  <li><code>minWordSize</code> (optional): minimal word length that gets decomposed. defaults to 5.
+	///  <li><code>minSubwordSize</code> (optional): minimum length of subwords. defaults to 2.
+	///  <li><code>maxSubwordSize</code> (optional): maximum length of subwords. defaults to 15.
+	///  <li><code>onlyLongestMatch</code> (optional): if true, adds only the longest matching subword 
+	///    to the stream. defaults to false.
+	/// </ul>
+	/// </para>
+	/// <para>
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_hyphncomp" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.HyphenationCompoundWordTokenFilterFactory" hyphenator="hyphenator.xml" encoding="UTF-8"
+	///         dictionary="dictionary.txt" minWordSize="5" minSubwordSize="2" maxSubwordSize="15" onlyLongestMatch="false"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// 
+	/// </para>
+	/// </summary>
+	/// <seealso cref= HyphenationCompoundWordTokenFilter </seealso>
+	public class HyphenationCompoundWordTokenFilterFactory : TokenFilterFactory, ResourceLoaderAware
+	{
+	  private CharArraySet dictionary;
+	  private HyphenationTree hyphenator;
+	  private readonly string dictFile;
+	  private readonly string hypFile;
+	  private readonly string encoding;
+	  private readonly int minWordSize;
+	  private readonly int minSubwordSize;
+	  private readonly int maxSubwordSize;
+	  private readonly bool onlyLongestMatch;
+
+	  /// <summary>
+	  /// Creates a new HyphenationCompoundWordTokenFilterFactory </summary>
+	  public HyphenationCompoundWordTokenFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		assureMatchVersion();
+		dictFile = get(args, "dictionary");
+		encoding = get(args, "encoding");
+		hypFile = require(args, "hyphenator");
+		minWordSize = getInt(args, "minWordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
+		minSubwordSize = getInt(args, "minSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
+		maxSubwordSize = getInt(args, "maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
+		onlyLongestMatch = getBoolean(args, "onlyLongestMatch", false);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void inform(org.apache.lucene.analysis.util.ResourceLoader loader) throws java.io.IOException
+	  public virtual void inform(ResourceLoader loader)
+	  {
+		InputStream stream = null;
+		try
+		{
+		  if (dictFile != null) // the dictionary can be empty.
+		  {
+			dictionary = getWordSet(loader, dictFile, false);
+		  }
+		  // TODO: Broken, because we cannot resolve real system id
+		  // ResourceLoader should also supply method like ClassLoader to get resource URL
+		  stream = loader.openResource(hypFile);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.xml.sax.InputSource is = new org.xml.sax.InputSource(stream);
+		  InputSource @is = new InputSource(stream);
+		  @is.Encoding = encoding; // if it's null let xml parser decide
+		  @is.SystemId = hypFile;
+		  hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);
+		}
+		finally
+		{
+		  IOUtils.closeWhileHandlingException(stream);
+		}
+	  }
+
+	  public override HyphenationCompoundWordTokenFilter create(TokenStream input)
+	  {
+		return new HyphenationCompoundWordTokenFilter(luceneMatchVersion, input, hyphenator, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/ByteVector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/ByteVector.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/ByteVector.cs
new file mode 100644
index 0000000..963ad0d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/ByteVector.cs
@@ -0,0 +1,151 @@
+using System;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+namespace org.apache.lucene.analysis.compound.hyphenation
+{
+
+	/// <summary>
+	/// This class implements a simple byte vector with access to the underlying
+	/// array.
+	/// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
+	/// </summary>
+	public class ByteVector
+	{
+
+	  /// <summary>
+	  /// Capacity increment size
+	  /// </summary>
+	  private const int DEFAULT_BLOCK_SIZE = 2048;
+
+	  private int blockSize;
+
+	  /// <summary>
+	  /// The encapsulated array
+	  /// </summary>
+	  private sbyte[] array;
+
+	  /// <summary>
+	  /// Points to next free item
+	  /// </summary>
+	  private int n;
+
+	  public ByteVector() : this(DEFAULT_BLOCK_SIZE)
+	  {
+	  }
+
+	  public ByteVector(int capacity)
+	  {
+		if (capacity_Renamed > 0)
+		{
+		  blockSize = capacity_Renamed;
+		}
+		else
+		{
+		  blockSize = DEFAULT_BLOCK_SIZE;
+		}
+		array = new sbyte[blockSize];
+		n = 0;
+	  }
+
+	  public ByteVector(sbyte[] a)
+	  {
+		blockSize = DEFAULT_BLOCK_SIZE;
+		array = a;
+		n = 0;
+	  }
+
+	  public ByteVector(sbyte[] a, int capacity)
+	  {
+		if (capacity_Renamed > 0)
+		{
+		  blockSize = capacity_Renamed;
+		}
+		else
+		{
+		  blockSize = DEFAULT_BLOCK_SIZE;
+		}
+		array = a;
+		n = 0;
+	  }
+
+	  public virtual sbyte[] Array
+	  {
+		  get
+		  {
+			return array;
+		  }
+	  }
+
+	  /// <summary>
+	  /// return number of items in array
+	  /// </summary>
+	  public virtual int length()
+	  {
+		return n;
+	  }
+
+	  /// <summary>
+	  /// returns current capacity of array
+	  /// </summary>
+	  public virtual int capacity()
+	  {
+		return array.Length;
+	  }
+
+	  public virtual void put(int index, sbyte val)
+	  {
+		array[index] = val;
+	  }
+
+	  public virtual sbyte get(int index)
+	  {
+		return array[index];
+	  }
+
+	  /// <summary>
+	  /// This is to implement memory allocation in the array. Like malloc().
+	  /// </summary>
+	  public virtual int alloc(int size)
+	  {
+		int index = n;
+		int len = array.Length;
+		if (n + size >= len)
+		{
+		  sbyte[] aux = new sbyte[len + blockSize];
+		  Array.Copy(array, 0, aux, 0, len);
+		  array = aux;
+		}
+		n += size;
+		return index;
+	  }
+
+	  public virtual void trimToSize()
+	  {
+		if (n < array.Length)
+		{
+		  sbyte[] aux = new sbyte[n];
+		  Array.Copy(array, 0, aux, 0, n);
+		  array = aux;
+		}
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/CharVector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/CharVector.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/CharVector.cs
new file mode 100644
index 0000000..6868911
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/CharVector.cs
@@ -0,0 +1,163 @@
+using System;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.analysis.compound.hyphenation
+{
+
+	/// <summary>
+	/// This class implements a simple char vector with access to the underlying
+	/// array.
+	/// 
+	/// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
+	/// </summary>
+	public class CharVector : ICloneable
+	{
+
+	  /// <summary>
+	  /// Capacity increment size
+	  /// </summary>
+	  private const int DEFAULT_BLOCK_SIZE = 2048;
+
+	  private int blockSize;
+
+	  /// <summary>
+	  /// The encapsulated array
+	  /// </summary>
+	  private char[] array;
+
+	  /// <summary>
+	  /// Points to next free item
+	  /// </summary>
+	  private int n;
+
+	  public CharVector() : this(DEFAULT_BLOCK_SIZE)
+	  {
+	  }
+
+	  public CharVector(int capacity)
+	  {
+		if (capacity_Renamed > 0)
+		{
+		  blockSize = capacity_Renamed;
+		}
+		else
+		{
+		  blockSize = DEFAULT_BLOCK_SIZE;
+		}
+		array = new char[blockSize];
+		n = 0;
+	  }
+
+	  public CharVector(char[] a)
+	  {
+		blockSize = DEFAULT_BLOCK_SIZE;
+		array = a;
+		n = a.Length;
+	  }
+
+	  public CharVector(char[] a, int capacity)
+	  {
+		if (capacity_Renamed > 0)
+		{
+		  blockSize = capacity_Renamed;
+		}
+		else
+		{
+		  blockSize = DEFAULT_BLOCK_SIZE;
+		}
+		array = a;
+		n = a.Length;
+	  }
+
+	  /// <summary>
+	  /// Reset Vector but don't resize or clear elements
+	  /// </summary>
+	  public virtual void clear()
+	  {
+		n = 0;
+	  }
+
+	  public override CharVector clone()
+	  {
+		CharVector cv = new CharVector(array.Clone(), blockSize);
+		cv.n = this.n;
+		return cv;
+	  }
+
+	  public virtual char[] Array
+	  {
+		  get
+		  {
+			return array;
+		  }
+	  }
+
+	  /// <summary>
+	  /// return number of items in array
+	  /// </summary>
+	  public virtual int length()
+	  {
+		return n;
+	  }
+
+	  /// <summary>
+	  /// returns current capacity of array
+	  /// </summary>
+	  public virtual int capacity()
+	  {
+		return array.Length;
+	  }
+
+	  public virtual void put(int index, char val)
+	  {
+		array[index] = val;
+	  }
+
+	  public virtual char get(int index)
+	  {
+		return array[index];
+	  }
+
+	  public virtual int alloc(int size)
+	  {
+		int index = n;
+		int len = array.Length;
+		if (n + size >= len)
+		{
+		  char[] aux = new char[len + blockSize];
+		  Array.Copy(array, 0, aux, 0, len);
+		  array = aux;
+		}
+		n += size;
+		return index;
+	  }
+
+	  public virtual void trimToSize()
+	  {
+		if (n < array.Length)
+		{
+		  char[] aux = new char[n];
+		  Array.Copy(array, 0, aux, 0, n);
+		  array = aux;
+		}
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphen.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphen.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphen.cs
new file mode 100644
index 0000000..819d756
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphen.cs
@@ -0,0 +1,76 @@
+using System.Text;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.analysis.compound.hyphenation
+{
+
+	/// <summary>
+	/// This class represents a hyphen. A 'full' hyphen is made of 3 parts: the
+	/// pre-break text, post-break text and no-break. If no line-break is generated
+	/// at this position, the no-break text is used, otherwise, pre-break and
+	/// post-break are used. Typically, pre-break is equal to the hyphen character
+	/// and the others are empty. However, this general scheme allows support for
+	/// cases in some languages where words change spelling if they're split across
+	/// lines, like german's 'backen' which hyphenates 'bak-ken'. BTW, this comes
+	/// from TeX.
+	/// 
+	/// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
+	/// </summary>
+
+	public class Hyphen
+	{
+	  public string preBreak;
+
+	  public string noBreak;
+
+	  public string postBreak;
+
+	  internal Hyphen(string pre, string no, string post)
+	  {
+		preBreak = pre;
+		noBreak = no;
+		postBreak = post;
+	  }
+
+	  internal Hyphen(string pre)
+	  {
+		preBreak = pre;
+		noBreak = null;
+		postBreak = null;
+	  }
+
+	  public override string ToString()
+	  {
+		if (noBreak == null && postBreak == null && preBreak != null && preBreak.Equals("-"))
+		{
+		  return "-";
+		}
+		StringBuilder res = new StringBuilder("{");
+		res.Append(preBreak);
+		res.Append("}{");
+		res.Append(postBreak);
+		res.Append("}{");
+		res.Append(noBreak);
+		res.Append('}');
+		return res.ToString();
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphenation.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphenation.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphenation.cs
new file mode 100644
index 0000000..ccf7387
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphenation.cs
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.analysis.compound.hyphenation
+{
+
+	/// <summary>
+	/// This class represents a hyphenated word.
+	/// 
+	/// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
+	/// </summary>
+	public class Hyphenation
+	{
+
+	  private int[] hyphenPoints;
+
+	  /// <summary>
+	  /// rawWord as made of alternating strings and <seealso cref="Hyphen Hyphen"/> instances
+	  /// </summary>
+	  internal Hyphenation(int[] points)
+	  {
+		hyphenPoints = points;
+	  }
+
+	  /// <returns> the number of hyphenation points in the word </returns>
+	  public virtual int length()
+	  {
+		return hyphenPoints.Length;
+	  }
+
+	  /// <returns> the hyphenation points </returns>
+	  public virtual int[] HyphenationPoints
+	  {
+		  get
+		  {
+			return hyphenPoints;
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/HyphenationTree.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/HyphenationTree.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/HyphenationTree.cs
new file mode 100644
index 0000000..9bc4cc0
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/HyphenationTree.cs
@@ -0,0 +1,533 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.analysis.compound.hyphenation
+{
+
+
+	using InputSource = org.xml.sax.InputSource;
+
+	/// <summary>
+	/// This tree structure stores the hyphenation patterns in an efficient way for
+	/// fast lookup. It provides the provides the method to hyphenate a word.
+	/// 
+	/// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
+	/// </summary>
+	public class HyphenationTree : TernaryTree, PatternConsumer
+	{
+
+	  /// <summary>
+	  /// value space: stores the interletter values
+	  /// </summary>
+	  protected internal ByteVector vspace;
+
+	  /// <summary>
+	  /// This map stores hyphenation exceptions
+	  /// </summary>
+	  protected internal Dictionary<string, List<object>> stoplist;
+
+	  /// <summary>
+	  /// This map stores the character classes
+	  /// </summary>
+	  protected internal TernaryTree classmap;
+
+	  /// <summary>
+	  /// Temporary map to store interletter values on pattern loading.
+	  /// </summary>
+	  [NonSerialized]
+	  private TernaryTree ivalues;
+
+	  public HyphenationTree()
+	  {
+		stoplist = new Dictionary<>(23); // usually a small table
+		classmap = new TernaryTree();
+		vspace = new ByteVector();
+		vspace.alloc(1); // this reserves index 0, which we don't use
+	  }
+
+	  /// <summary>
+	  /// Packs the values by storing them in 4 bits, two values into a byte Values
+	  /// range is from 0 to 9. We use zero as terminator, so we'll add 1 to the
+	  /// value.
+	  /// </summary>
+	  /// <param name="values"> a string of digits from '0' to '9' representing the
+	  ///        interletter values. </param>
+	  /// <returns> the index into the vspace array where the packed values are stored. </returns>
+	  protected internal virtual int packValues(string values)
+	  {
+		int i , n = values.Length;
+		int m = (n & 1) == 1 ? (n >> 1) + 2 : (n >> 1) + 1;
+		int offset = vspace.alloc(m);
+		sbyte[] va = vspace.Array;
+		for (i = 0; i < n; i++)
+		{
+		  int j = i >> 1;
+		  sbyte v = (sbyte)((values[i] - '0' + 1) & 0x0f);
+		  if ((i & 1) == 1)
+		  {
+			va[j + offset] = (sbyte)(va[j + offset] | v);
+		  }
+		  else
+		  {
+			va[j + offset] = (sbyte)(v << 4); // big endian
+		  }
+		}
+		va[m - 1 + offset] = 0; // terminator
+		return offset;
+	  }
+
+	  protected internal virtual string unpackValues(int k)
+	  {
+		StringBuilder buf = new StringBuilder();
+		sbyte v = vspace.get(k++);
+		while (v != 0)
+		{
+		  char c = (char)(((int)((uint)v >> 4)) - 1 + '0');
+		  buf.Append(c);
+		  c = (char)(v & 0x0f);
+		  if (c == 0)
+		  {
+			break;
+		  }
+		  c = (char)(c - 1 + '0');
+		  buf.Append(c);
+		  v = vspace.get(k++);
+		}
+		return buf.ToString();
+	  }
+
+	  /// <summary>
+	  /// Read hyphenation patterns from an XML file.
+	  /// </summary>
+	  /// <param name="f"> the filename </param>
+	  /// <exception cref="IOException"> In case the parsing fails </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void loadPatterns(java.io.File f) throws java.io.IOException
+	  public virtual void loadPatterns(File f)
+	  {
+		InputSource src = new InputSource(f.toURI().toASCIIString());
+		loadPatterns(src);
+	  }
+
+	  /// <summary>
+	  /// Read hyphenation patterns from an XML file.
+	  /// </summary>
+	  /// <param name="source"> the InputSource for the file </param>
+	  /// <exception cref="IOException"> In case the parsing fails </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void loadPatterns(org.xml.sax.InputSource source) throws java.io.IOException
+	  public virtual void loadPatterns(InputSource source)
+	  {
+		PatternParser pp = new PatternParser(this);
+		ivalues = new TernaryTree();
+
+		pp.parse(source);
+
+		// patterns/values should be now in the tree
+		// let's optimize a bit
+		trimToSize();
+		vspace.trimToSize();
+		classmap.trimToSize();
+
+		// get rid of the auxiliary map
+		ivalues = null;
+	  }
+
+	  public virtual string findPattern(string pat)
+	  {
+		int k = base.find(pat);
+		if (k >= 0)
+		{
+		  return unpackValues(k);
+		}
+		return "";
+	  }
+
+	  /// <summary>
+	  /// String compare, returns 0 if equal or t is a substring of s
+	  /// </summary>
+	  protected internal virtual int hstrcmp(char[] s, int si, char[] t, int ti)
+	  {
+		for (; s[si] == t[ti]; si++, ti++)
+		{
+		  if (s[si] == 0)
+		  {
+			return 0;
+		  }
+		}
+		if (t[ti] == 0)
+		{
+		  return 0;
+		}
+		return s[si] - t[ti];
+	  }
+
+	  protected internal virtual sbyte[] getValues(int k)
+	  {
+		StringBuilder buf = new StringBuilder();
+		sbyte v = vspace.get(k++);
+		while (v != 0)
+		{
+		  char c = (char)(((int)((uint)v >> 4)) - 1);
+		  buf.Append(c);
+		  c = (char)(v & 0x0f);
+		  if (c == 0)
+		  {
+			break;
+		  }
+		  c = (char)(c - 1);
+		  buf.Append(c);
+		  v = vspace.get(k++);
+		}
+		sbyte[] res = new sbyte[buf.Length];
+		for (int i = 0; i < res.Length; i++)
+		{
+		  res[i] = (sbyte) buf[i];
+		}
+		return res;
+	  }
+
+	  /// <summary>
+	  /// <para>
+	  /// Search for all possible partial matches of word starting at index an update
+	  /// interletter values. In other words, it does something like:
+	  /// </para>
+	  /// <code>
+	  /// for(i=0; i&lt;patterns.length; i++) {
+	  /// if ( word.substring(index).startsWidth(patterns[i]) )
+	  /// update_interletter_values(patterns[i]);
+	  /// }
+	  /// </code>
+	  /// <para>
+	  /// But it is done in an efficient way since the patterns are stored in a
+	  /// ternary tree. In fact, this is the whole purpose of having the tree: doing
+	  /// this search without having to test every single pattern. The number of
+	  /// patterns for languages such as English range from 4000 to 10000. Thus,
+	  /// doing thousands of string comparisons for each word to hyphenate would be
+	  /// really slow without the tree. The tradeoff is memory, but using a ternary
+	  /// tree instead of a trie, almost halves the the memory used by Lout or TeX.
+	  /// It's also faster than using a hash table
+	  /// </para>
+	  /// </summary>
+	  /// <param name="word"> null terminated word to match </param>
+	  /// <param name="index"> start index from word </param>
+	  /// <param name="il"> interletter values array to update </param>
+	  protected internal virtual void searchPatterns(char[] word, int index, sbyte[] il)
+	  {
+		sbyte[] values;
+		int i = index;
+		char p, q;
+		char sp = word[i];
+		p = root;
+
+		while (p > 0 && p < sc.Length)
+		{
+		  if (sc[p] == 0xFFFF)
+		  {
+			if (hstrcmp(word, i, kv.Array, lo[p]) == 0)
+			{
+			  values = getValues(eq[p]); // data pointer is in eq[]
+			  int j = index;
+			  for (int k = 0; k < values.Length; k++)
+			  {
+				if (j < il.Length && values[k] > il[j])
+				{
+				  il[j] = values[k];
+				}
+				j++;
+			  }
+			}
+			return;
+		  }
+		  int d = sp - sc[p];
+		  if (d == 0)
+		  {
+			if (sp == 0)
+			{
+			  break;
+			}
+			sp = word[++i];
+			p = eq[p];
+			q = p;
+
+			// look for a pattern ending at this position by searching for
+			// the null char ( splitchar == 0 )
+			while (q > 0 && q < sc.Length)
+			{
+			  if (sc[q] == 0xFFFF) // stop at compressed branch
+			  {
+				break;
+			  }
+			  if (sc[q] == 0)
+			  {
+				values = getValues(eq[q]);
+				int j = index;
+				for (int k = 0; k < values.Length; k++)
+				{
+				  if (j < il.Length && values[k] > il[j])
+				  {
+					il[j] = values[k];
+				  }
+				  j++;
+				}
+				break;
+			  }
+			  else
+			  {
+				q = lo[q];
+
+				/// <summary>
+				/// actually the code should be: q = sc[q] < 0 ? hi[q] : lo[q]; but
+				/// java chars are unsigned
+				/// </summary>
+			  }
+			}
+		  }
+		  else
+		  {
+			p = d < 0 ? lo[p] : hi[p];
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Hyphenate word and return a Hyphenation object.
+	  /// </summary>
+	  /// <param name="word"> the word to be hyphenated </param>
+	  /// <param name="remainCharCount"> Minimum number of characters allowed before the
+	  ///        hyphenation point. </param>
+	  /// <param name="pushCharCount"> Minimum number of characters allowed after the
+	  ///        hyphenation point. </param>
+	  /// <returns> a <seealso cref="Hyphenation Hyphenation"/> object representing the
+	  ///         hyphenated word or null if word is not hyphenated. </returns>
+	  public virtual Hyphenation hyphenate(string word, int remainCharCount, int pushCharCount)
+	  {
+		char[] w = word.ToCharArray();
+		return hyphenate(w, 0, w.Length, remainCharCount, pushCharCount);
+	  }
+
+	  /// <summary>
+	  /// w = "****nnllllllnnn*****", where n is a non-letter, l is a letter, all n
+	  /// may be absent, the first n is at offset, the first l is at offset +
+	  /// iIgnoreAtBeginning; word = ".llllll.'\0'***", where all l in w are copied
+	  /// into word. In the first part of the routine len = w.length, in the second
+	  /// part of the routine len = word.length. Three indices are used: index(w),
+	  /// the index in w, index(word), the index in word, letterindex(word), the
+	  /// index in the letter part of word. The following relations exist: index(w) =
+	  /// offset + i - 1 index(word) = i - iIgnoreAtBeginning letterindex(word) =
+	  /// index(word) - 1 (see first loop). It follows that: index(w) - index(word) =
+	  /// offset - 1 + iIgnoreAtBeginning index(w) = letterindex(word) + offset +
+	  /// iIgnoreAtBeginning
+	  /// </summary>
+
+	  /// <summary>
+	  /// Hyphenate word and return an array of hyphenation points.
+	  /// </summary>
+	  /// <param name="w"> char array that contains the word </param>
+	  /// <param name="offset"> Offset to first character in word </param>
+	  /// <param name="len"> Length of word </param>
+	  /// <param name="remainCharCount"> Minimum number of characters allowed before the
+	  ///        hyphenation point. </param>
+	  /// <param name="pushCharCount"> Minimum number of characters allowed after the
+	  ///        hyphenation point. </param>
+	  /// <returns> a <seealso cref="Hyphenation Hyphenation"/> object representing the
+	  ///         hyphenated word or null if word is not hyphenated. </returns>
+	  public virtual Hyphenation hyphenate(char[] w, int offset, int len, int remainCharCount, int pushCharCount)
+	  {
+		int i;
+		char[] word = new char[len + 3];
+
+		// normalize word
+		char[] c = new char[2];
+		int iIgnoreAtBeginning = 0;
+		int iLength = len;
+		bool bEndOfLetters = false;
+		for (i = 1; i <= len; i++)
+		{
+		  c[0] = w[offset + i - 1];
+		  int nc = classmap.find(c, 0);
+		  if (nc < 0) // found a non-letter character ...
+		  {
+			if (i == (1 + iIgnoreAtBeginning))
+			{
+			  // ... before any letter character
+			  iIgnoreAtBeginning++;
+			}
+			else
+			{
+			  // ... after a letter character
+			  bEndOfLetters = true;
+			}
+			iLength--;
+		  }
+		  else
+		  {
+			if (!bEndOfLetters)
+			{
+			  word[i - iIgnoreAtBeginning] = (char) nc;
+			}
+			else
+			{
+			  return null;
+			}
+		  }
+		}
+		len = iLength;
+		if (len < (remainCharCount + pushCharCount))
+		{
+		  // word is too short to be hyphenated
+		  return null;
+		}
+		int[] result = new int[len + 1];
+		int k = 0;
+
+		// check exception list first
+		string sw = new string(word, 1, len);
+		if (stoplist.ContainsKey(sw))
+		{
+		  // assume only simple hyphens (Hyphen.pre="-", Hyphen.post = Hyphen.no =
+		  // null)
+		  List<object> hw = stoplist[sw];
+		  int j = 0;
+		  for (i = 0; i < hw.Count; i++)
+		  {
+			object o = hw[i];
+			// j = index(sw) = letterindex(word)?
+			// result[k] = corresponding index(w)
+			if (o is string)
+			{
+			  j += ((string) o).Length;
+			  if (j >= remainCharCount && j < (len - pushCharCount))
+			  {
+				result[k++] = j + iIgnoreAtBeginning;
+			  }
+			}
+		  }
+		}
+		else
+		{
+		  // use algorithm to get hyphenation points
+		  word[0] = '.'; // word start marker
+		  word[len + 1] = '.'; // word end marker
+		  word[len + 2] = (char)0; // null terminated
+		  sbyte[] il = new sbyte[len + 3]; // initialized to zero
+		  for (i = 0; i < len + 1; i++)
+		  {
+			searchPatterns(word, i, il);
+		  }
+
+		  // hyphenation points are located where interletter value is odd
+		  // i is letterindex(word),
+		  // i + 1 is index(word),
+		  // result[k] = corresponding index(w)
+		  for (i = 0; i < len; i++)
+		  {
+			if (((il[i + 1] & 1) == 1) && i >= remainCharCount && i <= (len - pushCharCount))
+			{
+			  result[k++] = i + iIgnoreAtBeginning;
+			}
+		  }
+		}
+
+		if (k > 0)
+		{
+		  // trim result array
+		  int[] res = new int[k + 2];
+		  Array.Copy(result, 0, res, 1, k);
+		  // We add the synthetical hyphenation points
+		  // at the beginning and end of the word
+		  res[0] = 0;
+		  res[k + 1] = len;
+		  return new Hyphenation(res);
+		}
+		else
+		{
+		  return null;
+		}
+	  }
+
+	  /// <summary>
+	  /// Add a character class to the tree. It is used by
+	  /// <seealso cref="PatternParser PatternParser"/> as callback to add character classes.
+	  /// Character classes define the valid word characters for hyphenation. If a
+	  /// word contains a character not defined in any of the classes, it is not
+	  /// hyphenated. It also defines a way to normalize the characters in order to
+	  /// compare them with the stored patterns. Usually pattern files use only lower
+	  /// case characters, in this case a class for letter 'a', for example, should
+	  /// be defined as "aA", the first character being the normalization char.
+	  /// </summary>
+	  public virtual void addClass(string chargroup)
+	  {
+		if (chargroup.Length > 0)
+		{
+		  char equivChar = chargroup[0];
+		  char[] key = new char[2];
+		  key[1] = (char)0;
+		  for (int i = 0; i < chargroup.Length; i++)
+		  {
+			key[0] = chargroup[i];
+			classmap.insert(key, 0, equivChar);
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Add an exception to the tree. It is used by
+	  /// <seealso cref="PatternParser PatternParser"/> class as callback to store the
+	  /// hyphenation exceptions.
+	  /// </summary>
+	  /// <param name="word"> normalized word </param>
+	  /// <param name="hyphenatedword"> a vector of alternating strings and
+	  ///        <seealso cref="Hyphen hyphen"/> objects. </param>
+	  public virtual void addException(string word, List<object> hyphenatedword)
+	  {
+		stoplist[word] = hyphenatedword;
+	  }
+
+	  /// <summary>
+	  /// Add a pattern to the tree. Mainly, to be used by
+	  /// <seealso cref="PatternParser PatternParser"/> class as callback to add a pattern to
+	  /// the tree.
+	  /// </summary>
+	  /// <param name="pattern"> the hyphenation pattern </param>
+	  /// <param name="ivalue"> interletter weight values indicating the desirability and
+	  ///        priority of hyphenating at a given point within the pattern. It
+	  ///        should contain only digit characters. (i.e. '0' to '9'). </param>
+	  public virtual void addPattern(string pattern, string ivalue)
+	  {
+		int k = ivalues.find(ivalue);
+		if (k <= 0)
+		{
+		  k = packValues(ivalue);
+		  ivalues.insert(ivalue, (char) k);
+		}
+		insert(pattern, (char) k);
+	  }
+
+	  public override void printStats(PrintStream @out)
+	  {
+		@out.println("Value space size = " + Convert.ToString(vspace.length()));
+		base.printStats(@out);
+
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternConsumer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternConsumer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternConsumer.cs
new file mode 100644
index 0000000..5b3fc39
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternConsumer.cs
@@ -0,0 +1,57 @@
+using System.Collections.Generic;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.analysis.compound.hyphenation
+{
+
+	/// <summary>
+	/// This interface is used to connect the XML pattern file parser to the
+	/// hyphenation tree.
+	/// 
+	/// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
+	/// </summary>
+	public interface PatternConsumer
+	{
+
+	  /// <summary>
+	  /// Add a character class. A character class defines characters that are
+	  /// considered equivalent for the purpose of hyphenation (e.g. "aA"). It
+	  /// usually means to ignore case.
+	  /// </summary>
+	  /// <param name="chargroup"> character group </param>
+	  void addClass(string chargroup);
+
+	  /// <summary>
+	  /// Add a hyphenation exception. An exception replaces the result obtained by
+	  /// the algorithm for cases for which this fails or the user wants to provide
+	  /// his own hyphenation. A hyphenatedword is a vector of alternating String's
+	  /// and <seealso cref="Hyphen Hyphen"/> instances
+	  /// </summary>
+	  void addException(string word, List<object> hyphenatedword);
+
+	  /// <summary>
+	  /// Add hyphenation patterns.
+	  /// </summary>
+	  /// <param name="pattern"> the pattern </param>
+	  /// <param name="values"> interletter values expressed as a string of digit characters. </param>
+	  void addPattern(string pattern, string values);
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternParser.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternParser.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternParser.cs
new file mode 100644
index 0000000..50d3eb8
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternParser.cs
@@ -0,0 +1,463 @@
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Text;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.analysis.compound.hyphenation
+{
+
+	// SAX
+	using XMLReader = org.xml.sax.XMLReader;
+	using InputSource = org.xml.sax.InputSource;
+	using SAXException = org.xml.sax.SAXException;
+	using SAXParseException = org.xml.sax.SAXParseException;
+	using DefaultHandler = org.xml.sax.helpers.DefaultHandler;
+	using Attributes = org.xml.sax.Attributes;
+
+	// Java
+
+	/// <summary>
+	/// A SAX document handler to read and parse hyphenation patterns from a XML
+	/// file.
+	/// 
+	/// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
+	/// </summary>
+	public class PatternParser : DefaultHandler
+	{
+
+	  internal XMLReader parser;
+
+	  internal int currElement;
+
+	  internal PatternConsumer consumer;
+
+	  internal StringBuilder token;
+
+	  internal List<object> exception;
+
+	  internal char hyphenChar;
+
+	  internal string errMsg;
+
+	  internal const int ELEM_CLASSES = 1;
+
+	  internal const int ELEM_EXCEPTIONS = 2;
+
+	  internal const int ELEM_PATTERNS = 3;
+
+	  internal const int ELEM_HYPHEN = 4;
+
+	  public PatternParser()
+	  {
+		token = new StringBuilder();
+		parser = createParser();
+		parser.ContentHandler = this;
+		parser.ErrorHandler = this;
+		parser.EntityResolver = this;
+		hyphenChar = '-'; // default
+
+	  }
+
+	  public PatternParser(PatternConsumer consumer) : this()
+	  {
+		this.consumer = consumer;
+	  }
+
+	  public virtual PatternConsumer Consumer
+	  {
+		  set
+		  {
+			this.consumer = value;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Parses a hyphenation pattern file.
+	  /// </summary>
+	  /// <param name="filename"> the filename </param>
+	  /// <exception cref="IOException"> In case of an exception while parsing </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void parse(String filename) throws java.io.IOException
+	  public virtual void parse(string filename)
+	  {
+		parse(new InputSource(filename));
+	  }
+
+	  /// <summary>
+	  /// Parses a hyphenation pattern file.
+	  /// </summary>
+	  /// <param name="file"> the pattern file </param>
+	  /// <exception cref="IOException"> In case of an exception while parsing </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void parse(java.io.File file) throws java.io.IOException
+	  public virtual void parse(File file)
+	  {
+		InputSource src = new InputSource(file.toURI().toASCIIString());
+		parse(src);
+	  }
+
+	  /// <summary>
+	  /// Parses a hyphenation pattern file.
+	  /// </summary>
+	  /// <param name="source"> the InputSource for the file </param>
+	  /// <exception cref="IOException"> In case of an exception while parsing </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void parse(org.xml.sax.InputSource source) throws java.io.IOException
+	  public virtual void parse(InputSource source)
+	  {
+		try
+		{
+		  parser.parse(source);
+		}
+		catch (SAXException e)
+		{
+		  throw new IOException(e);
+		}
+	  }
+
+	  /// <summary>
+	  /// Creates a SAX parser using JAXP
+	  /// </summary>
+	  /// <returns> the created SAX parser </returns>
+	  internal static XMLReader createParser()
+	  {
+		try
+		{
+		  SAXParserFactory factory = SAXParserFactory.newInstance();
+		  factory.NamespaceAware = true;
+		  return factory.newSAXParser().XMLReader;
+		}
+		catch (Exception e)
+		{
+		  throw new Exception("Couldn't create XMLReader: " + e.Message);
+		}
+	  }
+
+	  protected internal virtual string readToken(StringBuilder chars)
+	  {
+		string word;
+		bool space = false;
+		int i;
+		for (i = 0; i < chars.Length; i++)
+		{
+		  if (char.IsWhiteSpace(chars[i]))
+		  {
+			space = true;
+		  }
+		  else
+		  {
+			break;
+		  }
+		}
+		if (space)
+		{
+		  // chars.delete(0,i);
+		  for (int countr = i; countr < chars.Length; countr++)
+		  {
+			chars[countr - i] = chars[countr];
+		  }
+		  chars.Length = chars.Length - i;
+		  if (token.Length > 0)
+		  {
+			word = token.ToString();
+			token.Length = 0;
+			return word;
+		  }
+		}
+		space = false;
+		for (i = 0; i < chars.Length; i++)
+		{
+		  if (char.IsWhiteSpace(chars[i]))
+		  {
+			space = true;
+			break;
+		  }
+		}
+		token.Append(chars.ToString().Substring(0, i));
+		// chars.delete(0,i);
+		for (int countr = i; countr < chars.Length; countr++)
+		{
+		  chars[countr - i] = chars[countr];
+		}
+		chars.Length = chars.Length - i;
+		if (space)
+		{
+		  word = token.ToString();
+		  token.Length = 0;
+		  return word;
+		}
+		token.Append(chars);
+		return null;
+	  }
+
+	  protected internal static string getPattern(string word)
+	  {
+		StringBuilder pat = new StringBuilder();
+		int len = word.Length;
+		for (int i = 0; i < len; i++)
+		{
+		  if (!char.IsDigit(word[i]))
+		  {
+			pat.Append(word[i]);
+		  }
+		}
+		return pat.ToString();
+	  }
+
+	  protected internal virtual List<object> normalizeException(List<T1> ex)
+	  {
+		List<object> res = new List<object>();
+		for (int i = 0; i < ex.Count; i++)
+		{
+		  object item = ex[i];
+		  if (item is string)
+		  {
+			string str = (string) item;
+			StringBuilder buf = new StringBuilder();
+			for (int j = 0; j < str.Length; j++)
+			{
+			  char c = str[j];
+			  if (c != hyphenChar)
+			  {
+				buf.Append(c);
+			  }
+			  else
+			  {
+				res.Add(buf.ToString());
+				buf.Length = 0;
+				char[] h = new char[1];
+				h[0] = hyphenChar;
+				// we use here hyphenChar which is not necessarily
+				// the one to be printed
+				res.Add(new Hyphen(new string(h), null, null));
+			  }
+			}
+			if (buf.Length > 0)
+			{
+			  res.Add(buf.ToString());
+			}
+		  }
+		  else
+		  {
+			res.Add(item);
+		  }
+		}
+		return res;
+	  }
+
+	  protected internal virtual string getExceptionWord<T1>(List<T1> ex)
+	  {
+		StringBuilder res = new StringBuilder();
+		for (int i = 0; i < ex.Count; i++)
+		{
+		  object item = ex[i];
+		  if (item is string)
+		  {
+			res.Append((string) item);
+		  }
+		  else
+		  {
+			if (((Hyphen) item).noBreak != null)
+			{
+			  res.Append(((Hyphen) item).noBreak);
+			}
+		  }
+		}
+		return res.ToString();
+	  }
+
+	  protected internal static string getInterletterValues(string pat)
+	  {
+		StringBuilder il = new StringBuilder();
+		string word = pat + "a"; // add dummy letter to serve as sentinel
+		int len = word.Length;
+		for (int i = 0; i < len; i++)
+		{
+		  char c = word[i];
+		  if (char.IsDigit(c))
+		  {
+			il.Append(c);
+			i++;
+		  }
+		  else
+		  {
+			il.Append('0');
+		  }
+		}
+		return il.ToString();
+	  }
+
+	  //
+	  // EntityResolver methods
+	  //
+	  public override InputSource resolveEntity(string publicId, string systemId)
+	  {
+		// supply the internal hyphenation.dtd if possible
+		if ((systemId != null && systemId.matches("(?i).*\\bhyphenation.dtd\\b.*")) || ("hyphenation-info".Equals(publicId)))
+		{
+		  // System.out.println(this.getClass().getResource("hyphenation.dtd").toExternalForm());
+		  return new InputSource(this.GetType().getResource("hyphenation.dtd").toExternalForm());
+		}
+		return null;
+	  }
+
+	  //
+	  // ContentHandler methods
+	  //
+
+	  /// <seealso cref= org.xml.sax.ContentHandler#startElement(java.lang.String,
+	  ///      java.lang.String, java.lang.String, org.xml.sax.Attributes) </seealso>
+	  public override void startElement(string uri, string local, string raw, Attributes attrs)
+	  {
+		if (local.Equals("hyphen-char"))
+		{
+		  string h = attrs.getValue("value");
+		  if (h != null && h.Length == 1)
+		  {
+			hyphenChar = h[0];
+		  }
+		}
+		else if (local.Equals("classes"))
+		{
+		  currElement = ELEM_CLASSES;
+		}
+		else if (local.Equals("patterns"))
+		{
+		  currElement = ELEM_PATTERNS;
+		}
+		else if (local.Equals("exceptions"))
+		{
+		  currElement = ELEM_EXCEPTIONS;
+		  exception = new List<>();
+		}
+		else if (local.Equals("hyphen"))
+		{
+		  if (token.Length > 0)
+		  {
+			exception.Add(token.ToString());
+		  }
+		  exception.Add(new Hyphen(attrs.getValue("pre"), attrs.getValue("no"), attrs.getValue("post")));
+		  currElement = ELEM_HYPHEN;
+		}
+		token.Length = 0;
+	  }
+
+	  /// <seealso cref= org.xml.sax.ContentHandler#endElement(java.lang.String,
+	  ///      java.lang.String, java.lang.String) </seealso>
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @Override @SuppressWarnings("unchecked") public void endElement(String uri, String local, String raw)
+	  public override void endElement(string uri, string local, string raw)
+	  {
+
+		if (token.Length > 0)
+		{
+		  string word = token.ToString();
+		  switch (currElement)
+		  {
+			case ELEM_CLASSES:
+			  consumer.addClass(word);
+			  break;
+			case ELEM_EXCEPTIONS:
+			  exception.Add(word);
+			  exception = normalizeException(exception);
+			  consumer.addException(getExceptionWord(exception), (ArrayList) exception.clone());
+			  break;
+			case ELEM_PATTERNS:
+			  consumer.addPattern(getPattern(word), getInterletterValues(word));
+			  break;
+			case ELEM_HYPHEN:
+			  // nothing to do
+			  break;
+		  }
+		  if (currElement != ELEM_HYPHEN)
+		  {
+			token.Length = 0;
+		  }
+		}
+		if (currElement == ELEM_HYPHEN)
+		{
+		  currElement = ELEM_EXCEPTIONS;
+		}
+		else
+		{
+		  currElement = 0;
+		}
+
+	  }
+
+	  /// <seealso cref= org.xml.sax.ContentHandler#characters(char[], int, int) </seealso>
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @SuppressWarnings("unchecked") @Override public void characters(char ch[] , int start, int length)
+	  public override void characters(char[] ch, int start, int length)
+	  {
+		StringBuilder chars = new StringBuilder(length);
+		chars.Append(ch, start, length);
+		string word = readToken(chars);
+		while (word != null)
+		{
+		  // System.out.println("\"" + word + "\"");
+		  switch (currElement)
+		  {
+			case ELEM_CLASSES:
+			  consumer.addClass(word);
+			  break;
+			case ELEM_EXCEPTIONS:
+			  exception.Add(word);
+			  exception = normalizeException(exception);
+			  consumer.addException(getExceptionWord(exception), (ArrayList) exception.clone());
+			  exception.Clear();
+			  break;
+			case ELEM_PATTERNS:
+			  consumer.addPattern(getPattern(word), getInterletterValues(word));
+			  break;
+		  }
+		  word = readToken(chars);
+		}
+
+	  }
+
+	  /// <summary>
+	  /// Returns a string of the location.
+	  /// </summary>
+	  private string getLocationString(SAXParseException ex)
+	  {
+		StringBuilder str = new StringBuilder();
+
+		string systemId = ex.SystemId;
+		if (systemId != null)
+		{
+		  int index = systemId.LastIndexOf('/');
+		  if (index != -1)
+		  {
+			systemId = systemId.Substring(index + 1);
+		  }
+		  str.Append(systemId);
+		}
+		str.Append(':');
+		str.Append(ex.LineNumber);
+		str.Append(':');
+		str.Append(ex.ColumnNumber);
+
+		return str.ToString();
+
+	  } // getLocationString(SAXParseException):String
+	}
+
+}
\ No newline at end of file


[12/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemmer.cs
new file mode 100644
index 0000000..c6506ad
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemmer.cs
@@ -0,0 +1,477 @@
+using System;
+using System.Collections;
+using System.Text;
+
+namespace org.apache.lucene.analysis.nl
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	/// <summary>
+	/// A stemmer for Dutch words. 
+	/// <para>
+	/// The algorithm is an implementation of
+	/// the <a href="http://snowball.tartarus.org/algorithms/dutch/stemmer.html">dutch stemming</a>
+	/// algorithm in Martin Porter's snowball project.
+	/// </para> </summary>
+	/// @deprecated (3.1) Use <seealso cref="org.tartarus.snowball.ext.DutchStemmer"/> instead, 
+	/// which has the same functionality. This filter will be removed in Lucene 5.0 
+	[Obsolete("(3.1) Use <seealso cref="org.tartarus.snowball.ext.DutchStemmer"/> instead,")]
+	public class DutchStemmer
+	{
+	  private static readonly Locale locale = new Locale("nl", "NL");
+
+	  /// <summary>
+	  /// Buffer for the terms while stemming them.
+	  /// </summary>
+	  private StringBuilder sb = new StringBuilder();
+	  private bool _removedE;
+	  private IDictionary _stemDict;
+
+	  private int _R1;
+	  private int _R2;
+
+	  //TODO convert to internal
+	  /*
+	   * Stems the given term to an unique <tt>discriminator</tt>.
+	   *
+	   * @param term The term that should be stemmed.
+	   * @return Discriminator for <tt>term</tt>
+	   */
+	  public virtual string stem(string term)
+	  {
+		term = term.ToLower(locale);
+		if (!isStemmable(term))
+		{
+		  return term;
+		}
+		if (_stemDict != null && _stemDict.Contains(term))
+		{
+		  if (_stemDict[term] is string)
+		  {
+			return (string) _stemDict[term];
+		  }
+		  else
+		  {
+			return null;
+		  }
+		}
+
+		// Reset the StringBuilder.
+		sb.Remove(0, sb.Length);
+		sb.Insert(0, term);
+		// Stemming starts here...
+		substitute(sb);
+		storeYandI(sb);
+		_R1 = getRIndex(sb, 0);
+		_R1 = Math.Max(3, _R1);
+		step1(sb);
+		step2(sb);
+		_R2 = getRIndex(sb, _R1);
+		step3a(sb);
+		step3b(sb);
+		step4(sb);
+		reStoreYandI(sb);
+		return sb.ToString();
+	  }
+
+	  private bool enEnding(StringBuilder sb)
+	  {
+		string[] enend = new string[]{"ene", "en"};
+		for (int i = 0; i < enend.Length; i++)
+		{
+		  string end = enend[i];
+		  string s = sb.ToString();
+		  int index = s.Length - end.Length;
+		  if (s.EndsWith(end, StringComparison.Ordinal) && index >= _R1 && isValidEnEnding(sb, index - 1))
+		  {
+			sb.Remove(index, index + end.Length - index);
+			unDouble(sb, index);
+			return true;
+		  }
+		}
+		return false;
+	  }
+
+
+	  private void step1(StringBuilder sb)
+	  {
+		if (_R1 >= sb.Length)
+		{
+		  return;
+		}
+
+		string s = sb.ToString();
+		int lengthR1 = sb.Length - _R1;
+		int index;
+
+		if (s.EndsWith("heden", StringComparison.Ordinal))
+		{
+		  sb.Remove(_R1, lengthR1 + _R1 - _R1).Insert(_R1, sb.Substring(_R1, lengthR1).replaceAll("heden", "heid"));
+		  return;
+		}
+
+		if (enEnding(sb))
+		{
+		  return;
+		}
+
+		if (s.EndsWith("se", StringComparison.Ordinal) && (index = s.Length - 2) >= _R1 && isValidSEnding(sb, index - 1))
+		{
+		  sb.Remove(index, index + 2 - index);
+		  return;
+		}
+		if (s.EndsWith("s", StringComparison.Ordinal) && (index = s.Length - 1) >= _R1 && isValidSEnding(sb, index - 1))
+		{
+		  sb.Remove(index, index + 1 - index);
+		}
+	  }
+
+	  /// <summary>
+	  /// Delete suffix e if in R1 and
+	  /// preceded by a non-vowel, and then undouble the ending
+	  /// </summary>
+	  /// <param name="sb"> String being stemmed </param>
+	  private void step2(StringBuilder sb)
+	  {
+		_removedE = false;
+		if (_R1 >= sb.Length)
+		{
+		  return;
+		}
+		string s = sb.ToString();
+		int index = s.Length - 1;
+		if (index >= _R1 && s.EndsWith("e", StringComparison.Ordinal) && !isVowel(sb[index - 1]))
+		{
+		  sb.Remove(index, index + 1 - index);
+		  unDouble(sb);
+		  _removedE = true;
+		}
+	  }
+
+	  /// <summary>
+	  /// Delete "heid"
+	  /// </summary>
+	  /// <param name="sb"> String being stemmed </param>
+	  private void step3a(StringBuilder sb)
+	  {
+		if (_R2 >= sb.Length)
+		{
+		  return;
+		}
+		string s = sb.ToString();
+		int index = s.Length - 4;
+		if (s.EndsWith("heid", StringComparison.Ordinal) && index >= _R2 && sb[index - 1] != 'c')
+		{
+		  sb.Remove(index, index + 4 - index); //remove heid
+		  enEnding(sb);
+		}
+	  }
+
+	  /// <summary>
+	  /// <para>A d-suffix, or derivational suffix, enables a new word,
+	  /// often with a different grammatical category, or with a different
+	  /// sense, to be built from another word. Whether a d-suffix can be
+	  /// attached is discovered not from the rules of grammar, but by
+	  /// referring to a dictionary. So in English, ness can be added to
+	  /// certain adjectives to form corresponding nouns (littleness,
+	  /// kindness, foolishness ...) but not to all adjectives
+	  /// (not for example, to big, cruel, wise ...) d-suffixes can be
+	  /// used to change meaning, often in rather exotic ways.</para>
+	  /// Remove "ing", "end", "ig", "lijk", "baar" and "bar"
+	  /// </summary>
+	  /// <param name="sb"> String being stemmed </param>
+	  private void step3b(StringBuilder sb)
+	  {
+		if (_R2 >= sb.Length)
+		{
+		  return;
+		}
+		string s = sb.ToString();
+		int index = 0;
+
+		if ((s.EndsWith("end", StringComparison.Ordinal) || s.EndsWith("ing", StringComparison.Ordinal)) && (index = s.Length - 3) >= _R2)
+		{
+		  sb.Remove(index, index + 3 - index);
+		  if (sb[index - 2] == 'i' && sb[index - 1] == 'g')
+		  {
+			if (sb[index - 3] != 'e' & index - 2 >= _R2)
+			{
+			  index -= 2;
+			  sb.Remove(index, index + 2 - index);
+			}
+		  }
+		  else
+		  {
+			unDouble(sb, index);
+		  }
+		  return;
+		}
+		if (s.EndsWith("ig", StringComparison.Ordinal) && (index = s.Length - 2) >= _R2)
+		{
+		  if (sb[index - 1] != 'e')
+		  {
+			sb.Remove(index, index + 2 - index);
+		  }
+		  return;
+		}
+		if (s.EndsWith("lijk", StringComparison.Ordinal) && (index = s.Length - 4) >= _R2)
+		{
+		  sb.Remove(index, index + 4 - index);
+		  step2(sb);
+		  return;
+		}
+		if (s.EndsWith("baar", StringComparison.Ordinal) && (index = s.Length - 4) >= _R2)
+		{
+		  sb.Remove(index, index + 4 - index);
+		  return;
+		}
+		if (s.EndsWith("bar", StringComparison.Ordinal) && (index = s.Length - 3) >= _R2)
+		{
+		  if (_removedE)
+		  {
+			sb.Remove(index, index + 3 - index);
+		  }
+		  return;
+		}
+	  }
+
+	  /// <summary>
+	  /// undouble vowel
+	  /// If the words ends CVD, where C is a non-vowel, D is a non-vowel other than I, and V is double a, e, o or u, remove one of the vowels from V (for example, maan -> man, brood -> brod).
+	  /// </summary>
+	  /// <param name="sb"> String being stemmed </param>
+	  private void step4(StringBuilder sb)
+	  {
+		if (sb.Length < 4)
+		{
+		  return;
+		}
+		string end = StringHelperClass.SubstringSpecial(sb, sb.Length - 4, sb.Length);
+		char c = end[0];
+		char v1 = end[1];
+		char v2 = end[2];
+		char d = end[3];
+		if (v1 == v2 && d != 'I' && v1 != 'i' && isVowel(v1) && !isVowel(d) && !isVowel(c))
+		{
+		  sb.Remove(sb.Length - 2, sb.Length - 1 - sb.Length - 2);
+		}
+	  }
+
+	  /// <summary>
+	  /// Checks if a term could be stemmed.
+	  /// </summary>
+	  /// <returns> true if, and only if, the given term consists in letters. </returns>
+	  private bool isStemmable(string term)
+	  {
+		for (int c = 0; c < term.Length; c++)
+		{
+		  if (!char.IsLetter(term[c]))
+		  {
+			  return false;
+		  }
+		}
+		return true;
+	  }
+
+	  /// <summary>
+	  /// Substitute ä, ë, ï, ö, ü, á , é, í, ó, ú
+	  /// </summary>
+	  private void substitute(StringBuilder buffer)
+	  {
+		for (int i = 0; i < buffer.Length; i++)
+		{
+		  switch (buffer[i])
+		  {
+			case 'ä':
+			case 'á':
+			{
+				buffer[i] = 'a';
+				break;
+			}
+			case 'ë':
+			case 'é':
+			{
+				buffer[i] = 'e';
+				break;
+			}
+			case 'ü':
+			case 'ú':
+			{
+				buffer[i] = 'u';
+				break;
+			}
+			case 'ï':
+			case 'i':
+			{
+				buffer[i] = 'i';
+				break;
+			}
+			case 'ö':
+			case 'ó':
+			{
+				buffer[i] = 'o';
+				break;
+			}
+		  }
+		}
+	  }
+
+	  /*private boolean isValidSEnding(StringBuilder sb) {
+	    return isValidSEnding(sb, sb.length() - 1);
+	  }*/
+
+	  private bool isValidSEnding(StringBuilder sb, int index)
+	  {
+		char c = sb[index];
+		if (isVowel(c) || c == 'j')
+		{
+		  return false;
+		}
+		return true;
+	  }
+
+	  /*private boolean isValidEnEnding(StringBuilder sb) {
+	    return isValidEnEnding(sb, sb.length() - 1);
+	  }*/
+
+	  private bool isValidEnEnding(StringBuilder sb, int index)
+	  {
+		char c = sb[index];
+		if (isVowel(c))
+		{
+		  return false;
+		}
+		if (c < 3)
+		{
+		  return false;
+		}
+		// ends with "gem"?
+		if (c == 'm' && sb[index - 2] == 'g' && sb[index - 1] == 'e')
+		{
+		  return false;
+		}
+		return true;
+	  }
+
+	  private void unDouble(StringBuilder sb)
+	  {
+		unDouble(sb, sb.Length);
+	  }
+
+	  private void unDouble(StringBuilder sb, int endIndex)
+	  {
+		string s = sb.Substring(0, endIndex);
+		if (s.EndsWith("kk", StringComparison.Ordinal) || s.EndsWith("tt", StringComparison.Ordinal) || s.EndsWith("dd", StringComparison.Ordinal) || s.EndsWith("nn", StringComparison.Ordinal) || s.EndsWith("mm", StringComparison.Ordinal) || s.EndsWith("ff", StringComparison.Ordinal))
+		{
+		  sb.Remove(endIndex - 1, endIndex - endIndex - 1);
+		}
+	  }
+
+	  private int getRIndex(StringBuilder sb, int start)
+	  {
+		if (start == 0)
+		{
+		  start = 1;
+		}
+		int i = start;
+		for (; i < sb.Length; i++)
+		{
+		  //first non-vowel preceded by a vowel
+		  if (!isVowel(sb[i]) && isVowel(sb[i - 1]))
+		  {
+			return i + 1;
+		  }
+		}
+		return i + 1;
+	  }
+
+	  private void storeYandI(StringBuilder sb)
+	  {
+		if (sb[0] == 'y')
+		{
+		  sb[0] = 'Y';
+		}
+
+		int last = sb.Length - 1;
+
+		for (int i = 1; i < last; i++)
+		{
+		  switch (sb[i])
+		  {
+			case 'i':
+			{
+				if (isVowel(sb[i - 1]) && isVowel(sb[i + 1]))
+				{
+				  sb[i] = 'I';
+				}
+				break;
+			}
+			case 'y':
+			{
+				if (isVowel(sb[i - 1]))
+				{
+				  sb[i] = 'Y';
+				}
+				break;
+			}
+		  }
+		}
+		if (last > 0 && sb[last] == 'y' && isVowel(sb[last - 1]))
+		{
+		  sb[last] = 'Y';
+		}
+	  }
+
+	  private void reStoreYandI(StringBuilder sb)
+	  {
+		string tmp = sb.ToString();
+		sb.Remove(0, sb.Length);
+		sb.Insert(0, tmp.replaceAll("I", "i").replaceAll("Y", "y"));
+	  }
+
+	  private bool isVowel(char c)
+	  {
+		switch (c)
+		{
+		  case 'e':
+		  case 'a':
+		  case 'o':
+		  case 'i':
+		  case 'u':
+		  case 'y':
+		  case 'è':
+		  {
+			  return true;
+		  }
+		}
+		return false;
+	  }
+
+	  internal virtual IDictionary StemDictionary
+	  {
+		  set
+		  {
+			_stemDict = value;
+		  }
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianAnalyzer.cs
new file mode 100644
index 0000000..5cc9abf
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianAnalyzer.cs
@@ -0,0 +1,140 @@
+using System;
+
+namespace org.apache.lucene.analysis.no
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using Version = org.apache.lucene.util.Version;
+	using NorwegianStemmer = org.tartarus.snowball.ext.NorwegianStemmer;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Norwegian.
+	/// </summary>
+	public sealed class NorwegianAnalyzer : StopwordAnalyzerBase
+	{
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// File containing default Norwegian stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "norwegian_stop.txt";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop words set. </summary>
+	  /// <returns> default stop words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(typeof(SnowballFilter), DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public NorwegianAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  public NorwegianAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+	  /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+	  /// stemming.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
+	  public NorwegianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates a
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> A
+	  ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided and <seealso cref="SnowballFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		result = new LowerCaseFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		result = new SnowballFilter(result, new NorwegianStemmer());
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilter.cs
new file mode 100644
index 0000000..bf10424
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilter.cs
@@ -0,0 +1,79 @@
+namespace org.apache.lucene.analysis.no
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="NorwegianLightStemmer"/> to stem Norwegian
+	/// words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// </summary>
+	public sealed class NorwegianLightStemFilter : TokenFilter
+	{
+	  private readonly NorwegianLightStemmer stemmer;
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  /// <summary>
+	  /// Calls {@link #NorwegianLightStemFilter(TokenStream, int) 
+	  /// NorwegianLightStemFilter(input, BOKMAAL)}
+	  /// </summary>
+	  public NorwegianLightStemFilter(TokenStream input) : this(input, NorwegianLightStemmer.BOKMAAL)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates a new NorwegianLightStemFilter </summary>
+	  /// <param name="flags"> set to <seealso cref="NorwegianLightStemmer#BOKMAAL"/>, 
+	  ///                     <seealso cref="NorwegianLightStemmer#NYNORSK"/>, or both. </param>
+	  public NorwegianLightStemFilter(TokenStream input, int flags) : base(input)
+	  {
+		stemmer = new NorwegianLightStemmer(flags);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilterFactory.cs
new file mode 100644
index 0000000..ad94de5
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilterFactory.cs
@@ -0,0 +1,79 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.no
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.no.NorwegianLightStemmer.BOKMAAL;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.no.NorwegianLightStemmer.NYNORSK;
+
+	/// <summary>
+	/// Factory for <seealso cref="NorwegianLightStemFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_svlgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.NorwegianLightStemFilterFactory" variant="nb"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class NorwegianLightStemFilterFactory : TokenFilterFactory
+	{
+
+	  private readonly int flags;
+
+	  /// <summary>
+	  /// Creates a new NorwegianLightStemFilterFactory </summary>
+	  public NorwegianLightStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		string variant = get(args, "variant");
+		if (variant == null || "nb".Equals(variant))
+		{
+		  flags = BOKMAAL;
+		}
+		else if ("nn".Equals(variant))
+		{
+		  flags = NYNORSK;
+		}
+		else if ("no".Equals(variant))
+		{
+		  flags = BOKMAAL | NYNORSK;
+		}
+		else
+		{
+		  throw new System.ArgumentException("invalid variant: " + variant);
+		}
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new NorwegianLightStemFilter(input, flags);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemmer.cs
new file mode 100644
index 0000000..5efa799
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemmer.cs
@@ -0,0 +1,158 @@
+namespace org.apache.lucene.analysis.no
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/* 
+	 * This algorithm is updated based on code located at:
+	 * http://members.unine.ch/jacques.savoy/clef/
+	 * 
+	 * Full copyright for that code follows:
+	 */
+
+	/*
+	 * Copyright (c) 2005, Jacques Savoy
+	 * All rights reserved.
+	 *
+	 * Redistribution and use in source and binary forms, with or without 
+	 * modification, are permitted provided that the following conditions are met:
+	 *
+	 * Redistributions of source code must retain the above copyright notice, this 
+	 * list of conditions and the following disclaimer. Redistributions in binary 
+	 * form must reproduce the above copyright notice, this list of conditions and
+	 * the following disclaimer in the documentation and/or other materials 
+	 * provided with the distribution. Neither the name of the author nor the names 
+	 * of its contributors may be used to endorse or promote products derived from 
+	 * this software without specific prior written permission.
+	 * 
+	 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+	 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+	 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+	 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+	 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+	 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+	 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+	 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+	 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+	 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+	 * POSSIBILITY OF SUCH DAMAGE.
+	 */
+
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+	/// <summary>
+	/// Light Stemmer for Norwegian.
+	/// <para>
+	/// Parts of this stemmer is adapted from SwedishLightStemFilter, except
+	/// that while the Swedish one has a pre-defined rule set and a corresponding
+	/// corpus to validate against whereas the Norwegian one is hand crafted.
+	/// </para>
+	/// </summary>
+	public class NorwegianLightStemmer
+	{
+	  /// <summary>
+	  /// Constant to remove Bokmål-specific endings </summary>
+	  public const int BOKMAAL = 1;
+	  /// <summary>
+	  /// Constant to remove Nynorsk-specific endings </summary>
+	  public const int NYNORSK = 2;
+
+	  internal readonly bool useBokmaal;
+	  internal readonly bool useNynorsk;
+
+	  /// <summary>
+	  /// Creates a new NorwegianLightStemmer </summary>
+	  /// <param name="flags"> set to <seealso cref="#BOKMAAL"/>, <seealso cref="#NYNORSK"/>, or both. </param>
+	  public NorwegianLightStemmer(int flags)
+	  {
+		if (flags <= 0 || flags > BOKMAAL + NYNORSK)
+		{
+		  throw new System.ArgumentException("invalid flags");
+		}
+		useBokmaal = (flags & BOKMAAL) != 0;
+		useNynorsk = (flags & NYNORSK) != 0;
+	  }
+
+	  public virtual int stem(char[] s, int len)
+	  {
+		// Remove posessive -s (bilens -> bilen) and continue checking 
+		if (len > 4 && s[len - 1] == 's')
+		{
+		  len--;
+		}
+
+		// Remove common endings, single-pass
+		if (len > 7 && ((StemmerUtil.EndsWith(s, len, "heter") && useBokmaal) || (StemmerUtil.EndsWith(s, len, "heten") && useBokmaal) || (StemmerUtil.EndsWith(s, len, "heita") && useNynorsk))) // general ending (hemmeleg-heita -> hemmeleg) -  general ending (hemmelig-heten -> hemmelig) -  general ending (hemmelig-heter -> hemmelig)
+		{
+		  return len - 5;
+		}
+
+		// Remove Nynorsk common endings, single-pass
+		if (len > 8 && useNynorsk && (StemmerUtil.EndsWith(s, len, "heiter") || StemmerUtil.EndsWith(s, len, "leiken") || StemmerUtil.EndsWith(s, len, "leikar"))) // general ending (trygg-leikar -> trygg) -  general ending (trygg-leiken -> trygg) -  general ending (hemmeleg-heiter -> hemmeleg)
+		{
+		  return len - 6;
+		}
+
+		if (len > 5 && (StemmerUtil.EndsWith(s, len, "dom") || (StemmerUtil.EndsWith(s, len, "het") && useBokmaal))) // general ending (hemmelig-het -> hemmelig) -  general ending (kristen-dom -> kristen)
+		{
+		  return len - 3;
+		}
+
+		if (len > 6 && useNynorsk && (StemmerUtil.EndsWith(s, len, "heit") || StemmerUtil.EndsWith(s, len, "semd") || StemmerUtil.EndsWith(s, len, "leik"))) // general ending (trygg-leik -> trygg) -  general ending (verk-semd -> verk) -  general ending (hemmeleg-heit -> hemmeleg)
+		{
+		  return len - 4;
+		}
+
+		if (len > 7 && (StemmerUtil.EndsWith(s, len, "elser") || StemmerUtil.EndsWith(s, len, "elsen"))) // general ending (føl-elsen -> føl) -  general ending (føl-elser -> føl)
+		{
+		  return len - 5;
+		}
+
+		if (len > 6 && ((StemmerUtil.EndsWith(s, len, "ende") && useBokmaal) || (StemmerUtil.EndsWith(s, len, "ande") && useNynorsk) || StemmerUtil.EndsWith(s, len, "else") || (StemmerUtil.EndsWith(s, len, "este") && useBokmaal) || (StemmerUtil.EndsWith(s, len, "aste") && useNynorsk) || (StemmerUtil.EndsWith(s, len, "eren") && useBokmaal) || (StemmerUtil.EndsWith(s, len, "aren") && useNynorsk))) // masc -  masc -  adj (fin-aste -> fin) -  adj (fin-este -> fin) -  general ending (føl-else -> føl) -  (sov-ande -> sov) -  (sov-ende -> sov)
+		{
+		  return len - 4;
+		}
+
+		if (len > 5 && ((StemmerUtil.EndsWith(s, len, "ere") && useBokmaal) || (StemmerUtil.EndsWith(s, len, "are") && useNynorsk) || (StemmerUtil.EndsWith(s, len, "est") && useBokmaal) || (StemmerUtil.EndsWith(s, len, "ast") && useNynorsk) || StemmerUtil.EndsWith(s, len, "ene") || (StemmerUtil.EndsWith(s, len, "ane") && useNynorsk))) // masc pl definite (gut-ane) -  masc/fem/neutr pl definite (hus-ene) -  adj (fin-ast -> fin) -  adj (fin-est -> fin) -  adj (fin-are -> fin) -  adj (fin-ere -> fin)
+		{
+		  return len - 3;
+		}
+
+		if (len > 4 && (StemmerUtil.EndsWith(s, len, "er") || StemmerUtil.EndsWith(s, len, "en") || StemmerUtil.EndsWith(s, len, "et") || (StemmerUtil.EndsWith(s, len, "ar") && useNynorsk) || (StemmerUtil.EndsWith(s, len, "st") && useBokmaal) || StemmerUtil.EndsWith(s, len, "te"))) // adj (billig-st -> billig) -  masc pl indefinite -  neutr definite -  masc/fem definite -  masc/fem indefinite
+		{
+		  return len - 2;
+		}
+
+		if (len > 3)
+		{
+		  switch (s[len - 1])
+		  {
+			case 'a': // fem definite
+			case 'e': // to get correct stem for nouns ending in -e (kake -> kak, kaker -> kak)
+			case 'n':
+			  return len - 1;
+		  }
+		}
+
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilter.cs
new file mode 100644
index 0000000..670446b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilter.cs
@@ -0,0 +1,79 @@
+namespace org.apache.lucene.analysis.no
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="NorwegianMinimalStemmer"/> to stem Norwegian
+	/// words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// </summary>
+	public sealed class NorwegianMinimalStemFilter : TokenFilter
+	{
+	  private readonly NorwegianMinimalStemmer stemmer;
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  /// <summary>
+	  /// Calls {@link #NorwegianMinimalStemFilter(TokenStream, int) 
+	  /// NorwegianMinimalStemFilter(input, BOKMAAL)}
+	  /// </summary>
+	  public NorwegianMinimalStemFilter(TokenStream input) : this(input, NorwegianLightStemmer.BOKMAAL)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates a new NorwegianLightStemFilter </summary>
+	  /// <param name="flags"> set to <seealso cref="NorwegianLightStemmer#BOKMAAL"/>, 
+	  ///                     <seealso cref="NorwegianLightStemmer#NYNORSK"/>, or both. </param>
+	  public NorwegianMinimalStemFilter(TokenStream input, int flags) : base(input)
+	  {
+		this.stemmer = new NorwegianMinimalStemmer(flags);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilterFactory.cs
new file mode 100644
index 0000000..1f629bc
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilterFactory.cs
@@ -0,0 +1,79 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.no
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.no.NorwegianLightStemmer.BOKMAAL;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.no.NorwegianLightStemmer.NYNORSK;
+
+	/// <summary>
+	/// Factory for <seealso cref="NorwegianMinimalStemFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_svlgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.NorwegianMinimalStemFilterFactory" variant="nb"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class NorwegianMinimalStemFilterFactory : TokenFilterFactory
+	{
+
+	  private readonly int flags;
+
+	  /// <summary>
+	  /// Creates a new NorwegianMinimalStemFilterFactory </summary>
+	  public NorwegianMinimalStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		string variant = get(args, "variant");
+		if (variant == null || "nb".Equals(variant))
+		{
+		  flags = BOKMAAL;
+		}
+		else if ("nn".Equals(variant))
+		{
+		  flags = NYNORSK;
+		}
+		else if ("no".Equals(variant))
+		{
+		  flags = BOKMAAL | NYNORSK;
+		}
+		else
+		{
+		  throw new System.ArgumentException("invalid variant: " + variant);
+		}
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new NorwegianMinimalStemFilter(input, flags);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemmer.cs
new file mode 100644
index 0000000..6a4a94a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemmer.cs
@@ -0,0 +1,121 @@
+namespace org.apache.lucene.analysis.no
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/* 
+	 * This algorithm is updated based on code located at:
+	 * http://members.unine.ch/jacques.savoy/clef/
+	 * 
+	 * Full copyright for that code follows:
+	 */
+
+	/*
+	 * Copyright (c) 2005, Jacques Savoy
+	 * All rights reserved.
+	 *
+	 * Redistribution and use in source and binary forms, with or without 
+	 * modification, are permitted provided that the following conditions are met:
+	 *
+	 * Redistributions of source code must retain the above copyright notice, this 
+	 * list of conditions and the following disclaimer. Redistributions in binary 
+	 * form must reproduce the above copyright notice, this list of conditions and
+	 * the following disclaimer in the documentation and/or other materials 
+	 * provided with the distribution. Neither the name of the author nor the names 
+	 * of its contributors may be used to endorse or promote products derived from 
+	 * this software without specific prior written permission.
+	 * 
+	 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+	 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+	 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+	 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+	 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+	 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+	 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+	 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+	 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+	 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+	 * POSSIBILITY OF SUCH DAMAGE.
+	 */
+
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.no.NorwegianLightStemmer.BOKMAAL;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.no.NorwegianLightStemmer.NYNORSK;
+
+	/// <summary>
+	/// Minimal Stemmer for Norwegian Bokmål (no-nb) and Nynorsk (no-nn)
+	/// <para>
+	/// Stems known plural forms for Norwegian nouns only, together with genitiv -s
+	/// </para>
+	/// </summary>
+	public class NorwegianMinimalStemmer
+	{
+	  internal readonly bool useBokmaal;
+	  internal readonly bool useNynorsk;
+
+	  /// <summary>
+	  /// Creates a new NorwegianMinimalStemmer </summary>
+	  /// <param name="flags"> set to <seealso cref="NorwegianLightStemmer#BOKMAAL"/>, 
+	  ///                     <seealso cref="NorwegianLightStemmer#NYNORSK"/>, or both. </param>
+	  public NorwegianMinimalStemmer(int flags)
+	  {
+		if (flags <= 0 || flags > BOKMAAL + NYNORSK)
+		{
+		  throw new System.ArgumentException("invalid flags");
+		}
+		useBokmaal = (flags & BOKMAAL) != 0;
+		useNynorsk = (flags & NYNORSK) != 0;
+	  }
+
+	  public virtual int stem(char[] s, int len)
+	  {
+		// Remove genitiv s
+		if (len > 4 && s[len - 1] == 's')
+		{
+		  len--;
+		}
+
+		if (len > 5 && (StemmerUtil.EndsWith(s, len, "ene") || (StemmerUtil.EndsWith(s, len, "ane") && useNynorsk))) // masc pl definite (gut-ane) -  masc/fem/neutr pl definite (hus-ene)
+		{
+		  return len - 3;
+		}
+
+		if (len > 4 && (StemmerUtil.EndsWith(s, len, "er") || StemmerUtil.EndsWith(s, len, "en") || StemmerUtil.EndsWith(s, len, "et") || (StemmerUtil.EndsWith(s, len, "ar") && useNynorsk))) // masc pl indefinite -  neutr definite -  masc/fem definite -  masc/fem indefinite
+		{
+		  return len - 2;
+		}
+
+		if (len > 3)
+		{
+		  switch (s[len - 1])
+		  {
+			case 'a': // fem definite
+			case 'e': // to get correct stem for nouns ending in -e (kake -> kak, kaker -> kak)
+			  return len - 1;
+		  }
+		}
+
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizer.cs
new file mode 100644
index 0000000..b826cd6
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizer.cs
@@ -0,0 +1,242 @@
+using System.Text;
+
+namespace org.apache.lucene.analysis.path
+{
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+
+	/// <summary>
+	/// Tokenizer for path-like hierarchies.
+	/// <para>
+	/// Take something like:
+	/// 
+	/// <pre>
+	///  /something/something/else
+	/// </pre>
+	/// 
+	/// and make:
+	/// 
+	/// <pre>
+	///  /something
+	///  /something/something
+	///  /something/something/else
+	/// </pre>
+	/// </para>
+	/// </summary>
+	public class PathHierarchyTokenizer : Tokenizer
+	{
+
+	  public PathHierarchyTokenizer(Reader input) : this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP)
+	  {
+	  }
+
+	  public PathHierarchyTokenizer(Reader input, int skip) : this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, skip)
+	  {
+	  }
+
+	  public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter) : this(input, bufferSize, delimiter, delimiter, DEFAULT_SKIP)
+	  {
+	  }
+
+	  public PathHierarchyTokenizer(Reader input, char delimiter, char replacement) : this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, DEFAULT_SKIP)
+	  {
+	  }
+
+	  public PathHierarchyTokenizer(Reader input, char delimiter, char replacement, int skip) : this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip)
+	  {
+	  }
+
+	  public PathHierarchyTokenizer(AttributeFactory factory, Reader input, char delimiter, char replacement, int skip) : this(factory, input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip)
+	  {
+	  }
+
+	  public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement, int skip) : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, bufferSize, delimiter, replacement, skip)
+	  {
+	  }
+
+	  public PathHierarchyTokenizer(AttributeFactory factory, Reader input, int bufferSize, char delimiter, char replacement, int skip) : base(factory, input)
+	  {
+		if (bufferSize < 0)
+		{
+		  throw new System.ArgumentException("bufferSize cannot be negative");
+		}
+		if (skip < 0)
+		{
+		  throw new System.ArgumentException("skip cannot be negative");
+		}
+		termAtt.resizeBuffer(bufferSize);
+
+		this.delimiter = delimiter;
+		this.replacement = replacement;
+		this.skip = skip;
+		resultToken = new StringBuilder(bufferSize);
+	  }
+
+	  private const int DEFAULT_BUFFER_SIZE = 1024;
+	  public const char DEFAULT_DELIMITER = '/';
+	  public const int DEFAULT_SKIP = 0;
+
+	  private readonly char delimiter;
+	  private readonly char replacement;
+	  private readonly int skip;
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+	  private readonly PositionIncrementAttribute posAtt = addAttribute(typeof(PositionIncrementAttribute));
+	  private int startPosition = 0;
+	  private int skipped = 0;
+	  private bool endDelimiter = false;
+	  private StringBuilder resultToken;
+
+	  private int charsRead = 0;
+
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		clearAttributes();
+		termAtt.append(resultToken);
+		if (resultToken.Length == 0)
+		{
+		  posAtt.PositionIncrement = 1;
+		}
+		else
+		{
+		  posAtt.PositionIncrement = 0;
+		}
+		int length = 0;
+		bool added = false;
+		if (endDelimiter)
+		{
+		  termAtt.append(replacement);
+		  length++;
+		  endDelimiter = false;
+		  added = true;
+		}
+
+		while (true)
+		{
+		  int c = input.read();
+		  if (c >= 0)
+		  {
+			charsRead++;
+		  }
+		  else
+		  {
+			if (skipped > skip)
+			{
+			  length += resultToken.Length;
+			  termAtt.Length = length;
+			   offsetAtt.setOffset(correctOffset(startPosition), correctOffset(startPosition + length));
+			  if (added)
+			  {
+				resultToken.Length = 0;
+				resultToken.Append(termAtt.buffer(), 0, length);
+			  }
+			  return added;
+			}
+			else
+			{
+			  return false;
+			}
+		  }
+		  if (!added)
+		  {
+			added = true;
+			skipped++;
+			if (skipped > skip)
+			{
+			  termAtt.append(c == delimiter ? replacement : (char)c);
+			  length++;
+			}
+			else
+			{
+			  startPosition++;
+			}
+		  }
+		  else
+		  {
+			if (c == delimiter)
+			{
+			  if (skipped > skip)
+			  {
+				endDelimiter = true;
+				break;
+			  }
+			  skipped++;
+			  if (skipped > skip)
+			  {
+				termAtt.append(replacement);
+				length++;
+			  }
+			  else
+			  {
+				startPosition++;
+			  }
+			}
+			else
+			{
+			  if (skipped > skip)
+			  {
+				termAtt.append((char)c);
+				length++;
+			  }
+			  else
+			  {
+				startPosition++;
+			  }
+			}
+		  }
+		}
+		length += resultToken.Length;
+		termAtt.Length = length;
+		offsetAtt.setOffset(correctOffset(startPosition), correctOffset(startPosition + length));
+		resultToken.Length = 0;
+		resultToken.Append(termAtt.buffer(), 0, length);
+		return true;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
+	  public override void end()
+	  {
+		base.end();
+		// set final offset
+		int finalOffset = correctOffset(charsRead);
+		offsetAtt.setOffset(finalOffset, finalOffset);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		resultToken.Length = 0;
+		charsRead = 0;
+		endDelimiter = false;
+		skipped = 0;
+		startPosition = 0;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizerFactory.cs
new file mode 100644
index 0000000..7dd1e62
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizerFactory.cs
@@ -0,0 +1,105 @@
+using System.Collections.Generic;
+using TokenizerFactory = Lucene.Net.Analysis.Util.TokenizerFactory;
+
+namespace org.apache.lucene.analysis.path
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using TokenizerFactory = TokenizerFactory;
+	using AttributeFactory = org.apache.lucene.util.AttributeSource.AttributeFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="PathHierarchyTokenizer"/>. 
+	/// <para>
+	/// This factory is typically configured for use only in the <code>index</code> 
+	/// Analyzer (or only in the <code>query</code> Analyzer, but never both).
+	/// </para>
+	/// <para>
+	/// For example, in the configuration below a query for 
+	/// <code>Books/NonFic</code> will match documents indexed with values like 
+	/// <code>Books/NonFic</code>, <code>Books/NonFic/Law</code>, 
+	/// <code>Books/NonFic/Science/Physics</code>, etc. But it will not match 
+	/// documents indexed with values like <code>Books</code>, or 
+	/// <code>Books/Fic</code>...
+	/// </para>
+	/// 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="descendent_path" class="solr.TextField"&gt;
+	///   &lt;analyzer type="index"&gt;
+	///     &lt;tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /&gt;
+	///   &lt;/analyzer&gt;
+	///   &lt;analyzer type="query"&gt;
+	///     &lt;tokenizer class="solr.KeywordTokenizerFactory" /&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;
+	/// </pre>
+	/// <para>
+	/// In this example however we see the oposite configuration, so that a query 
+	/// for <code>Books/NonFic/Science/Physics</code> would match documents 
+	/// containing <code>Books/NonFic</code>, <code>Books/NonFic/Science</code>, 
+	/// or <code>Books/NonFic/Science/Physics</code>, but not 
+	/// <code>Books/NonFic/Science/Physics/Theory</code> or 
+	/// <code>Books/NonFic/Law</code>.
+	/// </para>
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="descendent_path" class="solr.TextField"&gt;
+	///   &lt;analyzer type="index"&gt;
+	///     &lt;tokenizer class="solr.KeywordTokenizerFactory" /&gt;
+	///   &lt;/analyzer&gt;
+	///   &lt;analyzer type="query"&gt;
+	///     &lt;tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;
+	/// </pre>
+	/// </summary>
+	public class PathHierarchyTokenizerFactory : TokenizerFactory
+	{
+	  private readonly char delimiter;
+	  private readonly char replacement;
+	  private readonly bool reverse;
+	  private readonly int skip;
+
+	  /// <summary>
+	  /// Creates a new PathHierarchyTokenizerFactory </summary>
+	  public PathHierarchyTokenizerFactory(IDictionary<string, string> args) : base(args)
+	  {
+		delimiter = getChar(args, "delimiter", PathHierarchyTokenizer.DEFAULT_DELIMITER);
+		replacement = getChar(args, "replace", delimiter);
+		reverse = getBoolean(args, "reverse", false);
+		skip = getInt(args, "skip", PathHierarchyTokenizer.DEFAULT_SKIP);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override Tokenizer create(AttributeFactory factory, Reader input)
+	  {
+		if (reverse)
+		{
+		  return new ReversePathHierarchyTokenizer(factory, input, delimiter, replacement, skip);
+		}
+		return new PathHierarchyTokenizer(factory, input, delimiter, replacement, skip);
+	  }
+	}
+
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Path/ReversePathHierarchyTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Path/ReversePathHierarchyTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Path/ReversePathHierarchyTokenizer.cs
new file mode 100644
index 0000000..00b5880
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Path/ReversePathHierarchyTokenizer.cs
@@ -0,0 +1,214 @@
+using System.Collections.Generic;
+using System.Text;
+
+namespace org.apache.lucene.analysis.path
+{
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+
+	/// <summary>
+	/// Tokenizer for domain-like hierarchies.
+	/// <para>
+	/// Take something like:
+	/// 
+	/// <pre>
+	/// www.site.co.uk
+	/// </pre>
+	/// 
+	/// and make:
+	/// 
+	/// <pre>
+	/// www.site.co.uk
+	/// site.co.uk
+	/// co.uk
+	/// uk
+	/// </pre>
+	/// 
+	/// </para>
+	/// </summary>
+	public class ReversePathHierarchyTokenizer : Tokenizer
+	{
+
+	  public ReversePathHierarchyTokenizer(Reader input) : this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP)
+	  {
+	  }
+
+	  public ReversePathHierarchyTokenizer(Reader input, int skip) : this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, skip)
+	  {
+	  }
+
+	  public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter) : this(input, bufferSize, delimiter, delimiter, DEFAULT_SKIP)
+	  {
+	  }
+
+	  public ReversePathHierarchyTokenizer(Reader input, char delimiter, char replacement) : this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, DEFAULT_SKIP)
+	  {
+	  }
+
+	  public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement) : this(input, bufferSize, delimiter, replacement, DEFAULT_SKIP)
+	  {
+	  }
+
+	  public ReversePathHierarchyTokenizer(Reader input, char delimiter, int skip) : this(input, DEFAULT_BUFFER_SIZE, delimiter, delimiter, skip)
+	  {
+	  }
+
+	  public ReversePathHierarchyTokenizer(Reader input, char delimiter, char replacement, int skip) : this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip)
+	  {
+	  }
+
+	  public ReversePathHierarchyTokenizer(AttributeFactory factory, Reader input, char delimiter, char replacement, int skip) : this(factory, input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip)
+	  {
+	  }
+
+	  public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement, int skip) : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, bufferSize, delimiter, replacement, skip)
+	  {
+	  }
+	  public ReversePathHierarchyTokenizer(AttributeFactory factory, Reader input, int bufferSize, char delimiter, char replacement, int skip) : base(factory, input)
+	  {
+		if (bufferSize < 0)
+		{
+		  throw new System.ArgumentException("bufferSize cannot be negative");
+		}
+		if (skip < 0)
+		{
+		  throw new System.ArgumentException("skip cannot be negative");
+		}
+		termAtt.resizeBuffer(bufferSize);
+		this.delimiter = delimiter;
+		this.replacement = replacement;
+		this.skip = skip;
+		resultToken = new StringBuilder(bufferSize);
+		resultTokenBuffer = new char[bufferSize];
+		delimiterPositions = new List<>(bufferSize / 10);
+	  }
+
+	  private const int DEFAULT_BUFFER_SIZE = 1024;
+	  public const char DEFAULT_DELIMITER = '/';
+	  public const int DEFAULT_SKIP = 0;
+
+	  private readonly char delimiter;
+	  private readonly char replacement;
+	  private readonly int skip;
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+	  private readonly PositionIncrementAttribute posAtt = addAttribute(typeof(PositionIncrementAttribute));
+
+	  private int endPosition = 0;
+	  private int finalOffset = 0;
+	  private int skipped = 0;
+	  private StringBuilder resultToken;
+
+	  private IList<int?> delimiterPositions;
+	  private int delimitersCount = -1;
+	  private char[] resultTokenBuffer;
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		clearAttributes();
+		if (delimitersCount == -1)
+		{
+		  int length = 0;
+		  delimiterPositions.Add(0);
+		  while (true)
+		  {
+			int c = input.read();
+			if (c < 0)
+			{
+			  break;
+			}
+			length++;
+			if (c == delimiter)
+			{
+			  delimiterPositions.Add(length);
+			  resultToken.Append(replacement);
+			}
+			else
+			{
+			  resultToken.Append((char)c);
+			}
+		  }
+		  delimitersCount = delimiterPositions.Count;
+		  if (delimiterPositions[delimitersCount - 1] < length)
+		  {
+			delimiterPositions.Add(length);
+			delimitersCount++;
+		  }
+		  if (resultTokenBuffer.Length < resultToken.Length)
+		  {
+			resultTokenBuffer = new char[resultToken.Length];
+		  }
+		  resultToken.getChars(0, resultToken.Length, resultTokenBuffer, 0);
+		  resultToken.Length = 0;
+		  int idx = delimitersCount - 1 - skip;
+		  if (idx >= 0)
+		  {
+			// otherwise its ok, because we will skip and return false
+			endPosition = delimiterPositions[idx];
+		  }
+		  finalOffset = correctOffset(length);
+		  posAtt.PositionIncrement = 1;
+		}
+		else
+		{
+		  posAtt.PositionIncrement = 0;
+		}
+
+		while (skipped < delimitersCount - skip - 1)
+		{
+		  int start = delimiterPositions[skipped];
+		  termAtt.copyBuffer(resultTokenBuffer, start, endPosition - start);
+		  offsetAtt.setOffset(correctOffset(start), correctOffset(endPosition));
+		  skipped++;
+		  return true;
+		}
+
+		return false;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
+	  public override void end()
+	  {
+		base.end();
+		// set final offset
+		offsetAtt.setOffset(finalOffset, finalOffset);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		resultToken.Length = 0;
+		finalOffset = 0;
+		endPosition = 0;
+		skipped = 0;
+		delimitersCount = -1;
+		delimiterPositions.Clear();
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternCaptureGroupFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternCaptureGroupFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternCaptureGroupFilterFactory.cs
new file mode 100644
index 0000000..5b47526
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternCaptureGroupFilterFactory.cs
@@ -0,0 +1,54 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.pattern
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="PatternCaptureGroupTokenFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_ptncapturegroup" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.KeywordTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.PatternCaptureGroupFilterFactory" pattern="([^a-z])" preserve_original="true"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	/// <seealso cref= PatternCaptureGroupTokenFilter </seealso>
+	public class PatternCaptureGroupFilterFactory : TokenFilterFactory
+	{
+	  private Pattern pattern;
+	  private bool preserveOriginal = true;
+
+	  public PatternCaptureGroupFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		pattern = getPattern(args, "pattern");
+		preserveOriginal = args.ContainsKey("preserve_original") ? bool.Parse(args["preserve_original"]) : true;
+	  }
+	  public override PatternCaptureGroupTokenFilter create(TokenStream input)
+	  {
+		return new PatternCaptureGroupTokenFilter(input, preserveOriginal, pattern);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternCaptureGroupTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternCaptureGroupTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternCaptureGroupTokenFilter.cs
new file mode 100644
index 0000000..887b749
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternCaptureGroupTokenFilter.cs
@@ -0,0 +1,227 @@
+using System.Diagnostics;
+
+namespace org.apache.lucene.analysis.pattern
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using CharsRef = org.apache.lucene.util.CharsRef;
+
+	/// <summary>
+	/// CaptureGroup uses Java regexes to emit multiple tokens - one for each capture
+	/// group in one or more patterns.
+	/// 
+	/// <para>
+	/// For example, a pattern like:
+	/// </para>
+	/// 
+	/// <para>
+	/// <code>"(https?://([a-zA-Z\-_0-9.]+))"</code>
+	/// </para>
+	/// 
+	/// <para>
+	/// when matched against the string "http://www.foo.com/index" would return the
+	/// tokens "https://www.foo.com" and "www.foo.com".
+	/// </para>
+	/// 
+	/// <para>
+	/// If none of the patterns match, or if preserveOriginal is true, the original
+	/// token will be preserved.
+	/// </para>
+	/// <para>
+	/// Each pattern is matched as often as it can be, so the pattern
+	/// <code> "(...)"</code>, when matched against <code>"abcdefghi"</code> would
+	/// produce <code>["abc","def","ghi"]</code>
+	/// </para>
+	/// <para>
+	/// A camelCaseFilter could be written as:
+	/// </para>
+	/// <para>
+	/// <code>
+	///   "([A-Z]{2,})",                                 <br />
+	///   "(?&lt;![A-Z])([A-Z][a-z]+)",                     <br />
+	///   "(?:^|\\b|(?&lt;=[0-9_])|(?&lt;=[A-Z]{2}))([a-z]+)", <br />
+	///   "([0-9]+)"
+	/// </code>
+	/// </para>
+	/// <para>
+	/// plus if <seealso cref="#preserveOriginal"/> is true, it would also return
+	/// <code>"camelCaseFilter</code>
+	/// </para>
+	/// </summary>
+	public sealed class PatternCaptureGroupTokenFilter : TokenFilter
+	{
+
+	  private readonly CharTermAttribute charTermAttr = addAttribute(typeof(CharTermAttribute));
+	  private readonly PositionIncrementAttribute posAttr = addAttribute(typeof(PositionIncrementAttribute));
+	  private State state;
+	  private readonly Matcher[] matchers;
+	  private readonly CharsRef spare = new CharsRef();
+	  private readonly int[] groupCounts;
+	  private readonly bool preserveOriginal;
+	  private int[] currentGroup;
+	  private int currentMatcher;
+
+	  /// <param name="input">
+	  ///          the input <seealso cref="TokenStream"/> </param>
+	  /// <param name="preserveOriginal">
+	  ///          set to true to return the original token even if one of the
+	  ///          patterns matches </param>
+	  /// <param name="patterns">
+	  ///          an array of <seealso cref="Pattern"/> objects to match against each token </param>
+
+	  public PatternCaptureGroupTokenFilter(TokenStream input, bool preserveOriginal, params Pattern[] patterns) : base(input)
+	  {
+		this.preserveOriginal = preserveOriginal;
+		this.matchers = new Matcher[patterns.Length];
+		this.groupCounts = new int[patterns.Length];
+		this.currentGroup = new int[patterns.Length];
+		for (int i = 0; i < patterns.Length; i++)
+		{
+		  this.matchers[i] = patterns[i].matcher("");
+		  this.groupCounts[i] = this.matchers[i].groupCount();
+		  this.currentGroup[i] = -1;
+		}
+	  }
+
+	  private bool nextCapture()
+	  {
+		int min_offset = int.MaxValue;
+		currentMatcher = -1;
+		Matcher matcher;
+
+		for (int i = 0; i < matchers.Length; i++)
+		{
+		  matcher = matchers[i];
+		  if (currentGroup[i] == -1)
+		  {
+			currentGroup[i] = matcher.find() ? 1 : 0;
+		  }
+		  if (currentGroup[i] != 0)
+		  {
+			while (currentGroup[i] < groupCounts[i] + 1)
+			{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int start = matcher.start(currentGroup[i]);
+			  int start = matcher.start(currentGroup[i]);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int end = matcher.end(currentGroup[i]);
+			  int end = matcher.end(currentGroup[i]);
+			  if (start == end || preserveOriginal && start == 0 && spare.length == end)
+			  {
+				currentGroup[i]++;
+				continue;
+			  }
+			  if (start < min_offset)
+			  {
+				min_offset = start;
+				currentMatcher = i;
+			  }
+			  break;
+			}
+			if (currentGroup[i] == groupCounts[i] + 1)
+			{
+			  currentGroup[i] = -1;
+			  i--;
+			}
+		  }
+		}
+		return currentMatcher != -1;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+
+		if (currentMatcher != -1 && nextCapture())
+		{
+		  Debug.Assert(state != null);
+		  clearAttributes();
+		  restoreState(state);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int start = matchers[currentMatcher].start(currentGroup[currentMatcher]);
+		  int start = matchers[currentMatcher].start(currentGroup[currentMatcher]);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int end = matchers[currentMatcher].end(currentGroup[currentMatcher]);
+		  int end = matchers[currentMatcher].end(currentGroup[currentMatcher]);
+
+		  posAttr.PositionIncrement = 0;
+		  charTermAttr.copyBuffer(spare.chars, start, end - start);
+		  currentGroup[currentMatcher]++;
+		  return true;
+		}
+
+		if (!input.incrementToken())
+		{
+		  return false;
+		}
+
+		char[] buffer = charTermAttr.buffer();
+		int length = charTermAttr.length();
+		spare.copyChars(buffer, 0, length);
+		state = captureState();
+
+		for (int i = 0; i < matchers.Length; i++)
+		{
+		  matchers[i].reset(spare);
+		  currentGroup[i] = -1;
+		}
+
+		if (preserveOriginal)
+		{
+		  currentMatcher = 0;
+		}
+		else if (nextCapture())
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int start = matchers[currentMatcher].start(currentGroup[currentMatcher]);
+		  int start = matchers[currentMatcher].start(currentGroup[currentMatcher]);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int end = matchers[currentMatcher].end(currentGroup[currentMatcher]);
+		  int end = matchers[currentMatcher].end(currentGroup[currentMatcher]);
+
+		  // if we start at 0 we can simply set the length and save the copy
+		  if (start == 0)
+		  {
+			charTermAttr.Length = end;
+		  }
+		  else
+		  {
+			charTermAttr.copyBuffer(spare.chars, start, end - start);
+		  }
+		  currentGroup[currentMatcher]++;
+		}
+		return true;
+
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		state = null;
+		currentMatcher = -1;
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternReplaceCharFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternReplaceCharFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternReplaceCharFilter.cs
new file mode 100644
index 0000000..886668a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternReplaceCharFilter.cs
@@ -0,0 +1,179 @@
+using System;
+using System.Text;
+using BaseCharFilter = Lucene.Net.Analysis.CharFilter.BaseCharFilter;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.analysis.pattern
+{
+
+
+	using BaseCharFilter = BaseCharFilter;
+
+	/// <summary>
+	/// CharFilter that uses a regular expression for the target of replace string.
+	/// The pattern match will be done in each "block" in char stream.
+	/// 
+	/// <para>
+	/// ex1) source="aa&nbsp;&nbsp;bb&nbsp;aa&nbsp;bb", pattern="(aa)\\s+(bb)" replacement="$1#$2"<br/>
+	/// output="aa#bb&nbsp;aa#bb"
+	/// </para>
+	/// 
+	/// NOTE: If you produce a phrase that has different length to source string
+	/// and the field is used for highlighting for a term of the phrase, you will
+	/// face a trouble.
+	/// 
+	/// <para>
+	/// ex2) source="aa123bb", pattern="(aa)\\d+(bb)" replacement="$1&nbsp;$2"<br/>
+	/// output="aa&nbsp;bb"<br/>
+	/// and you want to search bb and highlight it, you will get<br/>
+	/// highlight snippet="aa1&lt;em&gt;23bb&lt;/em&gt;"
+	/// </para>
+	/// 
+	/// @since Solr 1.5
+	/// </summary>
+	public class PatternReplaceCharFilter : BaseCharFilter
+	{
+	  [Obsolete]
+	  public const int DEFAULT_MAX_BLOCK_CHARS = 10000;
+
+	  private readonly Pattern pattern;
+	  private readonly string replacement;
+	  private Reader transformedInput;
+
+	  public PatternReplaceCharFilter(Pattern pattern, string replacement, Reader @in) : base(@in)
+	  {
+		this.pattern = pattern;
+		this.replacement = replacement;
+	  }
+
+	  [Obsolete]
+	  public PatternReplaceCharFilter(Pattern pattern, string replacement, int maxBlockChars, string blockDelimiter, Reader @in) : this(pattern, replacement, @in)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public int read(char[] cbuf, int off, int len) throws java.io.IOException
+	  public override int read(char[] cbuf, int off, int len)
+	  {
+		// Buffer all input on the first call.
+		if (transformedInput == null)
+		{
+		  fill();
+		}
+
+		return transformedInput.read(cbuf, off, len);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void fill() throws java.io.IOException
+	  private void fill()
+	  {
+		StringBuilder buffered = new StringBuilder();
+		char[] temp = new char [1024];
+		for (int cnt = input.read(temp); cnt > 0; cnt = input.read(temp))
+		{
+		  buffered.Append(temp, 0, cnt);
+		}
+		transformedInput = new StringReader(processPattern(buffered).ToString());
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public int read() throws java.io.IOException
+	  public override int read()
+	  {
+		if (transformedInput == null)
+		{
+		  fill();
+		}
+
+		return transformedInput.read();
+	  }
+
+	  protected internal override int correct(int currentOff)
+	  {
+		return Math.Max(0, base.correct(currentOff));
+	  }
+
+	  /// <summary>
+	  /// Replace pattern in input and mark correction offsets. 
+	  /// </summary>
+	  internal virtual CharSequence processPattern(CharSequence input)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.util.regex.Matcher m = pattern.matcher(input);
+		Matcher m = pattern.matcher(input);
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final StringBuffer cumulativeOutput = new StringBuffer();
+		StringBuilder cumulativeOutput = new StringBuilder();
+		int cumulative = 0;
+		int lastMatchEnd = 0;
+		while (m.find())
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int groupSize = m.end() - m.start();
+		  int groupSize = m.end() - m.start();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int skippedSize = m.start() - lastMatchEnd;
+		  int skippedSize = m.start() - lastMatchEnd;
+		  lastMatchEnd = m.end();
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int lengthBeforeReplacement = cumulativeOutput.length() + skippedSize;
+		  int lengthBeforeReplacement = cumulativeOutput.Length + skippedSize;
+		  m.appendReplacement(cumulativeOutput, replacement);
+		  // Matcher doesn't tell us how many characters have been appended before the replacement.
+		  // So we need to calculate it. Skipped characters have been added as part of appendReplacement.
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int replacementSize = cumulativeOutput.length() - lengthBeforeReplacement;
+		  int replacementSize = cumulativeOutput.Length - lengthBeforeReplacement;
+
+		  if (groupSize != replacementSize)
+		  {
+			if (replacementSize < groupSize)
+			{
+			  // The replacement is smaller. 
+			  // Add the 'backskip' to the next index after the replacement (this is possibly 
+			  // after the end of string, but it's fine -- it just means the last character 
+			  // of the replaced block doesn't reach the end of the original string.
+			  cumulative += groupSize - replacementSize;
+			  int atIndex = lengthBeforeReplacement + replacementSize;
+			  // System.err.println(atIndex + "!" + cumulative);
+			  addOffCorrectMap(atIndex, cumulative);
+			}
+			else
+			{
+			  // The replacement is larger. Every new index needs to point to the last
+			  // element of the original group (if any).
+			  for (int i = groupSize; i < replacementSize; i++)
+			  {
+				addOffCorrectMap(lengthBeforeReplacement + i, --cumulative);
+				// System.err.println((lengthBeforeReplacement + i) + " " + cumulative);
+			  }
+			}
+		  }
+		}
+
+		// Append the remaining output, no further changes to indices.
+		m.appendTail(cumulativeOutput);
+		return cumulativeOutput;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternReplaceCharFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternReplaceCharFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternReplaceCharFilterFactory.cs
new file mode 100644
index 0000000..f5aa3cb
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternReplaceCharFilterFactory.cs
@@ -0,0 +1,67 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.pattern
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharFilterFactory = org.apache.lucene.analysis.util.CharFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="PatternReplaceCharFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_ptnreplace" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;charFilter class="solr.PatternReplaceCharFilterFactory" 
+	///                    pattern="([^a-z])" replacement=""/&gt;
+	///     &lt;tokenizer class="solr.KeywordTokenizerFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// 
+	/// @since Solr 3.1
+	/// </summary>
+	public class PatternReplaceCharFilterFactory : CharFilterFactory
+	{
+	  private readonly Pattern pattern;
+	  private readonly string replacement;
+	  private readonly int maxBlockChars;
+	  private readonly string blockDelimiters;
+
+	  /// <summary>
+	  /// Creates a new PatternReplaceCharFilterFactory </summary>
+	  public PatternReplaceCharFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		pattern = getPattern(args, "pattern");
+		replacement = get(args, "replacement", "");
+		// TODO: warn if you set maxBlockChars or blockDelimiters ?
+		maxBlockChars = getInt(args, "maxBlockChars", PatternReplaceCharFilter.DEFAULT_MAX_BLOCK_CHARS);
+		blockDelimiters = args.Remove("blockDelimiters");
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override CharFilter create(Reader input)
+	  {
+		return new PatternReplaceCharFilter(pattern, replacement, maxBlockChars, blockDelimiters, input);
+	  }
+	}
+
+}
\ No newline at end of file


[08/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerFactory.cs
new file mode 100644
index 0000000..6753039
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerFactory.cs
@@ -0,0 +1,61 @@
+using System.Collections.Generic;
+using TokenizerFactory = Lucene.Net.Analysis.Util.TokenizerFactory;
+
+namespace org.apache.lucene.analysis.standard
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenizerFactory = TokenizerFactory;
+	using AttributeFactory = org.apache.lucene.util.AttributeSource.AttributeFactory;
+
+
+	/// <summary>
+	/// Factory for <seealso cref="ClassicTokenizer"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_clssc" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.ClassicTokenizerFactory" maxTokenLength="120"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class ClassicTokenizerFactory : TokenizerFactory
+	{
+	  private readonly int maxTokenLength;
+
+	  /// <summary>
+	  /// Creates a new ClassicTokenizerFactory </summary>
+	  public ClassicTokenizerFactory(IDictionary<string, string> args) : base(args)
+	  {
+		assureMatchVersion();
+		maxTokenLength = getInt(args, "maxTokenLength", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override ClassicTokenizer create(AttributeFactory factory, Reader input)
+	  {
+		ClassicTokenizer tokenizer = new ClassicTokenizer(luceneMatchVersion, factory, input);
+		tokenizer.MaxTokenLength = maxTokenLength;
+		return tokenizer;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs
new file mode 100644
index 0000000..344d817
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs
@@ -0,0 +1,723 @@
+using System;
+
+/* The following code was generated by JFlex 1.5.1 */
+
+namespace org.apache.lucene.analysis.standard
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/*
+	
+	WARNING: if you change ClassicTokenizerImpl.jflex and need to regenerate
+	      the tokenizer, only use the trunk version of JFlex 1.5 at the moment!
+	
+	*/
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+	/// <summary>
+	/// This class implements the classic lucene StandardTokenizer up until 3.0 
+	/// </summary>
+
+	internal class ClassicTokenizerImpl : StandardTokenizerInterface
+	{
+
+	  /// <summary>
+	  /// This character denotes the end of file </summary>
+	  public const int StandardTokenizerInterface_Fields;
+
+	  /// <summary>
+	  /// initial size of the lookahead buffer </summary>
+	  private const int ZZ_BUFFERSIZE = 4096;
+
+	  /// <summary>
+	  /// lexical states </summary>
+	  public const int YYINITIAL = 0;
+
+	  /// <summary>
+	  /// ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
+	  /// ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
+	  ///                  at the beginning of a line
+	  /// l is of the form l = 2*k, k a non negative integer
+	  /// </summary>
+	  private static readonly int[] ZZ_LEXSTATE = {};
+
+	  /// <summary>
+	  /// Translates characters to character classes
+	  /// </summary>
+	  private const string ZZ_CMAP_PACKED = "\x0026\0\x0001\x0005\x0001\x0003\x0004\0\x0001\x0009\x0001\x0007\x0001\x0004\x0001\x0009\x000A\x0002\x0006\0" + "\x0001\x0006\x001A\x000A\x0004\0\x0001\x0008\x0001\0\x001A\x000A\x002F\0\x0001\x000A\x000A\0\x0001\x000A" + "\x0004\0\x0001\x000A\x0005\0\x0017\x000A\x0001\0\x001F\x000A\x0001\0\u0128\x000A\x0002\0\x0012\x000A" + "\x001C\0\x005E\x000A\x0002\0\x0009\x000A\x0002\0\x0007\x000A\x000E\0\x0002\x000A\x000E\0\x0005\x000A" + "\x0009\0\x0001\x000A\x008B\0\x0001\x000A\x000B\0\x0001\x000A\x0001\0\x0003\x000A\x0001\0\x0001\x000A" + "\x0001\0\x0014\x000A\x0001\0\x002C\x000A\x0001\0\x0008\x000A\x0002\0\x001A\x000A\x000C\0\x0082\x000A" + "\x000A\0\x0039\x000A\x0002\0\x0002\x000A\x0002\0\x0002\x000A\x0003\0\x0026\x000A\x0002\0\x0002\x000A" + "\x0037\0\x0026\x000A\x0002\0\x0001\x000A\x0007\0\x0027\x000A\x0048\0\x001B\x000A\x0005\0\x0003\x000A" + "\x002E\0\x001A\x000A\x0005\0\x000B\x000A\x0015\0\x000A\x0002\x0007\0\x0063\x000A\x0001\0\x0001\x000A" +
  "\x000F\0\x0002\x000A\x0009\0\x000A\x0002\x0003\x000A\x0013\0\x0001\x000A\x0001\0\x001B\x000A\x0053\0" + "\x0026\x000A\u015f\0\x0035\x000A\x0003\0\x0001\x000A\x0012\0\x0001\x000A\x0007\0\x000A\x000A\x0004\0" + "\x000A\x0002\x0015\0\x0008\x000A\x0002\0\x0002\x000A\x0002\0\x0016\x000A\x0001\0\x0007\x000A\x0001\0" + "\x0001\x000A\x0003\0\x0004\x000A\x0022\0\x0002\x000A\x0001\0\x0003\x000A\x0004\0\x000A\x0002\x0002\x000A" + "\x0013\0\x0006\x000A\x0004\0\x0002\x000A\x0002\0\x0016\x000A\x0001\0\x0007\x000A\x0001\0\x0002\x000A" + "\x0001\0\x0002\x000A\x0001\0\x0002\x000A\x001F\0\x0004\x000A\x0001\0\x0001\x000A\x0007\0\x000A\x0002" + "\x0002\0\x0003\x000A\x0010\0\x0007\x000A\x0001\0\x0001\x000A\x0001\0\x0003\x000A\x0001\0\x0016\x000A" + "\x0001\0\x0007\x000A\x0001\0\x0002\x000A\x0001\0\x0005\x000A\x0003\0\x0001\x000A\x0012\0\x0001\x000A" + "\x000F\0\x0001\x000A\x0005\0\x000A\x0002\x0015\0\x0008\x000A\x0002\0\x0002\x000A\x0002\0\x0016\x000A" + "\x0001\0\x0007\x000A\x0001\0\x0002\x000A\x0002
 \0\x0004\x000A\x0003\0\x0001\x000A\x001E\0\x0002\x000A" + "\x0001\0\x0003\x000A\x0004\0\x000A\x0002\x0015\0\x0006\x000A\x0003\0\x0003\x000A\x0001\0\x0004\x000A" + "\x0003\0\x0002\x000A\x0001\0\x0001\x000A\x0001\0\x0002\x000A\x0003\0\x0002\x000A\x0003\0\x0003\x000A" + "\x0003\0\x0008\x000A\x0001\0\x0003\x000A\x002D\0\x0009\x0002\x0015\0\x0008\x000A\x0001\0\x0003\x000A" + "\x0001\0\x0017\x000A\x0001\0\x000A\x000A\x0001\0\x0005\x000A\x0026\0\x0002\x000A\x0004\0\x000A\x0002" + "\x0015\0\x0008\x000A\x0001\0\x0003\x000A\x0001\0\x0017\x000A\x0001\0\x000A\x000A\x0001\0\x0005\x000A" + "\x0024\0\x0001\x000A\x0001\0\x0002\x000A\x0004\0\x000A\x0002\x0015\0\x0008\x000A\x0001\0\x0003\x000A" + "\x0001\0\x0017\x000A\x0001\0\x0010\x000A\x0026\0\x0002\x000A\x0004\0\x000A\x0002\x0015\0\x0012\x000A" + "\x0003\0\x0018\x000A\x0001\0\x0009\x000A\x0001\0\x0001\x000A\x0002\0\x0007\x000A\x0039\0\x0001\x0001" + "\x0030\x000A\x0001\x0001\x0002\x000A\x000C\x0001\x0007\x000A\x0009\x0001\x000A\x0002\x0027\0\x0002
 \x000A\x0001\0" + "\x0001\x000A\x0002\0\x0002\x000A\x0001\0\x0001\x000A\x0002\0\x0001\x000A\x0006\0\x0004\x000A\x0001\0" + "\x0007\x000A\x0001\0\x0003\x000A\x0001\0\x0001\x000A\x0001\0\x0001\x000A\x0002\0\x0002\x000A\x0001\0" + "\x0004\x000A\x0001\0\x0002\x000A\x0009\0\x0001\x000A\x0002\0\x0005\x000A\x0001\0\x0001\x000A\x0009\0" + "\x000A\x0002\x0002\0\x0002\x000A\x0022\0\x0001\x000A\x001F\0\x000A\x0002\x0016\0\x0008\x000A\x0001\0" + "\x0022\x000A\x001D\0\x0004\x000A\x0074\0\x0022\x000A\x0001\0\x0005\x000A\x0001\0\x0002\x000A\x0015\0" + "\x000A\x0002\x0006\0\x0006\x000A\x004A\0\x0026\x000A\x000A\0\x0027\x000A\x0009\0\x005A\x000A\x0005\0" + "\x0044\x000A\x0005\0\x0052\x000A\x0006\0\x0007\x000A\x0001\0\x003F\x000A\x0001\0\x0001\x000A\x0001\0" + "\x0004\x000A\x0002\0\x0007\x000A\x0001\0\x0001\x000A\x0001\0\x0004\x000A\x0002\0\x0027\x000A\x0001\0" + "\x0001\x000A\x0001\0\x0004\x000A\x0002\0\x001F\x000A\x0001\0\x0001\x000A\x0001\0\x0004\x000A\x0002\0" + "\x0007\x000A\x0001\0\x0001\x000A\
 x0001\0\x0004\x000A\x0002\0\x0007\x000A\x0001\0\x0007\x000A\x0001\0" + "\x0017\x000A\x0001\0\x001F\x000A\x0001\0\x0001\x000A\x0001\0\x0004\x000A\x0002\0\x0007\x000A\x0001\0" + "\x0027\x000A\x0001\0\x0013\x000A\x000E\0\x0009\x0002\x002E\0\x0055\x000A\x000C\0\u026c\x000A\x0002\0" + "\x0008\x000A\x000A\0\x001A\x000A\x0005\0\x004B\x000A\x0095\0\x0034\x000A\x002C\0\x000A\x0002\x0026\0" + "\x000A\x0002\x0006\0\x0058\x000A\x0008\0\x0029\x000A\u0557\0\x009C\x000A\x0004\0\x005A\x000A\x0006\0" + "\x0016\x000A\x0002\0\x0006\x000A\x0002\0\x0026\x000A\x0002\0\x0006\x000A\x0002\0\x0008\x000A\x0001\0" + "\x0001\x000A\x0001\0\x0001\x000A\x0001\0\x0001\x000A\x0001\0\x001F\x000A\x0002\0\x0035\x000A\x0001\0" + "\x0007\x000A\x0001\0\x0001\x000A\x0003\0\x0003\x000A\x0001\0\x0007\x000A\x0003\0\x0004\x000A\x0002\0" + "\x0006\x000A\x0004\0\x000D\x000A\x0005\0\x0003\x000A\x0001\0\x0007\x000A\x0082\0\x0001\x000A\x0082\0" + "\x0001\x000A\x0004\0\x0001\x000A\x0002\0\x000A\x000A\x0001\0\x0001\x000A\x0003\0\x000
 5\x000A\x0006\0" + "\x0001\x000A\x0001\0\x0001\x000A\x0001\0\x0001\x000A\x0001\0\x0004\x000A\x0001\0\x0003\x000A\x0001\0" + "\x0007\x000A\u0ecb\0\x0002\x000A\x002A\0\x0005\x000A\x000A\0\x0001\x000B\x0054\x000B\x0008\x000B\x0002\x000B" + "\x0002\x000B\x005A\x000B\x0001\x000B\x0003\x000B\x0006\x000B\x0028\x000B\x0003\x000B\x0001\0\x005E\x000A\x0011\0" + "\x0018\x000A\x0038\0\x0010\x000B\u0100\0\x0080\x000B\x0080\0\u19b6\x000B\x000A\x000B\x0040\0\u51a6\x000B" + "\x005A\x000B\u048d\x000A\u0773\0\u2ba4\x000A\u215c\0\u012e\x000B\x00D2\x000B\x0007\x000A\x000C\0\x0005\x000A" + "\x0005\0\x0001\x000A\x0001\0\x000A\x000A\x0001\0\x000D\x000A\x0001\0\x0005\x000A\x0001\0\x0001\x000A" + "\x0001\0\x0002\x000A\x0001\0\x0002\x000A\x0001\0\x006C\x000A\x0021\0\u016b\x000A\x0012\0\x0040\x000A" + "\x0002\0\x0036\x000A\x0028\0\x000C\x000A\x0074\0\x0003\x000A\x0001\0\x0001\x000A\x0001\0\x0087\x000A" + "\x0013\0\x000A\x0002\x0007\0\x001A\x000A\x0006\0\x001A\x000A\x000A\0\x0001\x000B\x003A\x000B\x001F\x000A"
  + "\x0003\0\x0006\x000A\x0002\0\x0006\x000A\x0002\0\x0006\x000A\x0002\0\x0003\x000A\x0023\0";
+
+	  /// <summary>
+	  /// Translates characters to character classes
+	  /// </summary>
+	  private static readonly char[] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED);
+
+	  /// <summary>
+	  /// Translates DFA states to action switch labels.
+	  /// </summary>
+	  private static readonly int[] ZZ_ACTION = zzUnpackAction();
+
+	  private const string ZZ_ACTION_PACKED_0 = "\x0001\0\x0001\x0001\x0003\x0002\x0001\x0003\x000B\0\x0001\x0002\x0003\x0004\x0002\0" + "\x0001\x0005\x0001\0\x0001\x0005\x0003\x0004\x0006\x0005\x0001\x0006\x0001\x0004\x0002\x0007" + "\x0001\x0008\x0001\0\x0001\x0008\x0003\0\x0002\x0008\x0001\x0009\x0001\x000A\x0001\x0004";
+
+	  private static int [] zzUnpackAction()
+	  {
+		int[] result = new int[50];
+		int offset = 0;
+		offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
+		return result;
+	  }
+
+	  private static int zzUnpackAction(string packed, int offset, int[] result)
+	  {
+		int i = 0; // index in packed string
+		int j = offset; // index in unpacked array
+		int l = packed.Length;
+		while (i < l)
+		{
+		  int count = packed[i++];
+		  int value = packed[i++];
+		  do
+		  {
+			  result[j++] = value;
+		  } while (--count > 0);
+		}
+		return j;
+	  }
+
+
+	  /// <summary>
+	  /// Translates a state to a row index in the transition table
+	  /// </summary>
+	  private static readonly int[] ZZ_ROWMAP = zzUnpackRowMap();
+
+	  private const string ZZ_ROWMAP_PACKED_0 = "\0\0\0\x000C\0\x0018\0\x0024\0\x0030\0\x000C\0\x003C\0\x0048" + "\0\x0054\0\x0060\0\x006C\0\x0078\0\x0084\0\x0090\0\x009C\0\x00A8" + "\0\x00B4\0\x00C0\0\x00CC\0\x00D8\0\x00E4\0\x00F0\0\x00FC\0\u0108" + "\0\u0114\0\u0120\0\u012c\0\u0138\0\u0144\0\u0150\0\u015c\0\u0168" + "\0\u0174\0\u0180\0\u018c\0\u0198\0\u01a4\0\x00A8\0\u01b0\0\u01bc" + "\0\u01c8\0\u01d4\0\u01e0\0\u01ec\0\u01f8\0\x003C\0\x006C\0\u0204" + "\0\u0210\0\u021c";
+
+	  private static int [] zzUnpackRowMap()
+	  {
+		int[] result = new int[50];
+		int offset = 0;
+		offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
+		return result;
+	  }
+
+	  private static int zzUnpackRowMap(string packed, int offset, int[] result)
+	  {
+		int i = 0; // index in packed string
+		int j = offset; // index in unpacked array
+		int l = packed.Length;
+		while (i < l)
+		{
+		  int high = packed[i++] << 16;
+		  result[j++] = high | packed[i++];
+		}
+		return j;
+	  }
+
+	  /// <summary>
+	  /// The transition table of the DFA
+	  /// </summary>
+	  private static readonly int[] ZZ_TRANS = zzUnpackTrans();
+
+	  private const string ZZ_TRANS_PACKED_0 = "\x0001\x0002\x0001\x0003\x0001\x0004\x0007\x0002\x0001\x0005\x0001\x0006\x000D\0\x0002\x0003" + "\x0001\0\x0001\x0007\x0001\0\x0001\x0008\x0002\x0009\x0001\x000A\x0001\x0003\x0002\0" + "\x0001\x0003\x0001\x0004\x0001\0\x0001\x000B\x0001\0\x0001\x0008\x0002\x000C\x0001\x000D" + "\x0001\x0004\x0002\0\x0001\x0003\x0001\x0004\x0001\x000E\x0001\x000F\x0001\x0010\x0001\x0011" + "\x0002\x0009\x0001\x000A\x0001\x0012\x0002\0\x0001\x0013\x0001\x0014\x0007\0\x0001\x0015" + "\x0002\0\x0002\x0016\x0007\0\x0001\x0016\x0002\0\x0001\x0017\x0001\x0018\x0007\0" + "\x0001\x0019\x0003\0\x0001\x001A\x0007\0\x0001\x000A\x0002\0\x0001\x001B\x0001\x001C" + "\x0007\0\x0001\x001D\x0002\0\x0001\x001E\x0001\x001F\x0007\0\x0001\x0020\x0002\0" + "\x0001\x0021\x0001\x0022\x0007\0\x0001\x0023\x000B\0\x0001\x0024\x0002\0\x0001\x0013" + "\x0001\x0014\x0007\0\x0001\x0025\x000B\0\x0001\x0026\x0002\0\x0002\x0016\x0007\0" + "\x0001\x0027\x0002\0\x0001\x0003\x0001\x0004\x0001
 \x000E\x0001\x0007\x0001\x0010\x0001\x0011" + "\x0002\x0009\x0001\x000A\x0001\x0012\x0002\0\x0002\x0013\x0001\0\x0001\x0028\x0001\0" + "\x0001\x0008\x0002\x0029\x0001\0\x0001\x0013\x0002\0\x0001\x0013\x0001\x0014\x0001\0" + "\x0001\x002A\x0001\0\x0001\x0008\x0002\x002B\x0001\x002C\x0001\x0014\x0002\0\x0001\x0013" + "\x0001\x0014\x0001\0\x0001\x0028\x0001\0\x0001\x0008\x0002\x0029\x0001\0\x0001\x0015" + "\x0002\0\x0002\x0016\x0001\0\x0001\x002D\x0002\0\x0001\x002D\x0002\0\x0001\x0016" + "\x0002\0\x0002\x0017\x0001\0\x0001\x0029\x0001\0\x0001\x0008\x0002\x0029\x0001\0" + "\x0001\x0017\x0002\0\x0001\x0017\x0001\x0018\x0001\0\x0001\x002B\x0001\0\x0001\x0008" + "\x0002\x002B\x0001\x002C\x0001\x0018\x0002\0\x0001\x0017\x0001\x0018\x0001\0\x0001\x0029" + "\x0001\0\x0001\x0008\x0002\x0029\x0001\0\x0001\x0019\x0003\0\x0001\x001A\x0001\0" + "\x0001\x002C\x0002\0\x0003\x002C\x0001\x001A\x0002\0\x0002\x001B\x0001\0\x0001\x002E" + "\x0001\0\x0001\x0008\x0002\x0009\x0001\x000A\x0001\x001B\x0002\0
 \x0001\x001B\x0001\x001C" + "\x0001\0\x0001\x002F\x0001\0\x0001\x0008\x0002\x000C\x0001\x000D\x0001\x001C\x0002\0" + "\x0001\x001B\x0001\x001C\x0001\0\x0001\x002E\x0001\0\x0001\x0008\x0002\x0009\x0001\x000A" + "\x0001\x001D\x0002\0\x0002\x001E\x0001\0\x0001\x0009\x0001\0\x0001\x0008\x0002\x0009" + "\x0001\x000A\x0001\x001E\x0002\0\x0001\x001E\x0001\x001F\x0001\0\x0001\x000C\x0001\0" + "\x0001\x0008\x0002\x000C\x0001\x000D\x0001\x001F\x0002\0\x0001\x001E\x0001\x001F\x0001\0" + "\x0001\x0009\x0001\0\x0001\x0008\x0002\x0009\x0001\x000A\x0001\x0020\x0002\0\x0002\x0021" + "\x0001\0\x0001\x000A\x0002\0\x0003\x000A\x0001\x0021\x0002\0\x0001\x0021\x0001\x0022" + "\x0001\0\x0001\x000D\x0002\0\x0003\x000D\x0001\x0022\x0002\0\x0001\x0021\x0001\x0022" + "\x0001\0\x0001\x000A\x0002\0\x0003\x000A\x0001\x0023\x0004\0\x0001\x000E\x0006\0" + "\x0001\x0024\x0002\0\x0001\x0013\x0001\x0014\x0001\0\x0001\x0030\x0001\0\x0001\x0008" + "\x0002\x0029\x0001\0\x0001\x0015\x0002\0\x0002\x0016\x0001\0\x0001\x00
 2D\x0002\0" + "\x0001\x002D\x0002\0\x0001\x0027\x0002\0\x0002\x0013\x0007\0\x0001\x0013\x0002\0" + "\x0002\x0017\x0007\0\x0001\x0017\x0002\0\x0002\x001B\x0007\0\x0001\x001B\x0002\0" + "\x0002\x001E\x0007\0\x0001\x001E\x0002\0\x0002\x0021\x0007\0\x0001\x0021\x0002\0" + "\x0002\x0031\x0007\0\x0001\x0031\x0002\0\x0002\x0013\x0007\0\x0001\x0032\x0002\0" + "\x0002\x0031\x0001\0\x0001\x002D\x0002\0\x0001\x002D\x0002\0\x0001\x0031\x0002\0" + "\x0002\x0013\x0001\0\x0001\x0030\x0001\0\x0001\x0008\x0002\x0029\x0001\0\x0001\x0013" + "\x0001\0";
+
+	  private static int [] zzUnpackTrans()
+	  {
+		int[] result = new int[552];
+		int offset = 0;
+		offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
+		return result;
+	  }
+
+	  private static int zzUnpackTrans(string packed, int offset, int[] result)
+	  {
+		int i = 0; // index in packed string
+		int j = offset; // index in unpacked array
+		int l = packed.Length;
+		while (i < l)
+		{
+		  int count = packed[i++];
+		  int value = packed[i++];
+		  value--;
+		  do
+		  {
+			  result[j++] = value;
+		  } while (--count > 0);
+		}
+		return j;
+	  }
+
+
+	  /* error codes */
+	  private const int ZZ_UNKNOWN_ERROR = 0;
+	  private const int ZZ_NO_MATCH = 1;
+	  private const int ZZ_PUSHBACK_2BIG = 2;
+
+	  /* error messages for the codes above */
+	  private static readonly string[] ZZ_ERROR_MSG = {};
+
+	  /// <summary>
+	  /// ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
+	  /// </summary>
+	  private static readonly int[] ZZ_ATTRIBUTE = zzUnpackAttribute();
+
+	  private const string ZZ_ATTRIBUTE_PACKED_0 = "\x0001\0\x0001\x0009\x0003\x0001\x0001\x0009\x000B\0\x0004\x0001\x0002\0\x0001\x0001" + "\x0001\0\x000F\x0001\x0001\0\x0001\x0001\x0003\0\x0005\x0001";
+
+	  private static int [] zzUnpackAttribute()
+	  {
+		int[] result = new int[50];
+		int offset = 0;
+		offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
+		return result;
+	  }
+
+	  private static int zzUnpackAttribute(string packed, int offset, int[] result)
+	  {
+		int i = 0; // index in packed string
+		int j = offset; // index in unpacked array
+		int l = packed.Length;
+		while (i < l)
+		{
+		  int count = packed[i++];
+		  int value = packed[i++];
+		  do
+		  {
+			  result[j++] = value;
+		  } while (--count > 0);
+		}
+		return j;
+	  }
+
+	  /// <summary>
+	  /// the input device </summary>
+	  private Reader zzReader;
+
+	  /// <summary>
+	  /// the current state of the DFA </summary>
+	  private int zzState;
+
+	  /// <summary>
+	  /// the current lexical state </summary>
+	  private int zzLexicalState = YYINITIAL;
+
+	  /// <summary>
+	  /// this buffer contains the current text to be matched and is
+	  ///    the source of the yytext() string 
+	  /// </summary>
+	  private char[] zzBuffer = new char[ZZ_BUFFERSIZE];
+
+	  /// <summary>
+	  /// the textposition at the last accepting state </summary>
+	  private int zzMarkedPos;
+
+	  /// <summary>
+	  /// the current text position in the buffer </summary>
+	  private int zzCurrentPos;
+
+	  /// <summary>
+	  /// startRead marks the beginning of the yytext() string in the buffer </summary>
+	  private int zzStartRead;
+
+	  /// <summary>
+	  /// endRead marks the last character in the buffer, that has been read
+	  ///    from input 
+	  /// </summary>
+	  private int zzEndRead;
+
+	  /// <summary>
+	  /// number of newlines encountered up to the start of the matched text </summary>
+	  private int yyline;
+
+	  /// <summary>
+	  /// the number of characters up to the start of the matched text </summary>
+	  private int yychar_Renamed;
+
+	  /// <summary>
+	  /// the number of characters from the last newline up to the start of the 
+	  /// matched text
+	  /// </summary>
+	  private int yycolumn;
+
+	  /// <summary>
+	  /// zzAtBOL == true <=> the scanner is currently at the beginning of a line
+	  /// </summary>
+	  private bool zzAtBOL = true;
+
+	  /// <summary>
+	  /// zzAtEOF == true <=> the scanner is at the EOF </summary>
+	  private bool zzAtEOF;
+
+	  /// <summary>
+	  /// denotes if the user-EOF-code has already been executed </summary>
+	  private bool zzEOFDone;
+
+	  /* user code: */
+
+	public const int ALPHANUM = StandardTokenizer.ALPHANUM;
+	public const int APOSTROPHE = StandardTokenizer.APOSTROPHE;
+	public const int ACRONYM = StandardTokenizer.ACRONYM;
+	public const int COMPANY = StandardTokenizer.COMPANY;
+	public const int EMAIL = StandardTokenizer.EMAIL;
+	public const int HOST = StandardTokenizer.HOST;
+	public const int NUM = StandardTokenizer.NUM;
+	public const int CJ = StandardTokenizer.CJ;
+	public const int ACRONYM_DEP = StandardTokenizer.ACRONYM_DEP;
+
+	public static readonly string[] TOKEN_TYPES = StandardTokenizer.TOKEN_TYPES;
+
+	public int yychar()
+	{
+		return yychar_Renamed;
+	}
+
+	/// <summary>
+	/// Fills CharTermAttribute with the current token text.
+	/// </summary>
+	public void getText(CharTermAttribute t)
+	{
+	  t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
+	}
+
+
+
+	  /// <summary>
+	  /// Creates a new scanner
+	  /// </summary>
+	  /// <param name="in">  the java.io.Reader to read input from. </param>
+	  internal ClassicTokenizerImpl(Reader @in)
+	  {
+		this.zzReader = @in;
+	  }
+
+
+	  /// <summary>
+	  /// Unpacks the compressed character translation table.
+	  /// </summary>
+	  /// <param name="packed">   the packed character translation table </param>
+	  /// <returns>         the unpacked character translation table </returns>
+	  private static char [] zzUnpackCMap(string packed)
+	  {
+		char[] map = new char[0x10000];
+		int i = 0; // index in packed string
+		int j = 0; // index in unpacked array
+		while (i < 1138)
+		{
+		  int count = packed[i++];
+		  char value = packed[i++];
+		  do
+		  {
+			  map[j++] = value;
+		  } while (--count > 0);
+		}
+		return map;
+	  }
+
+
+	  /// <summary>
+	  /// Refills the input buffer.
+	  /// </summary>
+	  /// <returns>      <code>false</code>, iff there was new input.
+	  /// </returns>
+	  /// <exception cref="java.io.IOException">  if any I/O-Error occurs </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private boolean zzRefill() throws java.io.IOException
+	  private bool zzRefill()
+	  {
+
+		/* first: make room (if you can) */
+		if (zzStartRead > 0)
+		{
+		  Array.Copy(zzBuffer, zzStartRead, zzBuffer, 0, zzEndRead - zzStartRead);
+
+		  /* translate stored positions */
+		  zzEndRead -= zzStartRead;
+		  zzCurrentPos -= zzStartRead;
+		  zzMarkedPos -= zzStartRead;
+		  zzStartRead = 0;
+		}
+
+		/* is the buffer big enough? */
+		if (zzCurrentPos >= zzBuffer.Length)
+		{
+		  /* if not: blow it up */
+		  char[] newBuffer = new char[zzCurrentPos * 2];
+		  Array.Copy(zzBuffer, 0, newBuffer, 0, zzBuffer.Length);
+		  zzBuffer = newBuffer;
+		}
+
+		/* finally: fill the buffer with new input */
+		int numRead = zzReader.read(zzBuffer, zzEndRead, zzBuffer.Length - zzEndRead);
+
+		if (numRead > 0)
+		{
+		  zzEndRead += numRead;
+		  return false;
+		}
+		// unlikely but not impossible: read 0 characters, but not at end of stream    
+		if (numRead == 0)
+		{
+		  int c = zzReader.read();
+		  if (c == -1)
+		  {
+			return true;
+		  }
+		  else
+		  {
+			zzBuffer[zzEndRead++] = (char) c;
+			return false;
+		  }
+		}
+
+		// numRead < 0
+		return true;
+	  }
+
+
+	  /// <summary>
+	  /// Closes the input stream.
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public final void yyclose() throws java.io.IOException
+	  public void yyclose()
+	  {
+		zzAtEOF = true; // indicate end of file
+		zzEndRead = zzStartRead; // invalidate buffer
+
+		if (zzReader != null)
+		{
+		  zzReader.close();
+		}
+	  }
+
+
+	  /// <summary>
+	  /// Resets the scanner to read from a new input stream.
+	  /// Does not close the old reader.
+	  /// 
+	  /// All internal variables are reset, the old input stream 
+	  /// <b>cannot</b> be reused (internal buffer is discarded and lost).
+	  /// Lexical state is set to <tt>ZZ_INITIAL</tt>.
+	  /// 
+	  /// Internal scan buffer is resized down to its initial length, if it has grown.
+	  /// </summary>
+	  /// <param name="reader">   the new input stream  </param>
+	  public void yyreset(Reader reader)
+	  {
+		zzReader = reader;
+		zzAtBOL = true;
+		zzAtEOF = false;
+		zzEOFDone = false;
+		zzEndRead = zzStartRead = 0;
+		zzCurrentPos = zzMarkedPos = 0;
+		yyline = yychar_Renamed = yycolumn = 0;
+		zzLexicalState = YYINITIAL;
+		if (zzBuffer.Length > ZZ_BUFFERSIZE)
+		{
+		  zzBuffer = new char[ZZ_BUFFERSIZE];
+		}
+	  }
+
+
+	  /// <summary>
+	  /// Returns the current lexical state.
+	  /// </summary>
+	  public int yystate()
+	  {
+		return zzLexicalState;
+	  }
+
+
+	  /// <summary>
+	  /// Enters a new lexical state
+	  /// </summary>
+	  /// <param name="newState"> the new lexical state </param>
+	  public void yybegin(int newState)
+	  {
+		zzLexicalState = newState;
+	  }
+
+
+	  /// <summary>
+	  /// Returns the text matched by the current regular expression.
+	  /// </summary>
+	  public string yytext()
+	  {
+		return new string(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
+	  }
+
+
+	  /// <summary>
+	  /// Returns the character at position <tt>pos</tt> from the 
+	  /// matched text. 
+	  /// 
+	  /// It is equivalent to yytext().charAt(pos), but faster
+	  /// </summary>
+	  /// <param name="pos"> the position of the character to fetch. 
+	  ///            A value from 0 to yylength()-1.
+	  /// </param>
+	  /// <returns> the character at position pos </returns>
+	  public char yycharat(int pos)
+	  {
+		return zzBuffer[zzStartRead + pos];
+	  }
+
+
+	  /// <summary>
+	  /// Returns the length of the matched text region.
+	  /// </summary>
+	  public int yylength()
+	  {
+		return zzMarkedPos - zzStartRead;
+	  }
+
+
+	  /// <summary>
+	  /// Reports an error that occured while scanning.
+	  /// 
+	  /// In a wellformed scanner (no or only correct usage of 
+	  /// yypushback(int) and a match-all fallback rule) this method 
+	  /// will only be called with things that "Can't Possibly Happen".
+	  /// If this method is called, something is seriously wrong
+	  /// (e.g. a JFlex bug producing a faulty scanner etc.).
+	  /// 
+	  /// Usual syntax/scanner level error handling should be done
+	  /// in error fallback rules.
+	  /// </summary>
+	  /// <param name="errorCode">  the code of the errormessage to display </param>
+	  private void zzScanError(int errorCode)
+	  {
+		string message;
+		try
+		{
+		  message = ZZ_ERROR_MSG[errorCode];
+		}
+		catch (System.IndexOutOfRangeException)
+		{
+		  message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
+		}
+
+		throw new Exception(message);
+	  }
+
+
+	  /// <summary>
+	  /// Pushes the specified amount of characters back into the input stream.
+	  /// 
+	  /// They will be read again by then next call of the scanning method
+	  /// </summary>
+	  /// <param name="number">  the number of characters to be read again.
+	  ///                This number must not be greater than yylength()! </param>
+	  public virtual void yypushback(int number)
+	  {
+		if (number > yylength())
+		{
+		  zzScanError(ZZ_PUSHBACK_2BIG);
+		}
+
+		zzMarkedPos -= number;
+	  }
+
+
+	  /// <summary>
+	  /// Resumes scanning until the next regular expression is matched,
+	  /// the end of input is encountered or an I/O-Error occurs.
+	  /// </summary>
+	  /// <returns>      the next token </returns>
+	  /// <exception cref="java.io.IOException">  if any I/O-Error occurs </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public int getNextToken() throws java.io.IOException
+	  public virtual int NextToken
+	  {
+		  get
+		  {
+			int zzInput;
+			int zzAction;
+    
+			// cached fields:
+			int zzCurrentPosL;
+			int zzMarkedPosL;
+			int zzEndReadL = zzEndRead;
+			char[] zzBufferL = zzBuffer;
+			char[] zzCMapL = ZZ_CMAP;
+    
+			int[] zzTransL = ZZ_TRANS;
+			int[] zzRowMapL = ZZ_ROWMAP;
+			int[] zzAttrL = ZZ_ATTRIBUTE;
+    
+			while (true)
+			{
+			  zzMarkedPosL = zzMarkedPos;
+    
+			  yychar_Renamed += zzMarkedPosL - zzStartRead;
+    
+			  zzAction = -1;
+    
+			  zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
+    
+			  zzState = ZZ_LEXSTATE[zzLexicalState];
+    
+			  // set up zzAction for empty match case:
+			  int zzAttributes = zzAttrL[zzState];
+			  if ((zzAttributes & 1) == 1)
+			  {
+				zzAction = zzState;
+			  }
+    
+    
+			  {
+				while (true)
+				{
+    
+				  if (zzCurrentPosL < zzEndReadL)
+				  {
+					zzInput = zzBufferL[zzCurrentPosL++];
+				  }
+				  else if (zzAtEOF)
+				  {
+					zzInput = StandardTokenizerInterface_Fields.YYEOF;
+					goto zzForActionBreak;
+				  }
+				  else
+				  {
+					// store back cached positions
+					zzCurrentPos = zzCurrentPosL;
+					zzMarkedPos = zzMarkedPosL;
+					bool eof = zzRefill();
+					// get translated positions and possibly new buffer
+					zzCurrentPosL = zzCurrentPos;
+					zzMarkedPosL = zzMarkedPos;
+					zzBufferL = zzBuffer;
+					zzEndReadL = zzEndRead;
+					if (eof)
+					{
+					  zzInput = StandardTokenizerInterface_Fields.YYEOF;
+					  goto zzForActionBreak;
+					}
+					else
+					{
+					  zzInput = zzBufferL[zzCurrentPosL++];
+					}
+				  }
+				  int zzNext = zzTransL[zzRowMapL[zzState] + zzCMapL[zzInput]];
+				  if (zzNext == -1)
+				  {
+					  goto zzForActionBreak;
+				  }
+				  zzState = zzNext;
+    
+				  zzAttributes = zzAttrL[zzState];
+				  if ((zzAttributes & 1) == 1)
+				  {
+					zzAction = zzState;
+					zzMarkedPosL = zzCurrentPosL;
+					if ((zzAttributes & 8) == 8)
+					{
+						goto zzForActionBreak;
+					}
+				  }
+    
+				}
+			  }
+			  zzForActionBreak:
+    
+			  // store back cached position
+			  zzMarkedPos = zzMarkedPosL;
+    
+			  switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction])
+			  {
+				case 1:
+				{ // Break so we don't hit fall-through warning:
+		 break; // ignore
+				}
+					goto case 11;
+				case 11:
+					break;
+				case 2:
+				{
+					  return ALPHANUM;
+				}
+				case 12:
+					break;
+				case 3:
+				{
+					  return CJ;
+				}
+				case 13:
+					break;
+				case 4:
+				{
+					  return HOST;
+				}
+				case 14:
+					break;
+				case 5:
+				{
+					  return NUM;
+				}
+				case 15:
+					break;
+				case 6:
+				{
+					  return APOSTROPHE;
+				}
+				case 16:
+					break;
+				case 7:
+				{
+					  return COMPANY;
+				}
+				case 17:
+					break;
+				case 8:
+				{
+					  return ACRONYM_DEP;
+				}
+				case 18:
+					break;
+				case 9:
+				{
+					  return ACRONYM;
+				}
+				case 19:
+					break;
+				case 10:
+				{
+					  return EMAIL;
+				}
+				case 20:
+					break;
+				default:
+				  if (zzInput == StandardTokenizerInterface_Fields.YYEOF && zzStartRead == zzCurrentPos)
+				  {
+					zzAtEOF = true;
+					return StandardTokenizerInterface_Fields.YYEOF;
+				  }
+				  else
+				  {
+					zzScanError(ZZ_NO_MATCH);
+				  }
+			  break;
+			  }
+			}
+		  }
+	  }
+
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs
new file mode 100644
index 0000000..73d16e3
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs
@@ -0,0 +1,162 @@
+using Lucene.Net.Analysis.Core;
+
+namespace org.apache.lucene.analysis.standard
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using org.apache.lucene.analysis;
+	using LowerCaseFilter = LowerCaseFilter;
+	using StopAnalyzer = StopAnalyzer;
+	using StopFilter = StopFilter;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader;
+	using Version = org.apache.lucene.util.Version;
+
+
+	/// <summary>
+	/// Filters <seealso cref="StandardTokenizer"/> with <seealso cref="StandardFilter"/>, {@link
+	/// LowerCaseFilter} and <seealso cref="StopFilter"/>, using a list of
+	/// English stop words.
+	/// 
+	/// <a name="version"/>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating StandardAnalyzer:
+	/// <ul>
+	///   <li> As of 3.4, Hiragana and Han characters are no longer wrongly split
+	///        from their combining characters. If you use a previous version number,
+	///        you get the exact broken behavior for backwards compatibility.
+	///   <li> As of 3.1, StandardTokenizer implements Unicode text segmentation,
+	///        and StopFilter correctly handles Unicode 4.0 supplementary characters
+	///        in stopwords.  <seealso cref="ClassicTokenizer"/> and <seealso cref="ClassicAnalyzer"/> 
+	///        are the pre-3.1 implementations of StandardTokenizer and
+	///        StandardAnalyzer.
+	///   <li> As of 2.9, StopFilter preserves position increments
+	///   <li> As of 2.4, Tokens incorrectly identified as acronyms
+	///        are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
+	/// </ul>
+	/// </para>
+	/// </summary>
+	public sealed class StandardAnalyzer : StopwordAnalyzerBase
+	{
+
+	  /// <summary>
+	  /// Default maximum allowed token length </summary>
+	  public const int DEFAULT_MAX_TOKEN_LENGTH = 255;
+
+	  private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
+
+	  /// <summary>
+	  /// An unmodifiable set containing some common English words that are usually not
+	  /// useful for searching. 
+	  /// </summary>
+	  public static readonly CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. </summary>
+	  /// <param name="matchVersion"> Lucene version to match See {@link
+	  /// <a href="#version">above</a>} </param>
+	  /// <param name="stopWords"> stop words  </param>
+	  public StandardAnalyzer(Version matchVersion, CharArraySet stopWords) : base(matchVersion, stopWords)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words ({@link
+	  /// #STOP_WORDS_SET}). </summary>
+	  /// <param name="matchVersion"> Lucene version to match See {@link
+	  /// <a href="#version">above</a>} </param>
+	  public StandardAnalyzer(Version matchVersion) : this(matchVersion, STOP_WORDS_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the stop words from the given reader. </summary>
+	  /// <seealso cref= WordlistLoader#getWordSet(Reader, Version) </seealso>
+	  /// <param name="matchVersion"> Lucene version to match See {@link
+	  /// <a href="#version">above</a>} </param>
+	  /// <param name="stopwords"> Reader to read stop words from  </param>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public StandardAnalyzer(org.apache.lucene.util.Version matchVersion, java.io.Reader stopwords) throws java.io.IOException
+	  public StandardAnalyzer(Version matchVersion, Reader stopwords) : this(matchVersion, loadStopwordSet(stopwords, matchVersion))
+	  {
+	  }
+
+	  /// <summary>
+	  /// Set maximum allowed token length.  If a token is seen
+	  /// that exceeds this length then it is discarded.  This
+	  /// setting only takes effect the next time tokenStream or
+	  /// tokenStream is called.
+	  /// </summary>
+	  public int MaxTokenLength
+	  {
+		  set
+		  {
+			maxTokenLength = value;
+		  }
+		  get
+		  {
+			return maxTokenLength;
+		  }
+	  }
+
+
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: @Override protected TokenStreamComponents createComponents(final String fieldName, final java.io.Reader reader)
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
+		StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
+		src.MaxTokenLength = maxTokenLength;
+		TokenStream tok = new StandardFilter(matchVersion, src);
+		tok = new LowerCaseFilter(matchVersion, tok);
+		tok = new StopFilter(matchVersion, tok, stopwords);
+		return new TokenStreamComponentsAnonymousInnerClassHelper(this, src, tok, reader);
+	  }
+
+	  private class TokenStreamComponentsAnonymousInnerClassHelper : TokenStreamComponents
+	  {
+		  private readonly StandardAnalyzer outerInstance;
+
+		  private Reader reader;
+		  private org.apache.lucene.analysis.standard.StandardTokenizer src;
+
+		  public TokenStreamComponentsAnonymousInnerClassHelper(StandardAnalyzer outerInstance, org.apache.lucene.analysis.standard.StandardTokenizer src, TokenStream tok, Reader reader) : base(src, tok)
+		  {
+			  this.outerInstance = outerInstance;
+			  this.reader = reader;
+			  this.src = src;
+		  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override protected void setReader(final java.io.Reader reader) throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+		  protected internal override Reader Reader
+		  {
+			  set
+			  {
+				src.MaxTokenLength = outerInstance.maxTokenLength;
+				base.Reader = value;
+			  }
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilter.cs
new file mode 100644
index 0000000..20e4f64
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilter.cs
@@ -0,0 +1,100 @@
+namespace org.apache.lucene.analysis.standard
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Normalizes tokens extracted with <seealso cref="StandardTokenizer"/>.
+	/// </summary>
+	public class StandardFilter : TokenFilter
+	{
+	  private readonly Version matchVersion;
+
+	  public StandardFilter(Version matchVersion, TokenStream @in) : base(@in)
+	  {
+		this.matchVersion = matchVersion;
+	  }
+
+	  private static readonly string APOSTROPHE_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.APOSTROPHE];
+	  private static readonly string ACRONYM_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.ACRONYM];
+
+	  // this filters uses attribute type
+	  private readonly TypeAttribute typeAtt = addAttribute(typeof(TypeAttribute));
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (matchVersion.onOrAfter(Version.LUCENE_31))
+		{
+		  return input.incrementToken(); // TODO: add some niceties for the new grammar
+		}
+		else
+		{
+		  return incrementTokenClassic();
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public final boolean incrementTokenClassic() throws java.io.IOException
+	  public bool incrementTokenClassic()
+	  {
+		if (!input.incrementToken())
+		{
+		  return false;
+		}
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] buffer = termAtt.buffer();
+		char[] buffer = termAtt.buffer();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int bufferLength = termAtt.length();
+		int bufferLength = termAtt.length();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String type = typeAtt.type();
+		string type = typeAtt.type();
+
+		if (type == APOSTROPHE_TYPE && bufferLength >= 2 && buffer[bufferLength - 2] == '\'' && (buffer[bufferLength - 1] == 's' || buffer[bufferLength - 1] == 'S')) // remove 's
+		{
+		  // Strip last 2 characters off
+		  termAtt.Length = bufferLength - 2;
+		} // remove dots
+		else if (type == ACRONYM_TYPE)
+		{
+		  int upto = 0;
+		  for (int i = 0;i < bufferLength;i++)
+		  {
+			char c = buffer[i];
+			if (c != '.')
+			{
+			  buffer[upto++] = c;
+			}
+		  }
+		  termAtt.Length = upto;
+		}
+
+		return true;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilterFactory.cs
new file mode 100644
index 0000000..eab0156
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilterFactory.cs
@@ -0,0 +1,56 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.standard
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="StandardFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_stndrd" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.StandardFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class StandardFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new StandardFilterFactory </summary>
+	  public StandardFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		assureMatchVersion();
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override StandardFilter create(TokenStream input)
+	  {
+		return new StandardFilter(luceneMatchVersion, input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizer.cs
new file mode 100644
index 0000000..afde960
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizer.cs
@@ -0,0 +1,257 @@
+using System;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.analysis.standard
+{
+
+
+	using StandardTokenizerImpl31 = org.apache.lucene.analysis.standard.std31.StandardTokenizerImpl31;
+	using StandardTokenizerImpl34 = org.apache.lucene.analysis.standard.std34.StandardTokenizerImpl34;
+	using StandardTokenizerImpl40 = org.apache.lucene.analysis.standard.std40.StandardTokenizerImpl40;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// A grammar-based tokenizer constructed with JFlex.
+	/// <para>
+	/// As of Lucene version 3.1, this class implements the Word Break rules from the
+	/// Unicode Text Segmentation algorithm, as specified in 
+	/// <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>.
+	/// <p/>
+	/// </para>
+	/// <para>Many applications have specific tokenizer needs.  If this tokenizer does
+	/// not suit your application, please consider copying this source code
+	/// directory to your project and maintaining your own grammar-based tokenizer.
+	/// 
+	/// <a name="version"/>
+	/// </para>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating StandardTokenizer:
+	/// <ul>
+	///   <li> As of 3.4, Hiragana and Han characters are no longer wrongly split
+	///   from their combining characters. If you use a previous version number,
+	///   you get the exact broken behavior for backwards compatibility.
+	///   <li> As of 3.1, StandardTokenizer implements Unicode text segmentation.
+	///   If you use a previous version number, you get the exact behavior of
+	///   <seealso cref="ClassicTokenizer"/> for backwards compatibility.
+	/// </ul>
+	/// </para>
+	/// </summary>
+
+	public sealed class StandardTokenizer : Tokenizer
+	{
+	  /// <summary>
+	  /// A private instance of the JFlex-constructed scanner </summary>
+	  private StandardTokenizerInterface scanner;
+
+	  public const int ALPHANUM = 0;
+	  /// @deprecated (3.1) 
+	  [Obsolete("(3.1)")]
+	  public const int APOSTROPHE = 1;
+	  /// @deprecated (3.1) 
+	  [Obsolete("(3.1)")]
+	  public const int ACRONYM = 2;
+	  /// @deprecated (3.1) 
+	  [Obsolete("(3.1)")]
+	  public const int COMPANY = 3;
+	  public const int EMAIL = 4;
+	  /// @deprecated (3.1) 
+	  [Obsolete("(3.1)")]
+	  public const int HOST = 5;
+	  public const int NUM = 6;
+	  /// @deprecated (3.1) 
+	  [Obsolete("(3.1)")]
+	  public const int CJ = 7;
+
+	  /// @deprecated (3.1) 
+	  [Obsolete("(3.1)")]
+	  public const int ACRONYM_DEP = 8;
+
+	  public const int SOUTHEAST_ASIAN = 9;
+	  public const int IDEOGRAPHIC = 10;
+	  public const int HIRAGANA = 11;
+	  public const int KATAKANA = 12;
+	  public const int HANGUL = 13;
+
+	  /// <summary>
+	  /// String token types that correspond to token type int constants </summary>
+	  public static readonly string[] TOKEN_TYPES = new string [] {"<ALPHANUM>", "<APOSTROPHE>", "<ACRONYM>", "<COMPANY>", "<EMAIL>", "<HOST>", "<NUM>", "<CJ>", "<ACRONYM_DEP>", "<SOUTHEAST_ASIAN>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>", "<HANGUL>"};
+
+	  private int skippedPositions;
+
+	  private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
+
+	  /// <summary>
+	  /// Set the max allowed token length.  Any token longer
+	  ///  than this is skipped. 
+	  /// </summary>
+	  public int MaxTokenLength
+	  {
+		  set
+		  {
+			if (value < 1)
+			{
+			  throw new System.ArgumentException("maxTokenLength must be greater than zero");
+			}
+			this.maxTokenLength = value;
+		  }
+		  get
+		  {
+			return maxTokenLength;
+		  }
+	  }
+
+
+	  /// <summary>
+	  /// Creates a new instance of the <seealso cref="org.apache.lucene.analysis.standard.StandardTokenizer"/>.  Attaches
+	  /// the <code>input</code> to the newly created JFlex scanner.
+	  /// </summary>
+	  /// <param name="input"> The input reader
+	  /// 
+	  /// See http://issues.apache.org/jira/browse/LUCENE-1068 </param>
+	  public StandardTokenizer(Version matchVersion, Reader input) : base(input)
+	  {
+		init(matchVersion);
+	  }
+
+	  /// <summary>
+	  /// Creates a new StandardTokenizer with a given <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> 
+	  /// </summary>
+	  public StandardTokenizer(Version matchVersion, AttributeFactory factory, Reader input) : base(factory, input)
+	  {
+		init(matchVersion);
+	  }
+
+	  private void init(Version matchVersion)
+	  {
+		if (matchVersion.onOrAfter(Version.LUCENE_47))
+		{
+		  this.scanner = new StandardTokenizerImpl(input);
+		}
+		else if (matchVersion.onOrAfter(Version.LUCENE_40))
+		{
+		  this.scanner = new StandardTokenizerImpl40(input);
+		}
+		else if (matchVersion.onOrAfter(Version.LUCENE_34))
+		{
+		  this.scanner = new StandardTokenizerImpl34(input);
+		}
+		else if (matchVersion.onOrAfter(Version.LUCENE_31))
+		{
+		  this.scanner = new StandardTokenizerImpl31(input);
+		}
+		else
+		{
+		  this.scanner = new ClassicTokenizerImpl(input);
+		}
+	  }
+
+	  // this tokenizer generates three attributes:
+	  // term offset, positionIncrement and type
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+	  private readonly PositionIncrementAttribute posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
+	  private readonly TypeAttribute typeAtt = addAttribute(typeof(TypeAttribute));
+
+	  /*
+	   * (non-Javadoc)
+	   *
+	   * @see org.apache.lucene.analysis.TokenStream#next()
+	   */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		clearAttributes();
+		skippedPositions = 0;
+
+		while (true)
+		{
+		  int tokenType = scanner.NextToken;
+
+		  if (tokenType == StandardTokenizerInterface_Fields.YYEOF)
+		  {
+			return false;
+		  }
+
+		  if (scanner.yylength() <= maxTokenLength)
+		  {
+			posIncrAtt.PositionIncrement = skippedPositions + 1;
+			scanner.getText(termAtt);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int start = scanner.yychar();
+			int start = scanner.yychar();
+			offsetAtt.setOffset(correctOffset(start), correctOffset(start + termAtt.length()));
+			// This 'if' should be removed in the next release. For now, it converts
+			// invalid acronyms to HOST. When removed, only the 'else' part should
+			// remain.
+			if (tokenType == StandardTokenizer.ACRONYM_DEP)
+			{
+			  typeAtt.Type = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HOST];
+			  termAtt.Length = termAtt.length() - 1; // remove extra '.'
+			}
+			else
+			{
+			  typeAtt.Type = StandardTokenizer.TOKEN_TYPES[tokenType];
+			}
+			return true;
+		  }
+		  else
+			// When we skip a too-long term, we still increment the
+			// position increment
+		  {
+			skippedPositions++;
+		  }
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
+	  public override void end()
+	  {
+		base.end();
+		// set final offset
+		int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
+		offsetAtt.setOffset(finalOffset, finalOffset);
+		// adjust any skipped tokens
+		posIncrAtt.PositionIncrement = posIncrAtt.PositionIncrement + skippedPositions;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void close() throws java.io.IOException
+	  public override void close()
+	  {
+		base.close();
+		scanner.yyreset(input);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		scanner.yyreset(input);
+		skippedPositions = 0;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerFactory.cs
new file mode 100644
index 0000000..f1fc9da
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerFactory.cs
@@ -0,0 +1,61 @@
+using System.Collections.Generic;
+using TokenizerFactory = Lucene.Net.Analysis.Util.TokenizerFactory;
+
+namespace org.apache.lucene.analysis.standard
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenizerFactory = TokenizerFactory;
+	using AttributeFactory = org.apache.lucene.util.AttributeSource.AttributeFactory;
+
+
+	/// <summary>
+	/// Factory for <seealso cref="StandardTokenizer"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_stndrd" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory" maxTokenLength="255"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre> 
+	/// </summary>
+	public class StandardTokenizerFactory : TokenizerFactory
+	{
+	  private readonly int maxTokenLength;
+
+	  /// <summary>
+	  /// Creates a new StandardTokenizerFactory </summary>
+	  public StandardTokenizerFactory(IDictionary<string, string> args) : base(args)
+	  {
+		assureMatchVersion();
+		maxTokenLength = getInt(args, "maxTokenLength", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override StandardTokenizer create(AttributeFactory factory, Reader input)
+	  {
+		StandardTokenizer tokenizer = new StandardTokenizer(luceneMatchVersion, factory, input);
+		tokenizer.MaxTokenLength = maxTokenLength;
+		return tokenizer;
+	  }
+	}
+
+}
\ No newline at end of file


[13/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilter.cs
new file mode 100644
index 0000000..0b6dc5a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilter.cs
@@ -0,0 +1,266 @@
+using System;
+
+namespace org.apache.lucene.analysis.ngram
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using ReverseStringFilter = org.apache.lucene.analysis.reverse.ReverseStringFilter;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using PositionLengthAttribute = org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+	using CharacterUtils = org.apache.lucene.analysis.util.CharacterUtils;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Tokenizes the given token into n-grams of given size(s).
+	/// <para>
+	/// This <seealso cref="TokenFilter"/> create n-grams from the beginning edge or ending edge of a input token.
+	/// </para>
+	/// <para><a name="version"/>As of Lucene 4.4, this filter does not support
+	/// <seealso cref="Side#BACK"/> (you can use <seealso cref="ReverseStringFilter"/> up-front and
+	/// afterward to get the same behavior), handles supplementary characters
+	/// correctly and does not update offsets anymore.
+	/// </para>
+	/// </summary>
+	public sealed class EdgeNGramTokenFilter : TokenFilter
+	{
+	  public const Side DEFAULT_SIDE = Side.FRONT;
+	  public const int DEFAULT_MAX_GRAM_SIZE = 1;
+	  public const int DEFAULT_MIN_GRAM_SIZE = 1;
+
+	  /// <summary>
+	  /// Specifies which side of the input the n-gram should be generated from </summary>
+	  public enum Side
+	  {
+
+		/// <summary>
+		/// Get the n-gram from the front of the input </summary>
+//JAVA TO C# CONVERTER TODO TASK: The following line could not be converted:
+		FRONT
+		{
+		  public String getLabel() { return "front"
+		}
+	  },
+
+		/// <summary>
+		/// Get the n-gram from the end of the input </summary>
+		[System.Obsolete]
+//JAVA TO C# CONVERTER TODO TASK: The following line could not be converted:
+		@Deprecated BACK
+		{
+			public String getLabel()
+			{
+				return "back";
+			}
+		}
+
+		public = 
+
+		// Get the appropriate Side from a string
+		public static Side getSide(String sideName)
+		{
+//JAVA TO C# CONVERTER TODO TASK: The following line could not be converted:
+		  if (FRONT.getLabel().equals(sideName))
+		  {
+			return FRONT;
+		  }
+//JAVA TO C# CONVERTER TODO TASK: The following line could not be converted:
+		  if (BACK.getLabel().equals(sideName))
+		  {
+			return BACK;
+		  }
+		  return null;
+		}
+	}
+
+	  private readonly Version version;
+	  private readonly CharacterUtils charUtils;
+	  private readonly int minGram;
+	  private readonly int maxGram;
+	  private Side side;
+	  private char[] curTermBuffer;
+	  private int curTermLength;
+	  private int curCodePointCount;
+	  private int curGramSize;
+	  private int tokStart;
+	  private int tokEnd; // only used if the length changed before this filter
+	  private bool updateOffsets; // never if the length changed before this filter
+	  private int savePosIncr;
+	  private int savePosLen;
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+	  private readonly PositionIncrementAttribute posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
+	  private readonly PositionLengthAttribute posLenAtt = addAttribute(typeof(PositionLengthAttribute));
+
+	  /// <summary>
+	  /// Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
+	  /// </summary>
+	  /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
+	  /// <param name="input"> <seealso cref="TokenStream"/> holding the input to be tokenized </param>
+	  /// <param name="side"> the <seealso cref="Side"/> from which to chop off an n-gram </param>
+	  /// <param name="minGram"> the smallest n-gram to generate </param>
+	  /// <param name="maxGram"> the largest n-gram to generate </param>
+	  [Obsolete]
+	  public EdgeNGramTokenFilter(Version version, TokenStream input, Side side, int minGram, int maxGram) : base(input)
+	  {
+
+		if (version == null)
+		{
+		  throw new System.ArgumentException("version must not be null");
+		}
+
+		if (version.onOrAfter(Version.LUCENE_44) && side == Side.BACK)
+		{
+		  throw new System.ArgumentException("Side.BACK is not supported anymore as of Lucene 4.4, use ReverseStringFilter up-front and afterward");
+		}
+
+		if (side == null)
+		{
+		  throw new System.ArgumentException("sideLabel must be either front or back");
+		}
+
+		if (minGram < 1)
+		{
+		  throw new System.ArgumentException("minGram must be greater than zero");
+		}
+
+		if (minGram > maxGram)
+		{
+		  throw new System.ArgumentException("minGram must not be greater than maxGram");
+		}
+
+		this.version = version;
+		this.charUtils = version.onOrAfter(Version.LUCENE_44) ? CharacterUtils.getInstance(version) : CharacterUtils.Java4Instance;
+		this.minGram = minGram;
+		this.maxGram = maxGram;
+		this.side = side;
+	  }
+
+	  /// <summary>
+	  /// Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
+	  /// </summary>
+	  /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
+	  /// <param name="input"> <seealso cref="TokenStream"/> holding the input to be tokenized </param>
+	  /// <param name="sideLabel"> the name of the <seealso cref="Side"/> from which to chop off an n-gram </param>
+	  /// <param name="minGram"> the smallest n-gram to generate </param>
+	  /// <param name="maxGram"> the largest n-gram to generate </param>
+	  [Obsolete]
+	  public EdgeNGramTokenFilter(Version version, TokenStream input, string sideLabel, int minGram, int maxGram) : this(version, input, Side.getSide(sideLabel), minGram, maxGram)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
+	  /// </summary>
+	  /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
+	  /// <param name="input"> <seealso cref="TokenStream"/> holding the input to be tokenized </param>
+	  /// <param name="minGram"> the smallest n-gram to generate </param>
+	  /// <param name="maxGram"> the largest n-gram to generate </param>
+	  public EdgeNGramTokenFilter(Version version, TokenStream input, int minGram, int maxGram) : this(version, input, Side.FRONT, minGram, maxGram)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		while (true)
+		{
+		  if (curTermBuffer == null)
+		  {
+			if (!input.incrementToken())
+			{
+			  return false;
+			}
+			else
+			{
+			  curTermBuffer = termAtt.buffer().clone();
+			  curTermLength = termAtt.length();
+			  curCodePointCount = charUtils.codePointCount(termAtt);
+			  curGramSize = minGram;
+			  tokStart = offsetAtt.startOffset();
+			  tokEnd = offsetAtt.endOffset();
+			  if (version.onOrAfter(Version.LUCENE_44))
+			  {
+				// Never update offsets
+				updateOffsets = false;
+			  }
+			  else
+			  {
+				// if length by start + end offsets doesn't match the term text then assume
+				// this is a synonym and don't adjust the offsets.
+				updateOffsets = (tokStart + curTermLength) == tokEnd;
+			  }
+			  savePosIncr += posIncrAtt.PositionIncrement;
+			  savePosLen = posLenAtt.PositionLength;
+			}
+		  }
+		  if (curGramSize <= maxGram) // if we have hit the end of our n-gram size range, quit
+		  {
+			if (curGramSize <= curCodePointCount) // if the remaining input is too short, we can't generate any n-grams
+			{
+			  // grab gramSize chars from front or back
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int start = side == Side.FRONT ? 0 : charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, curTermLength, -curGramSize);
+			  int start = side == Side.FRONT ? 0 : charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, curTermLength, -curGramSize);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int end = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize);
+			  int end = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize);
+			  clearAttributes();
+			  if (updateOffsets)
+			  {
+				offsetAtt.setOffset(tokStart + start, tokStart + end);
+			  }
+			  else
+			  {
+				offsetAtt.setOffset(tokStart, tokEnd);
+			  }
+			  // first ngram gets increment, others don't
+			  if (curGramSize == minGram)
+			  {
+				posIncrAtt.PositionIncrement = savePosIncr;
+				savePosIncr = 0;
+			  }
+			  else
+			  {
+				posIncrAtt.PositionIncrement = 0;
+			  }
+			  posLenAtt.PositionLength = savePosLen;
+			  termAtt.copyBuffer(curTermBuffer, start, end - start);
+			  curGramSize++;
+			  return true;
+			}
+		  }
+		  curTermBuffer = null;
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		curTermBuffer = null;
+		savePosIncr = 0;
+	  }
+}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizer.cs
new file mode 100644
index 0000000..c8d36f6
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizer.cs
@@ -0,0 +1,71 @@
+namespace org.apache.lucene.analysis.ngram
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Tokenizes the input from an edge into n-grams of given size(s).
+	/// <para>
+	/// This <seealso cref="Tokenizer"/> create n-grams from the beginning edge or ending edge of a input token.
+	/// </para>
+	/// <para><a name="version" /> As of Lucene 4.4, this tokenizer<ul>
+	/// <li>can handle <code>maxGram</code> larger than 1024 chars, but beware that this will result in increased memory usage
+	/// <li>doesn't trim the input,
+	/// <li>sets position increments equal to 1 instead of 1 for the first token and 0 for all other ones
+	/// <li>doesn't support backward n-grams anymore.
+	/// <li>supports <seealso cref="#isTokenChar(int) pre-tokenization"/>,
+	/// <li>correctly handles supplementary characters.
+	/// </ul>
+	/// </para>
+	/// <para>Although <b style="color:red">highly</b> discouraged, it is still possible
+	/// to use the old behavior through <seealso cref="Lucene43EdgeNGramTokenizer"/>.
+	/// </para>
+	/// </summary>
+	public class EdgeNGramTokenizer : NGramTokenizer
+	{
+	  public const int DEFAULT_MAX_GRAM_SIZE = 1;
+	  public const int DEFAULT_MIN_GRAM_SIZE = 1;
+
+	  /// <summary>
+	  /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+	  /// </summary>
+	  /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
+	  /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
+	  /// <param name="minGram"> the smallest n-gram to generate </param>
+	  /// <param name="maxGram"> the largest n-gram to generate </param>
+	  public EdgeNGramTokenizer(Version version, Reader input, int minGram, int maxGram) : base(version, input, minGram, maxGram, true)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+	  /// </summary>
+	  /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
+	  /// <param name="factory"> <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> to use </param>
+	  /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
+	  /// <param name="minGram"> the smallest n-gram to generate </param>
+	  /// <param name="maxGram"> the largest n-gram to generate </param>
+	  public EdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, int minGram, int maxGram) : base(version, factory, input, minGram, maxGram, true)
+	  {
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerFactory.cs
new file mode 100644
index 0000000..195a6e1
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerFactory.cs
@@ -0,0 +1,74 @@
+using System.Collections.Generic;
+using TokenizerFactory = Lucene.Net.Analysis.Util.TokenizerFactory;
+
+namespace org.apache.lucene.analysis.ngram
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using TokenizerFactory = TokenizerFactory;
+	using AttributeFactory = org.apache.lucene.util.AttributeSource.AttributeFactory;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Creates new instances of <seealso cref="EdgeNGramTokenizer"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_edgngrm" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.EdgeNGramTokenizerFactory" minGramSize="1" maxGramSize="1"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class EdgeNGramTokenizerFactory : TokenizerFactory
+	{
+	  private readonly int maxGramSize;
+	  private readonly int minGramSize;
+	  private readonly string side;
+
+	  /// <summary>
+	  /// Creates a new EdgeNGramTokenizerFactory </summary>
+	  public EdgeNGramTokenizerFactory(IDictionary<string, string> args) : base(args)
+	  {
+		minGramSize = getInt(args, "minGramSize", EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE);
+		maxGramSize = getInt(args, "maxGramSize", EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
+		side = get(args, "side", EdgeNGramTokenFilter.Side.FRONT.Label);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override Tokenizer create(AttributeFactory factory, Reader input)
+	  {
+		if (luceneMatchVersion.onOrAfter(Version.LUCENE_44))
+		{
+		  if (!EdgeNGramTokenFilter.Side.FRONT.Label.Equals(side))
+		  {
+			throw new System.ArgumentException(typeof(EdgeNGramTokenizer).SimpleName + " does not support backward n-grams as of Lucene 4.4");
+		  }
+		  return new EdgeNGramTokenizer(luceneMatchVersion, input, minGramSize, maxGramSize);
+		}
+		else
+		{
+		  return new Lucene43EdgeNGramTokenizer(luceneMatchVersion, input, side, minGramSize, maxGramSize);
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs
new file mode 100644
index 0000000..9809ccf
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs
@@ -0,0 +1,328 @@
+using System;
+
+namespace org.apache.lucene.analysis.ngram
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using ArrayUtil = org.apache.lucene.util.ArrayUtil;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Old version of <seealso cref="EdgeNGramTokenizer"/> which doesn't handle correctly
+	/// supplementary characters.
+	/// </summary>
+	[Obsolete]
+	public sealed class Lucene43EdgeNGramTokenizer : Tokenizer
+	{
+	  public const Side DEFAULT_SIDE = Side.FRONT;
+	  public const int DEFAULT_MAX_GRAM_SIZE = 1;
+	  public const int DEFAULT_MIN_GRAM_SIZE = 1;
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+	  private readonly PositionIncrementAttribute posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
+
+	  /// <summary>
+	  /// Specifies which side of the input the n-gram should be generated from </summary>
+	  public enum Side
+	  {
+
+		/// <summary>
+		/// Get the n-gram from the front of the input </summary>
+//JAVA TO C# CONVERTER TODO TASK: The following line could not be converted:
+		FRONT
+		{
+		  public String getLabel() { return "front"
+		}
+	  },
+
+		/// <summary>
+		/// Get the n-gram from the end of the input </summary>
+//JAVA TO C# CONVERTER TODO TASK: The following line could not be converted:
+		BACK
+		{
+		  public String getLabel()
+		  {
+			  return "back";
+		  }
+		}
+
+		public = 
+
+		// Get the appropriate Side from a string
+		public static Side getSide(String sideName)
+		{
+//JAVA TO C# CONVERTER TODO TASK: The following line could not be converted:
+		  if (FRONT.getLabel().equals(sideName))
+		  {
+			return FRONT;
+		  }
+//JAVA TO C# CONVERTER TODO TASK: The following line could not be converted:
+		  if (BACK.getLabel().equals(sideName))
+		  {
+			return BACK;
+		  }
+		  return null;
+		}
+	}
+
+	  private int minGram;
+	  private int maxGram;
+	  private int gramSize;
+	  private Side side;
+	  private bool started;
+	  private int inLen; // length of the input AFTER trim()
+	  private int charsRead; // length of the input
+	  private string inStr;
+
+
+	  /// <summary>
+	  /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+	  /// </summary>
+	  /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
+	  /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
+	  /// <param name="side"> the <seealso cref="Side"/> from which to chop off an n-gram </param>
+	  /// <param name="minGram"> the smallest n-gram to generate </param>
+	  /// <param name="maxGram"> the largest n-gram to generate </param>
+	  [Obsolete]
+	  public Lucene43EdgeNGramTokenizer(Version version, Reader input, Side side, int minGram, int maxGram) : base(input)
+	  {
+		init(version, side, minGram, maxGram);
+	  }
+
+	  /// <summary>
+	  /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+	  /// </summary>
+	  /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
+	  /// <param name="factory"> <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> to use </param>
+	  /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
+	  /// <param name="side"> the <seealso cref="Side"/> from which to chop off an n-gram </param>
+	  /// <param name="minGram"> the smallest n-gram to generate </param>
+	  /// <param name="maxGram"> the largest n-gram to generate </param>
+	  [Obsolete]
+	  public Lucene43EdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, Side side, int minGram, int maxGram) : base(factory, input)
+	  {
+		init(version, side, minGram, maxGram);
+	  }
+
+	  /// <summary>
+	  /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+	  /// </summary>
+	  /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
+	  /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
+	  /// <param name="sideLabel"> the name of the <seealso cref="Side"/> from which to chop off an n-gram </param>
+	  /// <param name="minGram"> the smallest n-gram to generate </param>
+	  /// <param name="maxGram"> the largest n-gram to generate </param>
+	  [Obsolete]
+	  public Lucene43EdgeNGramTokenizer(Version version, Reader input, string sideLabel, int minGram, int maxGram) : this(version, input, Side.getSide(sideLabel), minGram, maxGram)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+	  /// </summary>
+	  /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
+	  /// <param name="factory"> <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> to use </param>
+	  /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
+	  /// <param name="sideLabel"> the name of the <seealso cref="Side"/> from which to chop off an n-gram </param>
+	  /// <param name="minGram"> the smallest n-gram to generate </param>
+	  /// <param name="maxGram"> the largest n-gram to generate </param>
+	  [Obsolete]
+	  public Lucene43EdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, string sideLabel, int minGram, int maxGram) : this(version, factory, input, Side.getSide(sideLabel), minGram, maxGram)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+	  /// </summary>
+	  /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
+	  /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
+	  /// <param name="minGram"> the smallest n-gram to generate </param>
+	  /// <param name="maxGram"> the largest n-gram to generate </param>
+	  public Lucene43EdgeNGramTokenizer(Version version, Reader input, int minGram, int maxGram) : this(version, input, Side.FRONT, minGram, maxGram)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+	  /// </summary>
+	  /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
+	  /// <param name="factory"> <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> to use </param>
+	  /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
+	  /// <param name="minGram"> the smallest n-gram to generate </param>
+	  /// <param name="maxGram"> the largest n-gram to generate </param>
+	  public Lucene43EdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, int minGram, int maxGram) : this(version, factory, input, Side.FRONT, minGram, maxGram)
+	  {
+	  }
+
+	  private void init(Version version, Side side, int minGram, int maxGram)
+	  {
+		if (version == null)
+		{
+		  throw new System.ArgumentException("version must not be null");
+		}
+
+		if (side == null)
+		{
+		  throw new System.ArgumentException("sideLabel must be either front or back");
+		}
+
+		if (minGram < 1)
+		{
+		  throw new System.ArgumentException("minGram must be greater than zero");
+		}
+
+		if (minGram > maxGram)
+		{
+		  throw new System.ArgumentException("minGram must not be greater than maxGram");
+		}
+
+		if (version.onOrAfter(Version.LUCENE_44))
+		{
+		  if (side == Side.BACK)
+		  {
+			throw new System.ArgumentException("Side.BACK is not supported anymore as of Lucene 4.4");
+		  }
+		}
+		else
+		{
+		  maxGram = Math.Min(maxGram, 1024);
+		}
+
+		this.minGram = minGram;
+		this.maxGram = maxGram;
+		this.side = side;
+	  }
+
+	  /// <summary>
+	  /// Returns the next token in the stream, or null at EOS. </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		clearAttributes();
+		// if we are just starting, read the whole input
+		if (!started)
+		{
+		  started = true;
+		  gramSize = minGram;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int limit = side == Side.FRONT ? maxGram : 1024;
+		  int limit = side == Side.FRONT ? maxGram : 1024;
+		  char[] chars = new char[Math.Min(1024, limit)];
+		  charsRead = 0;
+		  // TODO: refactor to a shared readFully somewhere:
+		  bool exhausted = false;
+		  while (charsRead < limit)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int inc = input.read(chars, charsRead, chars.length-charsRead);
+			int inc = input.read(chars, charsRead, chars.Length - charsRead);
+			if (inc == -1)
+			{
+			  exhausted = true;
+			  break;
+			}
+			charsRead += inc;
+			if (charsRead == chars.Length && charsRead < limit)
+			{
+			  chars = ArrayUtil.grow(chars);
+			}
+		  }
+
+		  inStr = new string(chars, 0, charsRead);
+		  inStr = inStr.Trim();
+
+		  if (!exhausted)
+		  {
+			// Read extra throwaway chars so that on end() we
+			// report the correct offset:
+			char[] throwaway = new char[1024];
+			while (true)
+			{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int inc = input.read(throwaway, 0, throwaway.length);
+			  int inc = input.read(throwaway, 0, throwaway.Length);
+			  if (inc == -1)
+			  {
+				break;
+			  }
+			  charsRead += inc;
+			}
+		  }
+
+		  inLen = inStr.length();
+		  if (inLen == 0)
+		  {
+			return false;
+		  }
+		  posIncrAtt.PositionIncrement = 1;
+		}
+		else
+		{
+		  posIncrAtt.PositionIncrement = 0;
+		}
+
+		// if the remaining input is too short, we can't generate any n-grams
+		if (gramSize > inLen)
+		{
+		  return false;
+		}
+
+		// if we have hit the end of our n-gram size range, quit
+		if (gramSize > maxGram || gramSize > inLen)
+		{
+		  return false;
+		}
+
+		// grab gramSize chars from front or back
+		int start = side == Side.FRONT ? 0 : inLen - gramSize;
+		int end = start + gramSize;
+		termAtt.setEmpty().append(inStr, start, end);
+		offsetAtt.setOffset(correctOffset(start), correctOffset(end));
+		gramSize++;
+		return true;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void end() throws java.io.IOException
+	  public override void end()
+	  {
+		base.end();
+		// set final offset
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int finalOffset = correctOffset(charsRead);
+		int finalOffset = correctOffset(charsRead);
+		this.offsetAtt.setOffset(finalOffset, finalOffset);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		started = false;
+	  }
+}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43NGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43NGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43NGramTokenizer.cs
new file mode 100644
index 0000000..b0756a6
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43NGramTokenizer.cs
@@ -0,0 +1,182 @@
+using System;
+
+namespace org.apache.lucene.analysis.ngram
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+
+	/// <summary>
+	/// Old broken version of <seealso cref="NGramTokenizer"/>.
+	/// </summary>
+	[Obsolete]
+	public sealed class Lucene43NGramTokenizer : Tokenizer
+	{
+	  public const int DEFAULT_MIN_NGRAM_SIZE = 1;
+	  public const int DEFAULT_MAX_NGRAM_SIZE = 2;
+
+	  private int minGram, maxGram;
+	  private int gramSize;
+	  private int pos;
+	  private int inLen; // length of the input AFTER trim()
+	  private int charsRead; // length of the input
+	  private string inStr;
+	  private bool started;
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+
+	  /// <summary>
+	  /// Creates NGramTokenizer with given min and max n-grams. </summary>
+	  /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
+	  /// <param name="minGram"> the smallest n-gram to generate </param>
+	  /// <param name="maxGram"> the largest n-gram to generate </param>
+	  public Lucene43NGramTokenizer(Reader input, int minGram, int maxGram) : base(input)
+	  {
+		init(minGram, maxGram);
+	  }
+
+	  /// <summary>
+	  /// Creates NGramTokenizer with given min and max n-grams. </summary>
+	  /// <param name="factory"> <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> to use </param>
+	  /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
+	  /// <param name="minGram"> the smallest n-gram to generate </param>
+	  /// <param name="maxGram"> the largest n-gram to generate </param>
+	  public Lucene43NGramTokenizer(AttributeFactory factory, Reader input, int minGram, int maxGram) : base(factory, input)
+	  {
+		init(minGram, maxGram);
+	  }
+
+	  /// <summary>
+	  /// Creates NGramTokenizer with default min and max n-grams. </summary>
+	  /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
+	  public Lucene43NGramTokenizer(Reader input) : this(input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE)
+	  {
+	  }
+
+	  private void init(int minGram, int maxGram)
+	  {
+		if (minGram < 1)
+		{
+		  throw new System.ArgumentException("minGram must be greater than zero");
+		}
+		if (minGram > maxGram)
+		{
+		  throw new System.ArgumentException("minGram must not be greater than maxGram");
+		}
+		this.minGram = minGram;
+		this.maxGram = maxGram;
+	  }
+
+	  /// <summary>
+	  /// Returns the next token in the stream, or null at EOS. </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		clearAttributes();
+		if (!started)
+		{
+		  started = true;
+		  gramSize = minGram;
+		  char[] chars = new char[1024];
+		  charsRead = 0;
+		  // TODO: refactor to a shared readFully somewhere:
+		  while (charsRead < chars.Length)
+		  {
+			int inc = input.read(chars, charsRead, chars.Length - charsRead);
+			if (inc == -1)
+			{
+			  break;
+			}
+			charsRead += inc;
+		  }
+		  inStr = (new string(chars, 0, charsRead)).Trim(); // remove any trailing empty strings
+
+		  if (charsRead == chars.Length)
+		  {
+			// Read extra throwaway chars so that on end() we
+			// report the correct offset:
+			char[] throwaway = new char[1024];
+			while (true)
+			{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int inc = input.read(throwaway, 0, throwaway.length);
+			  int inc = input.read(throwaway, 0, throwaway.Length);
+			  if (inc == -1)
+			  {
+				break;
+			  }
+			  charsRead += inc;
+			}
+		  }
+
+		  inLen = inStr.Length;
+		  if (inLen == 0)
+		  {
+			return false;
+		  }
+		}
+
+		if (pos + gramSize > inLen) // if we hit the end of the string
+		{
+		  pos = 0; // reset to beginning of string
+		  gramSize++; // increase n-gram size
+		  if (gramSize > maxGram) // we are done
+		  {
+			return false;
+		  }
+		  if (pos + gramSize > inLen)
+		  {
+			return false;
+		  }
+		}
+
+		int oldPos = pos;
+		pos++;
+		termAtt.setEmpty().append(inStr, oldPos, oldPos + gramSize);
+		offsetAtt.setOffset(correctOffset(oldPos), correctOffset(oldPos + gramSize));
+		return true;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void end() throws java.io.IOException
+	  public override void end()
+	  {
+		base.end();
+		// set final offset
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int finalOffset = correctOffset(charsRead);
+		int finalOffset = correctOffset(charsRead);
+		this.offsetAtt.setOffset(finalOffset, finalOffset);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		started = false;
+		pos = 0;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramFilterFactory.cs
new file mode 100644
index 0000000..132f3bd
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramFilterFactory.cs
@@ -0,0 +1,59 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.ngram
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="NGramTokenFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_ngrm" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.NGramFilterFactory" minGramSize="1" maxGramSize="2"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class NGramFilterFactory : TokenFilterFactory
+	{
+	  private readonly int maxGramSize;
+	  private readonly int minGramSize;
+
+	  /// <summary>
+	  /// Creates a new NGramFilterFactory </summary>
+	  public NGramFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		minGramSize = getInt(args, "minGramSize", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
+		maxGramSize = getInt(args, "maxGramSize", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override NGramTokenFilter create(TokenStream input)
+	  {
+		return new NGramTokenFilter(luceneMatchVersion, input, minGramSize, maxGramSize);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs
new file mode 100644
index 0000000..3e7012c
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs
@@ -0,0 +1,251 @@
+namespace org.apache.lucene.analysis.ngram
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CodepointCountFilter = org.apache.lucene.analysis.miscellaneous.CodepointCountFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using PositionLengthAttribute = org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+	using CharacterUtils = org.apache.lucene.analysis.util.CharacterUtils;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Tokenizes the input into n-grams of the given size(s).
+	/// <a name="version"/>
+	/// <para>You must specify the required <seealso cref="Version"/> compatibility when
+	/// creating a <seealso cref="NGramTokenFilter"/>. As of Lucene 4.4, this token filters:<ul>
+	/// <li>handles supplementary characters correctly,</li>
+	/// <li>emits all n-grams for the same token at the same position,</li>
+	/// <li>does not modify offsets,</li>
+	/// <li>sorts n-grams by their offset in the original token first, then
+	/// increasing length (meaning that "abc" will give "a", "ab", "abc", "b", "bc",
+	/// "c").</li></ul>
+	/// </para>
+	/// <para>You can make this filter use the old behavior by providing a version &lt;
+	/// <seealso cref="Version#LUCENE_44"/> in the constructor but this is not recommended as
+	/// it will lead to broken <seealso cref="TokenStream"/>s that will cause highlighting
+	/// bugs.
+	/// </para>
+	/// <para>If you were using this <seealso cref="TokenFilter"/> to perform partial highlighting,
+	/// this won't work anymore since this filter doesn't update offsets. You should
+	/// modify your analysis chain to use <seealso cref="NGramTokenizer"/>, and potentially
+	/// override <seealso cref="NGramTokenizer#isTokenChar(int)"/> to perform pre-tokenization.
+	/// </para>
+	/// </summary>
+	public sealed class NGramTokenFilter : TokenFilter
+	{
+	  public const int DEFAULT_MIN_NGRAM_SIZE = 1;
+	  public const int DEFAULT_MAX_NGRAM_SIZE = 2;
+
+	  private readonly int minGram, maxGram;
+
+	  private char[] curTermBuffer;
+	  private int curTermLength;
+	  private int curCodePointCount;
+	  private int curGramSize;
+	  private int curPos;
+	  private int curPosInc, curPosLen;
+	  private int tokStart;
+	  private int tokEnd;
+	  private bool hasIllegalOffsets; // only if the length changed before this filter
+
+	  private readonly Version version;
+	  private readonly CharacterUtils charUtils;
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly PositionIncrementAttribute posIncAtt;
+	  private readonly PositionLengthAttribute posLenAtt;
+	  private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+
+	  /// <summary>
+	  /// Creates NGramTokenFilter with given min and max n-grams. </summary>
+	  /// <param name="version"> Lucene version to enable correct position increments.
+	  ///                See <a href="#version">above</a> for details. </param>
+	  /// <param name="input"> <seealso cref="TokenStream"/> holding the input to be tokenized </param>
+	  /// <param name="minGram"> the smallest n-gram to generate </param>
+	  /// <param name="maxGram"> the largest n-gram to generate </param>
+	  public NGramTokenFilter(Version version, TokenStream input, int minGram, int maxGram) : base(new CodepointCountFilter(version, input, minGram, int.MaxValue))
+	  {
+		this.version = version;
+		this.charUtils = version.onOrAfter(Version.LUCENE_44) ? CharacterUtils.getInstance(version) : CharacterUtils.Java4Instance;
+		if (minGram < 1)
+		{
+		  throw new System.ArgumentException("minGram must be greater than zero");
+		}
+		if (minGram > maxGram)
+		{
+		  throw new System.ArgumentException("minGram must not be greater than maxGram");
+		}
+		this.minGram = minGram;
+		this.maxGram = maxGram;
+		if (version.onOrAfter(Version.LUCENE_44))
+		{
+		  posIncAtt = addAttribute(typeof(PositionIncrementAttribute));
+		  posLenAtt = addAttribute(typeof(PositionLengthAttribute));
+		}
+		else
+		{
+		  posIncAtt = new PositionIncrementAttributeAnonymousInnerClassHelper(this);
+		  posLenAtt = new PositionLengthAttributeAnonymousInnerClassHelper(this);
+		}
+	  }
+
+	  private class PositionIncrementAttributeAnonymousInnerClassHelper : PositionIncrementAttribute
+	  {
+		  private readonly NGramTokenFilter outerInstance;
+
+		  public PositionIncrementAttributeAnonymousInnerClassHelper(NGramTokenFilter outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  public override int PositionIncrement
+		  {
+			  set
+			  {
+			  }
+			  get
+			  {
+				return 0;
+			  }
+		  }
+	  }
+
+	  private class PositionLengthAttributeAnonymousInnerClassHelper : PositionLengthAttribute
+	  {
+		  private readonly NGramTokenFilter outerInstance;
+
+		  public PositionLengthAttributeAnonymousInnerClassHelper(NGramTokenFilter outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  public override int PositionLength
+		  {
+			  set
+			  {
+			  }
+			  get
+			  {
+				return 0;
+			  }
+		  }
+	  }
+
+	  /// <summary>
+	  /// Creates NGramTokenFilter with default min and max n-grams. </summary>
+	  /// <param name="version"> Lucene version to enable correct position increments.
+	  ///                See <a href="#version">above</a> for details. </param>
+	  /// <param name="input"> <seealso cref="TokenStream"/> holding the input to be tokenized </param>
+	  public NGramTokenFilter(Version version, TokenStream input) : this(version, input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Returns the next token in the stream, or null at EOS. </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		while (true)
+		{
+		  if (curTermBuffer == null)
+		  {
+			if (!input.incrementToken())
+			{
+			  return false;
+			}
+			else
+			{
+			  curTermBuffer = termAtt.buffer().clone();
+			  curTermLength = termAtt.length();
+			  curCodePointCount = charUtils.codePointCount(termAtt);
+			  curGramSize = minGram;
+			  curPos = 0;
+			  curPosInc = posIncAtt.PositionIncrement;
+			  curPosLen = posLenAtt.PositionLength;
+			  tokStart = offsetAtt.startOffset();
+			  tokEnd = offsetAtt.endOffset();
+			  // if length by start + end offsets doesn't match the term text then assume
+			  // this is a synonym and don't adjust the offsets.
+			  hasIllegalOffsets = (tokStart + curTermLength) != tokEnd;
+			}
+		  }
+		  if (version.onOrAfter(Version.LUCENE_44))
+		  {
+			if (curGramSize > maxGram || (curPos + curGramSize) > curCodePointCount)
+			{
+			  ++curPos;
+			  curGramSize = minGram;
+			}
+			if ((curPos + curGramSize) <= curCodePointCount)
+			{
+			  clearAttributes();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int start = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curPos);
+			  int start = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curPos);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int end = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize);
+			  int end = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize);
+			  termAtt.copyBuffer(curTermBuffer, start, end - start);
+			  posIncAtt.PositionIncrement = curPosInc;
+			  curPosInc = 0;
+			  posLenAtt.PositionLength = curPosLen;
+			  offsetAtt.setOffset(tokStart, tokEnd);
+			  curGramSize++;
+			  return true;
+			}
+		  }
+		  else
+		  {
+			while (curGramSize <= maxGram)
+			{
+			  while (curPos + curGramSize <= curTermLength) // while there is input
+			  {
+				clearAttributes();
+				termAtt.copyBuffer(curTermBuffer, curPos, curGramSize);
+				if (hasIllegalOffsets)
+				{
+				  offsetAtt.setOffset(tokStart, tokEnd);
+				}
+				else
+				{
+				  offsetAtt.setOffset(tokStart + curPos, tokStart + curPos + curGramSize);
+				}
+				curPos++;
+				return true;
+			  }
+			  curGramSize++; // increase n-gram size
+			  curPos = 0;
+			}
+		  }
+		  curTermBuffer = null;
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		curTermBuffer = null;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs
new file mode 100644
index 0000000..b782e94
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs
@@ -0,0 +1,278 @@
+using System;
+using System.Diagnostics;
+
+namespace org.apache.lucene.analysis.ngram
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using PositionLengthAttribute = org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+	using CharacterUtils = org.apache.lucene.analysis.util.CharacterUtils;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Tokenizes the input into n-grams of the given size(s).
+	/// <para>On the contrary to <seealso cref="NGramTokenFilter"/>, this class sets offsets so
+	/// that characters between startOffset and endOffset in the original stream are
+	/// the same as the term chars.
+	/// </para>
+	/// <para>For example, "abcde" would be tokenized as (minGram=2, maxGram=3):
+	/// <table>
+	/// <tr><th>Term</th><td>ab</td><td>abc</td><td>bc</td><td>bcd</td><td>cd</td><td>cde</td><td>de</td></tr>
+	/// <tr><th>Position increment</th><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td></tr>
+	/// <tr><th>Position length</th><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td></tr>
+	/// <tr><th>Offsets</th><td>[0,2[</td><td>[0,3[</td><td>[1,3[</td><td>[1,4[</td><td>[2,4[</td><td>[2,5[</td><td>[3,5[</td></tr>
+	/// </table>
+	/// <a name="version"/>
+	/// </para>
+	/// <para>This tokenizer changed a lot in Lucene 4.4 in order to:<ul>
+	/// <li>tokenize in a streaming fashion to support streams which are larger
+	/// than 1024 chars (limit of the previous version),
+	/// <li>count grams based on unicode code points instead of java chars (and
+	/// never split in the middle of surrogate pairs),
+	/// <li>give the ability to <seealso cref="#isTokenChar(int) pre-tokenize"/> the stream
+	/// before computing n-grams.</ul>
+	/// </para>
+	/// <para>Additionally, this class doesn't trim trailing whitespaces and emits
+	/// tokens in a different order, tokens are now emitted by increasing start
+	/// offsets while they used to be emitted by increasing lengths (which prevented
+	/// from supporting large input streams).
+	/// </para>
+	/// <para>Although <b style="color:red">highly</b> discouraged, it is still possible
+	/// to use the old behavior through <seealso cref="Lucene43NGramTokenizer"/>.
+	/// </para>
+	/// </summary>
+	// non-final to allow for overriding isTokenChar, but all other methods should be final
+	public class NGramTokenizer : Tokenizer
+	{
+	  public const int DEFAULT_MIN_NGRAM_SIZE = 1;
+	  public const int DEFAULT_MAX_NGRAM_SIZE = 2;
+
+	  private CharacterUtils charUtils;
+	  private CharacterUtils.CharacterBuffer charBuffer;
+	  private int[] buffer; // like charBuffer, but converted to code points
+	  private int bufferStart, bufferEnd; // remaining slice in buffer
+	  private int offset;
+	  private int gramSize;
+	  private int minGram, maxGram;
+	  private bool exhausted;
+	  private int lastCheckedChar; // last offset in the buffer that we checked
+	  private int lastNonTokenChar; // last offset that we found to not be a token char
+	  private bool edgesOnly; // leading edges n-grams only
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly PositionIncrementAttribute posIncAtt = addAttribute(typeof(PositionIncrementAttribute));
+	  private readonly PositionLengthAttribute posLenAtt = addAttribute(typeof(PositionLengthAttribute));
+	  private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+
+	  internal NGramTokenizer(Version version, Reader input, int minGram, int maxGram, bool edgesOnly) : base(input)
+	  {
+		init(version, minGram, maxGram, edgesOnly);
+	  }
+
+	  /// <summary>
+	  /// Creates NGramTokenizer with given min and max n-grams. </summary>
+	  /// <param name="version"> the lucene compatibility <a href="#version">version</a> </param>
+	  /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
+	  /// <param name="minGram"> the smallest n-gram to generate </param>
+	  /// <param name="maxGram"> the largest n-gram to generate </param>
+	  public NGramTokenizer(Version version, Reader input, int minGram, int maxGram) : this(version, input, minGram, maxGram, false)
+	  {
+	  }
+
+	  internal NGramTokenizer(Version version, AttributeFactory factory, Reader input, int minGram, int maxGram, bool edgesOnly) : base(factory, input)
+	  {
+		init(version, minGram, maxGram, edgesOnly);
+	  }
+
+	  /// <summary>
+	  /// Creates NGramTokenizer with given min and max n-grams. </summary>
+	  /// <param name="version"> the lucene compatibility <a href="#version">version</a> </param>
+	  /// <param name="factory"> <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> to use </param>
+	  /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
+	  /// <param name="minGram"> the smallest n-gram to generate </param>
+	  /// <param name="maxGram"> the largest n-gram to generate </param>
+	  public NGramTokenizer(Version version, AttributeFactory factory, Reader input, int minGram, int maxGram) : this(version, factory, input, minGram, maxGram, false)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates NGramTokenizer with default min and max n-grams. </summary>
+	  /// <param name="version"> the lucene compatibility <a href="#version">version</a> </param>
+	  /// <param name="input"> <seealso cref="Reader"/> holding the input to be tokenized </param>
+	  public NGramTokenizer(Version version, Reader input) : this(version, input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE)
+	  {
+	  }
+
+	  private void init(Version version, int minGram, int maxGram, bool edgesOnly)
+	  {
+		if (!version.onOrAfter(Version.LUCENE_44))
+		{
+		  throw new System.ArgumentException("This class only works with Lucene 4.4+. To emulate the old (broken) behavior of NGramTokenizer, use Lucene43NGramTokenizer/Lucene43EdgeNGramTokenizer");
+		}
+		charUtils = version.onOrAfter(Version.LUCENE_44) ? CharacterUtils.getInstance(version) : CharacterUtils.Java4Instance;
+		if (minGram < 1)
+		{
+		  throw new System.ArgumentException("minGram must be greater than zero");
+		}
+		if (minGram > maxGram)
+		{
+		  throw new System.ArgumentException("minGram must not be greater than maxGram");
+		}
+		this.minGram = minGram;
+		this.maxGram = maxGram;
+		this.edgesOnly = edgesOnly;
+		charBuffer = CharacterUtils.newCharacterBuffer(2 * maxGram + 1024); // 2 * maxGram in case all code points require 2 chars and + 1024 for buffering to not keep polling the Reader
+		buffer = new int[charBuffer.Buffer.Length];
+		// Make the term att large enough
+		termAtt.resizeBuffer(2 * maxGram);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		clearAttributes();
+
+		// termination of this loop is guaranteed by the fact that every iteration
+		// either advances the buffer (calls consumes()) or increases gramSize
+		while (true)
+		{
+		  // compact
+		  if (bufferStart >= bufferEnd - maxGram - 1 && !exhausted)
+		  {
+			Array.Copy(buffer, bufferStart, buffer, 0, bufferEnd - bufferStart);
+			bufferEnd -= bufferStart;
+			lastCheckedChar -= bufferStart;
+			lastNonTokenChar -= bufferStart;
+			bufferStart = 0;
+
+			// fill in remaining space
+			exhausted = !charUtils.fill(charBuffer, input, buffer.Length - bufferEnd);
+			// convert to code points
+			bufferEnd += charUtils.toCodePoints(charBuffer.Buffer, 0, charBuffer.Length, buffer, bufferEnd);
+		  }
+
+		  // should we go to the next offset?
+		  if (gramSize > maxGram || (bufferStart + gramSize) > bufferEnd)
+		  {
+			if (bufferStart + 1 + minGram > bufferEnd)
+			{
+			  Debug.Assert(exhausted);
+			  return false;
+			}
+			consume();
+			gramSize = minGram;
+		  }
+
+		  updateLastNonTokenChar();
+
+		  // retry if the token to be emitted was going to not only contain token chars
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final boolean termContainsNonTokenChar = lastNonTokenChar >= bufferStart && lastNonTokenChar < (bufferStart + gramSize);
+		  bool termContainsNonTokenChar = lastNonTokenChar >= bufferStart && lastNonTokenChar < (bufferStart + gramSize);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final boolean isEdgeAndPreviousCharIsTokenChar = edgesOnly && lastNonTokenChar != bufferStart - 1;
+		  bool isEdgeAndPreviousCharIsTokenChar = edgesOnly && lastNonTokenChar != bufferStart - 1;
+		  if (termContainsNonTokenChar || isEdgeAndPreviousCharIsTokenChar)
+		  {
+			consume();
+			gramSize = minGram;
+			continue;
+		  }
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int length = charUtils.toChars(buffer, bufferStart, gramSize, termAtt.buffer(), 0);
+		  int length = charUtils.toChars(buffer, bufferStart, gramSize, termAtt.buffer(), 0);
+		  termAtt.Length = length;
+		  posIncAtt.PositionIncrement = 1;
+		  posLenAtt.PositionLength = 1;
+		  offsetAtt.setOffset(correctOffset(offset), correctOffset(offset + length));
+		  ++gramSize;
+		  return true;
+		}
+	  }
+
+	  private void updateLastNonTokenChar()
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int termEnd = bufferStart + gramSize - 1;
+		int termEnd = bufferStart + gramSize - 1;
+		if (termEnd > lastCheckedChar)
+		{
+		  for (int i = termEnd; i > lastCheckedChar; --i)
+		  {
+			if (!isTokenChar(buffer[i]))
+			{
+			  lastNonTokenChar = i;
+			  break;
+			}
+		  }
+		  lastCheckedChar = termEnd;
+		}
+	  }
+
+	  /// <summary>
+	  /// Consume one code point. </summary>
+	  private void consume()
+	  {
+		offset += char.charCount(buffer[bufferStart++]);
+	  }
+
+	  /// <summary>
+	  /// Only collect characters which satisfy this condition. </summary>
+	  protected internal virtual bool isTokenChar(int chr)
+	  {
+		return true;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
+	  public override void end()
+	  {
+		base.end();
+		Debug.Assert(bufferStart <= bufferEnd);
+		int endOffset = offset;
+		for (int i = bufferStart; i < bufferEnd; ++i)
+		{
+		  endOffset += char.charCount(buffer[i]);
+		}
+		endOffset = correctOffset(endOffset);
+		// set final offset
+		offsetAtt.setOffset(endOffset, endOffset);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		bufferStart = bufferEnd = buffer.Length;
+		lastNonTokenChar = lastCheckedChar = bufferStart - 1;
+		offset = 0;
+		gramSize = minGram;
+		exhausted = false;
+		charBuffer.reset();
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizerFactory.cs
new file mode 100644
index 0000000..d90b0ad
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizerFactory.cs
@@ -0,0 +1,70 @@
+using System.Collections.Generic;
+using TokenizerFactory = Lucene.Net.Analysis.Util.TokenizerFactory;
+
+namespace org.apache.lucene.analysis.ngram
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenizerFactory = TokenizerFactory;
+	using AttributeFactory = org.apache.lucene.util.AttributeSource.AttributeFactory;
+	using Version = org.apache.lucene.util.Version;
+
+
+	/// <summary>
+	/// Factory for <seealso cref="NGramTokenizer"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_ngrm" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.NGramTokenizerFactory" minGramSize="1" maxGramSize="2"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class NGramTokenizerFactory : TokenizerFactory
+	{
+	  private readonly int maxGramSize;
+	  private readonly int minGramSize;
+
+	  /// <summary>
+	  /// Creates a new NGramTokenizerFactory </summary>
+	  public NGramTokenizerFactory(IDictionary<string, string> args) : base(args)
+	  {
+		minGramSize = getInt(args, "minGramSize", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
+		maxGramSize = getInt(args, "maxGramSize", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  /// <summary>
+	  /// Creates the <seealso cref="TokenStream"/> of n-grams from the given <seealso cref="Reader"/> and <seealso cref="AttributeFactory"/>. </summary>
+	  public override Tokenizer create(AttributeFactory factory, Reader input)
+	  {
+		if (luceneMatchVersion.onOrAfter(Version.LUCENE_44))
+		{
+		  return new NGramTokenizer(luceneMatchVersion, factory, input, minGramSize, maxGramSize);
+		}
+		else
+		{
+		  return new Lucene43NGramTokenizer(factory, input, minGramSize, maxGramSize);
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchAnalyzer.cs
new file mode 100644
index 0000000..a1e5d3f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchAnalyzer.cs
@@ -0,0 +1,231 @@
+using System;
+
+namespace org.apache.lucene.analysis.nl
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using StemmerOverrideMap = org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
+	using StemmerOverrideFilter = org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
+	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using StandardAnalyzer = org.apache.lucene.analysis.standard.StandardAnalyzer; // for javadoc
+	using org.apache.lucene.analysis.util;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader;
+	using CharsRef = org.apache.lucene.util.CharsRef;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using Version = org.apache.lucene.util.Version;
+
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Dutch language. 
+	/// <para>
+	/// Supports an external list of stopwords (words that
+	/// will not be indexed at all), an external list of exclusions (word that will
+	/// not be stemmed, but indexed) and an external list of word-stem pairs that overrule
+	/// the algorithm (dictionary stemming).
+	/// A default set of stopwords is used unless an alternative list is specified, but the
+	/// exclusion list is empty by default.
+	/// </para>
+	/// 
+	/// <a name="version"/>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating DutchAnalyzer:
+	/// <ul>
+	///   <li> As of 3.6, <seealso cref="#DutchAnalyzer(Version, CharArraySet)"/> and
+	///        <seealso cref="#DutchAnalyzer(Version, CharArraySet, CharArraySet)"/> also populate
+	///        the default entries for the stem override dictionary
+	///   <li> As of 3.1, Snowball stemming is done with SnowballFilter, 
+	///        LowerCaseFilter is used prior to StopFilter, and Snowball 
+	///        stopwords are used by default.
+	///   <li> As of 2.9, StopFilter preserves position
+	///        increments
+	/// </ul>
+	/// 
+	/// </para>
+	/// <para><b>NOTE</b>: This class uses the same <seealso cref="Version"/>
+	/// dependent settings as <seealso cref="StandardAnalyzer"/>.</para>
+	/// </summary>
+	public sealed class DutchAnalyzer : Analyzer
+	{
+
+	  /// <summary>
+	  /// File containing default Dutch stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "dutch_stop.txt";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop-words set. </summary>
+	  /// <returns> an unmodifiable instance of the default stop-words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+		internal static readonly CharArrayMap<string> DEFAULT_STEM_DICT;
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(typeof(SnowballFilter), DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+
+		  DEFAULT_STEM_DICT = new CharArrayMap<>(Version.LUCENE_CURRENT, 4, false);
+		  DEFAULT_STEM_DICT.put("fiets", "fiets"); //otherwise fiet
+		  DEFAULT_STEM_DICT.put("bromfiets", "bromfiets"); //otherwise bromfiet
+		  DEFAULT_STEM_DICT.put("ei", "eier");
+		  DEFAULT_STEM_DICT.put("kind", "kinder");
+		}
+	  }
+
+
+	  /// <summary>
+	  /// Contains the stopwords used with the StopFilter.
+	  /// </summary>
+	  private readonly CharArraySet stoptable;
+
+	  /// <summary>
+	  /// Contains words that should be indexed but not stemmed.
+	  /// </summary>
+	  private CharArraySet excltable = CharArraySet.EMPTY_SET;
+
+	  private readonly StemmerOverrideMap stemdict;
+
+	  // null if on 3.1 or later - only for bw compat
+	  private readonly CharArrayMap<string> origStemdict;
+	  private readonly Version matchVersion;
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words (<seealso cref="#getDefaultStopSet()"/>) 
+	  /// and a few default entries for the stem exclusion table.
+	  /// 
+	  /// </summary>
+	  public DutchAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET, CharArraySet.EMPTY_SET, DefaultSetHolder.DEFAULT_STEM_DICT)
+	  {
+		// historically, only this ctor populated the stem dict!!!!!
+	  }
+
+	  public DutchAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET, matchVersion.onOrAfter(Version.LUCENE_36) ? DefaultSetHolder.DEFAULT_STEM_DICT : CharArrayMap.emptyMap<string>())
+	  {
+		// historically, this ctor never the stem dict!!!!!
+		// so we populate it only for >= 3.6
+	  }
+
+	  public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable) : this(matchVersion, stopwords, stemExclusionTable, matchVersion.onOrAfter(Version.LUCENE_36) ? DefaultSetHolder.DEFAULT_STEM_DICT : CharArrayMap.emptyMap<string>())
+	  {
+		// historically, this ctor never the stem dict!!!!!
+		// so we populate it only for >= 3.6
+	  }
+
+	  public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap<string> stemOverrideDict)
+	  {
+		this.matchVersion = matchVersion;
+		this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
+		this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable));
+		if (stemOverrideDict.Empty || !matchVersion.onOrAfter(Version.LUCENE_31))
+		{
+		  this.stemdict = null;
+		  this.origStemdict = CharArrayMap.unmodifiableMap(CharArrayMap.copy(matchVersion, stemOverrideDict));
+		}
+		else
+		{
+		  this.origStemdict = null;
+		  // we don't need to ignore case here since we lowercase in this analyzer anyway
+		  StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(false);
+		  CharArrayMap<string>.EntryIterator iter = stemOverrideDict.entrySet().GetEnumerator();
+		  CharsRef spare = new CharsRef();
+		  while (iter.hasNext())
+		  {
+			char[] nextKey = iter.nextKey();
+			spare.copyChars(nextKey, 0, nextKey.Length);
+			builder.add(spare, iter.currentValue());
+		  }
+		  try
+		  {
+			this.stemdict = builder.build();
+		  }
+		  catch (IOException ex)
+		  {
+			throw new Exception("can not build stem dict", ex);
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Returns a (possibly reused) <seealso cref="TokenStream"/> which tokenizes all the 
+	  /// text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> A <seealso cref="TokenStream"/> built from a <seealso cref="StandardTokenizer"/>
+	  ///   filtered with <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, 
+	  ///   <seealso cref="StopFilter"/>, <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is provided,
+	  ///   <seealso cref="StemmerOverrideFilter"/>, and <seealso cref="SnowballFilter"/> </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader aReader)
+	  {
+		if (matchVersion.onOrAfter(Version.LUCENE_31))
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, aReader);
+		  Tokenizer source = new StandardTokenizer(matchVersion, aReader);
+		  TokenStream result = new StandardFilter(matchVersion, source);
+		  result = new LowerCaseFilter(matchVersion, result);
+		  result = new StopFilter(matchVersion, result, stoptable);
+		  if (!excltable.Empty)
+		  {
+			result = new SetKeywordMarkerFilter(result, excltable);
+		  }
+		  if (stemdict != null)
+		  {
+			result = new StemmerOverrideFilter(result, stemdict);
+		  }
+		  result = new SnowballFilter(result, new org.tartarus.snowball.ext.DutchStemmer());
+		  return new TokenStreamComponents(source, result);
+		}
+		else
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, aReader);
+		  Tokenizer source = new StandardTokenizer(matchVersion, aReader);
+		  TokenStream result = new StandardFilter(matchVersion, source);
+		  result = new StopFilter(matchVersion, result, stoptable);
+		  if (!excltable.Empty)
+		  {
+			result = new SetKeywordMarkerFilter(result, excltable);
+		  }
+		  result = new DutchStemFilter(result, origStemdict);
+		  return new TokenStreamComponents(source, result);
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemFilter.cs
new file mode 100644
index 0000000..50d1ce1
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemFilter.cs
@@ -0,0 +1,129 @@
+using System;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.nl
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using KeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; // for javadoc
+	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that stems Dutch words. 
+	/// <para>
+	/// It supports a table of words that should
+	/// not be stemmed at all. The stemmer used can be changed at runtime after the
+	/// filter object is created (as long as it is a <seealso cref="DutchStemmer"/>).
+	/// </para>
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="KeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para> </summary>
+	/// <seealso cref= KeywordMarkerFilter </seealso>
+	/// @deprecated (3.1) Use <seealso cref="SnowballFilter"/> with 
+	/// <seealso cref="org.tartarus.snowball.ext.DutchStemmer"/> instead, which has the
+	/// same functionality. This filter will be removed in Lucene 5.0 
+	[Obsolete("(3.1) Use <seealso cref="SnowballFilter"/> with")]
+	public sealed class DutchStemFilter : TokenFilter
+	{
+	  /// <summary>
+	  /// The actual token in the input stream.
+	  /// </summary>
+	  private DutchStemmer stemmer = new DutchStemmer();
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public DutchStemFilter(TokenStream _in) : base(_in)
+	  {
+	  }
+
+	  /// <param name="stemdictionary"> Dictionary of word stem pairs, that overrule the algorithm </param>
+	  public DutchStemFilter<T1>(TokenStream _in, IDictionary<T1> stemdictionary) : this(_in)
+	  {
+		stemmer.StemDictionary = stemdictionary;
+	  }
+
+	  /// <summary>
+	  /// Returns the next token in the stream, or null at EOS
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String term = termAtt.toString();
+		  string term = termAtt.ToString();
+
+		  // Check the exclusion table.
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String s = stemmer.stem(term);
+			string s = stemmer.stem(term);
+			// If not stemmed, don't waste the time adjusting the token.
+			if ((s != null) && !s.Equals(term))
+			{
+			  termAtt.setEmpty().append(s);
+			}
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+
+	  /// <summary>
+	  /// Set a alternative/custom <seealso cref="DutchStemmer"/> for this filter.
+	  /// </summary>
+	  public DutchStemmer Stemmer
+	  {
+		  set
+		  {
+			if (value != null)
+			{
+			  this.stemmer = value;
+			}
+		  }
+	  }
+
+	  /// <summary>
+	  /// Set dictionary for stemming, this dictionary overrules the algorithm,
+	  /// so you can correct for a particular unwanted word-stem pair.
+	  /// </summary>
+	  public Dictionary<T1> StemDictionary<T1>
+	  {
+		  set
+		  {
+			if (stemmer != null)
+			{
+			  stemmer.StemDictionary = value;
+			}
+		  }
+	  }
+	}
+}
\ No newline at end of file


[28/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/El/GreekLowerCaseFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/El/GreekLowerCaseFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/El/GreekLowerCaseFilter.cs
new file mode 100644
index 0000000..6073eca
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/El/GreekLowerCaseFilter.cs
@@ -0,0 +1,135 @@
+namespace org.apache.lucene.analysis.el
+{
+
+	/// <summary>
+	/// Copyright 2005 The Apache Software Foundation
+	/// 
+	/// Licensed under the Apache License, Version 2.0 (the "License");
+	/// you may not use this file except in compliance with the License.
+	/// You may obtain a copy of the License at
+	/// 
+	///     http://www.apache.org/licenses/LICENSE-2.0
+	/// 
+	/// Unless required by applicable law or agreed to in writing, software
+	/// distributed under the License is distributed on an "AS IS" BASIS,
+	/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	/// See the License for the specific language governing permissions and
+	/// limitations under the License.
+	/// </summary>
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using CharacterUtils = org.apache.lucene.analysis.util.CharacterUtils;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Normalizes token text to lower case, removes some Greek diacritics,
+	/// and standardizes final sigma to sigma. 
+	/// <a name="version"/>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating GreekLowerCaseFilter:
+	/// <ul>
+	///   <li> As of 3.1, supplementary characters are properly lowercased.
+	/// </ul>
+	/// </para>
+	/// </summary>
+	public sealed class GreekLowerCaseFilter : TokenFilter
+	{
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly CharacterUtils charUtils;
+
+	  /// <summary>
+	  /// Create a GreekLowerCaseFilter that normalizes Greek token text.
+	  /// </summary>
+	  /// <param name="matchVersion"> Lucene compatibility version, 
+	  ///   See <a href="#version">above</a> </param>
+	  /// <param name="in"> TokenStream to filter </param>
+	  public GreekLowerCaseFilter(Version matchVersion, TokenStream @in) : base(@in)
+	  {
+		this.charUtils = CharacterUtils.getInstance(matchVersion);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  char[] chArray = termAtt.buffer();
+		  int chLen = termAtt.length();
+		  for (int i = 0; i < chLen;)
+		  {
+			i += char.toChars(lowerCase(charUtils.codePointAt(chArray, i, chLen)), chArray, i);
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+
+	  private int lowerCase(int codepoint)
+	  {
+		switch (codepoint)
+		{
+		  /* There are two lowercase forms of sigma:
+		   *   U+03C2: small final sigma (end of word)
+		   *   U+03C3: small sigma (otherwise)
+		   *   
+		   * Standardize both to U+03C3
+		   */
+		  case '\u03C2': // small final sigma
+			return '\u03C3'; // small sigma
+
+		  /* Some greek characters contain diacritics.
+		   * This filter removes these, converting to the lowercase base form.
+		   */
+
+		  case '\u0386': // capital alpha with tonos
+		  case '\u03AC': // small alpha with tonos
+			return '\u03B1'; // small alpha
+
+		  case '\u0388': // capital epsilon with tonos
+		  case '\u03AD': // small epsilon with tonos
+			return '\u03B5'; // small epsilon
+
+		  case '\u0389': // capital eta with tonos
+		  case '\u03AE': // small eta with tonos
+			return '\u03B7'; // small eta
+
+		  case '\u038A': // capital iota with tonos
+		  case '\u03AA': // capital iota with dialytika
+		  case '\u03AF': // small iota with tonos
+		  case '\u03CA': // small iota with dialytika
+		  case '\u0390': // small iota with dialytika and tonos
+			return '\u03B9'; // small iota
+
+		  case '\u038E': // capital upsilon with tonos
+		  case '\u03AB': // capital upsilon with dialytika
+		  case '\u03CD': // small upsilon with tonos
+		  case '\u03CB': // small upsilon with dialytika
+		  case '\u03B0': // small upsilon with dialytika and tonos
+			return '\u03C5'; // small upsilon
+
+		  case '\u038C': // capital omicron with tonos
+		  case '\u03CC': // small omicron with tonos
+			return '\u03BF'; // small omicron
+
+		  case '\u038F': // capital omega with tonos
+		  case '\u03CE': // small omega with tonos
+			return '\u03C9'; // small omega
+
+		  /* The previous implementation did the conversion below.
+		   * Only implemented for backwards compatibility with old indexes.
+		   */
+
+		  case '\u03A2': // reserved
+			return '\u03C2'; // small final sigma
+
+		  default:
+			return char.ToLower(codepoint);
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/El/GreekLowerCaseFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/El/GreekLowerCaseFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/El/GreekLowerCaseFilterFactory.cs
new file mode 100644
index 0000000..d6f084d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/El/GreekLowerCaseFilterFactory.cs
@@ -0,0 +1,66 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.el
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using AbstractAnalysisFactory = org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+	using MultiTermAwareComponent = org.apache.lucene.analysis.util.MultiTermAwareComponent;
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="GreekLowerCaseFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_glc" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.GreekLowerCaseFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class GreekLowerCaseFilterFactory : TokenFilterFactory, MultiTermAwareComponent
+	{
+
+	  /// <summary>
+	  /// Creates a new GreekLowerCaseFilterFactory </summary>
+	  public GreekLowerCaseFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		assureMatchVersion();
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override GreekLowerCaseFilter create(TokenStream @in)
+	  {
+		return new GreekLowerCaseFilter(luceneMatchVersion, @in);
+	  }
+
+	  public virtual AbstractAnalysisFactory MultiTermComponent
+	  {
+		  get
+		  {
+			return this;
+		  }
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/El/GreekStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/El/GreekStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/El/GreekStemFilter.cs
new file mode 100644
index 0000000..90912da
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/El/GreekStemFilter.cs
@@ -0,0 +1,72 @@
+namespace org.apache.lucene.analysis.el
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; // for javadoc
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="GreekStemmer"/> to stem Greek
+	/// words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// <para>
+	/// NOTE: Input is expected to be casefolded for Greek (including folding of final
+	/// sigma to sigma), and with diacritics removed. This can be achieved by using 
+	/// either <seealso cref="GreekLowerCaseFilter"/> or ICUFoldingFilter before GreekStemFilter.
+	/// @lucene.experimental
+	/// </para>
+	/// </summary>
+	public sealed class GreekStemFilter : TokenFilter
+	{
+	  private readonly GreekStemmer stemmer = new GreekStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public GreekStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/El/GreekStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/El/GreekStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/El/GreekStemFilterFactory.cs
new file mode 100644
index 0000000..95229f2
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/El/GreekStemFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.el
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="GreekStemFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_gstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.GreekLowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.GreekStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class GreekStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new GreekStemFilterFactory </summary>
+	  public GreekStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new GreekStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/El/GreekStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/El/GreekStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/El/GreekStemmer.cs
new file mode 100644
index 0000000..e9d98c5
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/El/GreekStemmer.cs
@@ -0,0 +1,799 @@
+namespace org.apache.lucene.analysis.el
+{
+
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using Version = org.apache.lucene.util.Version;
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/// <summary>
+	/// A stemmer for Greek words, according to: <i>Development of a Stemmer for the
+	/// Greek Language.</i> Georgios Ntais
+	/// <para>
+	/// NOTE: Input is expected to be casefolded for Greek (including folding of final
+	/// sigma to sigma), and with diacritics removed. This can be achieved with 
+	/// either <seealso cref="GreekLowerCaseFilter"/> or ICUFoldingFilter.
+	/// @lucene.experimental
+	/// </para>
+	/// </summary>
+	public class GreekStemmer
+	{
+
+	 /// <summary>
+	 /// Stems a word contained in a leading portion of a char[] array.
+	 /// The word is passed through a number of rules that modify it's length.
+	 /// </summary>
+	 /// <param name="s"> A char[] array that contains the word to be stemmed. </param>
+	 /// <param name="len"> The length of the char[] array. </param>
+	 /// <returns> The new length of the stemmed word. </returns>
+	  public virtual int stem(char[] s, int len)
+	  {
+		if (len < 4) // too short
+		{
+		  return len;
+		}
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int origLen = len;
+		int origLen = len;
+		// "short rules": if it hits one of these, it skips the "long list"
+		len = rule0(s, len);
+		len = rule1(s, len);
+		len = rule2(s, len);
+		len = rule3(s, len);
+		len = rule4(s, len);
+		len = rule5(s, len);
+		len = rule6(s, len);
+		len = rule7(s, len);
+		len = rule8(s, len);
+		len = rule9(s, len);
+		len = rule10(s, len);
+		len = rule11(s, len);
+		len = rule12(s, len);
+		len = rule13(s, len);
+		len = rule14(s, len);
+		len = rule15(s, len);
+		len = rule16(s, len);
+		len = rule17(s, len);
+		len = rule18(s, len);
+		len = rule19(s, len);
+		len = rule20(s, len);
+		// "long list"
+		if (len == origLen)
+		{
+		  len = rule21(s, len);
+		}
+
+		return rule22(s, len);
+	  }
+
+	  private int rule0(char[] s, int len)
+	  {
+		if (len > 9 && (endsWith(s, len, "καθεστωτοσ") || endsWith(s, len, "καθεστωτων")))
+		{
+		  return len - 4;
+		}
+
+		if (len > 8 && (endsWith(s, len, "γεγονοτοσ") || endsWith(s, len, "γεγονοτων")))
+		{
+		  return len - 4;
+		}
+
+		if (len > 8 && endsWith(s, len, "καθεστωτα"))
+		{
+		  return len - 3;
+		}
+
+		if (len > 7 && (endsWith(s, len, "τατογιου") || endsWith(s, len, "τατογιων")))
+		{
+		  return len - 4;
+		}
+
+		if (len > 7 && endsWith(s, len, "γεγονοτα"))
+		{
+		  return len - 3;
+		}
+
+		if (len > 7 && endsWith(s, len, "καθεστωσ"))
+		{
+		  return len - 2;
+		}
+
+		if (len > 6 && (endsWith(s, len, "σκαγιου")) || endsWith(s, len, "σκαγιων") || endsWith(s, len, "ολογιου") || endsWith(s, len, "ολογιων") || endsWith(s, len, "κρεατοσ") || endsWith(s, len, "κρεατων") || endsWith(s, len, "περατοσ") || endsWith(s, len, "περατων") || endsWith(s, len, "τερατοσ") || endsWith(s, len, "τερατων"))
+		{
+		  return len - 4;
+		}
+
+		if (len > 6 && endsWith(s, len, "τατογια"))
+		{
+		  return len - 3;
+		}
+
+		if (len > 6 && endsWith(s, len, "γεγονοσ"))
+		{
+		  return len - 2;
+		}
+
+		if (len > 5 && (endsWith(s, len, "φαγιου") || endsWith(s, len, "φαγιων") || endsWith(s, len, "σογιου") || endsWith(s, len, "σογιων")))
+		{
+		  return len - 4;
+		}
+
+		if (len > 5 && (endsWith(s, len, "σκαγια") || endsWith(s, len, "ολογια") || endsWith(s, len, "κρεατα") || endsWith(s, len, "περατα") || endsWith(s, len, "τερατα")))
+		{
+		  return len - 3;
+		}
+
+		if (len > 4 && (endsWith(s, len, "φαγια") || endsWith(s, len, "σογια") || endsWith(s, len, "φωτοσ") || endsWith(s, len, "φωτων")))
+		{
+		  return len - 3;
+		}
+
+		if (len > 4 && (endsWith(s, len, "κρεασ") || endsWith(s, len, "περασ") || endsWith(s, len, "τερασ")))
+		{
+		  return len - 2;
+		}
+
+		if (len > 3 && endsWith(s, len, "φωτα"))
+		{
+		  return len - 2;
+		}
+
+		if (len > 2 && endsWith(s, len, "φωσ"))
+		{
+		  return len - 1;
+		}
+
+		return len;
+	  }
+
+	  private int rule1(char[] s, int len)
+	  {
+		if (len > 4 && (endsWith(s, len, "αδεσ") || endsWith(s, len, "αδων")))
+		{
+		  len -= 4;
+		  if (!(endsWith(s, len, "οκ") || endsWith(s, len, "μαμ") || endsWith(s, len, "μαν") || endsWith(s, len, "μπαμπ") || endsWith(s, len, "πατερ") || endsWith(s, len, "γιαγι") || endsWith(s, len, "νταντ") || endsWith(s, len, "κυρ") || endsWith(s, len, "θει") || endsWith(s, len, "πεθερ")))
+		  {
+			len += 2; // add back -αδ
+		  }
+		}
+		return len;
+	  }
+
+	  private int rule2(char[] s, int len)
+	  {
+		if (len > 4 && (endsWith(s, len, "εδεσ") || endsWith(s, len, "εδων")))
+		{
+		  len -= 4;
+		  if (endsWith(s, len, "οπ") || endsWith(s, len, "ιπ") || endsWith(s, len, "εμπ") || endsWith(s, len, "υπ") || endsWith(s, len, "γηπ") || endsWith(s, len, "δαπ") || endsWith(s, len, "κρασπ") || endsWith(s, len, "μιλ"))
+		  {
+			len += 2; // add back -εδ
+		  }
+		}
+		return len;
+	  }
+
+	  private int rule3(char[] s, int len)
+	  {
+		if (len > 5 && (endsWith(s, len, "ουδεσ") || endsWith(s, len, "ουδων")))
+		{
+		  len -= 5;
+		  if (endsWith(s, len, "αρκ") || endsWith(s, len, "καλιακ") || endsWith(s, len, "πεταλ") || endsWith(s, len, "λιχ") || endsWith(s, len, "πλεξ") || endsWith(s, len, "σκ") || endsWith(s, len, "σ") || endsWith(s, len, "φλ") || endsWith(s, len, "φρ") || endsWith(s, len, "βελ") || endsWith(s, len, "λουλ") || endsWith(s, len, "χν") || endsWith(s, len, "σπ") || endsWith(s, len, "τραγ") || endsWith(s, len, "φε"))
+		  {
+			len += 3; // add back -ουδ
+		  }
+		}
+		return len;
+	  }
+
+	  private static readonly CharArraySet exc4 = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("θ", "δ", "ελ", "γαλ", "ν", "π", "ιδ", "παρ"), false);
+
+	  private int rule4(char[] s, int len)
+	  {
+		if (len > 3 && (endsWith(s, len, "εωσ") || endsWith(s, len, "εων")))
+		{
+		  len -= 3;
+		  if (exc4.contains(s, 0, len))
+		  {
+			len++; // add back -ε
+		  }
+		}
+		return len;
+	  }
+
+	  private int rule5(char[] s, int len)
+	  {
+		if (len > 2 && endsWith(s, len, "ια"))
+		{
+		  len -= 2;
+		  if (endsWithVowel(s, len))
+		  {
+			len++; // add back -ι
+		  }
+		}
+		else if (len > 3 && (endsWith(s, len, "ιου") || endsWith(s, len, "ιων")))
+		{
+		  len -= 3;
+		  if (endsWithVowel(s, len))
+		  {
+			len++; // add back -ι
+		  }
+		}
+		return len;
+	  }
+
+	  private static readonly CharArraySet exc6 = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("αλ", "αδ", "ενδ", "αμαν", "αμμοχαλ", "ηθ", "ανηθ", "αντιδ", "φυσ", "βρωμ", "γερ", "εξωδ", "καλπ", "καλλιν", "καταδ", "μουλ", "μπαν", "μπαγιατ", "μπολ", "μποσ", "νιτ", "ξικ", "συνομηλ", "πετσ", "πιτσ", "πικαντ", "πλιατσ", "ποστελν", "πρωτοδ", "σερτ", "συναδ", "τσαμ", "υποδ", "φιλον", "φυλοδ", "χασ"), false);
+
+	  private int rule6(char[] s, int len)
+	  {
+		bool removed = false;
+		if (len > 3 && (endsWith(s, len, "ικα") || endsWith(s, len, "ικο")))
+		{
+		  len -= 3;
+		  removed = true;
+		}
+		else if (len > 4 && (endsWith(s, len, "ικου") || endsWith(s, len, "ικων")))
+		{
+		  len -= 4;
+		  removed = true;
+		}
+
+		if (removed)
+		{
+		  if (endsWithVowel(s, len) || exc6.contains(s, 0, len))
+		  {
+			len += 2; // add back -ικ
+		  }
+		}
+		return len;
+	  }
+
+	  private static readonly CharArraySet exc7 = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("αναπ", "αποθ", "αποκ", "αποστ", "βουβ", "ξεθ", "ουλ", "πεθ", "πικρ", "ποτ", "σιχ", "χ"), false);
+
+	  private int rule7(char[] s, int len)
+	  {
+		if (len == 5 && endsWith(s, len, "αγαμε"))
+		{
+		  return len - 1;
+		}
+
+		if (len > 7 && endsWith(s, len, "ηθηκαμε"))
+		{
+		  len -= 7;
+		}
+		else if (len > 6 && endsWith(s, len, "ουσαμε"))
+		{
+		  len -= 6;
+		}
+		else if (len > 5 && (endsWith(s, len, "αγαμε") || endsWith(s, len, "ησαμε") || endsWith(s, len, "ηκαμε")))
+		{
+		  len -= 5;
+		}
+
+		if (len > 3 && endsWith(s, len, "αμε"))
+		{
+		  len -= 3;
+		  if (exc7.contains(s, 0, len))
+		  {
+			len += 2; // add back -αμ
+		  }
+		}
+
+		return len;
+	  }
+
+	  private static readonly CharArraySet exc8a = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("τρ", "τσ"), false);
+
+	  private static readonly CharArraySet exc8b = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("βετερ", "βουλκ", "βραχμ", "γ", "δραδουμ", "θ", "καλπουζ", "καστελ", "κορμορ", "λαοπλ", "μωαμεθ", "μ", "μουσουλμ", "ν", "ουλ", "π", "πελεκ", "πλ", "πολισ", "πορτολ", "σαρακατσ", "σουλτ", "τσαρλατ", "ορφ", "τσιγγ", "τσοπ", "φωτοστεφ", "χ", "ψυχοπλ", "αγ", "ορφ", "γαλ", "γερ", "δεκ", "διπλ", "αμερικαν", "ουρ", "πιθ", "πουριτ", "σ", "ζωντ", "ικ", "καστ", "κοπ", "λιχ", "λουθηρ", "μαιντ", "μελ", "σιγ", "σπ", "στεγ", "τραγ", "τσαγ", "φ", "ερ", "αδαπ", "αθιγγ", "αμηχ", "ανικ", "ανοργ", "απηγ", "απιθ", "ατσιγγ", "βασ", "βασκ", "βαθυγαλ", "βιομηχ", "βραχυκ", "διατ", "διαφ", "ενοργ", "θυσ", "κα�
 �νοβιομηχ", "καταγαλ", "κλιβ", "κοιλαρφ", "λιβ", "μεγλοβιομηχ", "μικροβιομηχ", "νταβ", "ξηροκλιβ", "ολιγοδαμ", "ολογαλ", "πενταρφ", "περηφ", "περιτρ", "πλατ", "πολυδαπ", "πολυμηχ", "στεφ", "ταβ", "τετ", "υπερηφ", "υποκοπ", "χαμηλοδαπ", "ψηλοταβ"), false);
+
+	  private int rule8(char[] s, int len)
+	  {
+		bool removed = false;
+
+		if (len > 8 && endsWith(s, len, "ιουντανε"))
+		{
+		  len -= 8;
+		  removed = true;
+		}
+		else if (len > 7 && endsWith(s, len, "ιοντανε") || endsWith(s, len, "ουντανε") || endsWith(s, len, "ηθηκανε"))
+		{
+		  len -= 7;
+		  removed = true;
+		}
+		else if (len > 6 && endsWith(s, len, "ιοτανε") || endsWith(s, len, "οντανε") || endsWith(s, len, "ουσανε"))
+		{
+		  len -= 6;
+		  removed = true;
+		}
+		else if (len > 5 && endsWith(s, len, "αγανε") || endsWith(s, len, "ησανε") || endsWith(s, len, "οτανε") || endsWith(s, len, "ηκανε"))
+		{
+		  len -= 5;
+		  removed = true;
+		}
+
+		if (removed && exc8a.contains(s, 0, len))
+		{
+		  // add -αγαν (we removed > 4 chars so its safe)
+		  len += 4;
+		  s[len - 4] = 'α';
+		  s[len - 3] = 'γ';
+		  s[len - 2] = 'α';
+		  s[len - 1] = 'ν';
+		}
+
+		if (len > 3 && endsWith(s, len, "ανε"))
+		{
+		  len -= 3;
+		  if (endsWithVowelNoY(s, len) || exc8b.contains(s, 0, len))
+		  {
+			len += 2; // add back -αν
+		  }
+		}
+
+		return len;
+	  }
+
+	  private static readonly CharArraySet exc9 = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("αβαρ", "βεν", "εναρ", "αβρ", "αδ", "αθ", "αν", "απλ", "βαρον", "ντρ", "σκ", "κοπ", "μπορ", "νιφ", "παγ", "παρακαλ", "σερπ", "σκελ", "συρφ", "τοκ", "υ", "δ", "εμ", "θαρρ", "θ"), false);
+
+	  private int rule9(char[] s, int len)
+	  {
+		if (len > 5 && endsWith(s, len, "ησετε"))
+		{
+		  len -= 5;
+		}
+
+		if (len > 3 && endsWith(s, len, "ετε"))
+		{
+		  len -= 3;
+		  if (exc9.contains(s, 0, len) || endsWithVowelNoY(s, len) || endsWith(s, len, "οδ") || endsWith(s, len, "αιρ") || endsWith(s, len, "φορ") || endsWith(s, len, "ταθ") || endsWith(s, len, "διαθ") || endsWith(s, len, "σχ") || endsWith(s, len, "ενδ") || endsWith(s, len, "ευρ") || endsWith(s, len, "τιθ") || endsWith(s, len, "υπερθ") || endsWith(s, len, "ραθ") || endsWith(s, len, "ενθ") || endsWith(s, len, "ροθ") || endsWith(s, len, "σθ") || endsWith(s, len, "πυρ") || endsWith(s, len, "αιν") || endsWith(s, len, "συνδ") || endsWith(s, len, "συν") || endsWith(s, len, "συνθ") || endsWith(s, len, "χωρ") || endsWith(s, len, "πον") || endsWith(s, len, "βρ") || endsWith(s, len, "καθ") || endsWith(s, len, "ευθ") || endsWith(s, len, "εκθ") || endsWith(s, len, "νετ") || endsWith(s, len, "ρον") || endsWith(s, len, "αρκ") || endsWith(s, len, "βαρ") || endsWith(s, len, "βολ") || endsWith(s, len, "ωφελ"))
+		  {
+			len += 2; // add back -ετ
+		  }
+		}
+
+		return len;
+	  }
+
+	  private int rule10(char[] s, int len)
+	  {
+		if (len > 5 && (endsWith(s, len, "οντασ") || endsWith(s, len, "ωντασ")))
+		{
+		  len -= 5;
+		  if (len == 3 && endsWith(s, len, "αρχ"))
+		  {
+			len += 3; // add back *ντ
+			s[len - 3] = 'ο';
+		  }
+		  if (endsWith(s, len, "κρε"))
+		  {
+			len += 3; // add back *ντ
+			s[len - 3] = 'ω';
+		  }
+		}
+
+		return len;
+	  }
+
+	  private int rule11(char[] s, int len)
+	  {
+		if (len > 6 && endsWith(s, len, "ομαστε"))
+		{
+		  len -= 6;
+		  if (len == 2 && endsWith(s, len, "ον"))
+		  {
+			len += 5; // add back -ομαστ
+		  }
+		}
+		else if (len > 7 && endsWith(s, len, "ιομαστε"))
+		{
+		  len -= 7;
+		  if (len == 2 && endsWith(s, len, "ον"))
+		  {
+			len += 5;
+			s[len - 5] = 'ο';
+			s[len - 4] = 'μ';
+			s[len - 3] = 'α';
+			s[len - 2] = 'σ';
+			s[len - 1] = 'τ';
+		  }
+		}
+		return len;
+	  }
+
+	  private static readonly CharArraySet exc12a = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("π", "απ", "συμπ", "ασυμπ", "ακαταπ", "αμεταμφ"), false);
+
+	  private static readonly CharArraySet exc12b = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("αλ", "αρ", "εκτελ", "ζ", "μ", "ξ", "παρακαλ", "αρ", "προ", "νισ"), false);
+
+	  private int rule12(char[] s, int len)
+	  {
+		if (len > 5 && endsWith(s, len, "ιεστε"))
+		{
+		  len -= 5;
+		  if (exc12a.contains(s, 0, len))
+		  {
+			len += 4; // add back -ιεστ
+		  }
+		}
+
+		if (len > 4 && endsWith(s, len, "εστε"))
+		{
+		  len -= 4;
+		  if (exc12b.contains(s, 0, len))
+		  {
+			len += 3; // add back -εστ
+		  }
+		}
+
+		return len;
+	  }
+
+	  private static readonly CharArraySet exc13 = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("διαθ", "θ", "παρακαταθ", "προσθ", "συνθ"), false);
+
+	  private int rule13(char[] s, int len)
+	  {
+		if (len > 6 && endsWith(s, len, "ηθηκεσ"))
+		{
+		  len -= 6;
+		}
+		else if (len > 5 && (endsWith(s, len, "ηθηκα") || endsWith(s, len, "ηθηκε")))
+		{
+		  len -= 5;
+		}
+
+		bool removed = false;
+
+		if (len > 4 && endsWith(s, len, "ηκεσ"))
+		{
+		  len -= 4;
+		  removed = true;
+		}
+		else if (len > 3 && (endsWith(s, len, "ηκα") || endsWith(s, len, "ηκε")))
+		{
+		  len -= 3;
+		  removed = true;
+		}
+
+		if (removed && (exc13.contains(s, 0, len) || endsWith(s, len, "σκωλ") || endsWith(s, len, "σκουλ") || endsWith(s, len, "ναρθ") || endsWith(s, len, "σφ") || endsWith(s, len, "οθ") || endsWith(s, len, "πιθ")))
+		{
+		  len += 2; // add back the -ηκ
+		}
+
+		return len;
+	  }
+
+	  private static readonly CharArraySet exc14 = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("φαρμακ", "χαδ", "αγκ", "αναρρ", "βρομ", "εκλιπ", "λαμπιδ", "λεχ", "μ", "πατ", "ρ", "λ", "μεδ", "μεσαζ", "υποτειν", "αμ", "αιθ", "ανηκ", "δεσποζ", "ενδιαφερ", "δε", "δευτερευ", "καθαρευ", "πλε", "τσα"), false);
+
+	  private int rule14(char[] s, int len)
+	  {
+		bool removed = false;
+
+		if (len > 5 && endsWith(s, len, "ουσεσ"))
+		{
+		  len -= 5;
+		  removed = true;
+		}
+		else if (len > 4 && (endsWith(s, len, "ουσα") || endsWith(s, len, "ουσε")))
+		{
+		  len -= 4;
+		  removed = true;
+		}
+
+		if (removed && (exc14.contains(s, 0, len) || endsWithVowel(s, len) || endsWith(s, len, "ποδαρ") || endsWith(s, len, "βλεπ") || endsWith(s, len, "πανταχ") || endsWith(s, len, "φρυδ") || endsWith(s, len, "μαντιλ") || endsWith(s, len, "μαλλ") || endsWith(s, len, "κυματ") || endsWith(s, len, "λαχ") || endsWith(s, len, "ληγ") || endsWith(s, len, "φαγ") || endsWith(s, len, "ομ") || endsWith(s, len, "πρωτ")))
+		{
+		  len += 3; // add back -ουσ
+		}
+
+	   return len;
+	  }
+
+	  private static readonly CharArraySet exc15a = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("αβαστ", "πολυφ", "αδηφ", "παμφ", "ρ", "ασπ", "αφ", "αμαλ", "αμαλλι", "ανυστ", "απερ", "ασπαρ", "αχαρ", "δερβεν", "δροσοπ", "ξεφ", "νεοπ", "νομοτ", "ολοπ", "ομοτ", "προστ", "προσωποπ", "συμπ", "συντ", "τ", "υποτ", "χαρ", "αειπ", "αιμοστ", "ανυπ", "αποτ", "αρτιπ", "διατ", "εν", "επιτ", "κροκαλοπ", "σιδηροπ", "λ", "ναυ", "ουλαμ", "ουρ", "π", "τρ", "μ"), false);
+
+	  private static readonly CharArraySet exc15b = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("ψοφ", "ναυλοχ"), false);
+
+	  private int rule15(char[] s, int len)
+	  {
+		bool removed = false;
+		if (len > 4 && endsWith(s, len, "αγεσ"))
+		{
+		  len -= 4;
+		  removed = true;
+		}
+		else if (len > 3 && (endsWith(s, len, "αγα") || endsWith(s, len, "αγε")))
+		{
+		  len -= 3;
+		  removed = true;
+		}
+
+		if (removed)
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final boolean cond1 = exc15a.contains(s, 0, len) || endsWith(s, len, "οφ") || endsWith(s, len, "πελ") || endsWith(s, len, "χορτ") || endsWith(s, len, "λλ") || endsWith(s, len, "σφ") || endsWith(s, len, "ρπ") || endsWith(s, len, "φρ") || endsWith(s, len, "πρ") || endsWith(s, len, "λοχ") || endsWith(s, len, "σμην");
+		  bool cond1 = exc15a.contains(s, 0, len) || endsWith(s, len, "οφ") || endsWith(s, len, "πελ") || endsWith(s, len, "χορτ") || endsWith(s, len, "λλ") || endsWith(s, len, "σφ") || endsWith(s, len, "ρπ") || endsWith(s, len, "φρ") || endsWith(s, len, "πρ") || endsWith(s, len, "λοχ") || endsWith(s, len, "σμην");
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final boolean cond2 = exc15b.contains(s, 0, len) || endsWith(s, len, "κολλ");
+		  bool cond2 = exc15b.contains(s, 0, len) || endsWith(s, len, "κολλ");
+
+		  if (cond1 && !cond2)
+		  {
+			len += 2; // add back -αγ
+		  }
+		}
+
+		return len;
+	  }
+
+	  private static readonly CharArraySet exc16 = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("ν", "χερσον", "δωδεκαν", "ερημον", "μεγαλον", "επταν"), false);
+
+	  private int rule16(char[] s, int len)
+	  {
+		bool removed = false;
+		if (len > 4 && endsWith(s, len, "ησου"))
+		{
+		  len -= 4;
+		  removed = true;
+		}
+		else if (len > 3 && (endsWith(s, len, "ησε") || endsWith(s, len, "ησα")))
+		{
+		  len -= 3;
+		  removed = true;
+		}
+
+		if (removed && exc16.contains(s, 0, len))
+		{
+		  len += 2; // add back -ησ
+		}
+
+		return len;
+	  }
+
+	  private static readonly CharArraySet exc17 = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("ασβ", "σβ", "αχρ", "χρ", "απλ", "αειμν", "δυσχρ", "ευχρ", "κοινοχρ", "παλιμψ"), false);
+
+	  private int rule17(char[] s, int len)
+	  {
+		if (len > 4 && endsWith(s, len, "ηστε"))
+		{
+		  len -= 4;
+		  if (exc17.contains(s, 0, len))
+		  {
+			len += 3; // add back the -ηστ
+		  }
+		}
+
+		return len;
+	  }
+
+	  private static readonly CharArraySet exc18 = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("ν", "ρ", "σπι", "στραβομουτσ", "κακομουτσ", "εξων"), false);
+
+	  private int rule18(char[] s, int len)
+	  {
+		bool removed = false;
+
+		if (len > 6 && (endsWith(s, len, "ησουνε") || endsWith(s, len, "ηθουνε")))
+		{
+		  len -= 6;
+		  removed = true;
+		}
+		else if (len > 4 && endsWith(s, len, "ουνε"))
+		{
+		  len -= 4;
+		  removed = true;
+		}
+
+		if (removed && exc18.contains(s, 0, len))
+		{
+		  len += 3;
+		  s[len - 3] = 'ο';
+		  s[len - 2] = 'υ';
+		  s[len - 1] = 'ν';
+		}
+		return len;
+	  }
+
+	  private static readonly CharArraySet exc19 = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("παρασουσ", "φ", "χ", "ωριοπλ", "αζ", "αλλοσουσ", "ασουσ"), false);
+
+	  private int rule19(char[] s, int len)
+	  {
+		bool removed = false;
+
+		if (len > 6 && (endsWith(s, len, "ησουμε") || endsWith(s, len, "ηθουμε")))
+		{
+		  len -= 6;
+		  removed = true;
+		}
+		else if (len > 4 && endsWith(s, len, "ουμε"))
+		{
+		  len -= 4;
+		  removed = true;
+		}
+
+		if (removed && exc19.contains(s, 0, len))
+		{
+		  len += 3;
+		  s[len - 3] = 'ο';
+		  s[len - 2] = 'υ';
+		  s[len - 1] = 'μ';
+		}
+		return len;
+	  }
+
+	  private int rule20(char[] s, int len)
+	  {
+		if (len > 5 && (endsWith(s, len, "ματων") || endsWith(s, len, "ματοσ")))
+		{
+		  len -= 3;
+		}
+		else if (len > 4 && endsWith(s, len, "ματα"))
+		{
+		  len -= 2;
+		}
+		return len;
+	  }
+
+	  private int rule21(char[] s, int len)
+	  {
+		if (len > 9 && endsWith(s, len, "ιοντουσαν"))
+		{
+		  return len - 9;
+		}
+
+		if (len > 8 && (endsWith(s, len, "ιομασταν") || endsWith(s, len, "ιοσασταν") || endsWith(s, len, "ιουμαστε") || endsWith(s, len, "οντουσαν")))
+		{
+		  return len - 8;
+		}
+
+		if (len > 7 && (endsWith(s, len, "ιεμαστε") || endsWith(s, len, "ιεσαστε") || endsWith(s, len, "ιομουνα") || endsWith(s, len, "ιοσαστε") || endsWith(s, len, "ιοσουνα") || endsWith(s, len, "ιουνται") || endsWith(s, len, "ιουνταν") || endsWith(s, len, "ηθηκατε") || endsWith(s, len, "ομασταν") || endsWith(s, len, "οσασταν") || endsWith(s, len, "ουμαστε")))
+		{
+		  return len - 7;
+		}
+
+		if (len > 6 && (endsWith(s, len, "ιομουν") || endsWith(s, len, "ιονταν") || endsWith(s, len, "ιοσουν") || endsWith(s, len, "ηθειτε") || endsWith(s, len, "ηθηκαν") || endsWith(s, len, "ομουνα") || endsWith(s, len, "οσαστε") || endsWith(s, len, "οσουνα") || endsWith(s, len, "ουνται") || endsWith(s, len, "ουνταν") || endsWith(s, len, "ουσατε")))
+		{
+		  return len - 6;
+		}
+
+		if (len > 5 && (endsWith(s, len, "αγατε") || endsWith(s, len, "ιεμαι") || endsWith(s, len, "ιεται") || endsWith(s, len, "ιεσαι") || endsWith(s, len, "ιοταν") || endsWith(s, len, "ιουμα") || endsWith(s, len, "ηθεισ") || endsWith(s, len, "ηθουν") || endsWith(s, len, "ηκατε") || endsWith(s, len, "ησατε") || endsWith(s, len, "ησουν") || endsWith(s, len, "ομουν") || endsWith(s, len, "ονται") || endsWith(s, len, "ονταν") || endsWith(s, len, "οσουν") || endsWith(s, len, "ουμαι") || endsWith(s, len, "ουσαν")))
+		{
+		  return len - 5;
+		}
+
+		if (len > 4 && (endsWith(s, len, "αγαν") || endsWith(s, len, "αμαι") || endsWith(s, len, "ασαι") || endsWith(s, len, "αται") || endsWith(s, len, "ειτε") || endsWith(s, len, "εσαι") || endsWith(s, len, "εται") || endsWith(s, len, "ηδεσ") || endsWith(s, len, "ηδων") || endsWith(s, len, "ηθει") || endsWith(s, len, "ηκαν") || endsWith(s, len, "ησαν") || endsWith(s, len, "ησει") || endsWith(s, len, "ησεσ") || endsWith(s, len, "ομαι") || endsWith(s, len, "οταν")))
+		{
+		  return len - 4;
+		}
+
+		if (len > 3 && (endsWith(s, len, "αει") || endsWith(s, len, "εισ") || endsWith(s, len, "ηθω") || endsWith(s, len, "ησω") || endsWith(s, len, "ουν") || endsWith(s, len, "ουσ")))
+		{
+		  return len - 3;
+		}
+
+		if (len > 2 && (endsWith(s, len, "αν") || endsWith(s, len, "ασ") || endsWith(s, len, "αω") || endsWith(s, len, "ει") || endsWith(s, len, "εσ") || endsWith(s, len, "ησ") || endsWith(s, len, "οι") || endsWith(s, len, "οσ") || endsWith(s, len, "ου") || endsWith(s, len, "υσ") || endsWith(s, len, "ων")))
+		{
+		  return len - 2;
+		}
+
+		if (len > 1 && endsWithVowel(s, len))
+		{
+		  return len - 1;
+		}
+
+		return len;
+	  }
+
+	  private int rule22(char[] s, int len)
+	  {
+		if (endsWith(s, len, "εστερ") || endsWith(s, len, "εστατ"))
+		{
+		  return len - 5;
+		}
+
+		if (endsWith(s, len, "οτερ") || endsWith(s, len, "οτατ") || endsWith(s, len, "υτερ") || endsWith(s, len, "υτατ") || endsWith(s, len, "ωτερ") || endsWith(s, len, "ωτατ"))
+		{
+		  return len - 4;
+		}
+
+		return len;
+	  }
+
+	 /// <summary>
+	 /// Checks if the word contained in the leading portion of char[] array , 
+	 /// ends with the suffix given as parameter.
+	 /// </summary>
+	 /// <param name="s"> A char[] array that represents a word. </param>
+	 /// <param name="len"> The length of the char[] array. </param>
+	 /// <param name="suffix"> A <seealso cref="String"/> object to check if the word given ends with these characters. </param>
+	 /// <returns> True if the word ends with the suffix given , false otherwise. </returns>
+	  private bool EndsWith(char[] s, int len, string suffix)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int suffixLen = suffix.length();
+		int suffixLen = suffix.Length;
+		if (suffixLen > len)
+		{
+		  return false;
+		}
+		for (int i = suffixLen - 1; i >= 0; i--)
+		{
+		  if (s[len - (suffixLen - i)] != suffix[i])
+		  {
+			return false;
+		  }
+		}
+
+		return true;
+	  }
+
+	 /// <summary>
+	 /// Checks if the word contained in the leading portion of char[] array , 
+	 /// ends with a Greek vowel.
+	 /// </summary>
+	 /// <param name="s"> A char[] array that represents a word. </param>
+	 /// <param name="len"> The length of the char[] array. </param>
+	 /// <returns> True if the word contained in the leading portion of char[] array , 
+	 /// ends with a vowel , false otherwise. </returns>
+	  private bool endsWithVowel(char[] s, int len)
+	  {
+		if (len == 0)
+		{
+		  return false;
+		}
+		switch (s[len - 1])
+		{
+		  case 'α':
+		  case 'ε':
+		  case 'η':
+		  case 'ι':
+		  case 'ο':
+		  case 'υ':
+		  case 'ω':
+			return true;
+		  default:
+			return false;
+		}
+	  }
+
+	 /// <summary>
+	 /// Checks if the word contained in the leading portion of char[] array , 
+	 /// ends with a Greek vowel.
+	 /// </summary>
+	 /// <param name="s"> A char[] array that represents a word. </param>
+	 /// <param name="len"> The length of the char[] array. </param>
+	 /// <returns> True if the word contained in the leading portion of char[] array , 
+	 /// ends with a vowel , false otherwise. </returns>
+	  private bool endsWithVowelNoY(char[] s, int len)
+	  {
+		if (len == 0)
+		{
+		  return false;
+		}
+		switch (s[len - 1])
+		{
+		  case 'α':
+		  case 'ε':
+		  case 'η':
+		  case 'ι':
+		  case 'ο':
+		  case 'ω':
+			return true;
+		  default:
+			return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishAnalyzer.cs
new file mode 100644
index 0000000..ec3c8bc
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishAnalyzer.cs
@@ -0,0 +1,121 @@
+namespace org.apache.lucene.analysis.en
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using StandardAnalyzer = org.apache.lucene.analysis.standard.StandardAnalyzer;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for English.
+	/// </summary>
+	public sealed class EnglishAnalyzer : StopwordAnalyzerBase
+	{
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop words set. </summary>
+	  /// <returns> default stop words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET = StandardAnalyzer.STOP_WORDS_SET;
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words: <seealso cref="#getDefaultStopSet"/>.
+	  /// </summary>
+	  public EnglishAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  public EnglishAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+	  /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+	  /// stemming.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
+	  public EnglishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates a
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> A
+	  ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="EnglishPossessiveFilter"/>, 
+	  ///         <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided and <seealso cref="PorterStemFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		// prior to this we get the classic behavior, standardfilter does it for us.
+		if (matchVersion.onOrAfter(Version.LUCENE_31))
+		{
+		  result = new EnglishPossessiveFilter(matchVersion, result);
+		}
+		result = new LowerCaseFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		result = new PorterStemFilter(result);
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishMinimalStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishMinimalStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishMinimalStemFilter.cs
new file mode 100644
index 0000000..3e99c4c
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishMinimalStemFilter.cs
@@ -0,0 +1,66 @@
+namespace org.apache.lucene.analysis.en
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="EnglishMinimalStemmer"/> to stem 
+	/// English words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// </summary>
+	public sealed class EnglishMinimalStemFilter : TokenFilter
+	{
+	  private readonly EnglishMinimalStemmer stemmer = new EnglishMinimalStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public EnglishMinimalStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishMinimalStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishMinimalStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishMinimalStemFilterFactory.cs
new file mode 100644
index 0000000..96d75e2
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishMinimalStemFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.en
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="EnglishMinimalStemFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_enminstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.EnglishMinimalStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class EnglishMinimalStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new EnglishMinimalStemFilterFactory </summary>
+	  public EnglishMinimalStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new EnglishMinimalStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishMinimalStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishMinimalStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishMinimalStemmer.cs
new file mode 100644
index 0000000..945c795
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishMinimalStemmer.cs
@@ -0,0 +1,61 @@
+namespace org.apache.lucene.analysis.en
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/// <summary>
+	/// Minimal plural stemmer for English.
+	/// <para>
+	/// This stemmer implements the "S-Stemmer" from
+	/// <i>How Effective Is Suffixing?</i>
+	/// Donna Harman.
+	/// </para>
+	/// </summary>
+	public class EnglishMinimalStemmer
+	{
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @SuppressWarnings("fallthrough") public int stem(char s[] , int len)
+	  public virtual int stem(char[] s, int len)
+	  {
+		if (len < 3 || s[len - 1] != 's')
+		{
+		  return len;
+		}
+
+		switch (s[len - 2])
+		{
+		  case 'u':
+		  case 's':
+			  return len;
+		  case 'e':
+			if (len > 3 && s[len - 3] == 'i' && s[len - 4] != 'a' && s[len - 4] != 'e')
+			{
+			  s[len - 3] = 'y';
+			  return len - 2;
+			}
+			if (s[len - 3] == 'i' || s[len - 3] == 'a' || s[len - 3] == 'o' || s[len - 3] == 'e')
+			{
+			  return len; // intentional fallthrough
+			}
+		  default:
+			  return len - 1;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishPossessiveFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishPossessiveFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishPossessiveFilter.cs
new file mode 100644
index 0000000..aad7bb9
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishPossessiveFilter.cs
@@ -0,0 +1,79 @@
+using System;
+
+namespace org.apache.lucene.analysis.en
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// TokenFilter that removes possessives (trailing 's) from words.
+	/// <a name="version"/>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating EnglishPossessiveFilter:
+	/// <ul>
+	///    <li> As of 3.6, U+2019 RIGHT SINGLE QUOTATION MARK and 
+	///         U+FF07 FULLWIDTH APOSTROPHE are also treated as
+	///         quotation marks.
+	/// </ul>
+	/// </para>
+	/// </summary>
+	public sealed class EnglishPossessiveFilter : TokenFilter
+	{
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private Version matchVersion;
+
+	  /// @deprecated Use <seealso cref="#EnglishPossessiveFilter(Version, TokenStream)"/> instead. 
+	  [Obsolete("Use <seealso cref="#EnglishPossessiveFilter(org.apache.lucene.util.Version, org.apache.lucene.analysis.TokenStream)"/> instead.")]
+	  public EnglishPossessiveFilter(TokenStream input) : this(Version.LUCENE_35, input)
+	  {
+	  }
+
+	  public EnglishPossessiveFilter(Version version, TokenStream input) : base(input)
+	  {
+		this.matchVersion = version;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (!input.incrementToken())
+		{
+		  return false;
+		}
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] buffer = termAtt.buffer();
+		char[] buffer = termAtt.buffer();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int bufferLength = termAtt.length();
+		int bufferLength = termAtt.length();
+
+		if (bufferLength >= 2 && (buffer[bufferLength - 2] == '\'' || (matchVersion.onOrAfter(Version.LUCENE_36) && (buffer[bufferLength - 2] == '\u2019' || buffer[bufferLength - 2] == '\uFF07'))) && (buffer[bufferLength - 1] == 's' || buffer[bufferLength - 1] == 'S'))
+		{
+		  termAtt.Length = bufferLength - 2; // Strip last 2 characters off
+		}
+
+		return true;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishPossessiveFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishPossessiveFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishPossessiveFilterFactory.cs
new file mode 100644
index 0000000..632fd53
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishPossessiveFilterFactory.cs
@@ -0,0 +1,56 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.en
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="EnglishPossessiveFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_enpossessive" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.EnglishPossessiveFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class EnglishPossessiveFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new EnglishPossessiveFilterFactory </summary>
+	  public EnglishPossessiveFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		assureMatchVersion();
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new EnglishPossessiveFilter(luceneMatchVersion, input);
+	  }
+	}
+
+}
\ No newline at end of file


[07/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerImpl.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerImpl.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerImpl.cs
new file mode 100644
index 0000000..c92b86e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerImpl.cs
@@ -0,0 +1,733 @@
+using System;
+
+/* The following code was generated by JFlex 1.5.1 */
+
+namespace org.apache.lucene.analysis.standard
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+	/// <summary>
+	/// This class implements Word Break rules from the Unicode Text Segmentation 
+	/// algorithm, as specified in 
+	/// <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>. 
+	/// <p/>
+	/// Tokens produced are of the following types:
+	/// <ul>
+	///   <li>&lt;ALPHANUM&gt;: A sequence of alphabetic and numeric characters</li>
+	///   <li>&lt;NUM&gt;: A number</li>
+	///   <li>&lt;SOUTHEAST_ASIAN&gt;: A sequence of characters from South and Southeast
+	///       Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
+	///   <li>&lt;IDEOGRAPHIC&gt;: A single CJKV ideographic character</li>
+	///   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
+	///   <li>&lt;KATAKANA&gt;: A sequence of katakana characters</li>
+	///   <li>&lt;HANGUL&gt;: A sequence of Hangul characters</li>
+	/// </ul>
+	/// </summary>
+
+	public sealed class StandardTokenizerImpl : StandardTokenizerInterface
+	{
+
+	  /// <summary>
+	  /// This character denotes the end of file </summary>
+	  public const int StandardTokenizerInterface_Fields;
+
+	  /// <summary>
+	  /// initial size of the lookahead buffer </summary>
+	  private const int ZZ_BUFFERSIZE = 4096;
+
+	  /// <summary>
+	  /// lexical states </summary>
+	  public const int YYINITIAL = 0;
+
+	  /// <summary>
+	  /// ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
+	  /// ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
+	  ///                  at the beginning of a line
+	  /// l is of the form l = 2*k, k a non negative integer
+	  /// </summary>
+	  private static readonly int[] ZZ_LEXSTATE = {};
+
+	  /// <summary>
+	  /// Translates characters to character classes
+	  /// </summary>
+	  private const string ZZ_CMAP_PACKED = "\x0022\0\x0001\x008B\x0004\0\x0001\x008A\x0004\0\x0001\x0083\x0001\0\x0001\x0084\x0001\0\x000A\x0080" + "\x0001\x0082\x0001\x0083\x0005\0\x001A\x007E\x0004\0\x0001\x0085\x0001\0\x001A\x007E\x002F\0\x0001\x007E" + "\x0002\0\x0001\x007F\x0007\0\x0001\x007E\x0001\0\x0001\x0082\x0002\0\x0001\x007E\x0005\0\x0017\x007E" + "\x0001\0\x001F\x007E\x0001\0\u01ca\x007E\x0004\0\x000C\x007E\x0005\0\x0001\x0082\x0008\0\x0005\x007E" + "\x0007\0\x0001\x007E\x0001\0\x0001\x007E\x0011\0\x0070\x007F\x0005\x007E\x0001\0\x0002\x007E\x0002\0" + "\x0004\x007E\x0001\x0083\x0007\0\x0001\x007E\x0001\x0082\x0003\x007E\x0001\0\x0001\x007E\x0001\0\x0014\x007E" + "\x0001\0\x0053\x007E\x0001\0\x008B\x007E\x0001\0\x0007\x007F\x009E\x007E\x0009\0\x0026\x007E\x0002\0" + "\x0001\x007E\x0007\0\x0027\x007E\x0001\0\x0001\x0083\x0007\0\x002D\x007F\x0001\0\x0001\x007F\x0001\0" + "\x0002\x007F\x0001\0\x0002\x007F\x0001\0\x0001\x007F\x0008\0\x001B\x008C\x0005\0\x0003\x008C\x0001\x007
 E" + "\x0001\x0082\x000B\0\x0005\x007F\x0007\0\x0002\x0083\x0002\0\x000B\x007F\x0001\0\x0001\x007F\x0003\0" + "\x002B\x007E\x0015\x007F\x000A\x0080\x0001\0\x0001\x0080\x0001\x0083\x0001\0\x0002\x007E\x0001\x007F\x0063\x007E" + "\x0001\0\x0001\x007E\x0007\x007F\x0001\x007F\x0001\0\x0006\x007F\x0002\x007E\x0002\x007F\x0001\0\x0004\x007F" + "\x0002\x007E\x000A\x0080\x0003\x007E\x0002\0\x0001\x007E\x000F\0\x0001\x007F\x0001\x007E\x0001\x007F\x001E\x007E" + "\x001B\x007F\x0002\0\x0059\x007E\x000B\x007F\x0001\x007E\x000E\0\x000A\x0080\x0021\x007E\x0009\x007F\x0002\x007E" + "\x0002\0\x0001\x0083\x0001\0\x0001\x007E\x0005\0\x0016\x007E\x0004\x007F\x0001\x007E\x0009\x007F\x0001\x007E" + "\x0003\x007F\x0001\x007E\x0005\x007F\x0012\0\x0019\x007E\x0003\x007F\x0044\0\x0001\x007E\x0001\0\x000B\x007E" + "\x0037\0\x001B\x007F\x0001\0\x0004\x007F\x0036\x007E\x0003\x007F\x0001\x007E\x0012\x007F\x0001\x007E\x0007\x007F" + "\x000A\x007E\x0002\x007F\x0002\0\x000A\x0080\x0001\0\x0007\x007E\x0001\0\x0007\
 x007E\x0001\0\x0003\x007F" + "\x0001\0\x0008\x007E\x0002\0\x0002\x007E\x0002\0\x0016\x007E\x0001\0\x0007\x007E\x0001\0\x0001\x007E" + "\x0003\0\x0004\x007E\x0002\0\x0001\x007F\x0001\x007E\x0007\x007F\x0002\0\x0002\x007F\x0002\0\x0003\x007F" + "\x0001\x007E\x0008\0\x0001\x007F\x0004\0\x0002\x007E\x0001\0\x0003\x007E\x0002\x007F\x0002\0\x000A\x0080" + "\x0002\x007E\x000F\0\x0003\x007F\x0001\0\x0006\x007E\x0004\0\x0002\x007E\x0002\0\x0016\x007E\x0001\0" + "\x0007\x007E\x0001\0\x0002\x007E\x0001\0\x0002\x007E\x0001\0\x0002\x007E\x0002\0\x0001\x007F\x0001\0" + "\x0005\x007F\x0004\0\x0002\x007F\x0002\0\x0003\x007F\x0003\0\x0001\x007F\x0007\0\x0004\x007E\x0001\0" + "\x0001\x007E\x0007\0\x000A\x0080\x0002\x007F\x0003\x007E\x0001\x007F\x000B\0\x0003\x007F\x0001\0\x0009\x007E" + "\x0001\0\x0003\x007E\x0001\0\x0016\x007E\x0001\0\x0007\x007E\x0001\0\x0002\x007E\x0001\0\x0005\x007E" + "\x0002\0\x0001\x007F\x0001\x007E\x0008\x007F\x0001\0\x0003\x007F\x0001\0\x0003\x007F\x0002\0\x0001\x007E" + "\x
 000F\0\x0002\x007E\x0002\x007F\x0002\0\x000A\x0080\x0011\0\x0003\x007F\x0001\0\x0008\x007E\x0002\0" + "\x0002\x007E\x0002\0\x0016\x007E\x0001\0\x0007\x007E\x0001\0\x0002\x007E\x0001\0\x0005\x007E\x0002\0" + "\x0001\x007F\x0001\x007E\x0007\x007F\x0002\0\x0002\x007F\x0002\0\x0003\x007F\x0008\0\x0002\x007F\x0004\0" + "\x0002\x007E\x0001\0\x0003\x007E\x0002\x007F\x0002\0\x000A\x0080\x0001\0\x0001\x007E\x0010\0\x0001\x007F" + "\x0001\x007E\x0001\0\x0006\x007E\x0003\0\x0003\x007E\x0001\0\x0004\x007E\x0003\0\x0002\x007E\x0001\0" + "\x0001\x007E\x0001\0\x0002\x007E\x0003\0\x0002\x007E\x0003\0\x0003\x007E\x0003\0\x000C\x007E\x0004\0" + "\x0005\x007F\x0003\0\x0003\x007F\x0001\0\x0004\x007F\x0002\0\x0001\x007E\x0006\0\x0001\x007F\x000E\0" + "\x000A\x0080\x0011\0\x0003\x007F\x0001\0\x0008\x007E\x0001\0\x0003\x007E\x0001\0\x0017\x007E\x0001\0" + "\x000A\x007E\x0001\0\x0005\x007E\x0003\0\x0001\x007E\x0007\x007F\x0001\0\x0003\x007F\x0001\0\x0004\x007F" + "\x0007\0\x0002\x007F\x0001\0\x0002\x007E\x
 0006\0\x0002\x007E\x0002\x007F\x0002\0\x000A\x0080\x0012\0" + "\x0002\x007F\x0001\0\x0008\x007E\x0001\0\x0003\x007E\x0001\0\x0017\x007E\x0001\0\x000A\x007E\x0001\0" + "\x0005\x007E\x0002\0\x0001\x007F\x0001\x007E\x0007\x007F\x0001\0\x0003\x007F\x0001\0\x0004\x007F\x0007\0" + "\x0002\x007F\x0007\0\x0001\x007E\x0001\0\x0002\x007E\x0002\x007F\x0002\0\x000A\x0080\x0001\0\x0002\x007E" + "\x000F\0\x0002\x007F\x0001\0\x0008\x007E\x0001\0\x0003\x007E\x0001\0\x0029\x007E\x0002\0\x0001\x007E" + "\x0007\x007F\x0001\0\x0003\x007F\x0001\0\x0004\x007F\x0001\x007E\x0008\0\x0001\x007F\x0008\0\x0002\x007E" + "\x0002\x007F\x0002\0\x000A\x0080\x000A\0\x0006\x007E\x0002\0\x0002\x007F\x0001\0\x0012\x007E\x0003\0" + "\x0018\x007E\x0001\0\x0009\x007E\x0001\0\x0001\x007E\x0002\0\x0007\x007E\x0003\0\x0001\x007F\x0004\0" + "\x0006\x007F\x0001\0\x0001\x007F\x0001\0\x0008\x007F\x0012\0\x0002\x007F\x000D\0\x0030\x0086\x0001\x0087" + "\x0002\x0086\x0007\x0087\x0005\0\x0007\x0086\x0008\x0087\x0001\0\x000A\x0080\x
 0027\0\x0002\x0086\x0001\0" + "\x0001\x0086\x0002\0\x0002\x0086\x0001\0\x0001\x0086\x0002\0\x0001\x0086\x0006\0\x0004\x0086\x0001\0" + "\x0007\x0086\x0001\0\x0003\x0086\x0001\0\x0001\x0086\x0001\0\x0001\x0086\x0002\0\x0002\x0086\x0001\0" + "\x0004\x0086\x0001\x0087\x0002\x0086\x0006\x0087\x0001\0\x0002\x0087\x0001\x0086\x0002\0\x0005\x0086\x0001\0" + "\x0001\x0086\x0001\0\x0006\x0087\x0002\0\x000A\x0080\x0002\0\x0004\x0086\x0020\0\x0001\x007E\x0017\0" + "\x0002\x007F\x0006\0\x000A\x0080\x000B\0\x0001\x007F\x0001\0\x0001\x007F\x0001\0\x0001\x007F\x0004\0" + "\x0002\x007F\x0008\x007E\x0001\0\x0024\x007E\x0004\0\x0014\x007F\x0001\0\x0002\x007F\x0005\x007E\x000B\x007F" + "\x0001\0\x0024\x007F\x0009\0\x0001\x007F\x0039\0\x002B\x0086\x0014\x0087\x0001\x0086\x000A\x0080\x0006\0" + "\x0006\x0086\x0004\x0087\x0004\x0086\x0003\x0087\x0001\x0086\x0003\x0087\x0002\x0086\x0007\x0087\x0003\x0086\x0004\x0087" + "\x000D\x0086\x000C\x0087\x0001\x0086\x0001\x0087\x000A\x0080\x0004\x0087\x0002\x0086\x
 0026\x007E\x0001\0\x0001\x007E" + "\x0005\0\x0001\x007E\x0002\0\x002B\x007E\x0001\0\x0004\x007E\u0100\x008D\x0049\x007E\x0001\0\x0004\x007E" + "\x0002\0\x0007\x007E\x0001\0\x0001\x007E\x0001\0\x0004\x007E\x0002\0\x0029\x007E\x0001\0\x0004\x007E" + "\x0002\0\x0021\x007E\x0001\0\x0004\x007E\x0002\0\x0007\x007E\x0001\0\x0001\x007E\x0001\0\x0004\x007E" + "\x0002\0\x000F\x007E\x0001\0\x0039\x007E\x0001\0\x0004\x007E\x0002\0\x0043\x007E\x0002\0\x0003\x007F" + "\x0020\0\x0010\x007E\x0010\0\x0055\x007E\x000C\0\u026c\x007E\x0002\0\x0011\x007E\x0001\0\x001A\x007E" + "\x0005\0\x004B\x007E\x0003\0\x0003\x007E\x000F\0\x000D\x007E\x0001\0\x0004\x007E\x0003\x007F\x000B\0" + "\x0012\x007E\x0003\x007F\x000B\0\x0012\x007E\x0002\x007F\x000C\0\x000D\x007E\x0001\0\x0003\x007E\x0001\0" + "\x0002\x007F\x000C\0\x0034\x0086\x0020\x0087\x0003\0\x0001\x0086\x0004\0\x0001\x0086\x0001\x0087\x0002\0" + "\x000A\x0080\x0021\0\x0003\x007F\x0001\x007F\x0001\0\x000A\x0080\x0006\0\x0058\x007E\x0008\0\x0029\x007E" + "\
 x0001\x007F\x0001\x007E\x0005\0\x0046\x007E\x000A\0\x001D\x007E\x0003\0\x000C\x007F\x0004\0\x000C\x007F" + "\x000A\0\x000A\x0080\x001E\x0086\x0002\0\x0005\x0086\x000B\0\x002C\x0086\x0004\0\x0011\x0087\x0007\x0086" + "\x0002\x0087\x0006\0\x000A\x0080\x0001\x0086\x0003\0\x0002\x0086\x0020\0\x0017\x007E\x0005\x007F\x0004\0" + "\x0035\x0086\x000A\x0087\x0001\0\x001D\x0087\x0002\0\x0001\x007F\x000A\x0080\x0006\0\x000A\x0080\x0006\0" + "\x000E\x0086\x0052\0\x0005\x007F\x002F\x007E\x0011\x007F\x0007\x007E\x0004\0\x000A\x0080\x0011\0\x0009\x007F" + "\x000C\0\x0003\x007F\x001E\x007E\x000D\x007F\x0002\x007E\x000A\x0080\x002C\x007E\x000E\x007F\x000C\0\x0024\x007E" + "\x0014\x007F\x0008\0\x000A\x0080\x0003\0\x0003\x007E\x000A\x0080\x0024\x007E\x0052\0\x0003\x007F\x0001\0" + "\x0015\x007F\x0004\x007E\x0001\x007F\x0004\x007E\x0003\x007F\x0002\x007E\x0009\0\x00C0\x007E\x0027\x007F\x0015\0" + "\x0004\x007F\u0116\x007E\x0002\0\x0006\x007E\x0002\0\x0026\x007E\x0002\0\x0006\x007E\x0002\0\x0008\x007E" 
 + "\x0001\0\x0001\x007E\x0001\0\x0001\x007E\x0001\0\x0001\x007E\x0001\0\x001F\x007E\x0002\0\x0035\x007E" + "\x0001\0\x0007\x007E\x0001\0\x0001\x007E\x0003\0\x0003\x007E\x0001\0\x0007\x007E\x0003\0\x0004\x007E" + "\x0002\0\x0006\x007E\x0004\0\x000D\x007E\x0005\0\x0003\x007E\x0001\0\x0007\x007E\x000F\0\x0002\x007F" + "\x0002\x007F\x0008\0\x0002\x0084\x000A\0\x0001\x0084\x0002\0\x0001\x0082\x0002\0\x0005\x007F\x0010\0" + "\x0002\x0085\x0003\0\x0001\x0083\x000F\0\x0001\x0085\x000B\0\x0005\x007F\x0001\0\x000A\x007F\x0001\0" + "\x0001\x007E\x000D\0\x0001\x007E\x0010\0\x000D\x007E\x0033\0\x0021\x007F\x0011\0\x0001\x007E\x0004\0" + "\x0001\x007E\x0002\0\x000A\x007E\x0001\0\x0001\x007E\x0003\0\x0005\x007E\x0006\0\x0001\x007E\x0001\0" + "\x0001\x007E\x0001\0\x0001\x007E\x0001\0\x0004\x007E\x0001\0\x000B\x007E\x0002\0\x0004\x007E\x0005\0" + "\x0005\x007E\x0004\0\x0001\x007E\x0011\0\x0029\x007E\u032d\0\x0034\x007E\u0716\0\x002F\x007E\x0001\0" + "\x002F\x007E\x0001\0\x0085\x007E\x0006\0\x0004\x0
 07E\x0003\x007F\x0002\x007E\x000C\0\x0026\x007E\x0001\0" + "\x0001\x007E\x0005\0\x0001\x007E\x0002\0\x0038\x007E\x0007\0\x0001\x007E\x000F\0\x0001\x007F\x0017\x007E" + "\x0009\0\x0007\x007E\x0001\0\x0007\x007E\x0001\0\x0007\x007E\x0001\0\x0007\x007E\x0001\0\x0007\x007E" + "\x0001\0\x0007\x007E\x0001\0\x0007\x007E\x0001\0\x0007\x007E\x0001\0\x0020\x007F\x002F\0\x0001\x007E" + "\x0050\0\x001A\x0088\x0001\0\x0059\x0088\x000C\0\x00D6\x0088\x002F\0\x0001\x007E\x0001\0\x0001\x0088" + "\x0019\0\x0009\x0088\x0006\x007F\x0001\0\x0005\x0081\x0002\0\x0003\x0088\x0001\x007E\x0001\x007E\x0004\0" + "\x0056\x0089\x0002\0\x0002\x007F\x0002\x0081\x0003\x0089\x005B\x0081\x0001\0\x0004\x0081\x0005\0\x0029\x007E" + "\x0003\0\x005E\x008D\x0011\0\x001B\x007E\x0035\0\x0010\x0081\x00D0\0\x002F\x0081\x0001\0\x0058\x0081" + "\x00A8\0\u19b6\x0088\x004A\0\u51cd\x0088\x0033\0\u048d\x007E\x0043\0\x002E\x007E\x0002\0\u010d\x007E" + "\x0003\0\x0010\x007E\x000A\x0080\x0002\x007E\x0014\0\x002F\x007E\x0004\x007F\x000
 1\0\x000A\x007F\x0001\0" + "\x0019\x007E\x0007\0\x0001\x007F\x0050\x007E\x0002\x007F\x0025\0\x0009\x007E\x0002\0\x0067\x007E\x0002\0" + "\x0004\x007E\x0001\0\x0004\x007E\x000C\0\x000B\x007E\x004D\0\x000A\x007E\x0001\x007F\x0003\x007E\x0001\x007F" + "\x0004\x007E\x0001\x007F\x0017\x007E\x0005\x007F\x0018\0\x0034\x007E\x000C\0\x0002\x007F\x0032\x007E\x0011\x007F" + "\x000B\0\x000A\x0080\x0006\0\x0012\x007F\x0006\x007E\x0003\0\x0001\x007E\x0004\0\x000A\x0080\x001C\x007E" + "\x0008\x007F\x0002\0\x0017\x007E\x000D\x007F\x000C\0\x001D\x008D\x0003\0\x0004\x007F\x002F\x007E\x000E\x007F" + "\x000E\0\x0001\x007E\x000A\x0080\x0026\0\x0029\x007E\x000E\x007F\x0009\0\x0003\x007E\x0001\x007F\x0008\x007E" + "\x0002\x007F\x0002\0\x000A\x0080\x0006\0\x001B\x0086\x0001\x0087\x0004\0\x0030\x0086\x0001\x0087\x0001\x0086" + "\x0003\x0087\x0002\x0086\x0002\x0087\x0005\x0086\x0002\x0087\x0001\x0086\x0001\x0087\x0001\x0086\x0018\0\x0005\x0086" + "\x000B\x007E\x0005\x007F\x0002\0\x0003\x007E\x0002\x007F\x000
 A\0\x0006\x007E\x0002\0\x0006\x007E\x0002\0" + "\x0006\x007E\x0009\0\x0007\x007E\x0001\0\x0007\x007E\x0091\0\x0023\x007E\x0008\x007F\x0001\0\x0002\x007F" + "\x0002\0\x000A\x0080\x0006\0\u2ba4\x008D\x000C\0\x0017\x008D\x0004\0\x0031\x008D\x0004\0\x0001\x0024" + "\x0001\x0020\x0001\x0037\x0001\x0034\x0001\x001B\x0001\x0018\x0002\0\x0001\x0014\x0001\x0011\x0002\0\x0001\x000F" + "\x0001\x000D\x000C\0\x0001\x0003\x0001\x0006\x0010\0\x0001\x006E\x0007\0\x0001\x0049\x0001\x0008\x0005\0" + "\x0001\x0001\x0001\x007A\x0003\0\x0001\x0073\x0001\x0073\x0001\x0073\x0001\x0073\x0001\x0073\x0001\x0073\x0001\x0073" + "\x0001\x0073\x0001\x0073\x0001\x0073\x0001\x0073\x0001\x0073\x0001\x0073\x0001\x0073\x0001\x0073\x0001\x0073\x0001\x0073" + "\x0001\x0073\x0001\x0073\x0001\x0073\x0001\x0073\x0001\x0073\x0001\x0073\x0001\x0073\x0001\x0073\x0001\x0073\x0001\x0073" + "\x0001\x0073\x0001\x0073\x0001\x0073\x0001\x0073\x0001\x0073\x0001\x0073\x0001\x0073\x0001\x0073\x0001\x0073\x0001\x0073" + "\x0001\x0073\
 x0001\x0073\x0001\x0073\x0001\x0073\x0001\x0074\x0001\x0073\x0001\x0073\x0001\x0073\x0001\x0078\x0001\x0076" + "\x000F\0\x0001\x0070\u02c1\0\x0001\x004C\x00BF\0\x0001\x006F\x0001\x004D\x0001\x000E\x0003\x0077\x0002\x0032" + "\x0001\x0077\x0001\x0032\x0002\x0077\x0001\x001E\x0011\x0077\x0002\x0046\x0007\x004F\x0001\x004E\x0007\x004F\x0007\x0042" + "\x0001\x001F\x0001\x0042\x0001\x0060\x0002\x0036\x0001\x0035\x0001\x0060\x0001\x0036\x0001\x0035\x0008\x0060\x0002\x0047" + "\x0005\x0043\x0002\x003D\x0005\x0043\x0001\x0012\x0008\x002B\x0005\x0013\x0003\x0021\x000A\x0052\x0010\x0021\x0003\x0033" + "\x001A\x0023\x0001\x0022\x0002\x0031\x0002\x0056\x0001\x0057\x0002\x0056\x0002\x0057\x0002\x0056\x0001\x0057\x0003\x0031" + "\x0001\x0030\x0002\x0031\x000A\x0048\x0001\x005E\x0001\x0028\x0001\x0025\x0001\x0048\x0006\x0028\x0001\x0025\x000B\x0028" + "\x0019\x0031\x0007\x0028\x000A\x0053\x0001\x0028\x0005\x000B\x0003\x005F\x0003\x0041\x0001\x0040\x0004\x0041\x0002\x0040" + "\x0008\x0041\x0001\x00
 40\x0007\x001D\x0001\x001C\x0002\x001D\x0007\x0041\x000E\x005F\x0001\x0069\x0004\x0054\x0001\x0004" + "\x0004\x0051\x0001\x0004\x0005\x0068\x0001\x0067\x0001\x0068\x0003\x0067\x0007\x0068\x0001\x0067\x0013\x0068\x0005\x004B" + "\x0003\x0068\x0006\x004B\x0002\x004B\x0006\x004A\x0005\x004A\x0003\x0064\x0002\x0041\x0007\x0063\x001E\x0041\x0004\x0063" + "\x0005\x0041\x0005\x005F\x0006\x005D\x0002\x005F\x0001\x005D\x0004\x001D\x000B\x0066\x000A\x0051\x000C\x0066\x000A\x007D" + "\x000D\x007C\x0001\x0065\x0002\x007C\x0001\x007B\x0003\x006A\x0001\x000B\x0002\x006A\x0005\x0071\x0004\x006A\x0004\x0072" + "\x0001\x0071\x0003\x0072\x0001\x0071\x0005\x0072\x0002\x0038\x0001\x003B\x0002\x0038\x0001\x003B\x0001\x0038\x0002\x003B" + "\x0001\x0038\x0001\x003B\x000A\x0038\x0001\x003B\x0004\x0005\x0001\x006C\x0001\x006B\x0001\x006D\x0001\x000A\x0003\x0075" + "\x0001\x006D\x0002\x0075\x0001\x0061\x0002\x0062\x0002\x0075\x0001\x000A\x0001\x0075\x0001\x000A\x0001\x0075\x0001\x000A" + "\x0001\x0075\x0003\
 x000A\x0001\x0075\x0002\x000A\x0001\x0075\x0001\x000A\x0002\x0075\x0001\x000A\x0001\x0075\x0001\x000A" + "\x0001\x0075\x0001\x000A\x0001\x0075\x0001\x000A\x0001\x0075\x0001\x000A\x0001\x003E\x0002\x003A\x0001\x003E\x0001\x003A" + "\x0002\x003E\x0004\x003A\x0001\x003E\x0007\x003A\x0001\x003E\x0004\x003A\x0001\x003E\x0004\x003A\x0001\x0075\x0001\x000A" + "\x0001\x0075\x000A\x0019\x0001\x002F\x0011\x0019\x0001\x002F\x0003\x001A\x0001\x002F\x0003\x0019\x0001\x002F\x0001\x0019" + "\x0002\x0002\x0002\x0019\x0001\x002F\x000D\x005C\x0004\x0027\x0004\x002C\x0001\x0050\x0001\x002E\x0008\x0050\x0007\x002C" + "\x0006\x0075\x0004\x0015\x0001\x0017\x001F\x0015\x0001\x0017\x0004\x0015\x0015\x0045\x0001\x0079\x0009\x0045\x0011\x0016" + "\x0005\x0045\x0001\x0007\x000A\x002D\x0005\x0045\x0006\x0044\x0004\x003E\x0001\x003F\x0001\x0016\x0005\x005B\x000A\x0059" + "\x000F\x005B\x0001\x003C\x0003\x0039\x000C\x0058\x0001\x0009\x0009\x0026\x0001\x002A\x0005\x0026\x0004\x005A\x000B\x0029" + "\x0002\x000C\x00
 09\x0026\x0001\x002A\x0019\x0026\x0001\x002A\x0004\x0009\x0004\x0026\x0002\x002A\x0002\x0055\x0001\x0010" + "\x0005\x0055\x002A\x0010\u1900\0\u016e\x0088\x0002\0\x006A\x0088\x0026\0\x0007\x007E\x000C\0\x0005\x007E" + "\x0005\0\x0001\x008C\x0001\x007F\x000A\x008C\x0001\0\x000D\x008C\x0001\0\x0005\x008C\x0001\0\x0001\x008C" + "\x0001\0\x0002\x008C\x0001\0\x0002\x008C\x0001\0\x000A\x008C\x0062\x007E\x0021\0\u016b\x007E\x0012\0" + "\x0040\x007E\x0002\0\x0036\x007E\x0028\0\x000C\x007E\x0004\0\x0010\x007F\x0001\x0083\x0002\0\x0001\x0082" + "\x0001\x0083\x000B\0\x0007\x007F\x000C\0\x0002\x0085\x0018\0\x0003\x0085\x0001\x0083\x0001\0\x0001\x0084" + "\x0001\0\x0001\x0083\x0001\x0082\x001A\0\x0005\x007E\x0001\0\x0087\x007E\x0002\0\x0001\x007F\x0007\0" + "\x0001\x0084\x0004\0\x0001\x0083\x0001\0\x0001\x0084\x0001\0\x000A\x0080\x0001\x0082\x0001\x0083\x0005\0" + "\x001A\x007E\x0004\0\x0001\x0085\x0001\0\x001A\x007E\x000B\0\x0038\x0081\x0002\x007F\x001F\x008D\x0003\0" + "\x0006\x008D\x0002\0\x00
 06\x008D\x0002\0\x0006\x008D\x0002\0\x0003\x008D\x001C\0\x0003\x007F\x0004\0";
+
+	  /// <summary>
+	  /// Translates characters to character classes
+	  /// </summary>
+	  private static readonly char[] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED);
+
+	  /// <summary>
+	  /// Translates DFA states to action switch labels.
+	  /// </summary>
+	  private static readonly int[] ZZ_ACTION = zzUnpackAction();
+
+	  private const string ZZ_ACTION_PACKED_0 = "\x0001\0\x0016\x0001\x0001\x0002\x0001\x0003\x0001\x0004\x0001\x0001\x0001\x0005\x0001\x0006" + "\x0001\x0007\x0001\x0002\x0001\x0008\x0011\0\x0001\x0002\x0001\0\x0001\x0002\x000A\0" + "\x0001\x0003\x0011\0\x0001\x0002\x0015\0\x0001\x0002\x004D\0\x0001\x0001\x0010\0";
+
+	  private static int [] zzUnpackAction()
+	  {
+		int[] result = new int[197];
+		int offset = 0;
+		offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
+		return result;
+	  }
+
+	  private static int zzUnpackAction(string packed, int offset, int[] result)
+	  {
+		int i = 0; // index in packed string
+		int j = offset; // index in unpacked array
+		int l = packed.Length;
+		while (i < l)
+		{
+		  int count = packed[i++];
+		  int value = packed[i++];
+		  do
+		  {
+			  result[j++] = value;
+		  } while (--count > 0);
+		}
+		return j;
+	  }
+
+
+	  /// <summary>
+	  /// Translates a state to a row index in the transition table
+	  /// </summary>
+	  private static readonly int[] ZZ_ROWMAP = zzUnpackRowMap();
+
+	  private const string ZZ_ROWMAP_PACKED_0 = "\0\0\0\x008E\0\u011c\0\u01aa\0\u0238\0\u02c6\0\u0354\0\u03e2" + "\0\u0470\0\u04fe\0\u058c\0\u061a\0\u06a8\0\u0736\0\u07c4\0\u0852" + "\0\u08e0\0\u096e\0\u09fc\0\u0a8a\0\u0b18\0\u0ba6\0\u0c34\0\u0cc2" + "\0\u0d50\0\u0dde\0\u0e6c\0\u0efa\0\u0f88\0\u1016\0\u10a4\0\u1132" + "\0\u11c0\0\u011c\0\u01aa\0\u124e\0\u12dc\0\u0354\0\u03e2\0\u0470" + "\0\u04fe\0\u136a\0\u13f8\0\u1486\0\u1514\0\u07c4\0\u15a2\0\u1630" + "\0\u16be\0\u174c\0\u17da\0\u1868\0\u18f6\0\u02c6\0\u1984\0\u1a12" + "\0\u06a8\0\u1aa0\0\u1b2e\0\u1bbc\0\u1c4a\0\u1cd8\0\u1d66\0\u1df4" + "\0\u1e82\0\u1f10\0\u1f9e\0\u202c\0\u20ba\0\u2148\0\u21d6\0\u2264" + "\0\u22f2\0\u2380\0\u240e\0\u249c\0\u252a\0\u25b8\0\u2646\0\u0e6c" + "\0\u26d4\0\u2762\0\u27f0\0\u287e\0\u290c\0\u299a\0\u2a28\0\u2ab6" + "\0\u2b44\0\u2bd2\0\u2c60\0\u2cee\0\u2d7c\0\u2e0a\0\u2e98\0\u2f26" + "\0\u2fb4\0\u3042\0\u30d0\0\u315e\0\u31ec\0\u327a\0\u3308\0\u3396" + "\0\u3424\0\u34b2\0\u3540\0\u35ce\0\u365c\0\u36ea\0\u3778\0
 \u3806" + "\0\u3894\0\u3922\0\u39b0\0\u3a3e\0\u3acc\0\u3b5a\0\u3be8\0\u3c76" + "\0\u3d04\0\u3d92\0\u3e20\0\u3eae\0\u3f3c\0\u3fca\0\u4058\0\u40e6" + "\0\u4174\0\u4202\0\u4290\0\u431e\0\u43ac\0\u443a\0\u44c8\0\u4556" + "\0\u45e4\0\u4672\0\u4700\0\u478e\0\u481c\0\u48aa\0\u4938\0\u49c6" + "\0\u4a54\0\u4ae2\0\u4b70\0\u4bfe\0\u4c8c\0\u4d1a\0\u4da8\0\u4e36" + "\0\u4ec4\0\u4f52\0\u4fe0\0\u506e\0\u50fc\0\u518a\0\u5218\0\u52a6" + "\0\u5334\0\u53c2\0\u5450\0\u54de\0\u556c\0\u55fa\0\u5688\0\u5716" + "\0\u57a4\0\u5832\0\u58c0\0\u594e\0\u59dc\0\u5a6a\0\u5af8\0\u5b86" + "\0\u5c14\0\u5ca2\0\u5d30\0\u5dbe\0\u5e4c\0\u5eda\0\u5f68\0\u5ff6" + "\0\u6084\0\u6112\0\u61a0\0\u622e\0\u62bc\0\u634a\0\u63d8\0\u6466" + "\0\u64f4\0\u6582\0\u6610\0\u669e\0\u672c";
+
+	  private static int [] zzUnpackRowMap()
+	  {
+		int[] result = new int[197];
+		int offset = 0;
+		offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
+		return result;
+	  }
+
+	  private static int zzUnpackRowMap(string packed, int offset, int[] result)
+	  {
+		int i = 0; // index in packed string
+		int j = offset; // index in unpacked array
+		int l = packed.Length;
+		while (i < l)
+		{
+		  int high = packed[i++] << 16;
+		  result[j++] = high | packed[i++];
+		}
+		return j;
+	  }
+
+	  /// <summary>
+	  /// The transition table of the DFA
+	  /// </summary>
+	  private static readonly int[] ZZ_TRANS = zzUnpackTrans();
+
+	  private const string ZZ_TRANS_PACKED_0 = "\x0001\x0002\x0001\x0003\x0001\x0002\x0001\x0004\x0002\x0002\x0001\x0005\x0001\x0002\x0001\x0006" + "\x0004\x0002\x0001\x0007\x0001\x0002\x0001\x0008\x0001\x0002\x0001\x0009\x0002\x0002\x0001\x000A" + "\x0003\x0002\x0001\x000B\x0002\x0002\x0001\x000C\x0004\x0002\x0001\x000D\x0003\x0002\x0001\x000E" + "\x000F\x0002\x0001\x000F\x0002\x0002\x0001\x0010\x0036\x0002\x0001\x0011\x0001\x0002\x0001\x0012" + "\x0002\x0002\x0001\x0013\x0001\x0014\x0001\x0002\x0001\x0015\x0001\x0002\x0001\x0016\x0001\x0002" + "\x0001\x0017\x0003\x0002\x0001\x0018\x0001\x0002\x0001\x0019\x0001\x001A\x0003\x0002\x0001\x001B" + "\x0002\x001C\x0001\x001D\x0001\x001E\x0002\x0002\x0001\x001F\x0001\x0020\x0090\0\x0001\x0018" + "\x0002\0\x0001\x0018\x0004\0\x0001\x0018\x000E\0\x0001\x0018\x000D\0\x0001\x0018" + "\x0010\0\x0001\x0018\x0001\0\x0001\x0018\x0021\0\x0001\x0018\x0004\0\x0001\x0018" + "\x0008\0\x0002\x0018\x0005\0\x0002\x0018\x0008\0\x0001\x0018\x0016\0\x0002\x001
 8" + "\x0005\0\x0001\x0018\x0002\0\x0001\x0018\x0003\0\x0002\x0018\x0008\0\x0004\x0018" + "\x0001\0\x0003\x0018\x0001\0\x0001\x0018\x0002\0\x0001\x0018\x0002\0\x0001\x0018" + "\x0004\0\x0004\x0018\x0001\0\x0002\x0018\x0001\0\x0001\x0018\x0002\0\x0001\x0018" + "\x0001\0\x0001\x0018\x0002\0\x0004\x0018\x0002\0\x0003\x0018\x0001\0\x0002\x0018" + "\x0001\0\x0003\x0018\x0001\0\x0004\x0018\x0001\0\x0002\x0018\x0005\0\x0004\x0018" + "\x0002\0\x0008\x0018\x0001\0\x0001\x0018\x0002\0\x0001\x0018\x0001\0\x0002\x0018" + "\x0004\0\x0001\x0018\x0003\0\x0003\x0018\x0017\0\x0001\x0018\x0004\0\x0001\x0018" + "\x0009\0\x0001\x0018\x0012\0\x0001\x0018\x0003\0\x0001\x0018\x0017\0\x0001\x0018" + "\x0033\0\x0001\x0018\x0019\0\x0001\x0018\x0003\0\x0004\x0018\x0001\0\x0001\x0018" + "\x0001\0\x0001\x0019\x0002\0\x0001\x0018\x0001\0\x0002\x0018\x0002\0\x0002\x0018" + "\x0002\0\x0003\x0018\x0001\0\x0001\x0018\x0001\0\x0001\x0018\x0002\0\x0004\x0018" + "\x0001\0\x0003\x0018\x0001\0\x0001\x0018\x0001\0\x0003\x
 0018\x0001\0\x0002\x0018" + "\x0001\0\x0004\x0018\x0001\0\x0002\x0018\x0002\0\x0008\x0018\x0001\0\x0002\x0018" + "\x0001\0\x0008\x0018\x0001\x0019\x0001\0\x0007\x0018\x0001\0\x0008\x0018\x0001\0" + "\x0006\x0018\x0001\0\x0001\x0018\x0001\0\x0002\x0018\x0002\0\x0001\x0018\x0001\0" + "\x0001\x0018\x0003\0\x0003\x0018\x001E\0\x0001\x0018\x000F\0\x0001\x0018\x0013\0" + "\x0001\x0018\x0013\0\x0001\x0018\x0006\0\x0003\x0018\x001F\0\x0001\x0018\x0007\0" + "\x0001\x0018\x0018\0\x0001\x0018\x0001\0\x0002\x0018\x0001\0\x0001\x0018\x0001\0" + "\x0004\x0018\x0001\0\x0001\x0018\x0001\0\x0001\x0018\x0001\0\x0002\x0018\x0001\0" + "\x0003\x0018\x0001\0\x0002\x0018\x0001\0\x0004\x0018\x0001\0\x0003\x0018\x0001\0" + "\x000F\x0018\x0001\0\x0002\x0018\x0001\0\x0011\x0018\x0001\0\x0002\x0018\x0001\0" + "\x0021\x0018\x0001\0\x0001\x0018\x0001\0\x0002\x0018\x0002\0\x0001\x0018\x0001\0" + "\x0001\x0018\x0001\0\x0001\x0018\x0001\0\x0003\x0018\x001E\0\x0001\x0018\x0003\0" + "\x0002\x0018\x000A\0\x0002\x0018\
 x000B\0\x0001\x0018\x0006\0\x0001\x0018\x0002\0" + "\x0002\x0018\x0006\0\x0001\x0018\x0004\0\x0002\x0018\x0002\0\x0002\x0018\x0005\0" + "\x0003\x0018\x0010\0\x0001\x0018\x000E\0\x0001\x0018\x0007\0\x0001\x0018\x0018\0" + "\x0001\x0018\x0001\0\x0002\x0018\x0001\0\x0001\x0018\x0002\0\x0002\x0018\x0002\0" + "\x0001\x0018\x0003\0\x0002\x0018\x0001\0\x0003\x0018\x0001\0\x0002\x0018\x0001\0" + "\x0004\x0018\x0001\0\x0003\x0018\x0001\0\x0001\x0018\x0001\0\x0002\x0018\x0002\0" + "\x0009\x0018\x0001\0\x0002\x0018\x0001\0\x0001\x0018\x0001\0\x0002\x0018\x0001\0" + "\x000C\x0018\x0001\0\x0002\x0018\x0001\0\x0008\x0018\x0001\0\x0002\x0018\x0001\0" + "\x0001\x0018\x0001\0\x0013\x0018\x0001\0\x0001\x0018\x0001\0\x0002\x0018\x0002\0" + "\x0001\x0018\x0001\0\x0001\x0018\x0001\0\x0001\x0018\x0001\0\x0003\x0018\x0012\0" + "\x0001\x0018\x0016\0\x0002\x0018\x0013\0\x0001\x0019\x0001\x0018\x0020\0\x0001\x0019" + "\x0041\0\x0001\x0019\x0017\0\x0004\x0018\x0002\0\x0002\x0018\x000C\0\x0003\x0018" + "\x000D
 \0\x0003\x0018\x0003\0\x0001\x0018\x0007\0\x0002\x0018\x0001\0\x0004\x0019" + "\x0001\0\x0002\x0018\x000B\0\x0001\x0018\x0013\0\x0001\x0018\x0024\0\x0001\x0018" + "\x0003\0\x0002\x0018\x000A\0\x0002\x0018\x0001\0\x0003\x0018\x0007\0\x0001\x0018" + "\x0006\0\x0002\x0018\x0001\0\x0002\x0018\x0006\0\x0001\x0018\x0004\0\x0002\x0018" + "\x0002\0\x0002\x0018\x0005\0\x0003\x0018\x0002\0\x0001\x0018\x0003\0\x0002\x0019" + "\x0008\0\x0001\x0018\x000E\0\x0001\x0018\x0007\0\x0001\x0018\x0018\0\x0001\x0018" + "\x0004\0\x0001\x0018\x0006\0\x0001\x0018\x0003\0\x0001\x0018\x0006\0\x0001\x0018" + "\x0005\0\x0001\x0018\x0002\0\x0002\x0018\x0001\0\x000F\x0018\x0002\0\x0001\x0018" + "\x000B\0\x0007\x0018\x0002\0\x0001\x0018\x0001\0\x0001\x0018\x0001\0\x0002\x0018" + "\x0002\0\x0001\x0018\x0001\0\x0003\x0018\x0002\0\x0001\x0018\x0001\0\x0001\x0018" + "\x0001\0\x0001\x0018\x0001\0\x0001\x0018\x0006\0\x0002\x0018\x0006\0\x0001\x0018" + "\x0007\0\x0001\x0018\x0001\0\x0001\x0018\x0022\0\x0001\x0018\x000F\0
 \x0002\x0018" + "\x0012\0\x0001\x0018\x0002\0\x0002\x0018\x000B\0\x0001\x0018\x0003\0\x0002\x0018" + "\x0005\0\x0003\x0018\x0010\0\x0001\x0018\x000E\0\x0001\x0018\x0007\0\x0001\x0018" + "\x001D\0\x0001\x0018\x0006\0\x0001\x0018\x0003\0\x0001\x0018\x0003\0\x0001\x0018" + "\x0007\0\x0001\x0018\x0019\0\x0010\x0018\x0005\0\x0003\x0018\x0004\0\x0001\x0018" + "\x0006\0\x0001\x0018\x0003\0\x0002\x0018\x0002\0\x0002\x0018\x0004\0\x0001\x0018" + "\x0005\0\x0001\x0018\x0002\0\x0001\x0018\x0004\0\x0001\x0018\x0001\0\x0001\x0018" + "\x0001\0\x0001\x0018\x005F\0\x0001\x001E\x0021\0\x0001\x001A\x0022\0\x0001\x001D" + "\x0006\0\x0001\x001D\x0002\0\x0001\x001D\x0003\0\x0002\x001D\x0008\0\x0004\x001D" + "\x0001\0\x0003\x001D\x0001\0\x0001\x001D\x0002\0\x0001\x001D\x0002\0\x0001\x001D" + "\x0004\0\x0004\x001D\x0001\0\x0002\x001D\x0006\0\x0001\x001D\x0002\0\x0004\x001D" + "\x0002\0\x0003\x001D\x0001\0\x0002\x001D\x0001\0\x0003\x001D\x0001\0\x0004\x001D" + "\x0001\0\x0002\x001D\x0005\0\x0004\x001D\x000
 2\0\x0008\x001D\x0004\0\x0001\x001D" + "\x0001\0\x0002\x001D\x0004\0\x0001\x001D\x0003\0\x0003\x001D\x0012\0\x0001\x001D" + "\x0001\0\x0002\x001D\x0001\0\x0001\x001D\x0001\0\x0004\x001D\x0001\0\x0001\x001D" + "\x0001\0\x0001\x001D\x0001\0\x0002\x001D\x0001\0\x0003\x001D\x0001\0\x0002\x001D" + "\x0001\0\x0004\x001D\x0001\0\x0003\x001D\x0001\0\x000F\x001D\x0001\0\x0002\x001D" + "\x0001\0\x0011\x001D\x0001\0\x0002\x001D\x0001\0\x0021\x001D\x0001\0\x0001\x001D" + "\x0001\0\x0002\x001D\x0002\0\x0001\x001D\x0001\0\x0001\x001D\x0001\0\x0001\x001D" + "\x0001\0\x0003\x001D\x0012\0\x0001\x001D\x0001\0\x0002\x001D\x0001\0\x0001\x001D" + "\x0001\0\x0004\x001D\x0001\0\x0001\x001D\x0001\0\x0001\x001D\x0001\0\x0002\x001D" + "\x0002\0\x0001\x001D\x0002\0\x0002\x001D\x0001\0\x0004\x001D\x0001\0\x0003\x001D" + "\x0001\0\x000F\x001D\x0001\0\x0002\x001D\x0001\0\x0011\x001D\x0001\0\x0002\x001D" + "\x0001\0\x0021\x001D\x0001\0\x0001\x001D\x0001\0\x0002\x001D\x0002\0\x0001\x001D" + "\x0001\0\x0001\x001D\x
 0001\0\x0001\x001D\x0001\0\x0003\x001D\x001E\0\x0001\x001D" + "\x000F\0\x0001\x001D\x0013\0\x0001\x001D\x001A\0\x0001\x001D\x0021\0\x0001\x001D" + "\x0007\0\x0001\x001D\x0018\0\x0001\x001D\x0001\0\x0002\x001D\x0003\0\x0004\x001D" + "\x0001\0\x0001\x001D\x0001\0\x0001\x001D\x0001\0\x0002\x001D\x0001\0\x0003\x001D" + "\x0001\0\x0002\x001D\x0001\0\x0004\x001D\x0001\0\x0003\x001D\x0001\0\x0008\x001D" + "\x0001\0\x0006\x001D\x0001\0\x0002\x001D\x0001\0\x0011\x001D\x0001\0\x0002\x001D" + "\x0001\0\x0021\x001D\x0001\0\x0001\x001D\x0001\0\x0002\x001D\x0002\0\x0001\x001D" + "\x0001\0\x0001\x001D\x0001\0\x0001\x001D\x0001\0\x0003\x001D\x0075\0\x0001\x0021" + "\x0015\0\x0001\x001E\x0002\x0021\x0011\0\x0001\x0022\x0001\0\x0001\x0023\x0002\0" + "\x0001\x0024\x0001\0\x0001\x0025\x0004\0\x0001\x0026\x0001\0\x0001\x0027\x0001\0" + "\x0001\x0028\x0002\0\x0001\x0029\x0003\0\x0001\x002A\x0002\0\x0001\x002B\x0004\0" + "\x0001\x002C\x0003\0\x0001\x002D\x000F\0\x0001\x002E\x0002\0\x0001\x002F\x0011\0" + 
 "\x0001\x0030\x0002\0\x0001\x0031\x0031\0\x0002\x0018\x0001\x0032\x0001\0\x0001\x0033" + "\x0001\0\x0001\x0033\x0001\x0034\x0001\0\x0001\x0018\x0002\0\x0001\x0033\x0001\0" + "\x0001\x001F\x0001\x0018\x0001\0\x0001\x0022\x0001\0\x0001\x0023\x0002\0\x0001\x0035" + "\x0001\0\x0001\x0036\x0004\0\x0001\x0026\x0001\0\x0001\x0027\x0001\0\x0001\x0028" + "\x0002\0\x0001\x0029\x0003\0\x0001\x0037\x0002\0\x0001\x0038\x0004\0\x0001\x0039" + "\x0003\0\x0001\x003A\x000F\0\x0001\x002E\x0002\0\x0001\x003B\x0011\0\x0001\x003C" + "\x0002\0\x0001\x003D\x0031\0\x0001\x0018\x0002\x0019\x0002\0\x0002\x003E\x0001\x003F" + "\x0001\0\x0001\x0019\x0002\0\x0001\x003E\x0001\0\x0001\x001F\x0001\x0018\x0006\0" + "\x0001\x0040\x0011\0\x0001\x0041\x0002\0\x0001\x0042\x0008\0\x0001\x0043\x0012\0" + "\x0001\x0044\x0011\0\x0001\x0045\x0002\0\x0001\x0046\x0021\0\x0001\x0047\x0010\0" + "\x0001\x001A\x0001\0\x0001\x001A\x0003\0\x0001\x0034\x0001\0\x0001\x001A\x0007\0" + "\x0001\x0022\x0001\0\x0001\x0023\x0002\0\x0001\x0
 048\x0001\0\x0001\x0036\x0004\0" + "\x0001\x0026\x0001\0\x0001\x0027\x0001\0\x0001\x0028\x0002\0\x0001\x0029\x0003\0" + "\x0001\x0049\x0002\0\x0001\x004A\x0004\0\x0001\x0039\x0003\0\x0001\x004B\x000F\0" + "\x0001\x002E\x0002\0\x0001\x004C\x0011\0\x0001\x004D\x0002\0\x0001\x004E\x0021\0" + "\x0001\x004F\x000F\0\x0001\x0018\x0001\x0050\x0001\x0019\x0001\x0051\x0003\0\x0001\x0050" + "\x0001\0\x0001\x0050\x0004\0\x0001\x001F\x0001\x0018\x0086\0\x0002\x001C\x000C\0" + "\x0001\x0052\x0011\0\x0001\x0053\x0002\0\x0001\x0054\x0008\0\x0001\x0055\x0012\0" + "\x0001\x0056\x0011\0\x0001\x0057\x0002\0\x0001\x0058\x0032\0\x0001\x001D\x0007\0" + "\x0001\x001D\x000C\0\x0001\x0059\x0011\0\x0001\x005A\x0002\0\x0001\x005B\x0008\0" + "\x0001\x005C\x0012\0\x0001\x005D\x0011\0\x0001\x005E\x0002\0\x0001\x005F\x0032\0" + "\x0001\x001E\x0007\0\x0001\x001E\x0007\0\x0001\x0022\x0001\0\x0001\x0023\x0002\0" + "\x0001\x0060\x0001\0\x0001\x0025\x0004\0\x0001\x0026\x0001\0\x0001\x0027\x0001\0" + "\x0001\x0028\x0002
 \0\x0001\x0029\x0003\0\x0001\x0061\x0002\0\x0001\x0062\x0004\0" + "\x0001\x002C\x0003\0\x0001\x0063\x000F\0\x0001\x002E\x0002\0\x0001\x0064\x0011\0" + "\x0001\x0065\x0002\0\x0001\x0066\x0031\0\x0001\x0018\x0001\x001F\x0001\x0032\x0001\0" + "\x0001\x0033\x0001\0\x0001\x0033\x0001\x0034\x0001\0\x0001\x001F\x0002\0\x0001\x0067" + "\x0001\x0068\x0001\x001F\x0001\x0018\x0001\0\x0001\x0022\x0001\0\x0001\x0023\x0002\0" + "\x0001\x0069\x0001\0\x0001\x0025\x0004\0\x0001\x0026\x0001\0\x0001\x0027\x0001\0" + "\x0001\x0028\x0002\0\x0001\x0029\x0003\0\x0001\x006A\x0002\0\x0001\x006B\x0004\0" + "\x0001\x002C\x0003\0\x0001\x006C\x000F\0\x0001\x002E\x0002\0\x0001\x006D\x0011\0" + "\x0001\x006E\x0002\0\x0001\x006F\x0031\0\x0001\x0018\x0001\x0020\x0001\x0032\x0001\0" + "\x0001\x0033\x0001\0\x0001\x0033\x0001\x0034\x0001\0\x0001\x0020\x0002\0\x0001\x0033" + "\x0001\0\x0001\x001F\x0001\x0020\x0006\0\x0001\x0070\x0011\0\x0001\x0071\x0002\0" + "\x0001\x0072\x0008\0\x0001\x0073\x0012\0\x0001\x0074\x0011\0
 \x0001\x0075\x0002\0" + "\x0001\x0076\x002D\0\x0001\x0077\x0004\0\x0001\x0021\x0007\0\x0001\x0021\x000D\0" + "\x0001\x0018\x0004\0\x0001\x0018\x0009\0\x0001\x0018\x0012\0\x0001\x0018\x0003\0" + "\x0001\x0018\x000B\0\x0001\x0018\x0002\0\x0001\x0018\x0008\0\x0001\x0018\x0012\0" + "\x0004\x0018\x001D\0\x0001\x0018\x0019\0\x0001\x0018\x0003\0\x0004\x0018\x0001\0" + "\x0001\x0018\x0001\0\x0001\x0032\x0002\0\x0001\x0018\x0001\0\x0002\x0018\x0002\0" + "\x0002\x0018\x0002\0\x0003\x0018\x0001\0\x0001\x0018\x0001\0\x0001\x0018\x0002\0" + "\x0004\x0018\x0001\0\x0003\x0018\x0001\0\x0001\x0018\x0001\0\x0003\x0018\x0001\0" + "\x0002\x0018\x0001\0\x0004\x0018\x0001\0\x0002\x0018\x0002\0\x0008\x0018\x0001\0" + "\x0002\x0018\x0001\0\x0008\x0018\x0001\x0032\x0001\0\x0007\x0018\x0001\0\x0008\x0018" + "\x0001\0\x0006\x0018\x0001\0\x0001\x0018\x0001\0\x0002\x0018\x0002\0\x0001\x0018" + "\x0001\0\x0001\x0018\x0003\0\x0003\x0018\x0012\0\x0001\x0018\x0016\0\x0002\x0018" + "\x0013\0\x0001\x0032\x0001\x0018\
 x0020\0\x0001\x0032\x000B\0\x0001\x0018\x0035\0" + "\x0001\x0032\x0009\0\x0001\x0018\x000D\0\x0004\x0018\x0002\0\x0002\x0018\x000C\0" + "\x0004\x0018\x0001\0\x0002\x0018\x0009\0\x0003\x0018\x0003\0\x0001\x0018\x0001\0" + "\x0001\x0018\x0004\0\x0003\x0018\x0001\0\x0004\x0032\x0001\0\x0002\x0018\x0005\0" + "\x0004\x0018\x0002\0\x0002\x0018\x000A\0\x0001\x0018\x0007\0\x0001\x0018\x0024\0" + "\x0001\x0018\x0003\0\x0002\x0018\x000A\0\x0002\x0018\x0001\0\x0003\x0018\x0007\0" + "\x0001\x0018\x0006\0\x0002\x0018\x0001\0\x0002\x0018\x0006\0\x0001\x0018\x0004\0" + "\x0002\x0018\x0002\0\x0002\x0018\x0005\0\x0003\x0018\x0002\0\x0001\x0018\x0003\0" + "\x0002\x0032\x0008\0\x0001\x0018\x000E\0\x0001\x0018\x0007\0\x0001\x0018\x0018\0" + "\x0001\x0018\x0004\0\x0001\x0018\x0006\0\x0001\x0018\x0003\0\x0001\x0018\x0006\0" + "\x0001\x0018\x0005\0\x0001\x0018\x0002\0\x0002\x0018\x0001\0\x000F\x0018\x0002\0" + "\x0001\x0018\x000B\0\x0007\x0018\x0002\0\x0001\x0018\x0001\0\x0001\x0018\x0001\0" + "\x0002\x00
 18\x0002\0\x0001\x0018\x0001\0\x0003\x0018\x0002\0\x0001\x0018\x0001\0" + "\x0001\x0018\x0001\0\x0001\x0018\x0001\0\x0001\x0018\x0004\0\x0001\x0018\x0001\0" + "\x0002\x0018\x0006\0\x0001\x0018\x0007\0\x0001\x0018\x0001\0\x0001\x0018\x001B\0" + "\x0001\x0018\x0006\0\x0001\x0018\x0003\0\x0001\x0018\x0003\0\x0001\x0018\x0007\0" + "\x0001\x0018\x0019\0\x0010\x0018\x0005\0\x0003\x0018\x0004\0\x0001\x0018\x0006\0" + "\x0001\x0018\x0003\0\x0002\x0018\x0002\0\x0002\x0018\x0004\0\x0005\x0018\x0001\0" + "\x0001\x0018\x0002\0\x0001\x0018\x0004\0\x0001\x0018\x0001\0\x0001\x0018\x0001\0" + "\x0001\x0018\x005C\0\x0002\x0018\x0015\0\x0004\x0018\x002D\0\x0001\x0018\x000D\0" + "\x0002\x0018\x0008\0\x0002\x0018\x0001\0\x0001\x0018\x0001\0\x0001\x0018\x0009\0" + "\x0001\x0018\x0009\0\x0002\x0018\x0006\0\x0001\x0018\x0002\0\x0004\x0018\x0003\0" + "\x0001\x0018\x0002\0\x0002\x0018\x0001\0\x0003\x0018\x0001\0\x0002\x0018\x0001\0" + "\x0001\x0018\x0008\0\x0001\x0018\x0001\0\x0002\x0018\x0002\0\x0002\x0018
 \x0001\0" + "\x0004\x0018\x0013\0\x0001\x0018\x0011\0\x0001\x0022\x0001\0\x0001\x0023\x0002\0" + "\x0001\x0078\x0001\0\x0001\x0025\x0004\0\x0001\x0026\x0001\0\x0001\x0027\x0001\0" + "\x0001\x0028\x0002\0\x0001\x0029\x0003\0\x0001\x0079\x0002\0\x0001\x007A\x0004\0" + "\x0001\x002C\x0003\0\x0001\x007B\x000F\0\x0001\x002E\x0002\0\x0001\x007C\x0011\0" + "\x0001\x007D\x0002\0\x0001\x007E\x0031\0\x0001\x0018\x0002\x0032\x0002\0\x0002\x007F" + "\x0001\x0034\x0001\0\x0001\x0032\x0002\0\x0001\x007F\x0001\0\x0001\x001F\x0001\x0018" + "\x0001\0\x0001\x0022\x0001\0\x0001\x0023\x0002\0\x0001\x0080\x0001\0\x0001\x0081" + "\x0004\0\x0001\x0026\x0001\0\x0001\x0027\x0001\0\x0001\x0028\x0002\0\x0001\x0029" + "\x0003\0\x0001\x0082\x0002\0\x0001\x0083\x0004\0\x0001\x0084\x0003\0\x0001\x0085" + "\x000F\0\x0001\x002E\x0002\0\x0001\x0086\x0011\0\x0001\x0087\x0002\0\x0001\x0088" + "\x0031\0\x0001\x0018\x0001\x0033\x0007\0\x0001\x0033\x0004\0\x0002\x0018\x0001\0" + "\x0001\x0022\x0001\0\x0001\x0023\x0002\0\
 x0001\x0089\x0001\0\x0001\x0025\x0004\0" + "\x0001\x0026\x0001\0\x0001\x0027\x0001\0\x0001\x0028\x0002\0\x0001\x0029\x0003\0" + "\x0001\x008A\x0002\0\x0001\x008B\x0004\0\x0001\x002C\x0003\0\x0001\x008C\x000F\0" + "\x0001\x002E\x0002\0\x0001\x008D\x0011\0\x0001\x008E\x0002\0\x0001\x008F\x0021\0" + "\x0001\x004F\x000F\0\x0001\x0018\x0001\x0034\x0001\x0032\x0001\x0051\x0003\0\x0001\x0034" + "\x0001\0\x0001\x0034\x0004\0\x0001\x001F\x0001\x0018\x0007\0\x0001\x0018\x0004\0" + "\x0001\x0018\x0009\0\x0001\x0018\x0012\0\x0001\x0018\x0003\0\x0001\x0018\x000B\0" + "\x0001\x0019\x0002\0\x0001\x0019\x0008\0\x0001\x0018\x0012\0\x0004\x0019\x001D\0" + "\x0001\x0018\x0016\0\x0001\x0018\x0016\0\x0002\x0018\x0013\0\x0001\x0019\x0001\x0018" + "\x0020\0\x0001\x0019\x000B\0\x0001\x0019\x0035\0\x0001\x0019\x0009\0\x0001\x0019" + "\x000D\0\x0004\x0018\x0002\0\x0002\x0018\x000C\0\x0003\x0018\x0001\x0019\x0001\0" + "\x0002\x0019\x0009\0\x0003\x0018\x0003\0\x0001\x0018\x0001\0\x0001\x0019\x0004\0" + "\x0001
 \x0019\x0002\x0018\x0001\0\x0004\x0019\x0001\0\x0002\x0018\x0005\0\x0004\x0019" + "\x0002\0\x0001\x0018\x0001\x0019\x000A\0\x0001\x0019\x0007\0\x0001\x0018\x0018\0" + "\x0001\x0018\x0004\0\x0001\x0018\x0006\0\x0001\x0018\x0003\0\x0001\x0018\x0006\0" + "\x0001\x0018\x0005\0\x0001\x0018\x0002\0\x0002\x0018\x0001\0\x000F\x0018\x0002\0" + "\x0001\x0018\x000B\0\x0007\x0018\x0002\0\x0001\x0018\x0001\0\x0001\x0018\x0001\0" + "\x0002\x0018\x0002\0\x0001\x0018\x0001\0\x0003\x0018\x0002\0\x0001\x0018\x0001\0" + "\x0001\x0018\x0001\0\x0001\x0018\x0001\0\x0001\x0018\x0004\0\x0001\x0019\x0001\0" + "\x0002\x0018\x0006\0\x0001\x0018\x0007\0\x0001\x0018\x0001\0\x0001\x0018\x001B\0" + "\x0001\x0018\x0006\0\x0001\x0018\x0003\0\x0001\x0018\x0003\0\x0001\x0018\x0007\0" + "\x0001\x0018\x0019\0\x0010\x0018\x0005\0\x0003\x0018\x0004\0\x0001\x0018\x0006\0" + "\x0001\x0018\x0003\0\x0002\x0018\x0002\0\x0002\x0018\x0004\0\x0001\x0018\x0004\x0019" + "\x0001\0\x0001\x0018\x0002\0\x0001\x0018\x0004\0\x0001\x0018
 \x0001\0\x0001\x0018" + "\x0001\0\x0001\x0018\x005C\0\x0002\x0019\x0015\0\x0004\x0019\x002D\0\x0001\x0019" + "\x000D\0\x0002\x0019\x0008\0\x0002\x0019\x0001\0\x0001\x0019\x0001\0\x0001\x0019" + "\x0009\0\x0001\x0019\x0009\0\x0002\x0019\x0006\0\x0001\x0019\x0002\0\x0004\x0019" + "\x0003\0\x0001\x0019\x0002\0\x0002\x0019\x0001\0\x0003\x0019\x0001\0\x0002\x0019" + "\x0001\0\x0001\x0019\x0008\0\x0001\x0019\x0001\0\x0002\x0019\x0002\0\x0002\x0019" + "\x0001\0\x0004\x0019\x0013\0\x0001\x0019\x0016\0\x0001\x0090\x0001\0\x0001\x0091" + "\x000F\0\x0001\x0092\x0002\0\x0001\x0093\x0004\0\x0001\x0094\x0003\0\x0001\x0095" + "\x0012\0\x0001\x0096\x0011\0\x0001\x0097\x0002\0\x0001\x0098\x0032\0\x0001\x003E" + "\x0001\x0019\x0006\0\x0001\x003E\x0007\0\x0001\x0022\x0001\0\x0001\x0023\x0002\0" + "\x0001\x0099\x0001\0\x0001\x0036\x0004\0\x0001\x0026\x0001\0\x0001\x0027\x0001\0" + "\x0001\x0028\x0002\0\x0001\x0029\x0003\0\x0001\x009A\x0002\0\x0001\x009B\x0004\0" + "\x0001\x0039\x0003\0\x0001\x009C\x000
 F\0\x0001\x002E\x0002\0\x0001\x009D\x0011\0" + "\x0001\x009E\x0002\0\x0001\x009F\x0021\0\x0001\x004F\x000F\0\x0001\x0018\x0001\x003F" + "\x0001\x0019\x0001\x0051\x0003\0\x0001\x003F\x0001\0\x0001\x003F\x0004\0\x0001\x001F" + "\x0001\x0018\x0039\0\x0001\x001A\x0002\0\x0001\x001A\x001B\0\x0004\x001A\x008E\0" + "\x0001\x001A\x003F\0\x0001\x001A\x0024\0\x0001\x001A\x0001\0\x0002\x001A\x0011\0" + "\x0001\x001A\x0004\0\x0001\x001A\x000F\0\x0004\x001A\x0003\0\x0001\x001A\x000A\0" + "\x0001\x001A\x0083\0\x0001\x001A\x0092\0\x0004\x001A\x006A\0\x0002\x001A\x0015\0" + "\x0004\x001A\x002D\0\x0001\x001A\x000D\0\x0002\x001A\x0008\0\x0002\x001A\x0001\0" + "\x0001\x001A\x0001\0\x0001\x001A\x0009\0\x0001\x001A\x0009\0\x0002\x001A\x0006\0" + "\x0001\x001A\x0002\0\x0004\x001A\x0003\0\x0001\x001A\x0002\0\x0002\x001A\x0001\0" + "\x0003\x001A\x0001\0\x0002\x001A\x0001\0\x0001\x001A\x0008\0\x0001\x001A\x0001\0" + "\x0002\x001A\x0002\0\x0002\x001A\x0001\0\x0004\x001A\x0013\0\x0001\x001A\x007F\0" + "\x0001
 \x001A\x0025\0\x0001\x0018\x0004\0\x0001\x0018\x0009\0\x0001\x0018\x0012\0" + "\x0001\x0018\x0003\0\x0001\x0018\x000B\0\x0001\x0050\x0002\0\x0001\x0050\x0008\0" + "\x0001\x0018\x0012\0\x0004\x0050\x001D\0\x0001\x0018\x0016\0\x0001\x0018\x0016\0" + "\x0002\x0018\x0013\0\x0001\x0019\x0001\x0018\x0020\0\x0001\x0019\x000B\0\x0001\x0050" + "\x0035\0\x0001\x0019\x0009\0\x0001\x0050\x000D\0\x0004\x0018\x0002\0\x0002\x0018" + "\x000C\0\x0003\x0018\x0001\x0050\x0001\0\x0002\x0050\x0009\0\x0003\x0018\x0003\0" + "\x0001\x0018\x0001\0\x0001\x0050\x0004\0\x0001\x0050\x0002\x0018\x0001\0\x0004\x0019" + "\x0001\0\x0002\x0018\x0005\0\x0004\x0050\x0002\0\x0001\x0018\x0001\x0050\x000A\0" + "\x0001\x0050\x0007\0\x0001\x0018\x0018\0\x0001\x0018\x0004\0\x0001\x0018\x0006\0" + "\x0001\x0018\x0003\0\x0001\x0018\x0006\0\x0001\x0018\x0005\0\x0001\x0018\x0002\0" + "\x0002\x0018\x0001\0\x000F\x0018\x0002\0\x0001\x0018\x000B\0\x0007\x0018\x0002\0" + "\x0001\x0018\x0001\0\x0001\x0018\x0001\0\x0002\x0018\x0002\0
 \x0001\x0018\x0001\0" + "\x0003\x0018\x0002\0\x0001\x0018\x0001\0\x0001\x0018\x0001\0\x0001\x0018\x0001\0" + "\x0001\x0018\x0004\0\x0001\x0050\x0001\0\x0002\x0018\x0006\0\x0001\x0018\x0007\0" + "\x0001\x0018\x0001\0\x0001\x0018\x001B\0\x0001\x0018\x0006\0\x0001\x0018\x0003\0" + "\x0001\x0018\x0003\0\x0001\x0018\x0007\0\x0001\x0018\x0019\0\x0010\x0018\x0005\0" + "\x0003\x0018\x0004\0\x0001\x0018\x0006\0\x0001\x0018\x0003\0\x0002\x0018\x0002\0" + "\x0002\x0018\x0004\0\x0001\x0018\x0004\x0050\x0001\0\x0001\x0018\x0002\0\x0001\x0018" + "\x0004\0\x0001\x0018\x0001\0\x0001\x0018\x0001\0\x0001\x0018\x005C\0\x0002\x0050" + "\x0015\0\x0004\x0050\x002D\0\x0001\x0050\x000D\0\x0002\x0050\x0008\0\x0002\x0050" + "\x0001\0\x0001\x0050\x0001\0\x0001\x0050\x0009\0\x0001\x0050\x0009\0\x0002\x0050" + "\x0006\0\x0001\x0050\x0002\0\x0004\x0050\x0003\0\x0001\x0050\x0002\0\x0002\x0050" + "\x0001\0\x0003\x0050\x0001\0\x0002\x0050\x0001\0\x0001\x0050\x0008\0\x0001\x0050" + "\x0001\0\x0002\x0050\x0002\0\x000
 2\x0050\x0001\0\x0004\x0050\x0013\0\x0001\x0050" + "\x007F\0\x0001\x0051\x0024\0\x0001\x00A0\x0011\0\x0001\x00A1\x0002\0\x0001\x00A2" + "\x0008\0\x0001\x00A3\x0012\0\x0001\x00A4\x0011\0\x0001\x00A5\x0002\0\x0001\x00A6" + "\x0021\0\x0001\x004F\x0010\0\x0001\x0051\x0001\0\x0001\x0051\x0003\0\x0001\x0034" + "\x0001\0\x0001\x0051\x003F\0\x0001\x001D\x0002\0\x0001\x001D\x001B\0\x0004\x001D" + "\x008E\0\x0001\x001D\x003F\0\x0001\x001D\x0024\0\x0001\x001D\x0001\0\x0002\x001D" + "\x0011\0\x0001\x001D\x0004\0\x0001\x001D\x000F\0\x0004\x001D\x0003\0\x0001\x001D" + "\x000A\0\x0001\x001D\x0083\0\x0001\x001D\x0092\0\x0004\x001D\x006A\0\x0002\x001D" + "\x0015\0\x0004\x001D\x002D\0\x0001\x001D\x000D\0\x0002\x001D\x0008\0\x0002\x001D" + "\x0001\0\x0001\x001D\x0001\0\x0001\x001D\x0009\0\x0001\x001D\x0009\0\x0002\x001D" + "\x0006\0\x0001\x001D\x0002\0\x0004\x001D\x0003\0\x0001\x001D\x0002\0\x0002\x001D" + "\x0001\0\x0003\x001D\x0001\0\x0002\x001D\x0001\0\x0001\x001D\x0008\0\x0001\x001D" + "\x0001\0\x
 0002\x001D\x0002\0\x0002\x001D\x0001\0\x0004\x001D\x0013\0\x0001\x001D" + "\x0049\0\x0001\x001E\x0002\0\x0001\x001E\x001B\0\x0004\x001E\x008E\0\x0001\x001E" + "\x003F\0\x0001\x001E\x0024\0\x0001\x001E\x0001\0\x0002\x001E\x0011\0\x0001\x001E" + "\x0004\0\x0001\x001E\x000F\0\x0004\x001E\x0003\0\x0001\x001E\x000A\0\x0001\x001E" + "\x0083\0\x0001\x001E\x0092\0\x0004\x001E\x006A\0\x0002\x001E\x0015\0\x0004\x001E" + "\x002D\0\x0001\x001E\x000D\0\x0002\x001E\x0008\0\x0002\x001E\x0001\0\x0001\x001E" + "\x0001\0\x0001\x001E\x0009\0\x0001\x001E\x0009\0\x0002\x001E\x0006\0\x0001\x001E" + "\x0002\0\x0004\x001E\x0003\0\x0001\x001E\x0002\0\x0002\x001E\x0001\0\x0003\x001E" + "\x0001\0\x0002\x001E\x0001\0\x0001\x001E\x0008\0\x0001\x001E\x0001\0\x0002\x001E" + "\x0002\0\x0002\x001E\x0001\0\x0004\x001E\x0013\0\x0001\x001E\x0017\0\x0001\x0018" + "\x0004\0\x0001\x0018\x0009\0\x0001\x0018\x0012\0\x0001\x0018\x0003\0\x0001\x0018" + "\x000B\0\x0001\x001F\x0002\0\x0001\x001F\x0008\0\x0001\x0018\x0012\0\x00
 04\x001F" + "\x001D\0\x0001\x0018\x0016\0\x0001\x0018\x0016\0\x0002\x0018\x0013\0\x0001\x0032" + "\x0001\x0018\x0020\0\x0001\x0032\x000B\0\x0001\x001F\x0035\0\x0001\x0032\x0009\0" + "\x0001\x001F\x000D\0\x0004\x0018\x0002\0\x0002\x0018\x000C\0\x0003\x0018\x0001\x001F" + "\x0001\0\x0002\x001F\x0009\0\x0003\x0018\x0003\0\x0001\x0018\x0001\0\x0001\x001F" + "\x0004\0\x0001\x001F\x0002\x0018\x0001\0\x0004\x0032\x0001\0\x0002\x0018\x0005\0" + "\x0004\x001F\x0002\0\x0001\x0018\x0001\x001F\x000A\0\x0001\x001F\x0007\0\x0001\x0018" + "\x0018\0\x0001\x0018\x0004\0\x0001\x0018\x0006\0\x0001\x0018\x0003\0\x0001\x0018" + "\x0006\0\x0001\x0018\x0005\0\x0001\x0018\x0002\0\x0002\x0018\x0001\0\x000F\x0018" + "\x0002\0\x0001\x0018\x000B\0\x0007\x0018\x0002\0\x0001\x0018\x0001\0\x0001\x0018" + "\x0001\0\x0002\x0018\x0002\0\x0001\x0018\x0001\0\x0003\x0018\x0002\0\x0001\x0018" + "\x0001\0\x0001\x0018\x0001\0\x0001\x0018\x0001\0\x0001\x0018\x0004\0\x0001\x001F" + "\x0001\0\x0002\x0018\x0006\0\x0001\x0018\
 x0007\0\x0001\x0018\x0001\0\x0001\x0018" + "\x001B\0\x0001\x0018\x0006\0\x0001\x0018\x0003\0\x0001\x0018\x0003\0\x0001\x0018" + "\x0007\0\x0001\x0018\x0019\0\x0010\x0018\x0005\0\x0003\x0018\x0004\0\x0001\x0018" + "\x0006\0\x0001\x0018\x0003\0\x0002\x0018\x0002\0\x0002\x0018\x0004\0\x0001\x0018" + "\x0004\x001F\x0001\0\x0001\x0018\x0002\0\x0001\x0018\x0004\0\x0001\x0018\x0001\0" + "\x0001\x0018\x0001\0\x0001\x0018\x005C\0\x0002\x001F\x0015\0\x0004\x001F\x002D\0" + "\x0001\x001F\x000D\0\x0002\x001F\x0008\0\x0002\x001F\x0001\0\x0001\x001F\x0001\0" + "\x0001\x001F\x0009\0\x0001\x001F\x0009\0\x0002\x001F\x0006\0\x0001\x001F\x0002\0" + "\x0004\x001F\x0003\0\x0001\x001F\x0002\0\x0002\x001F\x0001\0\x0003\x001F\x0001\0" + "\x0002\x001F\x0001\0\x0001\x001F\x0008\0\x0001\x001F\x0001\0\x0002\x001F\x0002\0" + "\x0002\x001F\x0001\0\x0004\x001F\x0013\0\x0001\x001F\x0011\0\x0001\x0022\x0001\0" + "\x0001\x0023\x0002\0\x0001\x00A7\x0001\0\x0001\x0025\x0004\0\x0001\x0026\x0001\0" + "\x0001\x0027\x0001
 \0\x0001\x0028\x0002\0\x0001\x0029\x0003\0\x0001\x00A8\x0002\0" + "\x0001\x00A9\x0004\0\x0001\x002C\x0003\0\x0001\x00AA\x000F\0\x0001\x002E\x0002\0" + "\x0001\x00AB\x0011\0\x0001\x00AC\x0002\0\x0001\x00AD\x0031\0\x0001\x0018\x0001\x0067" + "\x0001\x0032\x0004\0\x0001\x0034\x0001\0\x0001\x0067\x0004\0\x0001\x001F\x0001\x0018" + "\x0006\0\x0001\x00AE\x0011\0\x0001\x00AF\x0002\0\x0001\x00B0\x0008\0\x0001\x00B1" + "\x0012\0\x0001\x00B2\x0011\0\x0001\x00B3\x0002\0\x0001\x00B4\x0032\0\x0001\x0068" + "\x0007\0\x0001\x0068\x0004\0\x0001\x0067\x0008\0\x0001\x0018\x0004\0\x0001\x0018" + "\x0009\0\x0001\x0018\x0012\0\x0001\x0018\x0003\0\x0001\x0018\x000B\0\x0001\x0020" + "\x0002\0\x0001\x0020\x0008\0\x0001\x0018\x0012\0\x0004\x0020\x001D\0\x0001\x0018" + "\x0016\0\x0001\x0018\x0016\0\x0002\x0018\x0013\0\x0001\x0032\x0001\x0018\x0020\0" + "\x0001\x0032\x000B\0\x0001\x0020\x0035\0\x0001\x0032\x0009\0\x0001\x0020\x000D\0" + "\x0004\x0018\x0002\0\x0002\x0018\x000C\0\x0003\x0018\x0001\x0020\x0001\0
 \x0002\x0020" + "\x0009\0\x0003\x0018\x0003\0\x0001\x0018\x0001\0\x0001\x0020\x0004\0\x0001\x0020" + "\x0002\x0018\x0001\0\x0004\x0032\x0001\0\x0002\x0018\x0005\0\x0004\x0020\x0002\0" + "\x0001\x0018\x0001\x0020\x000A\0\x0001\x0020\x0007\0\x0001\x0018\x0018\0\x0001\x0018" + "\x0004\0\x0001\x0018\x0006\0\x0001\x0018\x0003\0\x0001\x0018\x0006\0\x0001\x0018" + "\x0005\0\x0001\x0018\x0002\0\x0002\x0018\x0001\0\x000F\x0018\x0002\0\x0001\x0018" + "\x000B\0\x0007\x0018\x0002\0\x0001\x0018\x0001\0\x0001\x0018\x0001\0\x0002\x0018" + "\x0002\0\x0001\x0018\x0001\0\x0003\x0018\x0002\0\x0001\x0018\x0001\0\x0001\x0018" + "\x0001\0\x0001\x0018\x0001\0\x0001\x0018\x0004\0\x0001\x0020\x0001\0\x0002\x0018" + "\x0006\0\x0001\x0018\x0007\0\x0001\x0018\x0001\0\x0001\x0018\x001B\0\x0001\x0018" + "\x0006\0\x0001\x0018\x0003\0\x0001\x0018\x0003\0\x0001\x0018\x0007\0\x0001\x0018" + "\x0019\0\x0010\x0018\x0005\0\x0003\x0018\x0004\0\x0001\x0018\x0006\0\x0001\x0018" + "\x0003\0\x0002\x0018\x0002\0\x0002\x0018\
 x0004\0\x0001\x0018\x0004\x0020\x0001\0" + "\x0001\x0018\x0002\0\x0001\x0018\x0004\0\x0001\x0018\x0001\0\x0001\x0018\x0001\0" + "\x0001\x0018\x005C\0\x0002\x0020\x0015\0\x0004\x0020\x002D\0\x0001\x0020\x000D\0" + "\x0002\x0020\x0008\0\x0002\x0020\x0001\0\x0001\x0020\x0001\0\x0001\x0020\x0009\0" + "\x0001\x0020\x0009\0\x0002\x0020\x0006\0\x0001\x0020\x0002\0\x0004\x0020\x0003\0" + "\x0001\x0020\x0002\0\x0002\x0020\x0001\0\x0003\x0020\x0001\0\x0002\x0020\x0001\0" + "\x0001\x0020\x0008\0\x0001\x0020\x0001\0\x0002\x0020\x0002\0\x0002\x0020\x0001\0" + "\x0004\x0020\x0013\0\x0001\x0020\x0049\0\x0001\x0021\x0002\0\x0001\x0021\x001B\0" + "\x0004\x0021\x008E\0\x0001\x0021\x003F\0\x0001\x0021\x0024\0\x0001\x0021\x0001\0" + "\x0002\x0021\x0011\0\x0001\x0021\x0004\0\x0001\x0021\x000F\0\x0004\x0021\x0003\0" + "\x0001\x0021\x000A\0\x0001\x0021\x0083\0\x0001\x0021\x0092\0\x0004\x0021\x006A\0" + "\x0002\x0021\x0015\0\x0004\x0021\x002D\0\x0001\x0021\x000D\0\x0002\x0021\x0008\0" + "\x0002\x0021\x0001
 \0\x0001\x0021\x0001\0\x0001\x0021\x0009\0\x0001\x0021\x0009\0" + "\x0002\x0021\x0006\0\x0001\x0021\x0002\0\x0004\x0021\x0003\0\x0001\x0021\x0002\0" + "\x0002\x0021\x0001\0\x0003\x0021\x0001\0\x0002\x0021\x0001\0\x0001\x0021\x0008\0" + "\x0001\x0021\x0001\0\x0002\x0021\x0002\0\x0002\x0021\x0001\0\x0004\x0021\x0013\0" + "\x0001\x0021\x0075\0\x0001\x00B5\x0016\0\x0002\x00B5\x0017\0\x0001\x0018\x0004\0" + "\x0001\x0018\x0009\0\x0001\x0018\x0012\0\x0001\x0018\x0003\0\x0001\x0018\x000B\0" + "\x0001\x0032\x0002\0\x0001\x0032\x0008\0\x0001\x0018\x0012\0\x0004\x0032\x001D\0" + "\x0001\x0018\x0016\0\x0001\x0018\x0016\0\x0002\x0018\x0013\0\x0001\x0032\x0001\x0018" + "\x0020\0\x0001\x0032\x000B\0\x0001\x0032\x0035\0\x0001\x0032\x0009\0\x0001\x0032" + "\x000D\0\x0004\x0018\x0002\0\x0002\x0018\x000C\0\x0003\x0018\x0001\x0032\x0001\0" + "\x0002\x0032\x0009\0\x0003\x0018\x0003\0\x0001\x0018\x0001\0\x0001\x0032\x0004\0" + "\x0001\x0032\x0002\x0018\x0001\0\x0004\x0032\x0001\0\x0002\x0018\x0005\0\x00
 04\x0032" + "\x0002\0\x0001\x0018\x0001\x0032\x000A\0\x0001\x0032\x0007\0\x0001\x0018\x0018\0" + "\x0001\x0018\x0004\0\x0001\x0018\x0006\0\x0001\x0018\x0003\0\x0001\x0018\x0006\0" + "\x0001\x0018\x0005\0\x0001\x0018\x0002\0\x0002\x0018\x0001\0\x000F\x0018\x0002\0" + "\x0001\x0018\x000B\0\x0007\x0018\x0002\0\x0001\x0018\x0001\0\x0001\x0018\x0001\0" + "\x0002\x0018\x0002\0\x0001\x0018\x0001\0\x0003\x0018\x0002\0\x0001\x0018\x0001\0" + "\x0001\x0018\x0001\0\x0001\x0018\x0001\0\x0001\x0018\x0004\0\x0001\x0032\x0001\0" + "\x0002\x0018\x0006\0\x0001\x0018\x0007\0\x0001\x0018\x0001\0\x0001\x0018\x001B\0" + "\x0001\x0018\x0006\0\x0001\x0018\x0003\0\x0001\x0018\x0003\0\x0001\x0018\x0007\0" + "\x0001\x0018\x0019\0\x0010\x0018\x0005\0\x0003\x0018\x0004\0\x0001\x0018\x0006\0" + "\x0001\x0018\x0003\0\x0002\x0018\x0002\0\x0002\x0018\x0004\0\x0001\x0018\x0004\x0032" + "\x0001\0\x0001\x0018\x0002\0\x0001\x0018\x0004\0\x0001\x0018\x0001\0\x0001\x0018" + "\x0001\0\x0001\x0018\x005C\0\x0002\x0032\x001
 5\0\x0004\x0032\x002D\0\x0001\x0032" + "\x000D\0\x0002\x0032\x0008\0\x0002\x0032\x0001\0\x0001\x0032\x0001\0\x0001\x0032" + "\x0009\0\x0001\x0032\x0009\0\x0002\x0032\x0006\0\x0001\x0032\x0002\0\x0004\x0032" + "\x0003\0\x0001\x0032\x0002\0\x0002\x0032\x0001\0\x0003\x0032\x0001\0\x0002\x0032" + "\x0001\0\x0001\x0032\x0008\0\x0001\x0032\x0001\0\x0002\x0032\x0002\0\x0002\x0032" + "\x0001\0\x0004\x0032\x0013\0\x0001\x0032\x0016\0\x0001\x00B6\x0001\0\x0001\x00B7" + "\x000F\0\x0001\x00B8\x0002\0\x0001\x00B9\x0004\0\x0001\x00BA\x0003\0\x0001\x00BB" + "\x0012\0\x0001\x00BC\x0011\0\x0001\x00BD\x0002\0\x0001\x00BE\x0032\0\x0001\x007F" + "\x0001\x0032\x0006\0\x0001\x007F\x000D\0\x0001\x0018\x0004\0\x0001\x0018\x0009\0" + "\x0001\x0018\x0012\0\x0001\x0018\x0003\0\x0001\x0018\x000B\0\x0001\x0033\x0002\0" + "\x0001\x0033\x0008\0\x0001\x0018\x0012\0\x0004\x0033\x001D\0\x0001\x0018\x0019\0" + "\x0001\x0018\x0003\0\x0004\x0018\x0001\0\x0001\x0018\x0004\0\x0001\x0018\x0001\0" + "\x0002\x0018\x0002\0\x
 0002\x0018\x0002\0\x0003\x0018\x0001\0\x0001\x0018\x0001\0" + "\x0001\x0018\x0002\0\x0004\x0018\x0001\0\x0003\x0018\x0001\0\x0001\x0018\x0001\0" + "\x0003\x0018\x0001\0\x0002\x0018\x0001\0\x0004\x0018\x0001\0\x0002\x0018\x0002\0" + "\x0008\x0018\x0001\0\x0002\x0018\x0001\0\x0008\x0018\x0002\0\x0007\x0018\x0001\0" + "\x0008\x0018\x0001\0\x0006\x0018\x0001\0\x0001\x0018\x0001\0\x0002\x0018\x0002\0" + "\x0001\x0018\x0001\0\x0001\x0018\x0003\0\x0003\x0018\x0012\0\x0001\x0018\x0016\0" + "\x0002\x0018\x0014\0\x0001\x0018\x002C\0\x0001\x0033\x003F\0\x0001\x0033\x000D\0" + "\x0004\x0018\x0002\0\x0002\x0018\x000C\0\x0003\x0018\x0001\x0033\x0001\0\x0002\x0033" + "\x0009\0\x0003\x0018\x0003\0\x0001\x0018\x0001\0\x0001\x0033\x0004\0\x0001\x0033" + "\x0002\x0018\x0006\0\x0002\x0018\x0005\0\x0004\x0033\x0002\0\x0001\x0018\x0001\x0033" + "\x000A\0\x0001\x0033\x0007\0\x0001\x0018\x0024\0\x0001\x0018\x0003\0\x0002\x0018" + "\x000A\0\x0002\x0018\x0001\0\x0003\x0018\x0007\0\x0001\x0018\x0006\0\x0002\x
 0018" + "\x0001\0\x0002\x0018\x0006\0\x0001\x0018\x0004\0\x0002\x0018\x0002\0\x0002\x0018" + "\x0005\0\x0003\x0018\x0002\0\x0001\x0018\x000D\0\x0001\x0018\x000E\0\x0001\x0018" + "\x0007\0\x0001\x0018\x0018\0\x0001\x0018\x0004\0\x0001\x0018\x0006\0\x0001\x0018" + "\x0003\0\x0001\x0018\x0006\0\x0001\x0018\x0005\0\x0001\x0018\x0002\0\x0002\x0018" + "\x0001\0\x000F\x0018\x0002\0\x0001\x0018\x000B\0\x0007\x0018\x0002\0\x0001\x0018" + "\x0001\0\x0001\x0018\x0001\0\x0002\x0018\x0002\0\x0001\x0018\x0001\0\x0003\x0018" + "\x0002\0\x0001\x0018\x0001\0\x0001\x0018\x0001\0\x0001\x0018\x0001\0\x0001\x0018" + "\x0004\0\x0001\x0033\x0001\0\x0002\x0018\x0006\0\x0001\x0018\x0007\0\x0001\x0018" + "\x0001\0\x0001\x0018\x001B\0\x0001\x0018\x0006\0\x0001\x0018\x0003\0\x0001\x0018" + "\x0003\0\x0001\x0018\x0007\0\x0001\x0018\x0019\0\x0010\x0018\x0005\0\x0003\x0018" + "\x0004\0\x0001\x0018\x0006\0\x0001\x0018\x0003\0\x0002\x0018\x0002\0\x0002\x0018" + "\x0004\0\x0001\x0018\x0004\x0033\x0001\0\x0001\x0018\
 x0002\0\x0001\x0018\x0004\0" + "\x0001\x0018\x0001\0\x0001\x0018\x0001\0\x0001\x0018\x005C\0\x0002\x0033\x0015\0" + "\x0004\x0033\x002D\0\x0001\x0033\x000D\0\x0002\x0033\x0008\0\x0002\x0033\x0001\0" + "\x0001\x0033\x0001\0\x0001\x0033\x0009\0\x0001\x0033\x0009\0\x0002\x0033\x0006\0" + "\x0001\x0033\x0002\0\x0004\x0033\x0003\0\x0001\x0033\x0002\0\x0002\x0033\x0001\0" + "\x0003\x0033\x0001\0\x0002\x0033\x0001\0\x0001\x0033\x0008\0\x0001\x0033\x0001\0" + "\x0002\x0033\x0002\0\x0002\x0033\x0001\0\x0004\x0033\x0013\0\x0001\x0033\x0017\0" + "\x0001\x0018\x0004\0\x0001\x0018\x0009\0\x0001\x0018\x0012\0\x0001\x0018\x0003\0" + "\x0001\x0018\x000B\0\x0001\x0034\x0002\0\x0001\x0034\x0008\0\x0001\x0018\x0012\0" + "\x0004\x0034\x001D\0\x0001\x0018\x0016\0\x0001\x0018\x0016\0\x0002\x0018\x0013\0" + "\x0001\x0032\x0001\x0018\x0020\0\x0001\x0032\x000B\0\x0001\x0034\x0035\0\x0001\x0032" + "\x0009\0\x0001\x0034\x000D\0\x0004\x0018\x0002\0\x0002\x0018\x000C\0\x0003\x0018" + "\x0001\x0034\x0001\0\x0002
 \x0034\x0009\0\x0003\x0018\x0003\0\x0001\x0018\x0001\0" + "\x0001\x0034\x0004\0\x0001\x0034\x0002\x0018\x0001\0\x0004\x0032\x0001\0\x0002\x0018" + "\x0005\0\x0004\x0034\x0002\0\x0001\x0018\x0001\x0034\x000A\0\x0001\x0034\x0007\0" + "\x0001\x0018\x0018\0\x0001\x0018\x0004\0\x0001\x0018\x0006\0\x0001\x0018\x0003\0" + "\x0001\x0018\x0006\0\x0001\x0018\x0005\0\x0001\x0018\x0002\0\x0002\x0018\x0001\0" + "\x000F\x0018\x0002\0\x0001\x0018\x000B\0\x0007\x0018\x0002\0\x0001\x0018\x0001\0" + "\x0001\x0018\x0001\0\x0002\x0018\x0002\0\x0001\x0018\x0001\0\x0003\x0018\x0002\0" + "\x0001\x0018\x0001\0\x0001\x0018\x0001\0\x0001\x0018\x0001\0\x0001\x0018\x0004\0" + "\x0001\x0034\x0001\0\x0002\x0018\x0006\0\x0001\x0018\x0007\0\x0001\x0018\x0001\0" + "\x0001\x0018\x001B\0\x0001\x0018\x0006\0\x0001\x0018\x0003\0\x0001\x0018\x0003\0" + "\x0001\x0018\x0007\0\x0001\x0018\x0019\0\x0010\x0018\x0005\0\x0003\x0018\x0004\0" + "\x0001\x0018\x0006\0\x0001\x0018\x0003\0\x0002\x0018\x0002\0\x0002\x0018\x0004\0" + 
 "\x0001\x0018\x0004\x0034\x0001\0\x0001\x0018\x0002\0\x0001\x0018\x0004\0\x0001\x0018" + "\x0001\0\x0001\x0018\x0001\0\x0001\x0018\x005C\0\x0002\x0034\x0015\0\x0004\x0034" + "\x002D\0\x0001\x0034\x000D\0\x0002\x0034\x0008\0\x0002\x0034\x0001\0\x0001\x0034" + "\x0001\0\x0001\x0034\x0009\0\x0001\x0034\x0009\0\x0002\x0034\x0006\0\x0001\x0034" + "\x0002\0\x0004\x0034\x0003\0\x0001\x0034\x0002\0\x0002\x0034\x0001\0\x0003\x0034" + "\x0001\0\x0002\x0034\x0001\0\x0001\x0034\x0008\0\x0001\x0034\x0001\0\x0002\x0034" + "\x0002\0\x0002\x0034\x0001\0\x0004\x0034\x0013\0\x0001\x0034\x0049\0\x0001\x003E" + "\x0002\0\x0001\x003E\x001B\0\x0004\x003E\x0042\0\x0001\x0019\x0044\0\x0001\x0019" + "\x0066\0\x0001\x0019\x0021\0\x0001\x0019\x000B\0\x0001\x003E\x0035\0\x0001\x0019" + "\x0009\0\x0001\x003E\x0024\0\x0001\x003E\x0001\0\x0002\x003E\x0011\0\x0001\x003E" + "\x0004\0\x0001\x003E\x0003\0\x0004\x0019\x0008\0\x0004\x003E\x0003\0\x0001\x003E" + "\x000A\0\x0001\x003E\x0074\0\x0002\x0019\x009B\0\x0001\x0
 03E\x0092\0\x0004\x003E" + "\x006A\0\x0002\x003E\x0015\0\x0004\x003E\x002D\0\x0001\x003E\x000D\0\x0002\x003E" + "\x0008\0\x0002\x003E\x0001\0\x0001\x003E\x0001\0\x0001\x003E\x0009\0\x0001\x003E" + "\x0009\0\x0002\x003E\x0006\0\x0001\x003E\x0002\0\x0004\x003E\x0003\0\x0001\x003E" + "\x0002\0\x0002\x003E\x0001\0\x0003\x003E\x0001\0\x0002\x003E\x0001\0\x0001\x003E" + "\x0008\0\x0001\x003E\x0001\0\x0002\x003E\x0002\0\x0002\x003E\x0001\0\x0004\x003E" + "\x0013\0\x0001\x003E\x0017\0\x0001\x0018\x0004\0\x0001\x0018\x0009\0\x0001\x0018" + "\x0012\0\x0001\x0018\x0003\0\x0001\x0018\x000B\0\x0001\x003F\x0002\0\x0001\x003F" + "\x0008\0\x0001\x0018\x0012\0\x0004\x003F\x001D\0\x0001\x0018\x0016\0\x0001\x0018" + "\x0016\0\x0002\x0018\x0013\0\x0001\x0019\x0001\x0018\x0020\0\x0001\x0019\x000B\0" + "\x0001\x003F\x0035\0\x0001\x0019\x0009\0\x0001\x003F\x000D\0\x0004\x0018\x0002\0" + "\x0002\x0018\x000C\0\x0003\x0018\x0001\x003F\x0001\0\x0002\x003F\x0009\0\x0003\x0018" + "\x0003\0\x0001\x0018\x0001\0\x
 0001\x003F\x0004\0\x0001\x003F\x0002\x0018\x0001\0" + "\x0004\x0019\x0001\0\x0002\x0018\x0005\0\x0004\x003F\x0002\0\x0001\x0018\x0001\x003F" + "\x000A\0\x0001\x003F\x0007\0\x0001\x0018\x0018\0\x0001\x0018\x0004\0\x0001\x0018" + "\x0006\0\x0001\x0018\x0003\0\x0001\x0018\x0006\0\x0001\x0018\x0005\0\x0001\x0018" + "\x0002\0\x0002\x0018\x0001\0\x000F\x0018\x0002\0\x0001\x0018\x000B\0\x0007\x0018" + "\x0002\0\x0001\x0018\x0001\0\x0001\x0018\x0001\0\x0002\x0018\x0002\0\x0001\x0018" + "\x0001\0\x0003\x0018\x0002\0\x0001\x0018\x0001\0\x0001\x0018\x0001\0\x0001\x0018" + "\x0001\0\x0001\x0018\x0004\0\x0001\x003F\x0001\0\x0002\x0018\x0006\0\x0001\x0018" + "\x0007\0\x0001\x0018\x0001\0\x0001\x0018\x001B\0\x0001\x0018\x0006\0\x0001\x0018" + "\x0003\0\x0001\x0018\x0003\0\x0001\x0018\x0007\0\x0001\x0018\x0019\0\x0010\x0018" + "\x0005\0\x0003\x0018\x0004\0\x0001\x0018\x0006\0\x0001\x0018\x0003\0\x0002\x0018" + "\x0002\0\x0002\x0018\x0004\0\x0001\x0018\x0004\x003F\x0001\0\x0001\x0018\x0002\0" + "\x0
 001\x0018\x0004\0\x0001\x0018\x0001\0\x0001\x0018\x0001\0\x0001\x0018\x005C\0" + "\x0002\x003F\x0015\0\x0004\x003F\x002D\0\x0001\x003F\x000D\0\x0002\x003F\x0008\0" + "\x0002\x003F\x0001\0\x0001\x003F\x0001\0\x0001\x003F\x0009\0\x0001\x003F\x0009\0" + "\x0002\x003F\x0006\0\x0001\x003F\x0002\0\x0004\x003F\x0003\0\x0001\x003F\x0002\0" + "\x0002\x003F\x0001\0\x0003\x003F\x0001\0\x0002\x003F\x0001\0\x0001\x003F\x0008\0" + "\x0001\x003F\x0001\0\x0002\x003F\x0002\0\x0002\x003F\x0001\0\x0004\x003F\x0013\0" + "\x0001\x003F\x0049\0\x0001\x0051\x0002\0\x0001\x0051\x001B\0\x0004\x0051\x008E\0" + "\x0001\x0051\x003F\0\x0001\x0051\x0024\0\x0001\x0051\x0001\0\x0002\x0051\x0011\0" + "\x0001\x0051\x0004\0\x0001\x0051\x000F\0\x0004\x0051\x0003\0\x0001\x0051\x000A\0" + "\x0001\x0051\x0083\0\x0001\x0051\x0092\0\x0004\x0051\x006A\0\x0002\x0051\x0015\0" + "\x0004\x0051\x002D\0\x0001\x0051\x000D\0\x0002\x0051\x0008\0\x0002\x0051\x0001\0" + "\x0001\x0051\x0001\0\x0001\x0051\x0009\0\x0001\x0051\x0009\0\x000
 2\x0051\x0006\0" + "\x0001\x0051\x0002\0\x0004\x0051\x0003\0\x0001\x0051\x0002\0\x0002\x0051\x0001\0" + "\x0003\x0051\x0001\0\x0002\x0051\x0001\0\x0001\x0051\x0008\0\x0001\x0051\x0001\0" + "\x0002\x0051\x0002\0\x0002\x0051\x0001\0\x0004\x0051\x0013\0\x0001\x0051\x0017\0" + "\x0001\x0018\x0004\0\x0001\x0018\x0009\0\x0001\x0018\x0012\0\x0001\x0018\x0003\0" + "\x0001\x0018\x000B\0\x0001\x0067\x0002\0\x0001\x0067\x0008\0\x0001\x0018\x0012\0" + "\x0004\x0067\x001D\0\x0001\x0018\x0016\0\x0001\x0018\x0016\0\x0002\x0018\x0013\0" + "\x0001\x0032\x0001\x0018\x0020\0\x0001\x0032\x000B\0\x0001\x0067\x0035\0\x0001\x0032" + "\x0009\0\x0001\x0067\x000D\0\x0004\x0018\x0002\0\x0002\x0018\x000C\0\x0003\x0018" + "\x0001\x0067\x0001\0\x0002\x0067\x0009\0\x0003\x0018\x0003\0\x0001\x0018\x0001\0" + "\x0001\x0067\x0004\0\x0001\x0067\x0002\x0018\x0001\0\x0004\x0032\x0001\0\x0002\x0018" + "\x0005\0\x0004\x0067\x0002\0\x0001\x0018\x0001\x0067\x000A\0\x0001\x0067\x0007\0" + "\x0001\x0018\x0018\0\x0001\x0018\x
 0004\0\x0001\x0018\x0006\0\x0001\x0018\x0003\0" + "\x0001\x0018\x0006\0\x0001\x0018\x0005\0\x0001\x0018\x0002\0\x0002\x0018\x0001\0" + "\x000F\x0018\x0002\0\x0001\x0018\x000B\0\x0007\x0018\x0002\0\x0001\x0018\x0001\0" + "\x0001\x0018\x0001\0\x0002\x0018\x0002\0\x0001\x0018\x0001\0\x0003\x0018\x0002\0" + "\x0001\x0018\x0001\0\x0001\x0018\x0001\0\x0001\x0018\x0001\0\x0001\x0018\x0004\0" + "\x0001\x0067\x0001\0\x0002\x0018\x0006\0\x0001\x0018\x0007\0\x0001\x0018\x0001\0" + "\x0001\x0018\x001B\0\x0001\x0018\x0006\0\x0001\x0018\x0003\0\x0001\x0018\x0003\0" + "\x0001\x0018\x0007\0\x0001\x0018\x0019\0\x0010\x0018\x0005\0\x0003\x0018\x0004\0" + "\x0001\x0018\x0006\0\x0001\x0018\x0003\0\x0002\x0018\x0002\0\x0002\x0018\x0004\0" + "\x0001\x0018\x0004\x0067\x0001\0\x0001\x0018\x0002\0\x0001\x0018\x0004\0\x0001\x0018" + "\x0001\0\x0001\x0018\x0001\0\x0001\x0018\x005C\0\x0002\x0067\x0015\0\x0004\x0067" + "\x002D\0\x0001\x0067\x000D\0\x0002\x0067\x0008\0\x0002\x0067\x0001\0\x0001\x0067" + "\x0001\
 0\x0001\x0067\x0009\0\x0001\x0067\x0009\0\x0002\x0067\x0006\0\x0001\x0067" + "\x0002\0\x0004\x0067\x0003\0\x0001\x0067\x0002\0\x0002\x0067\x0001\0\x0003\x0067" + "\x0001\0\x0002\x0067\x0001\0\x0001\x0067\x0008\0\x0001\x0067\x0001\0\x0002\x0067" + "\x0002\0\x0002\x0067\x0001\0\x0004\x0067\x0013\0\x0001\x0067\x0049\0\x0001\x0068" + "\x0002\0\x0001\x0068\x001B\0\x0004\x0068\x008E\0\x0001\x0068\x003F\0\x0001\x0068" + "\x0024\0\x0001\x0068\x0001\0\x0002\x0068\x0011\0\x0001\x0068\x0004\0\x0001\x0068" + "\x000F\0\x0004\x0068\x0003\0\x0001\x0068\x000A\0\x0001\x0068\x0083\0\x0001\x0068" + "\x0092\0\x0004\x0068\x006A\0\x0002\x0068\x0015\0\x0004\x0068\x002D\0\x0001\x0068" + "\x000D\0\x0002\x0068\x0008\0\x0002\x0068\x0001\0\x0001\x0068\x0001\0\x0001\x0068" + "\x0009\0\x0001\x0068\x0009\0\x0002\x0068\x0006\0\x0001\x0068\x0002\0\x0004\x0068" + "\x0003\0\x0001\x0068\x0002\0\x0002\x0068\x0001\0\x0003\x0068\x0001\0\x0002\x0068" + "\x0001\0\x0001\x0068\x0008\0\x0001\x0068\x0001\0\x0002\x0068\x0002\0\
 x0002\x0068" + "\x0001\0\x0004\x0068\x0013\0\x0001\x0068\x0016\0\x0001\x00BF\x0011\0\x0001\x00C0" + "\x0002\0\x0001\x00C1\x0008\0\x0001\x00C2\x0012\0\x0001\x00C3\x0011\0\x0001\x00C4" + "\x0002\0\x0001\x00C5\x002D\0\x0001\x0077\x0004\0\x0001\x00B5\x0007\0\x0001\x00B5" + "\x003F\0\x0001\x007F\x0002\0\x0001\x007F\x001B\0\x0004\x007F\x0042\0\x0001\x0032" + "\x0044\0\x0001\x0032\x0066\0\x0001\x0032\x0021\0\x0001\x0032\x000B\0\x0001\x007F" + "\x0035\0\x0001\x0032\x0009\0\x0001\x007F\x0024\0\x0001\x007F\x0001\0\x0002\x007F" + "\x0011\0\x0001\x007F\x0004\0\x0001\x007F\x0003\0\x0004\x0032\x0008\0\x0004\x007F" + "\x0003\0\x0001\x007F\x000A\0\x0001\x007F\x0074\0\x0002\x0032\x009B\0\x0001\x007F" + "\x0092\0\x0004\x007F\x006A\0\x0002\x007F\x0015\0\x0004\x007F\x002D\0\x0001\x007F" + "\x000D\0\x0002\x007F\x0008\0\x0002\x007F\x0001\0\x0001\x007F\x0001\0\x0001\x007F" + "\x0009\0\x0001\x007F\x0009\0\x0002\x007F\x0006\0\x0001\x007F\x0002\0\x0004\x007F" + "\x0003\0\x0001\x007F\x0002\0\x0002\x007F\x0001
 \0\x0003\x007F\x0001\0\x0002\x007F" + "\x0001\0\x0001\x007F\x0008\0\x0001\x007F\x0001\0\x0002\x007F\x0002\0\x0002\x007F" + "\x0001\0\x0004\x007F\x0013\0\x0001\x007F\x0049\0\x0001\x00B5\x0002\0\x0001\x00B5" + "\x001B\0\x0004\x00B5\x008E\0\x0001\x00B5\x003F\0\x0001\x00B5\x0024\0\x0001\x00B5" + "\x0001\0\x0002\x00B5\x0011\0\x0001\x00B5\x0004\0\x0001\x00B5\x000F\0\x0004\x00B5" + "\x0003\0\x0001\x00B5\x000A\0\x0001\x00B5\x0083\0\x0001\x00B5\x0092\0\x0004\x00B5" + "\x006A\0\x0002\x00B5\x0015\0\x0004\x00B5\x002D\0\x0001\x00B5\x000D\0\x0002\x00B5" + "\x0008\0\x0002\x00B5\x0001\0\x0001\x00B5\x0001\0\x0001\x00B5\x0009\0\x0001\x00B5" + "\x0009\0\x0002\x00B5\x0006\0\x0001\x00B5\x0002\0\x0004\x00B5\x0003\0\x0001\x00B5" + "\x0002\0\x0002\x00B5\x0001\0\x0003\x00B5\x0001\0\x0002\x00B5\x0001\0\x0001\x00B5" + "\x0008\0\x0001\x00B5\x0001\0\x0002\x00B5\x0002\0\x0002\x00B5\x0001\0\x0004\x00B5" + "\x0013\0\x0001\x00B5\x0010\0";
+
+	  private static int [] zzUnpackTrans()
+	  {
+		int[] result = new int[26554];
+		int offset = 0;
+		offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
+		return result;
+	  }
+
+	  private static int zzUnpackTrans(string packed, int offset, int[] result)
+	  {
+		int i = 0; // index in packed string
+		int j = offset; // index in unpacked array
+		int l = packed.Length;
+		while (i < l)
+		{
+		  int count = packed[i++];
+		  int value = packed[i++];
+		  value--;
+		  do
+		  {
+			  result[j++] = value;
+		  } while (--count > 0);
+		}
+		return j;
+	  }
+
+
+	  /* error codes */
+	  private const int ZZ_UNKNOWN_ERROR = 0;
+	  private const int ZZ_NO_MATCH = 1;
+	  private const int ZZ_PUSHBACK_2BIG = 2;
+
+	  /* error messages for the codes above */
+	  private static readonly string[] ZZ_ERROR_MSG = {};
+
+	  /// <summary>
+	  /// ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
+	  /// </summary>
+	  private static readonly int[] ZZ_ATTRIBUTE = zzUnpackAttribute();
+
+	  private const string ZZ_ATTRIBUTE_PACKED_0 = "\x0001\0\x0001\x0009\x001E\x0001\x0011\0\x0001\x0001\x0001\0\x0001\x0001\x000A\0" + "\x0001\x0001\x0011\0\x0001\x0001\x0015\0\x0001\x0001\x004D\0\x0001\x0001\x0010\0";
+
+	  private static int [] zzUnpackAttribute()
+	  {
+		int[] result = new int[197];
+		int offset = 0;
+		offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
+		return result;
+	  }
+
+	  private static int zzUnpackAttribute(string packed, int offset, int[] result)
+	  {
+		int i = 0; // index in packed string
+		int j = offset; // index in unpacked array
+		int l = packed.Length;
+		while (i < l)
+		{
+		  int count = packed[i++];
+		  int value = packed[i++];
+		  do
+		  {
+			  result[j++] = value;
+		  } while (--count > 0);
+		}
+		return j;
+	  }
+
+	  /// <summary>
+	  /// the input device </summary>
+	  private java.io.Reader zzReader;
+
+	  /// <summary>
+	  /// the current state of the DFA </summary>
+	  private int zzState;
+
+	  /// <summary>
+	  /// the current lexical state </summary>
+	  private int zzLexicalState = YYINITIAL;
+
+	  /// <summary>
+	  /// this buffer contains the current text to be matched and is
+	  ///    the source of the yytext() string 
+	  /// </summary>
+	  private char[] zzBuffer = new char[ZZ_BUFFERSIZE];
+
+	  /// <summary>
+	  /// the textposition at the last accepting state </summary>
+	  private int zzMarkedPos;
+
+	  /// <summary>
+	  /// the current text position in the buffer </summary>
+	  private int zzCurrentPos;
+
+	  /// <summary>
+	  /// startRead marks the beginning of the yytext() string in the buffer </summary>
+	  private int zzStartRead;
+
+	  /// <summary>
+	  /// endRead marks the last character in the buffer, that has been read
+	  ///    from input 
+	  /// </summary>
+	  private int zzEndRead;
+
+	  /// <summary>
+	  /// number of newlines encountered up to the start of the matched text </summary>
+	  private int yyline;
+
+	  /// <summary>
+	  /// the number of characters up to the start of the matched text </summary>
+	  private int yychar_Renamed;
+
+	  /// <summary>
+	  /// the number of characters from the last newline up to the start of the 
+	  /// matched text
+	  /// </summary>
+	  private int yycolumn;
+
+	  /// <summary>
+	  /// zzAtBOL == true <=> the scanner is currently at the beginning of a line
+	  /// </summary>
+	  private bool zzAtBOL = true;
+
+	  /// <summary>
+	  /// zzAtEOF == true <=> the scanner is at the EOF </summary>
+	  private bool zzAtEOF;
+
+	  /// <summary>
+	  /// denotes if the user-EOF-code has already been executed </summary>
+	  private bool zzEOFDone;
+
+	  /* user code: */
+	  /// <summary>
+	  /// Alphanumeric sequences </summary>
+	  public const int WORD_TYPE = StandardTokenizer.ALPHANUM;
+
+	  /// <summary>
+	  /// Numbers </summary>
+	  public const int NUMERIC_TYPE = StandardTokenizer.NUM;
+
+	  /// <summary>
+	  /// Chars in class \p{Line_Break = Complex_Context} are from South East Asian
+	  /// scripts (Thai, Lao, Myanmar, Khmer, etc.).  Sequences of these are kept 
+	  /// together as as a single token rather than broken up, because the logic
+	  /// required to break them at word boundaries is too complex for UAX#29.
+	  /// <para>
+	  /// See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
+	  /// </para>
+	  /// </summary>
+	  public const int SOUTH_EAST_ASIAN_TYPE = StandardTokenizer.SOUTHEAST_ASIAN;
+
+	  public const int IDEOGRAPHIC_TYPE = StandardTokenizer.IDEOGRAPHIC;
+
+	  public const int HIRAGANA_TYPE = StandardTokenizer.HIRAGANA;
+
+	  public const int KATAKANA_TYPE = StandardTokenizer.KATAKANA;
+
+	  public const int HANGUL_TYPE = StandardTokenizer.HANGUL;
+
+	  public int yychar()
+	  {
+		return yychar_Renamed;
+	  }
+
+	  /// <summary>
+	  /// Fills CharTermAttribute with the current token text.
+	  /// </summary>
+	  public void getText(CharTermAttribute t)
+	  {
+		t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
+	  }
+
+
+	  /// <summary>
+	  /// Creates a new scanner
+	  /// </summary>
+	  /// <param name="in">  the java.io.Reader to read input from. </param>
+	  public StandardTokenizerImpl(java.io.Reader @in)
+	  {
+		this.zzReader = @in;
+	  }
+
+
+	  /// <summary>
+	  /// Unpacks the compressed character translation table.
+	  /// </summary>
+	  /// <param name="packed">   the packed character translation table </param>
+	  /// <returns>         the unpacked character translation table </returns>
+	  private static char [] zzUnpackCMap(string packed)
+	  {
+		char[] map = new char[0x10000];
+		int i = 0; // index in packed string
+		int j = 0; // index in unpacked array
+		while (i < 2860)
+		{
+		  int count = packed[i++];
+		  char value = packed[i++];
+		  do
+		  {
+			  map[j++] = value;
+		  } while (--count > 0);
+		}
+		return map;
+	  }
+
+
+	  /// <summary>
+	  /// Refills the input buffer.
+	  /// </summary>
+	  /// <returns>      <code>false</code>, iff there was new input.
+	  /// </returns>
+	  /// <exception cref="java.io.IOException">  if any I/O-Error occurs </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private boolean zzRefill() throws java.io.IOException
+	  private bool zzRefill()
+	  {
+
+		/* first: make room (if you can) */
+		if (zzStartRead > 0)
+		{
+		  Array.Copy(zzBuffer, zzStartRead, zzBuffer, 0, zzEndRead - zzStartRead);
+
+		  /* translate stored positions */
+		  zzEndRead -= zzStartRead;
+		  zzCurrentPos -= zzStartRead;
+		  zzMarkedPos -= zzStartRead;
+		  zzStartRead = 0;
+		}
+
+		/* is the buffer big enough? */
+		if (zzCurrentPos >= zzBuffer.Length)
+		{
+		  /* if not: blow it up */
+		  char[] newBuffer = new char[zzCurrentPos * 2];
+		  Array.Copy(zzBuffer, 0, newBuffer, 0, zzBuffer.Length);
+		  zzBuffer = newBuffer;
+		}
+
+		/* finally: fill the buffer with new input */
+		int numRead = zzReader.read(zzBuffer, zzEndRead, zzBuffer.Length - zzEndRead);
+
+		if (numRead > 0)
+		{
+		  zzEndRead += numRead;
+		  return false;
+		}
+		// unlikely but not impossible: read 0 characters, but not at end of stream    
+		if (numRead == 0)
+		{
+		  int c = zzReader.read();
+		  if (c == -1)
+		  {
+			return true;
+		  }
+		  else
+		  {
+			zzBuffer[zzEndRead++] = (char) c;
+			return false;
+		  }
+		}
+
+		// numRead < 0
+		return true;
+	  }
+
+
+	  /// <summary>
+	  /// Closes the input stream.
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public final void yyclose() throws java.io.IOException
+	  public void yyclose()
+	  {
+		zzAtEOF = true; // indicate end of file
+		zzEndRead = zzStartRead; // invalidate buffer
+
+		if (zzReader != null)
+		{
+		  zzReader.close();
+		}
+	  }
+
+
+	  /// <summary>
+	  /// Resets the scanner to read from a new input stream.
+	  /// Does not close the old reader.
+	  /// 
+	  /// All internal variables are reset, the old input stream 
+	  /// <b>cannot</b> be reused (internal buffer is discarded and lost).
+	  /// Lexical state is set to <tt>ZZ_INITIAL</tt>.
+	  /// 
+	  /// Internal scan buffer is resized down to its initial length, if it has grown.
+	  /// </summary>
+	  /// <param name="reader">   the new input stream  </param>
+	  public void yyreset(java.io.Reader reader)
+	  {
+		zzReader = reader;
+		zzAtBOL = true;
+		zzAtEOF = false;
+		zzEOFDone = false;
+		zzEndRead = zzStartRead = 0;
+		zzCurrentPos = zzMarkedPos = 0;
+		yyline = yychar_Renamed = yycolumn = 0;
+		zzLexicalState = YYINITIAL;
+		if (zzBuffer.Length > ZZ_BUFFERSIZE)
+		{
+		  zzBuffer = new char[ZZ_BUFFERSIZE];
+		}
+	  }
+
+
+	  /// <summary>
+	  /// Returns the current lexical state.
+	  /// </summary>
+	  public int yystate()
+	  {
+		return zzLexicalState;
+	  }
+
+
+	  /// <summary>
+	  /// Enters a new lexical state
+	  /// </summary>
+	  /// <param name="newState"> the new lexical state </param>
+	  public void yybegin(int newState)
+	  {
+		zzLexicalState = newState;
+	  }
+
+
+	  /// <summary>
+	  /// Returns the text matched by the current regular expression.
+	  /// </summary>
+	  public string yytext()
+	  {
+		return new string(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
+	  }
+
+
+	  /// <summary>
+	  /// Returns the character at position <tt>pos</tt> from the 
+	  /// matched text. 
+	  /// 
+	  /// It is equivalent to yytext().charAt(pos), but faster
+	  /// </summary>
+	  /// <param name="pos"> the position of the character to fetch. 
+	  ///            A value from 0 to yylength()-1.
+	  /// </param>
+	  /// <returns> the character at position pos </returns>
+	  public char yycharat(int pos)
+	  {
+		return zzBuffer[zzStartRead + pos];
+	  }
+
+
+	  /// <summary>
+	  /// Returns the length of the matched text region.
+	  /// </summary>
+	  public int yylength()
+	  {
+		return zzMarkedPos - zzStartRead;
+	  }
+
+
+	  /// <summary>
+	  /// Reports an error that occured while scanning.
+	  /// 
+	  /// In a wellformed scanner (no or only correct usage of 
+	  /// yypushback(int) and a match-all fallback rule) this method 
+	  /// will only be called with things that "Can't Possibly Happen".
+	  /// If this method is called, something is seriously wrong
+	  /// (e.g. a JFlex bug producing a faulty scanner etc.).
+	  /// 
+	  /// Usual syntax/scanner level error handling should be done
+	  /// in error fallback rules.
+	  /// </summary>
+	  /// <param name="errorCode">  the code of the errormessage to display </param>
+	  private void zzScanError(int errorCode)
+	  {
+		string message;
+		try
+		{
+		  message = ZZ_ERROR_MSG[errorCode];
+		}
+		catch (System.IndexOutOfRangeException)
+		{
+		  message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
+		}
+
+		throw new Exception(message);
+	  }
+
+
+	  /// <summary>
+	  /// Pushes the specified amount of characters back into the input stream.
+	  /// 
+	  /// They will be read again by then next call of the scanning method
+	  /// </summary>
+	  /// <param name="number">  the number of characters to be read again.
+	  ///                This number must not be greater than yylength()! </param>
+	  public void yypushback(int number)
+	  {
+		if (number > yylength())
+		{
+		  zzScanError(ZZ_PUSHBACK_2BIG);
+		}
+
+		zzMarkedPos -= number;
+	  }
+
+
+	  /// <summary>
+	  /// Resumes scanning until the next regular expression is matched,
+	  /// the end of input is encountered or an I/O-Error occurs.
+	  /// </summary>
+	  /// <returns>      the next token </returns>
+	  /// <exception cref="java.io.IOException">  if any I/O-Error occurs </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public int getNextToken() throws java.io.IOException
+	  public int NextToken
+	  {
+		  get
+		  {
+			int zzInput;
+			int zzAction;
+    
+			// cached fields:
+			int zzCurrentPosL;
+			int zzMarkedPosL;
+			int zzEndReadL = zzEndRead;
+			char[] zzBufferL = zzBuffer;
+			char[] zzCMapL = ZZ_CMAP;
+    
+			int[] zzTransL = ZZ_TRANS;
+			int[] zzRowMapL = ZZ_ROWMAP;
+			int[] zzAttrL = ZZ_ATTRIBUTE;
+    
+			while (true)
+			{
+			  zzMarkedPosL = zzMarkedPos;
+    
+			  yychar_Renamed += zzMarkedPosL - zzStartRead;
+    
+			  zzAction = -1;
+    
+			  zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
+    
+			  zzState = ZZ_LEXSTATE[zzLexicalState];
+    
+			  // set up zzAction for empty match case:
+			  int zzAttributes = zzAttrL[zzState];
+			  if ((zzAttributes & 1) == 1)
+			  {
+				zzAction = zzState;
+			  }
+    
+    
+			  {
+				while (true)
+				{
+    
+				  if (zzCurrentPosL < zzEndReadL)
+				  {
+					zzInput = zzBufferL[zzCurrentPosL++];
+				  }
+				  else if (zzAtEOF)
+				  {
+					zzInput = StandardTokenizerInterface_Fields.YYEOF;
+					goto zzForActionBreak;
+				  }
+				  else
+				  {
+					// store back cached positions
+					zzCurrentPos = zzCurrentPosL;
+					zzMarkedPos = zzMarkedPosL;
+					bool eof = zzRefill();
+					// get translated positions and possibly new buffer
+					zzCurrentPosL = zzCurrentPos;
+					zzMarkedPosL = zzMarkedPos;
+					zzBufferL = zzBuffer;
+					zzEndReadL = zzEndRead;
+					if (eof)
+					{
+					  zzInput = StandardTokenizerInterface_Fields.YYEOF;
+					  goto zzForActionBreak;
+					}
+					else
+					{
+					  zzInput = zzBufferL[zzCurrentPosL++];
+					}
+				  }
+				  int zzNext = zzTransL[zzRowMapL[zzState] + zzCMapL[zzInput]];
+				  if (zzNext == -1)
+				  {
+					  goto zzForActionBreak;
+				  }
+				  zzState = zzNext;
+    
+				  zzAttributes = zzAttrL[zzState];
+				  if ((zzAttributes & 1) == 1)
+				  {
+					zzAction = zzState;
+					zzMarkedPosL = zzCurrentPosL;
+					if ((zzAttributes & 8) == 8)
+					{
+						goto zzForActionBreak;
+					}
+				  }
+    
+				}
+			  }
+			  zzForActionBreak:
+    
+			  // store back cached position
+			  zzMarkedPos = zzMarkedPosL;
+    
+			  switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction])
+			  {
+				case 1:
+				{ // Break so we don't hit fall-through warning:
+		 break; // Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it.
+				}
+					goto case 9;
+				case 9:
+					break;
+				case 2:
+				{
+					  return WORD_TYPE;
+				}
+				case 10:
+					break;
+				case 3:
+				{
+					  return NUMERIC_TYPE;
+				}
+				case 11:
+					break;
+				case 4:
+				{
+					  return KATAKANA_TYPE;
+				}
+				case 12:
+					break;
+				case 5:
+				{
+					  return SOUTH_EAST_ASIAN_TYPE;
+				}
+				case 13:
+					break;
+				case 6:
+				{
+					  return IDEOGRAPHIC_TYPE;
+				}
+				case 14:
+					break;
+				case 7:
+				{
+					  return HIRAGANA_TYPE;
+				}
+				case 15:
+					break;
+				case 8:
+				{
+					  return HANGUL_TYPE;
+				}
+				case 16:
+					break;
+				default:
+				  if (zzInput == StandardTokenizerInterface_Fields.YYEOF && zzStartRead == zzCurrentPos)
+				  {
+					zzAtEOF = true;
+					{
+						return StandardTokenizerInterface_Fields.YYEOF;
+					}
+				  }
+				  else
+				  {
+					zzScanError(ZZ_NO_MATCH);
+				  }
+			  break;
+			  }
+			}
+		  }
+	  }
+
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerInterface.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerInterface.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerInterface.cs
new file mode 100644
index 0000000..ee84322
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerInterface.cs
@@ -0,0 +1,77 @@
+namespace org.apache.lucene.analysis.standard
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+
+	/// <summary>
+	/// Internal interface for supporting versioned grammars.
+	/// @lucene.internal 
+	/// </summary>
+	public interface StandardTokenizerInterface
+	{
+
+	  /// <summary>
+	  /// This character denotes the end of file </summary>
+
+	  /// <summary>
+	  /// Copies the matched text into the CharTermAttribute
+	  /// </summary>
+	  void getText(CharTermAttribute t);
+
+	  /// <summary>
+	  /// Returns the current position.
+	  /// </summary>
+	  int yychar();
+
+	  /// <summary>
+	  /// Resets the scanner to read from a new input stream.
+	  /// Does not close the old reader.
+	  /// 
+	  /// All internal variables are reset, the old input stream 
+	  /// <b>cannot</b> be reused (internal buffer is discarded and lost).
+	  /// Lexical state is set to <tt>ZZ_INITIAL</tt>.
+	  /// </summary>
+	  /// <param name="reader">   the new input stream  </param>
+	  void yyreset(Reader reader);
+
+	  /// <summary>
+	  /// Returns the length of the matched text region.
+	  /// </summary>
+	  int yylength();
+
+	  /// <summary>
+	  /// Resumes scanning until the next regular expression is matched,
+	  /// the end of input is encountered or an I/O-Error occurs.
+	  /// </summary>
+	  /// <returns>      the next token, <seealso cref="#YYEOF"/> on end of stream </returns>
+	  /// <exception cref="IOException">  if any I/O-Error occurs </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public int getNextToken() throws java.io.IOException;
+	  int NextToken {get;}
+
+	}
+
+	public static class StandardTokenizerInterface_Fields
+	{
+	  public const int YYEOF = -1;
+	}
+
+}
\ No newline at end of file


[05/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilter.cs
new file mode 100644
index 0000000..d3bb929
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilter.cs
@@ -0,0 +1,789 @@
+using System;
+using System.Diagnostics;
+
+namespace org.apache.lucene.analysis.synonym
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using PositionLengthAttribute = org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+	using ByteArrayDataInput = org.apache.lucene.store.ByteArrayDataInput;
+	using ArrayUtil = org.apache.lucene.util.ArrayUtil;
+	using AttributeSource = org.apache.lucene.util.AttributeSource;
+	using BytesRef = org.apache.lucene.util.BytesRef;
+	using CharsRef = org.apache.lucene.util.CharsRef;
+	using RamUsageEstimator = org.apache.lucene.util.RamUsageEstimator;
+	using UnicodeUtil = org.apache.lucene.util.UnicodeUtil;
+	using FST = org.apache.lucene.util.fst.FST;
+
+	/// <summary>
+	/// Matches single or multi word synonyms in a token stream.
+	/// This token stream cannot properly handle position
+	/// increments != 1, ie, you should place this filter before
+	/// filtering out stop words.
+	/// 
+	/// <para>Note that with the current implementation, parsing is
+	/// greedy, so whenever multiple parses would apply, the rule
+	/// starting the earliest and parsing the most tokens wins.
+	/// For example if you have these rules:
+	///      
+	/// <pre>
+	///   a -> x
+	///   a b -> y
+	///   b c d -> z
+	/// </pre>
+	/// 
+	/// Then input <code>a b c d e</code> parses to <code>y b c
+	/// d</code>, ie the 2nd rule "wins" because it started
+	/// earliest and matched the most input tokens of other rules
+	/// starting at that point.</para>
+	/// 
+	/// <para>A future improvement to this filter could allow
+	/// non-greedy parsing, such that the 3rd rule would win, and
+	/// also separately allow multiple parses, such that all 3
+	/// rules would match, perhaps even on a rule by rule
+	/// basis.</para>
+	/// 
+	/// <para><b>NOTE</b>: when a match occurs, the output tokens
+	/// associated with the matching rule are "stacked" on top of
+	/// the input stream (if the rule had
+	/// <code>keepOrig=true</code>) and also on top of another
+	/// matched rule's output tokens.  This is not a correct
+	/// solution, as really the output should be an arbitrary
+	/// graph/lattice.  For example, with the above match, you
+	/// would expect an exact <code>PhraseQuery</code> <code>"y b
+	/// c"</code> to match the parsed tokens, but it will fail to
+	/// do so.  This limitation is necessary because Lucene's
+	/// TokenStream (and index) cannot yet represent an arbitrary
+	/// graph.</para>
+	/// 
+	/// <para><b>NOTE</b>: If multiple incoming tokens arrive on the
+	/// same position, only the first token at that position is
+	/// used for parsing.  Subsequent tokens simply pass through
+	/// and are not parsed.  A future improvement would be to
+	/// allow these tokens to also be matched.</para>
+	/// </summary>
+
+	// TODO: maybe we should resolve token -> wordID then run
+	// FST on wordIDs, for better perf?
+
+	// TODO: a more efficient approach would be Aho/Corasick's
+	// algorithm
+	// http://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_string_matching_algorithm
+	// It improves over the current approach here
+	// because it does not fully re-start matching at every
+	// token.  For example if one pattern is "a b c x"
+	// and another is "b c d" and the input is "a b c d", on
+	// trying to parse "a b c x" but failing when you got to x,
+	// rather than starting over again your really should
+	// immediately recognize that "b c d" matches at the next
+	// input.  I suspect this won't matter that much in
+	// practice, but it's possible on some set of synonyms it
+	// will.  We'd have to modify Aho/Corasick to enforce our
+	// conflict resolving (eg greedy matching) because that algo
+	// finds all matches.  This really amounts to adding a .*
+	// closure to the FST and then determinizing it.
+
+	public sealed class SynonymFilter : TokenFilter
+	{
+
+	  public const string TYPE_SYNONYM = "SYNONYM";
+
+	  private readonly SynonymMap synonyms;
+
+	  private readonly bool ignoreCase;
+	  private readonly int rollBufferSize;
+
+	  private int captureCount;
+
+	  // TODO: we should set PositionLengthAttr too...
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly PositionIncrementAttribute posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
+	  private readonly PositionLengthAttribute posLenAtt = addAttribute(typeof(PositionLengthAttribute));
+	  private readonly TypeAttribute typeAtt = addAttribute(typeof(TypeAttribute));
+	  private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+
+	  // How many future input tokens have already been matched
+	  // to a synonym; because the matching is "greedy" we don't
+	  // try to do any more matching for such tokens:
+	  private int inputSkipCount;
+
+	  // Hold all buffered (read ahead) stacked input tokens for
+	  // a future position.  When multiple tokens are at the
+	  // same position, we only store (and match against) the
+	  // term for the first token at the position, but capture
+	  // state for (and enumerate) all other tokens at this
+	  // position:
+	  private class PendingInput
+	  {
+		internal readonly CharsRef term = new CharsRef();
+		internal AttributeSource.State state;
+		internal bool keepOrig;
+		internal bool matched;
+		internal bool consumed = true;
+		internal int startOffset;
+		internal int endOffset;
+
+		public virtual void reset()
+		{
+		  state = null;
+		  consumed = true;
+		  keepOrig = false;
+		  matched = false;
+		}
+	  }
+
+	  // Rolling buffer, holding pending input tokens we had to
+	  // clone because we needed to look ahead, indexed by
+	  // position:
+	  private readonly PendingInput[] futureInputs;
+
+	  // Holds pending output synonyms for one future position:
+	  private class PendingOutputs
+	  {
+		internal CharsRef[] outputs;
+		internal int[] endOffsets;
+		internal int[] posLengths;
+		internal int upto;
+		internal int count;
+		internal int posIncr = 1;
+		internal int lastEndOffset;
+		internal int lastPosLength;
+
+		public PendingOutputs()
+		{
+		  outputs = new CharsRef[1];
+		  endOffsets = new int[1];
+		  posLengths = new int[1];
+		}
+
+		public virtual void reset()
+		{
+		  upto = count = 0;
+		  posIncr = 1;
+		}
+
+		public virtual CharsRef pullNext()
+		{
+		  Debug.Assert(upto < count);
+		  lastEndOffset = endOffsets[upto];
+		  lastPosLength = posLengths[upto];
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.CharsRef result = outputs[upto++];
+		  CharsRef result = outputs[upto++];
+		  posIncr = 0;
+		  if (upto == count)
+		  {
+			reset();
+		  }
+		  return result;
+		}
+
+		public virtual int LastEndOffset
+		{
+			get
+			{
+			  return lastEndOffset;
+			}
+		}
+
+		public virtual int LastPosLength
+		{
+			get
+			{
+			  return lastPosLength;
+			}
+		}
+
+		public virtual void add(char[] output, int offset, int len, int endOffset, int posLength)
+		{
+		  if (count == outputs.Length)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.CharsRef[] next = new org.apache.lucene.util.CharsRef[org.apache.lucene.util.ArrayUtil.oversize(1+count, org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+			CharsRef[] next = new CharsRef[ArrayUtil.oversize(1 + count, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+			Array.Copy(outputs, 0, next, 0, count);
+			outputs = next;
+		  }
+		  if (count == endOffsets.Length)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int[] next = new int[org.apache.lucene.util.ArrayUtil.oversize(1+count, org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_INT)];
+			int[] next = new int[ArrayUtil.oversize(1 + count, RamUsageEstimator.NUM_BYTES_INT)];
+			Array.Copy(endOffsets, 0, next, 0, count);
+			endOffsets = next;
+		  }
+		  if (count == posLengths.Length)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int[] next = new int[org.apache.lucene.util.ArrayUtil.oversize(1+count, org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_INT)];
+			int[] next = new int[ArrayUtil.oversize(1 + count, RamUsageEstimator.NUM_BYTES_INT)];
+			Array.Copy(posLengths, 0, next, 0, count);
+			posLengths = next;
+		  }
+		  if (outputs[count] == null)
+		  {
+			outputs[count] = new CharsRef();
+		  }
+		  outputs[count].copyChars(output, offset, len);
+		  // endOffset can be -1, in which case we should simply
+		  // use the endOffset of the input token, or X >= 0, in
+		  // which case we use X as the endOffset for this output
+		  endOffsets[count] = endOffset;
+		  posLengths[count] = posLength;
+		  count++;
+		}
+	  }
+
+	  private readonly ByteArrayDataInput bytesReader = new ByteArrayDataInput();
+
+	  // Rolling buffer, holding stack of pending synonym
+	  // outputs, indexed by position:
+	  private readonly PendingOutputs[] futureOutputs;
+
+	  // Where (in rolling buffers) to write next input saved state:
+	  private int nextWrite;
+
+	  // Where (in rolling buffers) to read next input saved state:
+	  private int nextRead;
+
+	  // True once we've read last token
+	  private bool finished;
+
+	  private readonly FST.Arc<BytesRef> scratchArc;
+
+	  private readonly FST<BytesRef> fst;
+
+	  private readonly FST.BytesReader fstReader;
+
+
+	  private readonly BytesRef scratchBytes = new BytesRef();
+	  private readonly CharsRef scratchChars = new CharsRef();
+
+	  /// <param name="input"> input tokenstream </param>
+	  /// <param name="synonyms"> synonym map </param>
+	  /// <param name="ignoreCase"> case-folds input for matching with <seealso cref="Character#toLowerCase(int)"/>.
+	  ///                   Note, if you set this to true, its your responsibility to lowercase
+	  ///                   the input entries when you create the <seealso cref="SynonymMap"/> </param>
+	  public SynonymFilter(TokenStream input, SynonymMap synonyms, bool ignoreCase) : base(input)
+	  {
+		this.synonyms = synonyms;
+		this.ignoreCase = ignoreCase;
+		this.fst = synonyms.fst;
+		if (fst == null)
+		{
+		  throw new System.ArgumentException("fst must be non-null");
+		}
+		this.fstReader = fst.BytesReader;
+
+		// Must be 1+ so that when roll buffer is at full
+		// lookahead we can distinguish this full buffer from
+		// the empty buffer:
+		rollBufferSize = 1 + synonyms.maxHorizontalContext;
+
+		futureInputs = new PendingInput[rollBufferSize];
+		futureOutputs = new PendingOutputs[rollBufferSize];
+		for (int pos = 0;pos < rollBufferSize;pos++)
+		{
+		  futureInputs[pos] = new PendingInput();
+		  futureOutputs[pos] = new PendingOutputs();
+		}
+
+		//System.out.println("FSTFilt maxH=" + synonyms.maxHorizontalContext);
+
+		scratchArc = new FST.Arc<>();
+	  }
+
+	  private void capture()
+	  {
+		captureCount++;
+		//System.out.println("  capture slot=" + nextWrite);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final PendingInput input = futureInputs[nextWrite];
+		PendingInput input = futureInputs[nextWrite];
+
+		input.state = captureState();
+		input.consumed = false;
+		input.term.copyChars(termAtt.buffer(), 0, termAtt.length());
+
+		nextWrite = rollIncr(nextWrite);
+
+		// Buffer head should never catch up to tail:
+		Debug.Assert(nextWrite != nextRead);
+	  }
+
+	  /*
+	   This is the core of this TokenFilter: it locates the
+	   synonym matches and buffers up the results into
+	   futureInputs/Outputs.
+	
+	   NOTE: this calls input.incrementToken and does not
+	   capture the state if no further tokens were checked.  So
+	   caller must then forward state to our caller, or capture:
+	  */
+	  private int lastStartOffset;
+	  private int lastEndOffset;
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void parse() throws java.io.IOException
+	  private void parse()
+	  {
+		//System.out.println("\nS: parse");
+
+		Debug.Assert(inputSkipCount == 0);
+
+		int curNextRead = nextRead;
+
+		// Holds the longest match we've seen so far:
+		BytesRef matchOutput = null;
+		int matchInputLength = 0;
+		int matchEndOffset = -1;
+
+		BytesRef pendingOutput = fst.outputs.NoOutput;
+		fst.getFirstArc(scratchArc);
+
+		Debug.Assert(scratchArc.output == fst.outputs.NoOutput);
+
+		int tokenCount = 0;
+
+		while (true)
+		{
+
+		  // Pull next token's chars:
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] buffer;
+		  char[] buffer;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int bufferLen;
+		  int bufferLen;
+		  //System.out.println("  cycle nextRead=" + curNextRead + " nextWrite=" + nextWrite);
+
+		  int inputEndOffset = 0;
+
+		  if (curNextRead == nextWrite)
+		  {
+
+			// We used up our lookahead buffer of input tokens
+			// -- pull next real input token:
+
+			if (finished)
+			{
+			  break;
+			}
+			else
+			{
+			  //System.out.println("  input.incrToken");
+			  Debug.Assert(futureInputs[nextWrite].consumed);
+			  // Not correct: a syn match whose output is longer
+			  // than its input can set future inputs keepOrig
+			  // to true:
+			  //assert !futureInputs[nextWrite].keepOrig;
+			  if (input.incrementToken())
+			  {
+				buffer = termAtt.buffer();
+				bufferLen = termAtt.length();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final PendingInput input = futureInputs[nextWrite];
+				PendingInput input = futureInputs[nextWrite];
+				lastStartOffset = input.startOffset = offsetAtt.startOffset();
+				lastEndOffset = input.endOffset = offsetAtt.endOffset();
+				inputEndOffset = input.endOffset;
+				//System.out.println("  new token=" + new String(buffer, 0, bufferLen));
+				if (nextRead != nextWrite)
+				{
+				  capture();
+				}
+				else
+				{
+				  input.consumed = false;
+				}
+
+			  }
+			  else
+			  {
+				// No more input tokens
+				//System.out.println("      set end");
+				finished = true;
+				break;
+			  }
+			}
+		  }
+		  else
+		  {
+			// Still in our lookahead
+			buffer = futureInputs[curNextRead].term.chars;
+			bufferLen = futureInputs[curNextRead].term.length;
+			inputEndOffset = futureInputs[curNextRead].endOffset;
+			//System.out.println("  old token=" + new String(buffer, 0, bufferLen));
+		  }
+
+		  tokenCount++;
+
+		  // Run each char in this token through the FST:
+		  int bufUpto = 0;
+		  while (bufUpto < bufferLen)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int codePoint = Character.codePointAt(buffer, bufUpto, bufferLen);
+			int codePoint = char.codePointAt(buffer, bufUpto, bufferLen);
+			if (fst.findTargetArc(ignoreCase ? char.ToLower(codePoint) : codePoint, scratchArc, scratchArc, fstReader) == null)
+			{
+			  //System.out.println("    stop");
+			  goto byTokenBreak;
+			}
+
+			// Accum the output
+			pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output);
+			//System.out.println("    char=" + buffer[bufUpto] + " output=" + pendingOutput + " arc.output=" + scratchArc.output);
+			bufUpto += char.charCount(codePoint);
+		  }
+
+		  // OK, entire token matched; now see if this is a final
+		  // state:
+		  if (scratchArc.Final)
+		  {
+			matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput);
+			matchInputLength = tokenCount;
+			matchEndOffset = inputEndOffset;
+			//System.out.println("  found matchLength=" + matchInputLength + " output=" + matchOutput);
+		  }
+
+		  // See if the FST wants to continue matching (ie, needs to
+		  // see the next input token):
+		  if (fst.findTargetArc(SynonymMap.WORD_SEPARATOR, scratchArc, scratchArc, fstReader) == null)
+		  {
+			// No further rules can match here; we're done
+			// searching for matching rules starting at the
+			// current input position.
+			break;
+		  }
+		  else
+		  {
+			// More matching is possible -- accum the output (if
+			// any) of the WORD_SEP arc:
+			pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output);
+			if (nextRead == nextWrite)
+			{
+			  capture();
+			}
+		  }
+
+		  curNextRead = rollIncr(curNextRead);
+			byTokenContinue:;
+		}
+		byTokenBreak:
+
+		if (nextRead == nextWrite && !finished)
+		{
+		  //System.out.println("  skip write slot=" + nextWrite);
+		  nextWrite = rollIncr(nextWrite);
+		}
+
+		if (matchOutput != null)
+		{
+		  //System.out.println("  add matchLength=" + matchInputLength + " output=" + matchOutput);
+		  inputSkipCount = matchInputLength;
+		  addOutput(matchOutput, matchInputLength, matchEndOffset);
+		}
+		else if (nextRead != nextWrite)
+		{
+		  // Even though we had no match here, we set to 1
+		  // because we need to skip current input token before
+		  // trying to match again:
+		  inputSkipCount = 1;
+		}
+		else
+		{
+		  Debug.Assert(finished);
+		}
+
+		//System.out.println("  parse done inputSkipCount=" + inputSkipCount + " nextRead=" + nextRead + " nextWrite=" + nextWrite);
+	  }
+
+	  // Interleaves all output tokens onto the futureOutputs:
+	  private void addOutput(BytesRef bytes, int matchInputLength, int matchEndOffset)
+	  {
+		bytesReader.reset(bytes.bytes, bytes.offset, bytes.length);
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int code = bytesReader.readVInt();
+		int code = bytesReader.readVInt();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final boolean keepOrig = (code & 0x1) == 0;
+		bool keepOrig = (code & 0x1) == 0;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int count = code >>> 1;
+		int count = (int)((uint)code >> 1);
+		//System.out.println("  addOutput count=" + count + " keepOrig=" + keepOrig);
+		for (int outputIDX = 0;outputIDX < count;outputIDX++)
+		{
+		  synonyms.words.get(bytesReader.readVInt(), scratchBytes);
+		  //System.out.println("    outIDX=" + outputIDX + " bytes=" + scratchBytes.length);
+		  UnicodeUtil.UTF8toUTF16(scratchBytes, scratchChars);
+		  int lastStart = scratchChars.offset;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int chEnd = lastStart + scratchChars.length;
+		  int chEnd = lastStart + scratchChars.length;
+		  int outputUpto = nextRead;
+		  for (int chIDX = lastStart;chIDX <= chEnd;chIDX++)
+		  {
+			if (chIDX == chEnd || scratchChars.chars[chIDX] == SynonymMap.WORD_SEPARATOR)
+			{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int outputLen = chIDX - lastStart;
+			  int outputLen = chIDX - lastStart;
+			  // Caller is not allowed to have empty string in
+			  // the output:
+			  Debug.Assert(outputLen > 0, "output contains empty string: " + scratchChars);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int endOffset;
+			  int endOffset;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int posLen;
+			  int posLen;
+			  if (chIDX == chEnd && lastStart == scratchChars.offset)
+			  {
+				// This rule had a single output token, so, we set
+				// this output's endOffset to the current
+				// endOffset (ie, endOffset of the last input
+				// token it matched):
+				endOffset = matchEndOffset;
+				posLen = keepOrig ? matchInputLength : 1;
+			  }
+			  else
+			  {
+				// This rule has more than one output token; we
+				// can't pick any particular endOffset for this
+				// case, so, we inherit the endOffset for the
+				// input token which this output overlaps:
+				endOffset = -1;
+				posLen = 1;
+			  }
+			  futureOutputs[outputUpto].add(scratchChars.chars, lastStart, outputLen, endOffset, posLen);
+			  //System.out.println("      " + new String(scratchChars.chars, lastStart, outputLen) + " outputUpto=" + outputUpto);
+			  lastStart = 1 + chIDX;
+			  //System.out.println("  slot=" + outputUpto + " keepOrig=" + keepOrig);
+			  outputUpto = rollIncr(outputUpto);
+			  Debug.Assert(futureOutputs[outputUpto].posIncr == 1, "outputUpto=" + outputUpto + " vs nextWrite=" + nextWrite);
+			}
+		  }
+		}
+
+		int upto = nextRead;
+		for (int idx = 0;idx < matchInputLength;idx++)
+		{
+		  futureInputs[upto].keepOrig |= keepOrig;
+		  futureInputs[upto].matched = true;
+		  upto = rollIncr(upto);
+		}
+	  }
+
+	  // ++ mod rollBufferSize
+	  private int rollIncr(int count)
+	  {
+		count++;
+		if (count == rollBufferSize)
+		{
+		  return 0;
+		}
+		else
+		{
+		  return count;
+		}
+	  }
+
+	  // for testing
+	  internal int CaptureCount
+	  {
+		  get
+		  {
+			return captureCount;
+		  }
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+
+		//System.out.println("\nS: incrToken inputSkipCount=" + inputSkipCount + " nextRead=" + nextRead + " nextWrite=" + nextWrite);
+
+		while (true)
+		{
+
+		  // First play back any buffered future inputs/outputs
+		  // w/o running parsing again:
+		  while (inputSkipCount != 0)
+		  {
+
+			// At each position, we first output the original
+			// token
+
+			// TODO: maybe just a PendingState class, holding
+			// both input & outputs?
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final PendingInput input = futureInputs[nextRead];
+			PendingInput input = futureInputs[nextRead];
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final PendingOutputs outputs = futureOutputs[nextRead];
+			PendingOutputs outputs = futureOutputs[nextRead];
+
+			//System.out.println("  cycle nextRead=" + nextRead + " nextWrite=" + nextWrite + " inputSkipCount="+ inputSkipCount + " input.keepOrig=" + input.keepOrig + " input.consumed=" + input.consumed + " input.state=" + input.state);
+
+			if (!input.consumed && (input.keepOrig || !input.matched))
+			{
+			  if (input.state != null)
+			  {
+				// Return a previously saved token (because we
+				// had to lookahead):
+				restoreState(input.state);
+			  }
+			  else
+			  {
+				// Pass-through case: return token we just pulled
+				// but didn't capture:
+				Debug.Assert(inputSkipCount == 1, "inputSkipCount=" + inputSkipCount + " nextRead=" + nextRead);
+			  }
+			  input.reset();
+			  if (outputs.count > 0)
+			  {
+				outputs.posIncr = 0;
+			  }
+			  else
+			  {
+				nextRead = rollIncr(nextRead);
+				inputSkipCount--;
+			  }
+			  //System.out.println("  return token=" + termAtt.toString());
+			  return true;
+			}
+			else if (outputs.upto < outputs.count)
+			{
+			  // Still have pending outputs to replay at this
+			  // position
+			  input.reset();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int posIncr = outputs.posIncr;
+			  int posIncr = outputs.posIncr;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.CharsRef output = outputs.pullNext();
+			  CharsRef output = outputs.pullNext();
+			  clearAttributes();
+			  termAtt.copyBuffer(output.chars, output.offset, output.length);
+			  typeAtt.Type = TYPE_SYNONYM;
+			  int endOffset = outputs.LastEndOffset;
+			  if (endOffset == -1)
+			  {
+				endOffset = input.endOffset;
+			  }
+			  offsetAtt.setOffset(input.startOffset, endOffset);
+			  posIncrAtt.PositionIncrement = posIncr;
+			  posLenAtt.PositionLength = outputs.LastPosLength;
+			  if (outputs.count == 0)
+			  {
+				// Done with the buffered input and all outputs at
+				// this position
+				nextRead = rollIncr(nextRead);
+				inputSkipCount--;
+			  }
+			  //System.out.println("  return token=" + termAtt.toString());
+			  return true;
+			}
+			else
+			{
+			  // Done with the buffered input and all outputs at
+			  // this position
+			  input.reset();
+			  nextRead = rollIncr(nextRead);
+			  inputSkipCount--;
+			}
+		  }
+
+		  if (finished && nextRead == nextWrite)
+		  {
+			// End case: if any output syns went beyond end of
+			// input stream, enumerate them now:
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final PendingOutputs outputs = futureOutputs[nextRead];
+			PendingOutputs outputs = futureOutputs[nextRead];
+			if (outputs.upto < outputs.count)
+			{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int posIncr = outputs.posIncr;
+			  int posIncr = outputs.posIncr;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.CharsRef output = outputs.pullNext();
+			  CharsRef output = outputs.pullNext();
+			  futureInputs[nextRead].reset();
+			  if (outputs.count == 0)
+			  {
+				nextWrite = nextRead = rollIncr(nextRead);
+			  }
+			  clearAttributes();
+			  // Keep offset from last input token:
+			  offsetAtt.setOffset(lastStartOffset, lastEndOffset);
+			  termAtt.copyBuffer(output.chars, output.offset, output.length);
+			  typeAtt.Type = TYPE_SYNONYM;
+			  //System.out.println("  set posIncr=" + outputs.posIncr + " outputs=" + outputs);
+			  posIncrAtt.PositionIncrement = posIncr;
+			  //System.out.println("  return token=" + termAtt.toString());
+			  return true;
+			}
+			else
+			{
+			  return false;
+			}
+		  }
+
+		  // Find new synonym matches:
+		  parse();
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		captureCount = 0;
+		finished = false;
+		inputSkipCount = 0;
+		nextRead = nextWrite = 0;
+
+		// In normal usage these resets would not be needed,
+		// since they reset-as-they-are-consumed, but the app
+		// may not consume all input tokens (or we might hit an
+		// exception), in which case we have leftover state
+		// here:
+		foreach (PendingInput input in futureInputs)
+		{
+		  input.reset();
+		}
+		foreach (PendingOutputs output in futureOutputs)
+		{
+		  output.reset();
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilterFactory.cs
new file mode 100644
index 0000000..b6967d8
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilterFactory.cs
@@ -0,0 +1,115 @@
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Analysis.Synonym;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.synonym
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using Version = org.apache.lucene.util.Version;
+	using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
+	using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="SynonymFilter"/>.
+	/// <pre class="prettyprint" >
+	/// &lt;fieldType name="text_synonym" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" 
+	///             format="solr" ignoreCase="false" expand="true" 
+	///             tokenizerFactory="solr.WhitespaceTokenizerFactory"
+	///             [optional tokenizer factory parameters]/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// 
+	/// <para>
+	/// An optional param name prefix of "tokenizerFactory." may be used for any 
+	/// init params that the SynonymFilterFactory needs to pass to the specified 
+	/// TokenizerFactory.  If the TokenizerFactory expects an init parameters with 
+	/// the same name as an init param used by the SynonymFilterFactory, the prefix 
+	/// is mandatory.
+	/// </para>
+	/// <para>
+	/// The optional {@code format} parameter controls how the synonyms will be parsed:
+	/// It supports the short names of {@code solr} for <seealso cref="SolrSynonymParser"/> 
+	/// and {@code wordnet} for and <seealso cref="WordnetSynonymParser"/>, or your own 
+	/// {@code SynonymMap.Parser} class name. The default is {@code solr}.
+	/// A custom <seealso cref="SynonymMap.Parser"/> is expected to have a constructor taking:
+	/// <ul>
+	///   <li><code>boolean dedup</code> - true if duplicates should be ignored, false otherwise</li>
+	///   <li><code>boolean expand</code> - true if conflation groups should be expanded, false if they are one-directional</li>
+	///   <li><code><seealso cref="Analyzer"/> analyzer</code> - an analyzer used for each raw synonym</li>
+	/// </ul>
+	/// </para>
+	/// </summary>
+	public class SynonymFilterFactory : TokenFilterFactory, ResourceLoaderAware
+	{
+	  private readonly TokenFilterFactory delegator;
+
+	  public SynonymFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		assureMatchVersion();
+		if (luceneMatchVersion.onOrAfter(Version.LUCENE_34))
+		{
+		  delegator = new FSTSynonymFilterFactory(new Dictionary<>(OriginalArgs));
+		}
+		else
+		{
+		  // check if you use the new optional arg "format". this makes no sense for the old one, 
+		  // as its wired to solr's synonyms format only.
+		  if (args.ContainsKey("format") && !args["format"].Equals("solr"))
+		  {
+			throw new System.ArgumentException("You must specify luceneMatchVersion >= 3.4 to use alternate synonyms formats");
+		  }
+		  delegator = new SlowSynonymFilterFactory(new Dictionary<>(OriginalArgs));
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return delegator.create(input);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void inform(org.apache.lucene.analysis.util.ResourceLoader loader) throws java.io.IOException
+	  public virtual void inform(ResourceLoader loader)
+	  {
+		((ResourceLoaderAware) delegator).inform(loader);
+	  }
+
+	  /// <summary>
+	  /// Access to the delegator TokenFilterFactory for test verification
+	  /// </summary>
+	  /// @deprecated Method exists only for testing 4x, will be removed in 5.0
+	  /// @lucene.internal 
+	  [Obsolete("Method exists only for testing 4x, will be removed in 5.0")]
+	  internal virtual TokenFilterFactory Delegator
+	  {
+		  get
+		  {
+			return delegator;
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymMap.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymMap.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymMap.cs
new file mode 100644
index 0000000..004572d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymMap.cs
@@ -0,0 +1,430 @@
+using System;
+using System.Diagnostics;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.synonym
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using ByteArrayDataOutput = org.apache.lucene.store.ByteArrayDataOutput;
+	using BytesRef = org.apache.lucene.util.BytesRef;
+	using BytesRefHash = org.apache.lucene.util.BytesRefHash;
+	using CharsRef = org.apache.lucene.util.CharsRef;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using IntsRef = org.apache.lucene.util.IntsRef;
+	using UnicodeUtil = org.apache.lucene.util.UnicodeUtil;
+	using ByteSequenceOutputs = org.apache.lucene.util.fst.ByteSequenceOutputs;
+	using FST = org.apache.lucene.util.fst.FST;
+	using Util = org.apache.lucene.util.fst.Util;
+
+	/// <summary>
+	/// A map of synonyms, keys and values are phrases.
+	/// @lucene.experimental
+	/// </summary>
+	public class SynonymMap
+	{
+	  /// <summary>
+	  /// for multiword support, you must separate words with this separator </summary>
+	  public const char WORD_SEPARATOR = (char)0;
+	  /// <summary>
+	  /// map&lt;input word, list&lt;ord&gt;&gt; </summary>
+	  public readonly FST<BytesRef> fst;
+	  /// <summary>
+	  /// map&lt;ord, outputword&gt; </summary>
+	  public readonly BytesRefHash words;
+	  /// <summary>
+	  /// maxHorizontalContext: maximum context we need on the tokenstream </summary>
+	  public readonly int maxHorizontalContext;
+
+	  public SynonymMap(FST<BytesRef> fst, BytesRefHash words, int maxHorizontalContext)
+	  {
+		this.fst = fst;
+		this.words = words;
+		this.maxHorizontalContext = maxHorizontalContext;
+	  }
+
+	  /// <summary>
+	  /// Builds an FSTSynonymMap.
+	  /// <para>
+	  /// Call add() until you have added all the mappings, then call build() to get an FSTSynonymMap
+	  /// @lucene.experimental
+	  /// </para>
+	  /// </summary>
+	  public class Builder
+	  {
+		internal readonly Dictionary<CharsRef, MapEntry> workingSet = new Dictionary<CharsRef, MapEntry>();
+		internal readonly BytesRefHash words = new BytesRefHash();
+		internal readonly BytesRef utf8Scratch = new BytesRef(8);
+		internal int maxHorizontalContext;
+		internal readonly bool dedup;
+
+		/// <summary>
+		/// If dedup is true then identical rules (same input,
+		///  same output) will be added only once. 
+		/// </summary>
+		public Builder(bool dedup)
+		{
+		  this.dedup = dedup;
+		}
+
+		private class MapEntry
+		{
+		  internal bool includeOrig;
+		  // we could sort for better sharing ultimately, but it could confuse people
+		  internal List<int?> ords = new List<int?>();
+		}
+
+		/// <summary>
+		/// Sugar: just joins the provided terms with {@link
+		///  SynonymMap#WORD_SEPARATOR}.  reuse and its chars
+		///  must not be null. 
+		/// </summary>
+		public static CharsRef join(string[] words, CharsRef reuse)
+		{
+		  int upto = 0;
+		  char[] buffer = reuse.chars;
+		  foreach (string word in words)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int wordLen = word.length();
+			int wordLen = word.Length;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int needed = (0 == upto ? wordLen : 1 + upto + wordLen);
+			int needed = (0 == upto ? wordLen : 1 + upto + wordLen); // Add 1 for WORD_SEPARATOR
+			if (needed > buffer.Length)
+			{
+			  reuse.grow(needed);
+			  buffer = reuse.chars;
+			}
+			if (upto > 0)
+			{
+			  buffer[upto++] = SynonymMap.WORD_SEPARATOR;
+			}
+
+			word.CopyTo(0, buffer, upto, wordLen - 0);
+			upto += wordLen;
+		  }
+		  reuse.length = upto;
+		  return reuse;
+		}
+
+
+
+		/// <summary>
+		/// only used for asserting! </summary>
+		internal virtual bool hasHoles(CharsRef chars)
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int end = chars.offset + chars.length;
+		  int end = chars.offset + chars.length;
+		  for (int idx = chars.offset + 1;idx < end;idx++)
+		  {
+			if (chars.chars[idx] == SynonymMap.WORD_SEPARATOR && chars.chars[idx - 1] == SynonymMap.WORD_SEPARATOR)
+			{
+			  return true;
+			}
+		  }
+		  if (chars.chars[chars.offset] == '\u0000')
+		  {
+			return true;
+		  }
+		  if (chars.chars[chars.offset + chars.length - 1] == '\u0000')
+		  {
+			return true;
+		  }
+
+		  return false;
+		}
+
+		// NOTE: while it's tempting to make this public, since
+		// caller's parser likely knows the
+		// numInput/numOutputWords, sneaky exceptions, much later
+		// on, will result if these values are wrong; so we always
+		// recompute ourselves to be safe:
+		internal virtual void add(CharsRef input, int numInputWords, CharsRef output, int numOutputWords, bool includeOrig)
+		{
+		  // first convert to UTF-8
+		  if (numInputWords <= 0)
+		  {
+			throw new System.ArgumentException("numInputWords must be > 0 (got " + numInputWords + ")");
+		  }
+		  if (input.length <= 0)
+		  {
+			throw new System.ArgumentException("input.length must be > 0 (got " + input.length + ")");
+		  }
+		  if (numOutputWords <= 0)
+		  {
+			throw new System.ArgumentException("numOutputWords must be > 0 (got " + numOutputWords + ")");
+		  }
+		  if (output.length <= 0)
+		  {
+			throw new System.ArgumentException("output.length must be > 0 (got " + output.length + ")");
+		  }
+
+		  Debug.Assert(!hasHoles(input), "input has holes: " + input);
+		  Debug.Assert(!hasHoles(output), "output has holes: " + output);
+
+		  //System.out.println("fmap.add input=" + input + " numInputWords=" + numInputWords + " output=" + output + " numOutputWords=" + numOutputWords);
+		  UnicodeUtil.UTF16toUTF8(output.chars, output.offset, output.length, utf8Scratch);
+		  // lookup in hash
+		  int ord = words.add(utf8Scratch);
+		  if (ord < 0)
+		  {
+			// already exists in our hash
+			ord = (-ord) - 1;
+			//System.out.println("  output=" + output + " old ord=" + ord);
+		  }
+		  else
+		  {
+			//System.out.println("  output=" + output + " new ord=" + ord);
+		  }
+
+		  MapEntry e = workingSet[input];
+		  if (e == null)
+		  {
+			e = new MapEntry();
+			workingSet[CharsRef.deepCopyOf(input)] = e; // make a copy, since we will keep around in our map
+		  }
+
+		  e.ords.Add(ord);
+		  e.includeOrig |= includeOrig;
+		  maxHorizontalContext = Math.Max(maxHorizontalContext, numInputWords);
+		  maxHorizontalContext = Math.Max(maxHorizontalContext, numOutputWords);
+		}
+
+		internal virtual int countWords(CharsRef chars)
+		{
+		  int wordCount = 1;
+		  int upto = chars.offset;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int limit = chars.offset + chars.length;
+		  int limit = chars.offset + chars.length;
+		  while (upto < limit)
+		  {
+			if (chars.chars[upto++] == SynonymMap.WORD_SEPARATOR)
+			{
+			  wordCount++;
+			}
+		  }
+		  return wordCount;
+		}
+
+		/// <summary>
+		/// Add a phrase->phrase synonym mapping.
+		/// Phrases are character sequences where words are
+		/// separated with character zero (U+0000).  Empty words
+		/// (two U+0000s in a row) are not allowed in the input nor
+		/// the output!
+		/// </summary>
+		/// <param name="input"> input phrase </param>
+		/// <param name="output"> output phrase </param>
+		/// <param name="includeOrig"> true if the original should be included </param>
+		public virtual void add(CharsRef input, CharsRef output, bool includeOrig)
+		{
+		  add(input, countWords(input), output, countWords(output), includeOrig);
+		}
+
+		/// <summary>
+		/// Builds an <seealso cref="SynonymMap"/> and returns it.
+		/// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public SynonymMap build() throws java.io.IOException
+		public virtual SynonymMap build()
+		{
+		  ByteSequenceOutputs outputs = ByteSequenceOutputs.Singleton;
+		  // TODO: are we using the best sharing options?
+		  org.apache.lucene.util.fst.Builder<BytesRef> builder = new org.apache.lucene.util.fst.Builder<BytesRef>(FST.INPUT_TYPE.BYTE4, outputs);
+
+		  BytesRef scratch = new BytesRef(64);
+		  ByteArrayDataOutput scratchOutput = new ByteArrayDataOutput();
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.util.Set<Integer> dedupSet;
+		  HashSet<int?> dedupSet;
+
+		  if (dedup)
+		  {
+			dedupSet = new HashSet<>();
+		  }
+		  else
+		  {
+			dedupSet = null;
+		  }
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final byte[] spare = new byte[5];
+		  sbyte[] spare = new sbyte[5];
+
+		  Dictionary<CharsRef, MapEntry>.KeyCollection keys = workingSet.Keys;
+		  CharsRef[] sortedKeys = keys.toArray(new CharsRef[keys.size()]);
+		  Arrays.sort(sortedKeys, CharsRef.UTF16SortedAsUTF8Comparator);
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.IntsRef scratchIntsRef = new org.apache.lucene.util.IntsRef();
+		  IntsRef scratchIntsRef = new IntsRef();
+
+		  //System.out.println("fmap.build");
+		  for (int keyIdx = 0; keyIdx < sortedKeys.Length; keyIdx++)
+		  {
+			CharsRef input = sortedKeys[keyIdx];
+			MapEntry output = workingSet[input];
+
+			int numEntries = output.ords.Count;
+			// output size, assume the worst case
+			int estimatedSize = 5 + numEntries * 5; // numEntries + one ord for each entry
+
+			scratch.grow(estimatedSize);
+			scratchOutput.reset(scratch.bytes, scratch.offset, scratch.bytes.length);
+			Debug.Assert(scratch.offset == 0);
+
+			// now write our output data:
+			int count = 0;
+			for (int i = 0; i < numEntries; i++)
+			{
+			  if (dedupSet != null)
+			  {
+				// box once
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final Integer ent = output.ords.get(i);
+				int? ent = output.ords[i];
+				if (dedupSet.Contains(ent))
+				{
+				  continue;
+				}
+				dedupSet.Add(ent);
+			  }
+			  scratchOutput.writeVInt(output.ords[i]);
+			  count++;
+			}
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int pos = scratchOutput.getPosition();
+			int pos = scratchOutput.Position;
+			scratchOutput.writeVInt(count << 1 | (output.includeOrig ? 0 : 1));
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int pos2 = scratchOutput.getPosition();
+			int pos2 = scratchOutput.Position;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int vIntLen = pos2-pos;
+			int vIntLen = pos2 - pos;
+
+			// Move the count + includeOrig to the front of the byte[]:
+			Array.Copy(scratch.bytes, pos, spare, 0, vIntLen);
+			Array.Copy(scratch.bytes, 0, scratch.bytes, vIntLen, pos);
+			Array.Copy(spare, 0, scratch.bytes, 0, vIntLen);
+
+			if (dedupSet != null)
+			{
+			  dedupSet.Clear();
+			}
+
+			scratch.length = scratchOutput.Position - scratch.offset;
+			//System.out.println("  add input=" + input + " output=" + scratch + " offset=" + scratch.offset + " length=" + scratch.length + " count=" + count);
+			builder.add(Util.toUTF32(input, scratchIntsRef), BytesRef.deepCopyOf(scratch));
+		  }
+
+		  FST<BytesRef> fst = builder.finish();
+		  return new SynonymMap(fst, words, maxHorizontalContext);
+		}
+	  }
+
+	  /// <summary>
+	  /// Abstraction for parsing synonym files.
+	  /// 
+	  /// @lucene.experimental
+	  /// </summary>
+	  public abstract class Parser : Builder
+	  {
+
+		internal readonly Analyzer analyzer;
+
+		public Parser(bool dedup, Analyzer analyzer) : base(dedup)
+		{
+		  this.analyzer = analyzer;
+		}
+
+		/// <summary>
+		/// Parse the given input, adding synonyms to the inherited <seealso cref="Builder"/>. </summary>
+		/// <param name="in"> The input to parse </param>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public abstract void parse(java.io.Reader in) throws java.io.IOException, java.text.ParseException;
+		public abstract void parse(Reader @in);
+
+		/// <summary>
+		/// Sugar: analyzes the text with the analyzer and
+		///  separates by <seealso cref="SynonymMap#WORD_SEPARATOR"/>.
+		///  reuse and its chars must not be null. 
+		/// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public org.apache.lucene.util.CharsRef analyze(String text, org.apache.lucene.util.CharsRef reuse) throws java.io.IOException
+		public virtual CharsRef analyze(string text, CharsRef reuse)
+		{
+		  IOException priorException = null;
+		  TokenStream ts = analyzer.tokenStream("", text);
+		  try
+		  {
+			CharTermAttribute termAtt = ts.addAttribute(typeof(CharTermAttribute));
+			PositionIncrementAttribute posIncAtt = ts.addAttribute(typeof(PositionIncrementAttribute));
+			ts.reset();
+			reuse.length = 0;
+			while (ts.incrementToken())
+			{
+			  int length = termAtt.length();
+			  if (length == 0)
+			  {
+				throw new System.ArgumentException("term: " + text + " analyzed to a zero-length token");
+			  }
+			  if (posIncAtt.PositionIncrement != 1)
+			  {
+				throw new System.ArgumentException("term: " + text + " analyzed to a token with posinc != 1");
+			  }
+			  reuse.grow(reuse.length + length + 1); // current + word + separator
+			  int end = reuse.offset + reuse.length;
+			  if (reuse.length > 0)
+			  {
+				reuse.chars[end++] = SynonymMap.WORD_SEPARATOR;
+				reuse.length++;
+			  }
+			  Array.Copy(termAtt.buffer(), 0, reuse.chars, end, length);
+			  reuse.length += length;
+			}
+			ts.end();
+		  }
+		  catch (IOException e)
+		  {
+			priorException = e;
+		  }
+		  finally
+		  {
+			IOUtils.closeWhileHandlingException(priorException, ts);
+		  }
+		  if (reuse.length == 0)
+		  {
+			throw new System.ArgumentException("term: " + text + " was completely eliminated by analyzer");
+		  }
+		  return reuse;
+		}
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Synonym/WordnetSynonymParser.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/WordnetSynonymParser.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/WordnetSynonymParser.cs
new file mode 100644
index 0000000..0bf9890
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/WordnetSynonymParser.cs
@@ -0,0 +1,135 @@
+using System;
+
+namespace org.apache.lucene.analysis.synonym
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharsRef = org.apache.lucene.util.CharsRef;
+
+	/// <summary>
+	/// Parser for wordnet prolog format
+	/// <para>
+	/// See http://wordnet.princeton.edu/man/prologdb.5WN.html for a description of the format.
+	/// @lucene.experimental
+	/// </para>
+	/// </summary>
+	// TODO: allow you to specify syntactic categories (e.g. just nouns, etc)
+	public class WordnetSynonymParser : SynonymMap.Parser
+	{
+	  private readonly bool expand;
+
+	  public WordnetSynonymParser(bool dedup, bool expand, Analyzer analyzer) : base(dedup, analyzer)
+	  {
+		this.expand = expand;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void parse(java.io.Reader in) throws java.io.IOException, java.text.ParseException
+	  public override void parse(Reader @in)
+	  {
+		LineNumberReader br = new LineNumberReader(@in);
+		try
+		{
+		  string line = null;
+		  string lastSynSetID = "";
+		  CharsRef[] synset = new CharsRef[8];
+		  int synsetSize = 0;
+
+		  while ((line = br.readLine()) != null)
+		  {
+			string synSetID = line.Substring(2, 9);
+
+			if (!synSetID.Equals(lastSynSetID))
+			{
+			  addInternal(synset, synsetSize);
+			  synsetSize = 0;
+			}
+
+			if (synset.Length <= synsetSize+1)
+			{
+			  CharsRef[] larger = new CharsRef[synset.Length * 2];
+			  Array.Copy(synset, 0, larger, 0, synsetSize);
+			  synset = larger;
+			}
+
+			synset[synsetSize] = parseSynonym(line, synset[synsetSize]);
+			synsetSize++;
+			lastSynSetID = synSetID;
+		  }
+
+		  // final synset in the file
+		  addInternal(synset, synsetSize);
+		}
+		catch (System.ArgumentException e)
+		{
+		  ParseException ex = new ParseException("Invalid synonym rule at line " + br.LineNumber, 0);
+		  ex.initCause(e);
+		  throw ex;
+		}
+		finally
+		{
+		  br.close();
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private org.apache.lucene.util.CharsRef parseSynonym(String line, org.apache.lucene.util.CharsRef reuse) throws java.io.IOException
+	  private CharsRef parseSynonym(string line, CharsRef reuse)
+	  {
+		if (reuse == null)
+		{
+		  reuse = new CharsRef(8);
+		}
+
+		int start = line.IndexOf('\'') + 1;
+		int end = line.LastIndexOf('\'');
+
+		string text = line.Substring(start, end - start).Replace("''", "'");
+		return analyze(text, reuse);
+	  }
+
+	  private void addInternal(CharsRef[] synset, int size)
+	  {
+		if (size <= 1)
+		{
+		  return; // nothing to do
+		}
+
+		if (expand)
+		{
+		  for (int i = 0; i < size; i++)
+		  {
+			for (int j = 0; j < size; j++)
+			{
+			  add(synset[i], synset[j], false);
+			}
+		  }
+		}
+		else
+		{
+		  for (int i = 0; i < size; i++)
+		  {
+			add(synset[i], synset[0], false);
+		  }
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiAnalyzer.cs
new file mode 100644
index 0000000..86c0811
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiAnalyzer.cs
@@ -0,0 +1,143 @@
+using System;
+
+namespace org.apache.lucene.analysis.th
+{
+
+	/// <summary>
+	/// Copyright 2006 The Apache Software Foundation
+	/// 
+	/// Licensed under the Apache License, Version 2.0 (the "License");
+	/// you may not use this file except in compliance with the License.
+	/// You may obtain a copy of the License at
+	/// 
+	///     http://www.apache.org/licenses/LICENSE-2.0
+	/// 
+	/// Unless required by applicable law or agreed to in writing, software
+	/// distributed under the License is distributed on an "AS IS" BASIS,
+	/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	/// See the License for the specific language governing permissions and
+	/// limitations under the License.
+	/// </summary>
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopAnalyzer = org.apache.lucene.analysis.core.StopAnalyzer;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Thai language. It uses <seealso cref="java.text.BreakIterator"/> to break words.
+	/// <para>
+	/// <a name="version"/>
+	/// </para>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating ThaiAnalyzer:
+	/// <ul>
+	///   <li> As of 3.6, a set of Thai stopwords is used by default
+	/// </ul>
+	/// </para>
+	/// </summary>
+	public sealed class ThaiAnalyzer : StopwordAnalyzerBase
+	{
+
+	  /// <summary>
+	  /// File containing default Thai stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "stopwords.txt";
+	  /// <summary>
+	  /// The comment character in the stopwords file.  
+	  /// All lines prefixed with this will be ignored.
+	  /// </summary>
+	  private const string STOPWORDS_COMMENT = "#";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop words set. </summary>
+	  /// <returns> default stop words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = loadStopwordSet(false, typeof(ThaiAnalyzer), DEFAULT_STOPWORD_FILE, STOPWORDS_COMMENT);
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  public ThaiAnalyzer(Version matchVersion) : this(matchVersion, matchVersion.onOrAfter(Version.LUCENE_36) ? DefaultSetHolder.DEFAULT_STOP_SET : StopAnalyzer.ENGLISH_STOP_WORDS_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  public ThaiAnalyzer(Version matchVersion, CharArraySet stopwords) : base(matchVersion, stopwords)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from a <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="ThaiWordFilter"/>, and
+	  ///         <seealso cref="StopFilter"/> </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+		if (matchVersion.onOrAfter(Version.LUCENE_48))
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new ThaiTokenizer(reader);
+		  Tokenizer source = new ThaiTokenizer(reader);
+		  TokenStream result = new LowerCaseFilter(matchVersion, source);
+		  result = new StopFilter(matchVersion, result, stopwords);
+		  return new TokenStreamComponents(source, result);
+		}
+		else
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		  Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		  TokenStream result = new StandardFilter(matchVersion, source);
+		  if (matchVersion.onOrAfter(Version.LUCENE_31))
+		  {
+			result = new LowerCaseFilter(matchVersion, result);
+		  }
+		  result = new ThaiWordFilter(matchVersion, result);
+		  return new TokenStreamComponents(source, new StopFilter(matchVersion, result, stopwords));
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs
new file mode 100644
index 0000000..3b472ae
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs
@@ -0,0 +1,116 @@
+namespace org.apache.lucene.analysis.th
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using CharArrayIterator = org.apache.lucene.analysis.util.CharArrayIterator;
+	using SegmentingTokenizerBase = org.apache.lucene.analysis.util.SegmentingTokenizerBase;
+
+	/// <summary>
+	/// Tokenizer that use <seealso cref="BreakIterator"/> to tokenize Thai text.
+	/// <para>WARNING: this tokenizer may not be supported by all JREs.
+	///    It is known to work with Sun/Oracle and Harmony JREs.
+	///    If your application needs to be fully portable, consider using ICUTokenizer instead,
+	///    which uses an ICU Thai BreakIterator that will always be available.
+	/// </para>
+	/// </summary>
+	public class ThaiTokenizer : SegmentingTokenizerBase
+	{
+	  /// <summary>
+	  /// True if the JRE supports a working dictionary-based breakiterator for Thai.
+	  /// If this is false, this tokenizer will not work at all!
+	  /// </summary>
+	  public static readonly bool DBBI_AVAILABLE;
+	  private static readonly BreakIterator proto = BreakIterator.getWordInstance(new Locale("th"));
+	  static ThaiTokenizer()
+	  {
+		// check that we have a working dictionary-based break iterator for thai
+		proto.Text = "ภาษาไทย";
+		DBBI_AVAILABLE = proto.isBoundary(4);
+	  }
+
+	  /// <summary>
+	  /// used for breaking the text into sentences </summary>
+	  private static readonly BreakIterator sentenceProto = BreakIterator.getSentenceInstance(Locale.ROOT);
+
+	  private readonly BreakIterator wordBreaker;
+	  private readonly CharArrayIterator wrapper = CharArrayIterator.newWordInstance();
+
+	  internal int sentenceStart;
+	  internal int sentenceEnd;
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+
+	  /// <summary>
+	  /// Creates a new ThaiTokenizer </summary>
+	  public ThaiTokenizer(Reader reader) : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, reader)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates a new ThaiTokenizer, supplying the AttributeFactory </summary>
+	  public ThaiTokenizer(AttributeFactory factory, Reader reader) : base(factory, reader, (BreakIterator)sentenceProto.clone())
+	  {
+		if (!DBBI_AVAILABLE)
+		{
+		  throw new System.NotSupportedException("This JRE does not have support for Thai segmentation");
+		}
+		wordBreaker = (BreakIterator)proto.clone();
+	  }
+
+	  protected internal override void setNextSentence(int sentenceStart, int sentenceEnd)
+	  {
+		this.sentenceStart = sentenceStart;
+		this.sentenceEnd = sentenceEnd;
+		wrapper.setText(buffer, sentenceStart, sentenceEnd - sentenceStart);
+		wordBreaker.Text = wrapper;
+	  }
+
+	  protected internal override bool incrementWord()
+	  {
+		int start = wordBreaker.current();
+		if (start == BreakIterator.DONE)
+		{
+		  return false; // BreakIterator exhausted
+		}
+
+		// find the next set of boundaries, skipping over non-tokens
+		int end_Renamed = wordBreaker.next();
+		while (end_Renamed != BreakIterator.DONE && !char.IsLetterOrDigit(char.codePointAt(buffer, sentenceStart + start, sentenceEnd)))
+		{
+		  start = end_Renamed;
+		  end_Renamed = wordBreaker.next();
+		}
+
+		if (end_Renamed == BreakIterator.DONE)
+		{
+		  return false; // BreakIterator exhausted
+		}
+
+		clearAttributes();
+		termAtt.copyBuffer(buffer, sentenceStart + start, end_Renamed - start);
+		offsetAtt.setOffset(correctOffset(offset + sentenceStart + start), correctOffset(offset + sentenceStart + end_Renamed));
+		return true;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizerFactory.cs
new file mode 100644
index 0000000..97ba897
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizerFactory.cs
@@ -0,0 +1,56 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.th
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using TokenizerFactory = org.apache.lucene.analysis.util.TokenizerFactory;
+	using AttributeSource = org.apache.lucene.util.AttributeSource;
+
+	/// <summary>
+	/// Factory for <seealso cref="ThaiTokenizer"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_thai" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.ThaiTokenizerFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class ThaiTokenizerFactory : TokenizerFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new ThaiTokenizerFactory </summary>
+	  public ThaiTokenizerFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override Tokenizer create(AttributeSource.AttributeFactory factory, Reader reader)
+	  {
+		return new ThaiTokenizer(factory, reader);
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs
new file mode 100644
index 0000000..ae7fa96
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs
@@ -0,0 +1,172 @@
+using System;
+
+namespace org.apache.lucene.analysis.th
+{
+
+	/// <summary>
+	/// Copyright 2006 The Apache Software Foundation
+	/// 
+	/// Licensed under the Apache License, Version 2.0 (the "License");
+	/// you may not use this file except in compliance with the License.
+	/// You may obtain a copy of the License at
+	/// 
+	///     http://www.apache.org/licenses/LICENSE-2.0
+	/// 
+	/// Unless required by applicable law or agreed to in writing, software
+	/// distributed under the License is distributed on an "AS IS" BASIS,
+	/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	/// See the License for the specific language governing permissions and
+	/// limitations under the License.
+	/// </summary>
+
+	using UnicodeBlock = Character.UnicodeBlock;
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using CharArrayIterator = org.apache.lucene.analysis.util.CharArrayIterator;
+	using AttributeSource = org.apache.lucene.util.AttributeSource;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// <seealso cref="TokenFilter"/> that use <seealso cref="java.text.BreakIterator"/> to break each 
+	/// Token that is Thai into separate Token(s) for each Thai word.
+	/// <para>Please note: Since matchVersion 3.1 on, this filter no longer lowercases non-thai text.
+	/// <seealso cref="ThaiAnalyzer"/> will insert a <seealso cref="LowerCaseFilter"/> before this filter
+	/// so the behaviour of the Analyzer does not change. With version 3.1, the filter handles
+	/// position increments correctly.
+	/// </para>
+	/// <para>WARNING: this filter may not be supported by all JREs.
+	///    It is known to work with Sun/Oracle and Harmony JREs.
+	///    If your application needs to be fully portable, consider using ICUTokenizer instead,
+	///    which uses an ICU Thai BreakIterator that will always be available.
+	/// </para>
+	/// </summary>
+	/// @deprecated Use <seealso cref="ThaiTokenizer"/> instead. 
+	[Obsolete("Use <seealso cref="ThaiTokenizer"/> instead.")]
+	public sealed class ThaiWordFilter : TokenFilter
+	{
+	  /// <summary>
+	  /// True if the JRE supports a working dictionary-based breakiterator for Thai.
+	  /// If this is false, this filter will not work at all!
+	  /// </summary>
+	  public static readonly bool DBBI_AVAILABLE = ThaiTokenizer.DBBI_AVAILABLE;
+	  private static readonly BreakIterator proto = BreakIterator.getWordInstance(new Locale("th"));
+	  private readonly BreakIterator breaker = (BreakIterator) proto.clone();
+	  private readonly CharArrayIterator charIterator = CharArrayIterator.newWordInstance();
+
+	  private readonly bool handlePosIncr;
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+	  private readonly PositionIncrementAttribute posAtt = addAttribute(typeof(PositionIncrementAttribute));
+
+	  private AttributeSource clonedToken = null;
+	  private CharTermAttribute clonedTermAtt = null;
+	  private OffsetAttribute clonedOffsetAtt = null;
+	  private bool hasMoreTokensInClone = false;
+	  private bool hasIllegalOffsets = false; // only if the length changed before this filter
+
+	  /// <summary>
+	  /// Creates a new ThaiWordFilter with the specified match version. </summary>
+	  public ThaiWordFilter(Version matchVersion, TokenStream input) : base(matchVersion.onOrAfter(Version.LUCENE_31) ? input : new LowerCaseFilter(matchVersion, input))
+	  {
+		if (!DBBI_AVAILABLE)
+		{
+		  throw new System.NotSupportedException("This JRE does not have support for Thai segmentation");
+		}
+		handlePosIncr = matchVersion.onOrAfter(Version.LUCENE_31);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (hasMoreTokensInClone)
+		{
+		  int start = breaker.current();
+		  int end = breaker.next();
+		  if (end != BreakIterator.DONE)
+		  {
+			clonedToken.copyTo(this);
+			termAtt.copyBuffer(clonedTermAtt.buffer(), start, end - start);
+			if (hasIllegalOffsets)
+			{
+			  offsetAtt.setOffset(clonedOffsetAtt.startOffset(), clonedOffsetAtt.endOffset());
+			}
+			else
+			{
+			  offsetAtt.setOffset(clonedOffsetAtt.startOffset() + start, clonedOffsetAtt.startOffset() + end);
+			}
+			if (handlePosIncr)
+			{
+				posAtt.PositionIncrement = 1;
+			}
+			return true;
+		  }
+		  hasMoreTokensInClone = false;
+		}
+
+		if (!input.incrementToken())
+		{
+		  return false;
+		}
+
+		if (termAtt.length() == 0 || char.UnicodeBlock.of(termAtt.charAt(0)) != char.UnicodeBlock.THAI)
+		{
+		  return true;
+		}
+
+		hasMoreTokensInClone = true;
+
+		// if length by start + end offsets doesn't match the term text then assume
+		// this is a synonym and don't adjust the offsets.
+		hasIllegalOffsets = offsetAtt.endOffset() - offsetAtt.startOffset() != termAtt.length();
+
+		// we lazy init the cloned token, as in ctor not all attributes may be added
+		if (clonedToken == null)
+		{
+		  clonedToken = cloneAttributes();
+		  clonedTermAtt = clonedToken.getAttribute(typeof(CharTermAttribute));
+		  clonedOffsetAtt = clonedToken.getAttribute(typeof(OffsetAttribute));
+		}
+		else
+		{
+		  this.copyTo(clonedToken);
+		}
+
+		// reinit CharacterIterator
+		charIterator.setText(clonedTermAtt.buffer(), 0, clonedTermAtt.length());
+		breaker.Text = charIterator;
+		int end = breaker.next();
+		if (end != BreakIterator.DONE)
+		{
+		  termAtt.Length = end;
+		  if (hasIllegalOffsets)
+		  {
+			offsetAtt.setOffset(clonedOffsetAtt.startOffset(), clonedOffsetAtt.endOffset());
+		  }
+		  else
+		  {
+			offsetAtt.setOffset(clonedOffsetAtt.startOffset(), clonedOffsetAtt.startOffset() + end);
+		  }
+		  // position increment keeps as it is for first token
+		  return true;
+		}
+		return false;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		hasMoreTokensInClone = false;
+		clonedToken = null;
+		clonedTermAtt = null;
+		clonedOffsetAtt = null;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilterFactory.cs
new file mode 100644
index 0000000..0fa779c
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilterFactory.cs
@@ -0,0 +1,59 @@
+using System;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.th
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="ThaiWordFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_thai" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.ThaiWordFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre> </summary>
+	/// @deprecated Use <seealso cref="ThaiTokenizerFactory"/> instead 
+	[Obsolete("Use <seealso cref="ThaiTokenizerFactory"/> instead")]
+	public class ThaiWordFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new ThaiWordFilterFactory </summary>
+	  public ThaiWordFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		assureMatchVersion();
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override ThaiWordFilter create(TokenStream input)
+	  {
+		return new ThaiWordFilter(luceneMatchVersion, input);
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Tr/ApostropheFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Tr/ApostropheFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Tr/ApostropheFilter.cs
new file mode 100644
index 0000000..3c2d66d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Tr/ApostropheFilter.cs
@@ -0,0 +1,70 @@
+namespace org.apache.lucene.analysis.tr
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+	/// <summary>
+	/// Strips all characters after an apostrophe (including the apostrophe itself).
+	/// <para>
+	/// In Turkish, apostrophe is used to separate suffixes from proper names
+	/// (continent, sea, river, lake, mountain, upland, proper names related to
+	/// religion and mythology). This filter intended to be used before stem filters.
+	/// For more information, see <a href="http://www.ipcsit.com/vol57/015-ICNI2012-M021.pdf">
+	/// Role of Apostrophes in Turkish Information Retrieval</a>
+	/// </para>
+	/// </summary>
+	public sealed class ApostropheFilter : TokenFilter
+	{
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+
+	  public ApostropheFilter(TokenStream @in) : base(@in)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (!input.incrementToken())
+		{
+		  return false;
+		}
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] buffer = termAtt.buffer();
+		char[] buffer = termAtt.buffer();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int length = termAtt.length();
+		int length = termAtt.length();
+
+		for (int i = 0; i < length; i++)
+		{
+		  if (buffer[i] == '\'' || buffer[i] == '\u2019')
+		  {
+			termAtt.Length = i;
+			return true;
+		  }
+		}
+		return true;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Tr/ApostropheFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Tr/ApostropheFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Tr/ApostropheFilterFactory.cs
new file mode 100644
index 0000000..b3e0fea
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Tr/ApostropheFilterFactory.cs
@@ -0,0 +1,52 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.tr
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="ApostropheFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_tr_lower_apostrophes" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.ApostropheFilterFactory"/&gt;
+	///     &lt;filter class="solr.TurkishLowerCaseFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class ApostropheFilterFactory : TokenFilterFactory
+	{
+
+	  public ApostropheFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameter(s): " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new ApostropheFilter(input);
+	  }
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishAnalyzer.cs
new file mode 100644
index 0000000..836782a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishAnalyzer.cs
@@ -0,0 +1,145 @@
+using System;
+
+namespace org.apache.lucene.analysis.tr
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using Version = org.apache.lucene.util.Version;
+	using TurkishStemmer = org.tartarus.snowball.ext.TurkishStemmer;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Turkish.
+	/// </summary>
+	public sealed class TurkishAnalyzer : StopwordAnalyzerBase
+	{
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// File containing default Turkish stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "stopwords.txt";
+	  /// <summary>
+	  /// The comment character in the stopwords file.  
+	  /// All lines prefixed with this will be ignored.
+	  /// </summary>
+	  private const string STOPWORDS_COMMENT = "#";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop words set. </summary>
+	  /// <returns> default stop words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = loadStopwordSet(false, typeof(TurkishAnalyzer), DEFAULT_STOPWORD_FILE, STOPWORDS_COMMENT);
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public TurkishAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  public TurkishAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+	  /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+	  /// stemming.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
+	  public TurkishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates a
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> A
+	  ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="TurkishLowerCaseFilter"/>,
+	  ///         <seealso cref="StopFilter"/>, <seealso cref="SetKeywordMarkerFilter"/> if a stem
+	  ///         exclusion set is provided and <seealso cref="SnowballFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		if (matchVersion.onOrAfter(Version.LUCENE_48))
+		{
+		  result = new ApostropheFilter(result);
+		}
+		result = new TurkishLowerCaseFilter(result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		result = new SnowballFilter(result, new TurkishStemmer());
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+}
\ No newline at end of file


[06/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs
new file mode 100644
index 0000000..2067ff6
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs
@@ -0,0 +1,150 @@
+using Lucene.Net.Analysis.Core;
+
+namespace org.apache.lucene.analysis.standard
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using LowerCaseFilter = LowerCaseFilter;
+	using StopAnalyzer = StopAnalyzer;
+	using StopFilter = StopFilter;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using Version = org.apache.lucene.util.Version;
+
+
+	/// <summary>
+	/// Filters <seealso cref="org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer"/>
+	/// with <seealso cref="org.apache.lucene.analysis.standard.StandardFilter"/>,
+	/// <seealso cref="LowerCaseFilter"/> and
+	/// <seealso cref="StopFilter"/>, using a list of
+	/// English stop words.
+	/// 
+	/// <a name="version"/>
+	/// <para>
+	///   You must specify the required <seealso cref="org.apache.lucene.util.Version"/>
+	///   compatibility when creating UAX29URLEmailAnalyzer
+	/// </para>
+	/// </summary>
+	public sealed class UAX29URLEmailAnalyzer : StopwordAnalyzerBase
+	{
+
+	  /// <summary>
+	  /// Default maximum allowed token length </summary>
+	  public const int DEFAULT_MAX_TOKEN_LENGTH = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
+
+	  private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
+
+	  /// <summary>
+	  /// An unmodifiable set containing some common English words that are usually not
+	  /// useful for searching. 
+	  /// </summary>
+	  public static readonly CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. </summary>
+	  /// <param name="matchVersion"> Lucene version to match See {@link
+	  /// <a href="#version">above</a>} </param>
+	  /// <param name="stopWords"> stop words  </param>
+	  public UAX29URLEmailAnalyzer(Version matchVersion, CharArraySet stopWords) : base(matchVersion, stopWords)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words ({@link
+	  /// #STOP_WORDS_SET}). </summary>
+	  /// <param name="matchVersion"> Lucene version to match See {@link
+	  /// <a href="#version">above</a>} </param>
+	  public UAX29URLEmailAnalyzer(Version matchVersion) : this(matchVersion, STOP_WORDS_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the stop words from the given reader. </summary>
+	  /// <seealso cref= org.apache.lucene.analysis.util.WordlistLoader#getWordSet(java.io.Reader, org.apache.lucene.util.Version) </seealso>
+	  /// <param name="matchVersion"> Lucene version to match See {@link
+	  /// <a href="#version">above</a>} </param>
+	  /// <param name="stopwords"> Reader to read stop words from  </param>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public UAX29URLEmailAnalyzer(org.apache.lucene.util.Version matchVersion, java.io.Reader stopwords) throws java.io.IOException
+	  public UAX29URLEmailAnalyzer(Version matchVersion, Reader stopwords) : this(matchVersion, loadStopwordSet(stopwords, matchVersion))
+	  {
+	  }
+
+	  /// <summary>
+	  /// Set maximum allowed token length.  If a token is seen
+	  /// that exceeds this length then it is discarded.  This
+	  /// setting only takes effect the next time tokenStream or
+	  /// tokenStream is called.
+	  /// </summary>
+	  public int MaxTokenLength
+	  {
+		  set
+		  {
+			maxTokenLength = value;
+		  }
+		  get
+		  {
+			return maxTokenLength;
+		  }
+	  }
+
+
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: @Override protected TokenStreamComponents createComponents(final String fieldName, final java.io.Reader reader)
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final UAX29URLEmailTokenizer src = new UAX29URLEmailTokenizer(matchVersion, reader);
+		UAX29URLEmailTokenizer src = new UAX29URLEmailTokenizer(matchVersion, reader);
+		src.MaxTokenLength = maxTokenLength;
+		TokenStream tok = new StandardFilter(matchVersion, src);
+		tok = new LowerCaseFilter(matchVersion, tok);
+		tok = new StopFilter(matchVersion, tok, stopwords);
+		return new TokenStreamComponentsAnonymousInnerClassHelper(this, src, tok, reader);
+	  }
+
+	  private class TokenStreamComponentsAnonymousInnerClassHelper : TokenStreamComponents
+	  {
+		  private readonly UAX29URLEmailAnalyzer outerInstance;
+
+		  private Reader reader;
+		  private org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer src;
+
+		  public TokenStreamComponentsAnonymousInnerClassHelper(UAX29URLEmailAnalyzer outerInstance, org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer src, TokenStream tok, Reader reader) : base(src, tok)
+		  {
+			  this.outerInstance = outerInstance;
+			  this.reader = reader;
+			  this.src = src;
+		  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override protected void setReader(final java.io.Reader reader) throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+		  protected internal override Reader Reader
+		  {
+			  set
+			  {
+				src.MaxTokenLength = outerInstance.maxTokenLength;
+				base.Reader = value;
+			  }
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizer.cs
new file mode 100644
index 0000000..83ac99c
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizer.cs
@@ -0,0 +1,221 @@
+namespace org.apache.lucene.analysis.standard
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using UAX29URLEmailTokenizerImpl31 = org.apache.lucene.analysis.standard.std31.UAX29URLEmailTokenizerImpl31;
+	using UAX29URLEmailTokenizerImpl34 = org.apache.lucene.analysis.standard.std34.UAX29URLEmailTokenizerImpl34;
+	using UAX29URLEmailTokenizerImpl36 = org.apache.lucene.analysis.standard.std36.UAX29URLEmailTokenizerImpl36;
+	using UAX29URLEmailTokenizerImpl40 = org.apache.lucene.analysis.standard.std40.UAX29URLEmailTokenizerImpl40;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// This class implements Word Break rules from the Unicode Text Segmentation 
+	/// algorithm, as specified in                 `
+	/// <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a> 
+	/// URLs and email addresses are also tokenized according to the relevant RFCs.
+	/// <p/>
+	/// Tokens produced are of the following types:
+	/// <ul>
+	///   <li>&lt;ALPHANUM&gt;: A sequence of alphabetic and numeric characters</li>
+	///   <li>&lt;NUM&gt;: A number</li>
+	///   <li>&lt;URL&gt;: A URL</li>
+	///   <li>&lt;EMAIL&gt;: An email address</li>
+	///   <li>&lt;SOUTHEAST_ASIAN&gt;: A sequence of characters from South and Southeast
+	///       Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
+	///   <li>&lt;IDEOGRAPHIC&gt;: A single CJKV ideographic character</li>
+	///   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
+	/// </ul>
+	/// <a name="version"/>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating UAX29URLEmailTokenizer:
+	/// <ul>
+	///   <li> As of 3.4, Hiragana and Han characters are no longer wrongly split
+	///   from their combining characters. If you use a previous version number,
+	///   you get the exact broken behavior for backwards compatibility.
+	/// </ul>
+	/// </para>
+	/// </summary>
+
+	public sealed class UAX29URLEmailTokenizer : Tokenizer
+	{
+	  /// <summary>
+	  /// A private instance of the JFlex-constructed scanner </summary>
+	  private readonly StandardTokenizerInterface scanner;
+
+	  public const int ALPHANUM = 0;
+	  public const int NUM = 1;
+	  public const int SOUTHEAST_ASIAN = 2;
+	  public const int IDEOGRAPHIC = 3;
+	  public const int HIRAGANA = 4;
+	  public const int KATAKANA = 5;
+	  public const int HANGUL = 6;
+	  public const int URL = 7;
+	  public const int EMAIL = 8;
+
+	  /// <summary>
+	  /// String token types that correspond to token type int constants </summary>
+	  public static readonly string[] TOKEN_TYPES = new string [] {StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ALPHANUM], StandardTokenizer.TOKEN_TYPES[StandardTokenizer.NUM], StandardTokenizer.TOKEN_TYPES[StandardTokenizer.SOUTHEAST_ASIAN], StandardTokenizer.TOKEN_TYPES[StandardTokenizer.IDEOGRAPHIC], StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HIRAGANA], StandardTokenizer.TOKEN_TYPES[StandardTokenizer.KATAKANA], StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HANGUL], "<URL>", "<EMAIL>"};
+
+	  private int skippedPositions;
+
+	  private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
+
+	  /// <summary>
+	  /// Set the max allowed token length.  Any token longer
+	  ///  than this is skipped. 
+	  /// </summary>
+	  public int MaxTokenLength
+	  {
+		  set
+		  {
+			if (value < 1)
+			{
+			  throw new System.ArgumentException("maxTokenLength must be greater than zero");
+			}
+			this.maxTokenLength = value;
+		  }
+		  get
+		  {
+			return maxTokenLength;
+		  }
+	  }
+
+
+	  /// <summary>
+	  /// Creates a new instance of the UAX29URLEmailTokenizer.  Attaches
+	  /// the <code>input</code> to the newly created JFlex scanner.
+	  /// </summary>
+	  /// <param name="input"> The input reader </param>
+	  public UAX29URLEmailTokenizer(Version matchVersion, Reader input) : base(input)
+	  {
+		this.scanner = getScannerFor(matchVersion);
+	  }
+
+	  /// <summary>
+	  /// Creates a new UAX29URLEmailTokenizer with a given <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>
+	  /// </summary>
+	  public UAX29URLEmailTokenizer(Version matchVersion, AttributeFactory factory, Reader input) : base(factory, input)
+	  {
+		this.scanner = getScannerFor(matchVersion);
+	  }
+
+	  private StandardTokenizerInterface getScannerFor(Version matchVersion)
+	  {
+		// best effort NPE if you dont call reset
+		if (matchVersion.onOrAfter(Version.LUCENE_47))
+		{
+		  return new UAX29URLEmailTokenizerImpl(input);
+		}
+		else if (matchVersion.onOrAfter(Version.LUCENE_40))
+		{
+		  return new UAX29URLEmailTokenizerImpl40(input);
+		}
+		else if (matchVersion.onOrAfter(Version.LUCENE_36))
+		{
+		  return new UAX29URLEmailTokenizerImpl36(input);
+		}
+		else if (matchVersion.onOrAfter(Version.LUCENE_34))
+		{
+		  return new UAX29URLEmailTokenizerImpl34(input);
+		}
+		else
+		{
+		  return new UAX29URLEmailTokenizerImpl31(input);
+		}
+	  }
+
+	  // this tokenizer generates three attributes:
+	  // term offset, positionIncrement and type
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+	  private readonly PositionIncrementAttribute posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
+	  private readonly TypeAttribute typeAtt = addAttribute(typeof(TypeAttribute));
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		clearAttributes();
+		skippedPositions = 0;
+
+		while (true)
+		{
+		  int tokenType = scanner.NextToken;
+
+		  if (tokenType == StandardTokenizerInterface_Fields.YYEOF)
+		  {
+			return false;
+		  }
+
+		  if (scanner.yylength() <= maxTokenLength)
+		  {
+			posIncrAtt.PositionIncrement = skippedPositions + 1;
+			scanner.getText(termAtt);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int start = scanner.yychar();
+			int start = scanner.yychar();
+			offsetAtt.setOffset(correctOffset(start), correctOffset(start + termAtt.length()));
+			typeAtt.Type = TOKEN_TYPES[tokenType];
+			return true;
+		  }
+		  else
+			// When we skip a too-long term, we still increment the
+			// position increment
+		  {
+			skippedPositions++;
+		  }
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
+	  public override void end()
+	  {
+		base.end();
+		// set final offset
+		int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
+		offsetAtt.setOffset(finalOffset, finalOffset);
+		// adjust any skipped tokens
+		posIncrAtt.PositionIncrement = posIncrAtt.PositionIncrement + skippedPositions;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void close() throws java.io.IOException
+	  public override void close()
+	  {
+		base.close();
+		scanner.yyreset(input);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		scanner.yyreset(input);
+		skippedPositions = 0;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerFactory.cs
new file mode 100644
index 0000000..350fdbb
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerFactory.cs
@@ -0,0 +1,61 @@
+using System.Collections.Generic;
+using TokenizerFactory = Lucene.Net.Analysis.Util.TokenizerFactory;
+
+namespace org.apache.lucene.analysis.standard
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenizerFactory = TokenizerFactory;
+	using AttributeFactory = org.apache.lucene.util.AttributeSource.AttributeFactory;
+
+
+	/// <summary>
+	/// Factory for <seealso cref="UAX29URLEmailTokenizer"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_urlemail" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.UAX29URLEmailTokenizerFactory" maxTokenLength="255"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre> 
+	/// </summary>
+	public class UAX29URLEmailTokenizerFactory : TokenizerFactory
+	{
+	  private readonly int maxTokenLength;
+
+	  /// <summary>
+	  /// Creates a new UAX29URLEmailTokenizerFactory </summary>
+	  public UAX29URLEmailTokenizerFactory(IDictionary<string, string> args) : base(args)
+	  {
+		assureMatchVersion();
+		maxTokenLength = getInt(args, "maxTokenLength", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override UAX29URLEmailTokenizer create(AttributeFactory factory, Reader input)
+	  {
+		UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(luceneMatchVersion, factory, input);
+		tokenizer.MaxTokenLength = maxTokenLength;
+		return tokenizer;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishAnalyzer.cs
new file mode 100644
index 0000000..f3c9cce
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishAnalyzer.cs
@@ -0,0 +1,139 @@
+using System;
+
+namespace org.apache.lucene.analysis.sv
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using Version = org.apache.lucene.util.Version;
+	using SwedishStemmer = org.tartarus.snowball.ext.SwedishStemmer;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Swedish.
+	/// </summary>
+	public sealed class SwedishAnalyzer : StopwordAnalyzerBase
+	{
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// File containing default Swedish stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "swedish_stop.txt";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop words set. </summary>
+	  /// <returns> default stop words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(typeof(SnowballFilter), DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public SwedishAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  public SwedishAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+	  /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+	  /// stemming.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
+	  public SwedishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates a
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> A
+	  ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided and <seealso cref="SnowballFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		result = new LowerCaseFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		result = new SnowballFilter(result, new SwedishStemmer());
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemFilter.cs
new file mode 100644
index 0000000..256a618
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemFilter.cs
@@ -0,0 +1,66 @@
+namespace org.apache.lucene.analysis.sv
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="SwedishLightStemmer"/> to stem Swedish
+	/// words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// </summary>
+	public sealed class SwedishLightStemFilter : TokenFilter
+	{
+	  private readonly SwedishLightStemmer stemmer = new SwedishLightStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public SwedishLightStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemFilterFactory.cs
new file mode 100644
index 0000000..ebfde41
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.sv
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="SwedishLightStemFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_svlgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.SwedishLightStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class SwedishLightStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new SwedishLightStemFilterFactory </summary>
+	  public SwedishLightStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new SwedishLightStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemmer.cs
new file mode 100644
index 0000000..523b489
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemmer.cs
@@ -0,0 +1,114 @@
+namespace org.apache.lucene.analysis.sv
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/* 
+	 * This algorithm is updated based on code located at:
+	 * http://members.unine.ch/jacques.savoy/clef/
+	 * 
+	 * Full copyright for that code follows:
+	 */
+
+	/*
+	 * Copyright (c) 2005, Jacques Savoy
+	 * All rights reserved.
+	 *
+	 * Redistribution and use in source and binary forms, with or without 
+	 * modification, are permitted provided that the following conditions are met:
+	 *
+	 * Redistributions of source code must retain the above copyright notice, this 
+	 * list of conditions and the following disclaimer. Redistributions in binary 
+	 * form must reproduce the above copyright notice, this list of conditions and
+	 * the following disclaimer in the documentation and/or other materials 
+	 * provided with the distribution. Neither the name of the author nor the names 
+	 * of its contributors may be used to endorse or promote products derived from 
+	 * this software without specific prior written permission.
+	 * 
+	 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+	 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+	 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+	 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+	 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+	 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+	 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+	 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+	 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+	 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+	 * POSSIBILITY OF SUCH DAMAGE.
+	 */
+
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+	/// <summary>
+	/// Light Stemmer for Swedish.
+	/// <para>
+	/// This stemmer implements the algorithm described in:
+	/// <i>Report on CLEF-2003 Monolingual Tracks</i>
+	/// Jacques Savoy
+	/// </para>
+	/// </summary>
+	public class SwedishLightStemmer
+	{
+
+	  public virtual int stem(char[] s, int len)
+	  {
+		if (len > 4 && s[len - 1] == 's')
+		{
+		  len--;
+		}
+
+		if (len > 7 && (StemmerUtil.EndsWith(s, len, "elser") || StemmerUtil.EndsWith(s, len, "heten")))
+		{
+		  return len - 5;
+		}
+
+		if (len > 6 && (StemmerUtil.EndsWith(s, len, "arne") || StemmerUtil.EndsWith(s, len, "erna") || StemmerUtil.EndsWith(s, len, "ande") || StemmerUtil.EndsWith(s, len, "else") || StemmerUtil.EndsWith(s, len, "aste") || StemmerUtil.EndsWith(s, len, "orna") || StemmerUtil.EndsWith(s, len, "aren")))
+		{
+		  return len - 4;
+		}
+
+		if (len > 5 && (StemmerUtil.EndsWith(s, len, "are") || StemmerUtil.EndsWith(s, len, "ast") || StemmerUtil.EndsWith(s, len, "het")))
+		{
+		  return len - 3;
+		}
+
+		if (len > 4 && (StemmerUtil.EndsWith(s, len, "ar") || StemmerUtil.EndsWith(s, len, "er") || StemmerUtil.EndsWith(s, len, "or") || StemmerUtil.EndsWith(s, len, "en") || StemmerUtil.EndsWith(s, len, "at") || StemmerUtil.EndsWith(s, len, "te") || StemmerUtil.EndsWith(s, len, "et")))
+		{
+		  return len - 2;
+		}
+
+		if (len > 3)
+		{
+		  switch (s[len - 1])
+		  {
+			case 't':
+			case 'a':
+			case 'e':
+			case 'n':
+				return len - 1;
+		  }
+		}
+
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Synonym/FSTSynonymFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/FSTSynonymFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/FSTSynonymFilterFactory.cs
new file mode 100644
index 0000000..c38f1dd
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/FSTSynonymFilterFactory.cs
@@ -0,0 +1,186 @@
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Util;
+using org.apache.lucene.analysis.core;
+using org.apache.lucene.analysis.synonym;
+using org.apache.lucene.analysis.util;
+
+namespace Lucene.Net.Analysis.Synonym
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+    /// @deprecated (3.4) use <seealso cref="SynonymFilterFactory"/> instead. this is only a backwards compatibility
+	///                   mechanism that will be removed in Lucene 5.0 
+	// NOTE: rename this to "SynonymFilterFactory" and nuke that delegator in Lucene 5.0!
+	[Obsolete("(3.4) use <seealso cref="SynonymFilterFactory"/> instead. this is only a backwards compatibility")]
+	internal sealed class FSTSynonymFilterFactory : TokenFilterFactory, ResourceLoaderAware
+	{
+	  private readonly bool ignoreCase;
+	  private readonly string tokenizerFactory;
+	  private readonly string synonyms;
+	  private readonly string format;
+	  private readonly bool expand;
+	  private readonly IDictionary<string, string> tokArgs = new Dictionary<string, string>();
+
+	  private SynonymMap map;
+
+	  public FSTSynonymFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		ignoreCase = getBoolean(args, "ignoreCase", false);
+		synonyms = require(args, "synonyms");
+		format = get(args, "format");
+		expand = getBoolean(args, "expand", true);
+
+		tokenizerFactory = get(args, "tokenizerFactory");
+		if (tokenizerFactory != null)
+		{
+		  assureMatchVersion();
+		  tokArgs["luceneMatchVersion"] = LuceneMatchVersion.ToString();
+		  for (IEnumerator<string> itr = args.Keys.GetEnumerator(); itr.MoveNext();)
+		  {
+			string key = itr.Current;
+			tokArgs[key.replaceAll("^tokenizerFactory\\.","")] = args[key];
+			itr.remove();
+		  }
+		}
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		// if the fst is null, it means there's actually no synonyms... just return the original stream
+		// as there is nothing to do here.
+		return map.fst == null ? input : new SynonymFilter(input, map, ignoreCase);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void inform(ResourceLoader loader) throws java.io.IOException
+	  public void inform(ResourceLoader loader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);
+		TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);
+
+		Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, factory);
+
+		try
+		{
+		  string formatClass = format;
+		  if (format == null || format.Equals("solr"))
+		  {
+			formatClass = typeof(SolrSynonymParser).Name;
+		  }
+		  else if (format.Equals("wordnet"))
+		  {
+			formatClass = typeof(WordnetSynonymParser).Name;
+		  }
+		  // TODO: expose dedup as a parameter?
+		  map = loadSynonyms(loader, formatClass, true, analyzer);
+		}
+		catch (ParseException e)
+		{
+		  throw new IOException("Error parsing synonyms file:", e);
+		}
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper : Analyzer
+	  {
+		  private readonly FSTSynonymFilterFactory outerInstance;
+
+		  private TokenizerFactory factory;
+
+		  public AnalyzerAnonymousInnerClassHelper(FSTSynonymFilterFactory outerInstance, TokenizerFactory factory)
+		  {
+			  this.outerInstance = outerInstance;
+			  this.factory = factory;
+		  }
+
+		  protected internal override Analyzer.TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader) : factory.create(reader);
+			TokenStream stream = outerInstance.ignoreCase ? new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer) : tokenizer;
+			return new Analyzer.TokenStreamComponents(tokenizer, stream);
+		  }
+	  }
+
+	  /// <summary>
+	  /// Load synonyms with the given <seealso cref="SynonymMap.Parser"/> class.
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private org.apache.lucene.analysis.synonym.SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, org.apache.lucene.analysis.Analyzer analyzer) throws java.io.IOException, java.text.ParseException
+	  private SynonymMap loadSynonyms(ResourceLoader loader, string cname, bool dedup, Analyzer analyzer)
+	  {
+		CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT);
+
+		SynonymMap.Parser parser;
+		Type clazz = loader.findClass(cname, typeof(SynonymMap.Parser));
+		try
+		{
+		  parser = clazz.getConstructor(typeof(bool), typeof(bool), typeof(Analyzer)).newInstance(dedup, expand, analyzer);
+		}
+		catch (Exception e)
+		{
+		  throw new Exception(e);
+		}
+
+		File synonymFile = new File(synonyms);
+		if (synonymFile.exists())
+		{
+		  decoder.reset();
+		  parser.parse(new InputStreamReader(loader.openResource(synonyms), decoder));
+		}
+		else
+		{
+		  IList<string> files = splitFileNames(synonyms);
+		  foreach (string file in files)
+		  {
+			decoder.reset();
+			parser.parse(new InputStreamReader(loader.openResource(file), decoder));
+		  }
+		}
+		return parser.build();
+	  }
+
+	  // (there are no tests for this functionality)
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private TokenizerFactory loadTokenizerFactory(ResourceLoader loader, String cname) throws java.io.IOException
+	  private TokenizerFactory loadTokenizerFactory(ResourceLoader loader, string cname)
+	  {
+		Type clazz = loader.findClass(cname, typeof(TokenizerFactory));
+		try
+		{
+		  TokenizerFactory tokFactory = clazz.getConstructor(typeof(IDictionary)).newInstance(tokArgs);
+		  if (tokFactory is ResourceLoaderAware)
+		  {
+			((ResourceLoaderAware) tokFactory).inform(loader);
+		  }
+		  return tokFactory;
+		}
+		catch (Exception e)
+		{
+		  throw new Exception(e);
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilter.cs
new file mode 100644
index 0000000..15abb7a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilter.cs
@@ -0,0 +1,317 @@
+using System;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.synonym
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+	using AttributeSource = org.apache.lucene.util.AttributeSource;
+
+
+	/// <summary>
+	/// SynonymFilter handles multi-token synonyms with variable position increment offsets.
+	/// <para>
+	/// The matched tokens from the input stream may be optionally passed through (includeOrig=true)
+	/// or discarded.  If the original tokens are included, the position increments may be modified
+	/// to retain absolute positions after merging with the synonym tokenstream.
+	/// </para>
+	/// <para>
+	/// Generated synonyms will start at the same position as the first matched source token.
+	/// </para>
+	/// </summary>
+	/// @deprecated (3.4) use <seealso cref="SynonymFilterFactory"/> instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0 
+	[Obsolete("(3.4) use <seealso cref="SynonymFilterFactory"/> instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0")]
+	internal sealed class SlowSynonymFilter : TokenFilter
+	{
+
+	  private readonly SlowSynonymMap map; // Map<String, SynonymMap>
+	  private IEnumerator<AttributeSource> replacement; // iterator over generated tokens
+
+	  public SlowSynonymFilter(TokenStream @in, SlowSynonymMap map) : base(@in)
+	  {
+		if (map == null)
+		{
+		  throw new System.ArgumentException("map is required");
+		}
+
+		this.map = map;
+		// just ensuring these attributes exist...
+		addAttribute(typeof(CharTermAttribute));
+		addAttribute(typeof(PositionIncrementAttribute));
+		addAttribute(typeof(OffsetAttribute));
+		addAttribute(typeof(TypeAttribute));
+	  }
+
+
+	  /*
+	   * Need to worry about multiple scenarios:
+	   *  - need to go for the longest match
+	   *    a b => foo      #shouldn't match if "a b" is followed by "c d"
+	   *    a b c d => bar
+	   *  - need to backtrack - retry matches for tokens already read
+	   *     a b c d => foo
+	   *       b c => bar
+	   *     If the input stream is "a b c x", one will consume "a b c d"
+	   *     trying to match the first rule... all but "a" should be
+	   *     pushed back so a match may be made on "b c".
+	   *  - don't try and match generated tokens (thus need separate queue)
+	   *    matching is not recursive.
+	   *  - handle optional generation of original tokens in all these cases,
+	   *    merging token streams to preserve token positions.
+	   *  - preserve original positionIncrement of first matched token
+	   */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		while (true)
+		{
+		  // if there are any generated tokens, return them... don't try any
+		  // matches against them, as we specifically don't want recursion.
+//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
+		  if (replacement != null && replacement.hasNext())
+		  {
+//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
+			copy(this, replacement.next());
+			return true;
+		  }
+
+		  // common case fast-path of first token not matching anything
+		  AttributeSource firstTok = nextTok();
+		  if (firstTok == null)
+		  {
+			  return false;
+		  }
+		  CharTermAttribute termAtt = firstTok.addAttribute(typeof(CharTermAttribute));
+		  SlowSynonymMap result = map.submap != null ? map.submap.get(termAtt.buffer(), 0, termAtt.length()) : null;
+		  if (result == null)
+		  {
+			copy(this, firstTok);
+			return true;
+		  }
+
+		  // fast-path failed, clone ourselves if needed
+		  if (firstTok == this)
+		  {
+			firstTok = cloneAttributes();
+		  }
+		  // OK, we matched a token, so find the longest match.
+
+		  matched = new LinkedList<>();
+
+		  result = match(result);
+
+		  if (result == null)
+		  {
+			// no match, simply return the first token read.
+			copy(this, firstTok);
+			return true;
+		  }
+
+		  // reuse, or create new one each time?
+		  List<AttributeSource> generated = new List<AttributeSource>(result.synonyms.Length + matched.Count + 1);
+
+		  //
+		  // there was a match... let's generate the new tokens, merging
+		  // in the matched tokens (position increments need adjusting)
+		  //
+		  AttributeSource lastTok = matched.Count == 0 ? firstTok : matched.Last.Value;
+		  bool includeOrig = result.includeOrig();
+
+		  AttributeSource origTok = includeOrig ? firstTok : null;
+		  PositionIncrementAttribute firstPosIncAtt = firstTok.addAttribute(typeof(PositionIncrementAttribute));
+		  int origPos = firstPosIncAtt.PositionIncrement; // position of origTok in the original stream
+		  int repPos = 0; // curr position in replacement token stream
+		  int pos = 0; // current position in merged token stream
+
+		  for (int i = 0; i < result.synonyms.Length; i++)
+		  {
+			Token repTok = result.synonyms[i];
+			AttributeSource newTok = firstTok.cloneAttributes();
+			CharTermAttribute newTermAtt = newTok.addAttribute(typeof(CharTermAttribute));
+			OffsetAttribute newOffsetAtt = newTok.addAttribute(typeof(OffsetAttribute));
+			PositionIncrementAttribute newPosIncAtt = newTok.addAttribute(typeof(PositionIncrementAttribute));
+
+			OffsetAttribute lastOffsetAtt = lastTok.addAttribute(typeof(OffsetAttribute));
+
+			newOffsetAtt.setOffset(newOffsetAtt.startOffset(), lastOffsetAtt.endOffset());
+			newTermAtt.copyBuffer(repTok.buffer(), 0, repTok.length());
+			repPos += repTok.PositionIncrement;
+			if (i == 0) // make position of first token equal to original
+			{
+				repPos = origPos;
+			}
+
+			// if necessary, insert original tokens and adjust position increment
+			while (origTok != null && origPos <= repPos)
+			{
+			  PositionIncrementAttribute origPosInc = origTok.addAttribute(typeof(PositionIncrementAttribute));
+			  origPosInc.PositionIncrement = origPos - pos;
+			  generated.Add(origTok);
+			  pos += origPosInc.PositionIncrement;
+			  origTok = matched.Count == 0 ? null : matched.RemoveFirst();
+			  if (origTok != null)
+			  {
+				origPosInc = origTok.addAttribute(typeof(PositionIncrementAttribute));
+				origPos += origPosInc.PositionIncrement;
+			  }
+			}
+
+			newPosIncAtt.PositionIncrement = repPos - pos;
+			generated.Add(newTok);
+			pos += newPosIncAtt.PositionIncrement;
+		  }
+
+		  // finish up any leftover original tokens
+		  while (origTok != null)
+		  {
+			PositionIncrementAttribute origPosInc = origTok.addAttribute(typeof(PositionIncrementAttribute));
+			origPosInc.PositionIncrement = origPos - pos;
+			generated.Add(origTok);
+			pos += origPosInc.PositionIncrement;
+			origTok = matched.Count == 0 ? null : matched.RemoveFirst();
+			if (origTok != null)
+			{
+			  origPosInc = origTok.addAttribute(typeof(PositionIncrementAttribute));
+			  origPos += origPosInc.PositionIncrement;
+			}
+		  }
+
+		  // what if we replaced a longer sequence with a shorter one?
+		  // a/0 b/5 =>  foo/0
+		  // should I re-create the gap on the next buffered token?
+
+		  replacement = generated.GetEnumerator();
+		  // Now return to the top of the loop to read and return the first
+		  // generated token.. The reason this is done is that we may have generated
+		  // nothing at all, and may need to continue with more matching logic.
+		}
+	  }
+
+
+	  //
+	  // Defer creation of the buffer until the first time it is used to
+	  // optimize short fields with no matches.
+	  //
+	  private LinkedList<AttributeSource> buffer;
+	  private LinkedList<AttributeSource> matched;
+
+	  private bool exhausted;
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private org.apache.lucene.util.AttributeSource nextTok() throws java.io.IOException
+	  private AttributeSource nextTok()
+	  {
+		if (buffer != null && buffer.Count > 0)
+		{
+		  return buffer.RemoveFirst();
+		}
+		else
+		{
+		  if (!exhausted && input.incrementToken())
+		  {
+			return this;
+		  }
+		  else
+		  {
+			exhausted = true;
+			return null;
+		  }
+		}
+	  }
+
+	  private void pushTok(AttributeSource t)
+	  {
+		if (buffer == null)
+		{
+			buffer = new LinkedList<>();
+		}
+		buffer.AddFirst(t);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private SlowSynonymMap match(SlowSynonymMap map) throws java.io.IOException
+	  private SlowSynonymMap match(SlowSynonymMap map)
+	  {
+		SlowSynonymMap result = null;
+
+		if (map.submap != null)
+		{
+		  AttributeSource tok = nextTok();
+		  if (tok != null)
+		  {
+			// clone ourselves.
+			if (tok == this)
+			{
+			  tok = cloneAttributes();
+			}
+			// check for positionIncrement!=1?  if>1, should not match, if==0, check multiple at this level?
+			CharTermAttribute termAtt = tok.getAttribute(typeof(CharTermAttribute));
+			SlowSynonymMap subMap = map.submap.get(termAtt.buffer(), 0, termAtt.length());
+
+			if (subMap != null)
+			{
+			  // recurse
+			  result = match(subMap);
+			}
+
+			if (result != null)
+			{
+			  matched.AddFirst(tok);
+			}
+			else
+			{
+			  // push back unmatched token
+			  pushTok(tok);
+			}
+		  }
+		}
+
+		// if no longer sequence matched, so if this node has synonyms, it's the match.
+		if (result == null && map.synonyms != null)
+		{
+		  result = map;
+		}
+
+		return result;
+	  }
+
+	  private void copy(AttributeSource target, AttributeSource source)
+	  {
+		if (target != source)
+		{
+		  source.copyTo(target);
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		input.reset();
+		replacement = null;
+		exhausted = false;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilterFactory.cs
new file mode 100644
index 0000000..5e76e47
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilterFactory.cs
@@ -0,0 +1,391 @@
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Text;
+using Lucene.Net.Analysis.Util;
+
+namespace org.apache.lucene.analysis.synonym
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using org.apache.lucene.analysis.util;
+
+
+	/// <summary>
+	/// Factory for <seealso cref="SlowSynonymFilter"/> (only used with luceneMatchVersion < 3.4)
+	/// <pre class="prettyprint" >
+	/// &lt;fieldType name="text_synonym" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="false"
+	///             expand="true" tokenizerFactory="solr.WhitespaceTokenizerFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre> </summary>
+	/// @deprecated (3.4) use <seealso cref="SynonymFilterFactory"/> instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0 
+	[Obsolete("(3.4) use <seealso cref="SynonymFilterFactory"/> instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0")]
+	internal sealed class SlowSynonymFilterFactory : TokenFilterFactory, ResourceLoaderAware
+	{
+	  private readonly string synonyms;
+	  private readonly bool ignoreCase;
+	  private readonly bool expand;
+	  private readonly string tf;
+	  private readonly IDictionary<string, string> tokArgs = new Dictionary<string, string>();
+
+	  public SlowSynonymFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		synonyms = require(args, "synonyms");
+		ignoreCase = getBoolean(args, "ignoreCase", false);
+		expand = getBoolean(args, "expand", true);
+
+		tf = get(args, "tokenizerFactory");
+		if (tf != null)
+		{
+		  assureMatchVersion();
+		  tokArgs["luceneMatchVersion"] = LuceneMatchVersion.ToString();
+		  for (IEnumerator<string> itr = args.Keys.GetEnumerator(); itr.MoveNext();)
+		  {
+			string key = itr.Current;
+			tokArgs[key.replaceAll("^tokenizerFactory\\.","")] = args[key];
+			itr.remove();
+		  }
+		}
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void inform(ResourceLoader loader) throws java.io.IOException
+	  public void inform(ResourceLoader loader)
+	  {
+		TokenizerFactory tokFactory = null;
+		if (tf != null)
+		{
+		  tokFactory = loadTokenizerFactory(loader, tf);
+		}
+
+		IEnumerable<string> wlist = loadRules(synonyms, loader);
+
+		synMap = new SlowSynonymMap(ignoreCase);
+		parseRules(wlist, synMap, "=>", ",", expand,tokFactory);
+	  }
+
+	  /// <returns> a list of all rules </returns>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: protected Iterable<String> loadRules(String synonyms, ResourceLoader loader) throws java.io.IOException
+	  protected internal IEnumerable<string> loadRules(string synonyms, ResourceLoader loader)
+	  {
+		IList<string> wlist = null;
+		File synonymFile = new File(synonyms);
+		if (synonymFile.exists())
+		{
+		  wlist = getLines(loader, synonyms);
+		}
+		else
+		{
+		  IList<string> files = splitFileNames(synonyms);
+		  wlist = new List<>();
+		  foreach (string file in files)
+		  {
+			IList<string> lines = getLines(loader, file.Trim());
+			wlist.AddRange(lines);
+		  }
+		}
+		return wlist;
+	  }
+
+	  private SlowSynonymMap synMap;
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: static void parseRules(Iterable<String> rules, SlowSynonymMap map, String mappingSep, String synSep, boolean expansion, TokenizerFactory tokFactory) throws java.io.IOException
+	  internal static void parseRules(IEnumerable<string> rules, SlowSynonymMap map, string mappingSep, string synSep, bool expansion, TokenizerFactory tokFactory)
+	  {
+		int count = 0;
+		foreach (string rule in rules)
+		{
+		  // To use regexes, we need an expression that specifies an odd number of chars.
+		  // This can't really be done with string.split(), and since we need to
+		  // do unescaping at some point anyway, we wouldn't be saving any effort
+		  // by using regexes.
+
+		  IList<string> mapping = splitSmart(rule, mappingSep, false);
+
+		  IList<IList<string>> source;
+		  IList<IList<string>> target;
+
+		  if (mapping.Count > 2)
+		  {
+			throw new System.ArgumentException("Invalid Synonym Rule:" + rule);
+		  }
+		  else if (mapping.Count == 2)
+		  {
+			source = getSynList(mapping[0], synSep, tokFactory);
+			target = getSynList(mapping[1], synSep, tokFactory);
+		  }
+		  else
+		  {
+			source = getSynList(mapping[0], synSep, tokFactory);
+			if (expansion)
+			{
+			  // expand to all arguments
+			  target = source;
+			}
+			else
+			{
+			  // reduce to first argument
+			  target = new List<>(1);
+			  target.Add(source[0]);
+			}
+		  }
+
+		  bool includeOrig = false;
+		  foreach (IList<string> fromToks in source)
+		  {
+			count++;
+			foreach (IList<string> toToks in target)
+			{
+			  map.add(fromToks, SlowSynonymMap.makeTokens(toToks), includeOrig, true);
+			}
+		  }
+		}
+	  }
+
+	  // a , b c , d e f => [[a],[b,c],[d,e,f]]
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private static java.util.List<java.util.List<String>> getSynList(String str, String separator, TokenizerFactory tokFactory) throws java.io.IOException
+	  private static IList<IList<string>> getSynList(string str, string separator, TokenizerFactory tokFactory)
+	  {
+		IList<string> strList = splitSmart(str, separator, false);
+		// now split on whitespace to get a list of token strings
+		IList<IList<string>> synList = new List<IList<string>>();
+		foreach (string toks in strList)
+		{
+		  IList<string> tokList = tokFactory == null ? splitWS(toks, true) : splitByTokenizer(toks, tokFactory);
+		  synList.Add(tokList);
+		}
+		return synList;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private static java.util.List<String> splitByTokenizer(String source, TokenizerFactory tokFactory) throws java.io.IOException
+	  private static IList<string> splitByTokenizer(string source, TokenizerFactory tokFactory)
+	  {
+		StringReader reader = new StringReader(source);
+		TokenStream ts = loadTokenizer(tokFactory, reader);
+		IList<string> tokList = new List<string>();
+		try
+		{
+		  CharTermAttribute termAtt = ts.addAttribute(typeof(CharTermAttribute));
+		  ts.reset();
+		  while (ts.incrementToken())
+		  {
+			if (termAtt.length() > 0)
+			{
+			  tokList.Add(termAtt.ToString());
+			}
+		  }
+		}
+		finally
+		{
+		  reader.close();
+		}
+		return tokList;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private TokenizerFactory loadTokenizerFactory(ResourceLoader loader, String cname) throws java.io.IOException
+	  private TokenizerFactory loadTokenizerFactory(ResourceLoader loader, string cname)
+	  {
+		Type clazz = loader.findClass(cname, typeof(TokenizerFactory));
+		try
+		{
+		  TokenizerFactory tokFactory = clazz.getConstructor(typeof(IDictionary)).newInstance(tokArgs);
+		  if (tokFactory is ResourceLoaderAware)
+		  {
+			((ResourceLoaderAware) tokFactory).inform(loader);
+		  }
+		  return tokFactory;
+		}
+		catch (Exception e)
+		{
+		  throw new Exception(e);
+		}
+	  }
+
+	  private static TokenStream loadTokenizer(TokenizerFactory tokFactory, Reader reader)
+	  {
+		return tokFactory.create(reader);
+	  }
+
+	  public SlowSynonymMap SynonymMap
+	  {
+		  get
+		  {
+			return synMap;
+		  }
+	  }
+
+	  public override SlowSynonymFilter create(TokenStream input)
+	  {
+		return new SlowSynonymFilter(input,synMap);
+	  }
+
+	  public static IList<string> splitWS(string s, bool decode)
+	  {
+		List<string> lst = new List<string>(2);
+		StringBuilder sb = new StringBuilder();
+		int pos = 0, end = s.Length;
+		while (pos < end)
+		{
+		  char ch = s[pos++];
+		  if (char.IsWhiteSpace(ch))
+		  {
+			if (sb.Length > 0)
+			{
+			  lst.Add(sb.ToString());
+			  sb = new StringBuilder();
+			}
+			continue;
+		  }
+
+		  if (ch == '\\')
+		  {
+			if (!decode)
+			{
+				sb.Append(ch);
+			}
+			if (pos >= end) // ERROR, or let it go?
+			{
+				break;
+			}
+			ch = s[pos++];
+			if (decode)
+			{
+			  switch (ch)
+			  {
+				case 'n' :
+					ch = '\n';
+					break;
+				case 't' :
+					ch = '\t';
+					break;
+				case 'r' :
+					ch = '\r';
+					break;
+				case 'b' :
+					ch = '\b';
+					break;
+				case 'f' :
+					ch = '\f';
+					break;
+			  }
+			}
+		  }
+
+		  sb.Append(ch);
+		}
+
+		if (sb.Length > 0)
+		{
+		  lst.Add(sb.ToString());
+		}
+
+		return lst;
+	  }
+
+	  /// <summary>
+	  /// Splits a backslash escaped string on the separator.
+	  /// <para>
+	  /// Current backslash escaping supported:
+	  /// <br> \n \t \r \b \f are escaped the same as a Java String
+	  /// <br> Other characters following a backslash are produced verbatim (\c => c)
+	  /// 
+	  /// </para>
+	  /// </summary>
+	  /// <param name="s">  the string to split </param>
+	  /// <param name="separator"> the separator to split on </param>
+	  /// <param name="decode"> decode backslash escaping </param>
+	  public static IList<string> splitSmart(string s, string separator, bool decode)
+	  {
+		List<string> lst = new List<string>(2);
+		StringBuilder sb = new StringBuilder();
+		int pos = 0, end = s.Length;
+		while (pos < end)
+		{
+		  if (s.StartsWith(separator,pos))
+		  {
+			if (sb.Length > 0)
+			{
+			  lst.Add(sb.ToString());
+			  sb = new StringBuilder();
+			}
+			pos += separator.Length;
+			continue;
+		  }
+
+		  char ch = s[pos++];
+		  if (ch == '\\')
+		  {
+			if (!decode)
+			{
+				sb.Append(ch);
+			}
+			if (pos >= end) // ERROR, or let it go?
+			{
+				break;
+			}
+			ch = s[pos++];
+			if (decode)
+			{
+			  switch (ch)
+			  {
+				case 'n' :
+					ch = '\n';
+					break;
+				case 't' :
+					ch = '\t';
+					break;
+				case 'r' :
+					ch = '\r';
+					break;
+				case 'b' :
+					ch = '\b';
+					break;
+				case 'f' :
+					ch = '\f';
+					break;
+			  }
+			}
+		  }
+
+		  sb.Append(ch);
+		}
+
+		if (sb.Length > 0)
+		{
+		  lst.Add(sb.ToString());
+		}
+
+		return lst;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymMap.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymMap.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymMap.cs
new file mode 100644
index 0000000..cfc7d71
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymMap.cs
@@ -0,0 +1,210 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace org.apache.lucene.analysis.synonym
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using org.apache.lucene.analysis.util;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Mapping rules for use with <seealso cref="SlowSynonymFilter"/> </summary>
+	/// @deprecated (3.4) use <seealso cref="SynonymFilterFactory"/> instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0 
+	[Obsolete("(3.4) use <seealso cref="SynonymFilterFactory"/> instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0")]
+	internal class SlowSynonymMap
+	{
+	  /// <summary>
+	  /// @lucene.internal </summary>
+	  public CharArrayMap<SlowSynonymMap> submap; // recursive: Map<String, SynonymMap>
+	  /// <summary>
+	  /// @lucene.internal </summary>
+	  public Token[] synonyms;
+	  internal int flags;
+
+	  internal const int INCLUDE_ORIG = 0x01;
+	  internal const int IGNORE_CASE = 0x02;
+
+	  public SlowSynonymMap()
+	  {
+	  }
+	  public SlowSynonymMap(bool ignoreCase)
+	  {
+		if (ignoreCase_Renamed)
+		{
+			flags |= IGNORE_CASE;
+		}
+	  }
+
+	  public virtual bool includeOrig()
+	  {
+		  return (flags & INCLUDE_ORIG) != 0;
+	  }
+	  public virtual bool ignoreCase()
+	  {
+		  return (flags & IGNORE_CASE) != 0;
+	  }
+
+	  /// <param name="singleMatch">  List<String>, the sequence of strings to match </param>
+	  /// <param name="replacement">  List<Token> the list of tokens to use on a match </param>
+	  /// <param name="includeOrig">  sets a flag on this mapping signaling the generation of matched tokens in addition to the replacement tokens </param>
+	  /// <param name="mergeExisting"> merge the replacement tokens with any other mappings that exist </param>
+	  public virtual void add(IList<string> singleMatch, IList<Token> replacement, bool includeOrig, bool mergeExisting)
+	  {
+		SlowSynonymMap currMap = this;
+		foreach (string str in singleMatch)
+		{
+		  if (currMap.submap == null)
+		  {
+			// for now hardcode at 4.0, as its what the old code did.
+			// would be nice to fix, but shouldn't store a version in each submap!!!
+			currMap.submap = new CharArrayMap<>(Version.LUCENE_CURRENT, 1, ignoreCase());
+		  }
+
+		  SlowSynonymMap map = currMap.submap.get(str);
+		  if (map == null)
+		  {
+			map = new SlowSynonymMap();
+			map.flags |= flags & IGNORE_CASE;
+			currMap.submap.put(str, map);
+		  }
+
+		  currMap = map;
+		}
+
+		if (currMap.synonyms != null && !mergeExisting)
+		{
+		  throw new System.ArgumentException("SynonymFilter: there is already a mapping for " + singleMatch);
+		}
+		IList<Token> superset = currMap.synonyms == null ? replacement : mergeTokens(currMap.synonyms, replacement);
+		currMap.synonyms = superset.ToArray();
+		if (includeOrig_Renamed)
+		{
+			currMap.flags |= INCLUDE_ORIG;
+		}
+	  }
+
+
+	  public override string ToString()
+	  {
+		StringBuilder sb = new StringBuilder("<");
+		if (synonyms != null)
+		{
+		  sb.Append("[");
+		  for (int i = 0; i < synonyms.Length; i++)
+		  {
+			if (i != 0)
+			{
+				sb.Append(',');
+			}
+			sb.Append(synonyms[i]);
+		  }
+		  if ((flags & INCLUDE_ORIG) != 0)
+		  {
+			sb.Append(",ORIG");
+		  }
+		  sb.Append("],");
+		}
+		sb.Append(submap);
+		sb.Append(">");
+		return sb.ToString();
+	  }
+
+
+
+	  /// <summary>
+	  /// Produces a List<Token> from a List<String> </summary>
+	  public static IList<Token> makeTokens(IList<string> strings)
+	  {
+		IList<Token> ret = new List<Token>(strings.Count);
+		foreach (string str in strings)
+		{
+		  //Token newTok = new Token(str,0,0,"SYNONYM");
+		  Token newTok = new Token(str, 0,0,"SYNONYM");
+		  ret.Add(newTok);
+		}
+		return ret;
+	  }
+
+
+	  /// <summary>
+	  /// Merge two lists of tokens, producing a single list with manipulated positionIncrements so that
+	  /// the tokens end up at the same position.
+	  /// 
+	  /// Example:  [a b] merged with [c d] produces [a/b c/d]  ('/' denotes tokens in the same position)
+	  /// Example:  [a,5 b,2] merged with [c d,4 e,4] produces [c a,5/d b,2 e,2]  (a,n means a has posInc=n)
+	  /// 
+	  /// </summary>
+	  public static IList<Token> mergeTokens(IList<Token> lst1, IList<Token> lst2)
+	  {
+		List<Token> result = new List<Token>();
+		if (lst1 == null || lst2 == null)
+		{
+		  if (lst2 != null)
+		  {
+			  result.AddRange(lst2);
+		  }
+		  if (lst1 != null)
+		  {
+			  result.AddRange(lst1);
+		  }
+		  return result;
+		}
+
+		int pos = 0;
+		IEnumerator<Token> iter1 = lst1.GetEnumerator();
+		IEnumerator<Token> iter2 = lst2.GetEnumerator();
+//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
+		Token tok1 = iter1.hasNext() ? iter1.next() : null;
+//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
+		Token tok2 = iter2.hasNext() ? iter2.next() : null;
+		int pos1 = tok1 != null ? tok1.PositionIncrement : 0;
+		int pos2 = tok2 != null ? tok2.PositionIncrement : 0;
+		while (tok1 != null || tok2 != null)
+		{
+		  while (tok1 != null && (pos1 <= pos2 || tok2 == null))
+		  {
+			Token tok = new Token(tok1.startOffset(), tok1.endOffset(), tok1.type());
+			tok.copyBuffer(tok1.buffer(), 0, tok1.length());
+			tok.PositionIncrement = pos1 - pos;
+			result.Add(tok);
+			pos = pos1;
+//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
+			tok1 = iter1.hasNext() ? iter1.next() : null;
+			pos1 += tok1 != null ? tok1.PositionIncrement : 0;
+		  }
+		  while (tok2 != null && (pos2 <= pos1 || tok1 == null))
+		  {
+			Token tok = new Token(tok2.startOffset(), tok2.endOffset(), tok2.type());
+			tok.copyBuffer(tok2.buffer(), 0, tok2.length());
+			tok.PositionIncrement = pos2 - pos;
+			result.Add(tok);
+			pos = pos2;
+//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
+			tok2 = iter2.hasNext() ? iter2.next() : null;
+			pos2 += tok2 != null ? tok2.PositionIncrement : 0;
+		  }
+		}
+		return result;
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SolrSynonymParser.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SolrSynonymParser.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SolrSynonymParser.cs
new file mode 100644
index 0000000..b0fb325
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SolrSynonymParser.cs
@@ -0,0 +1,218 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace org.apache.lucene.analysis.synonym
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharsRef = org.apache.lucene.util.CharsRef;
+
+	/// <summary>
+	/// Parser for the Solr synonyms format.
+	/// <ol>
+	///   <li> Blank lines and lines starting with '#' are comments.
+	///   <li> Explicit mappings match any token sequence on the LHS of "=>"
+	///        and replace with all alternatives on the RHS.  These types of mappings
+	///        ignore the expand parameter in the constructor.
+	///        Example:
+	///        <blockquote>i-pod, i pod => ipod</blockquote>
+	///   <li> Equivalent synonyms may be separated with commas and give
+	///        no explicit mapping.  In this case the mapping behavior will
+	///        be taken from the expand parameter in the constructor.  This allows
+	///        the same synonym file to be used in different synonym handling strategies.
+	///        Example:
+	///        <blockquote>ipod, i-pod, i pod</blockquote>
+	/// 
+	///   <li> Multiple synonym mapping entries are merged.
+	///        Example:
+	///        <blockquote>
+	///         foo => foo bar<br>
+	///         foo => baz<br><br>
+	///         is equivalent to<br><br>
+	///         foo => foo bar, baz
+	///        </blockquote>
+	///  </ol>
+	/// @lucene.experimental
+	/// </summary>
+	public class SolrSynonymParser : SynonymMap.Parser
+	{
+	  private readonly bool expand;
+
+	  public SolrSynonymParser(bool dedup, bool expand, Analyzer analyzer) : base(dedup, analyzer)
+	  {
+		this.expand = expand;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void parse(java.io.Reader in) throws java.io.IOException, java.text.ParseException
+	  public override void parse(Reader @in)
+	  {
+		LineNumberReader br = new LineNumberReader(@in);
+		try
+		{
+		  addInternal(br);
+		}
+		catch (System.ArgumentException e)
+		{
+		  ParseException ex = new ParseException("Invalid synonym rule at line " + br.LineNumber, 0);
+		  ex.initCause(e);
+		  throw ex;
+		}
+		finally
+		{
+		  br.close();
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void addInternal(java.io.BufferedReader in) throws java.io.IOException
+	  private void addInternal(BufferedReader @in)
+	  {
+		string line = null;
+		while ((line = @in.readLine()) != null)
+		{
+		  if (line.Length == 0 || line[0] == '#')
+		  {
+			continue; // ignore empty lines and comments
+		  }
+
+		  CharsRef[] inputs;
+		  CharsRef[] outputs;
+
+		  // TODO: we could process this more efficiently.
+		  string[] sides = Split(line, "=>");
+		  if (sides.Length > 1) // explicit mapping
+		  {
+			if (sides.Length != 2)
+			{
+			  throw new System.ArgumentException("more than one explicit mapping specified on the same line");
+			}
+			string[] inputStrings = Split(sides[0], ",");
+			inputs = new CharsRef[inputStrings.Length];
+			for (int i = 0; i < inputs.Length; i++)
+			{
+			  inputs[i] = analyze(unescape(inputStrings[i]).Trim(), new CharsRef());
+			}
+
+			string[] outputStrings = Split(sides[1], ",");
+			outputs = new CharsRef[outputStrings.Length];
+			for (int i = 0; i < outputs.Length; i++)
+			{
+			  outputs[i] = analyze(unescape(outputStrings[i]).Trim(), new CharsRef());
+			}
+		  }
+		  else
+		  {
+			string[] inputStrings = Split(line, ",");
+			inputs = new CharsRef[inputStrings.Length];
+			for (int i = 0; i < inputs.Length; i++)
+			{
+			  inputs[i] = analyze(unescape(inputStrings[i]).Trim(), new CharsRef());
+			}
+			if (expand)
+			{
+			  outputs = inputs;
+			}
+			else
+			{
+			  outputs = new CharsRef[1];
+			  outputs[0] = inputs[0];
+			}
+		  }
+
+		  // currently we include the term itself in the map,
+		  // and use includeOrig = false always.
+		  // this is how the existing filter does it, but its actually a bug,
+		  // especially if combined with ignoreCase = true
+		  for (int i = 0; i < inputs.Length; i++)
+		  {
+			for (int j = 0; j < outputs.Length; j++)
+			{
+			  add(inputs[i], outputs[j], false);
+			}
+		  }
+		}
+	  }
+
+	  private static string[] Split(string s, string separator)
+	  {
+		List<string> list = new List<string>(2);
+		StringBuilder sb = new StringBuilder();
+		int pos = 0, end = s.Length;
+		while (pos < end)
+		{
+		  if (s.StartsWith(separator,pos))
+		  {
+			if (sb.Length > 0)
+			{
+			  list.Add(sb.ToString());
+			  sb = new StringBuilder();
+			}
+			pos += separator.Length;
+			continue;
+		  }
+
+		  char ch = s[pos++];
+		  if (ch == '\\')
+		  {
+			sb.Append(ch);
+			if (pos >= end) // ERROR, or let it go?
+			{
+				break;
+			}
+			ch = s[pos++];
+		  }
+
+		  sb.Append(ch);
+		}
+
+		if (sb.Length > 0)
+		{
+		  list.Add(sb.ToString());
+		}
+
+		return list.ToArray();
+	  }
+
+	  private string unescape(string s)
+	  {
+		if (s.IndexOf("\\", StringComparison.Ordinal) >= 0)
+		{
+		  StringBuilder sb = new StringBuilder();
+		  for (int i = 0; i < s.Length; i++)
+		  {
+			char ch = s[i];
+			if (ch == '\\' && i < s.Length - 1)
+			{
+			  sb.Append(s[++i]);
+			}
+			else
+			{
+			  sb.Append(ch);
+			}
+		  }
+		  return sb.ToString();
+		}
+		return s;
+	  }
+	}
+
+}
\ No newline at end of file


[32/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs
new file mode 100644
index 0000000..c091904
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs
@@ -0,0 +1,370 @@
+using System;
+
+namespace org.apache.lucene.analysis.cjk
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+
+	/// <summary>
+	/// CJKTokenizer is designed for Chinese, Japanese, and Korean languages.
+	/// <para>  
+	/// The tokens returned are every two adjacent characters with overlap match.
+	/// </para>
+	/// <para>
+	/// Example: "java C1C2C3C4" will be segmented to: "java" "C1C2" "C2C3" "C3C4".
+	/// </para>
+	/// Additionally, the following is applied to Latin text (such as English):
+	/// <ul>
+	/// <li>Text is converted to lowercase.
+	/// <li>Numeric digits, '+', '#', and '_' are tokenized as letters.
+	/// <li>Full-width forms are converted to half-width forms.
+	/// </ul>
+	/// For more info on Asian language (Chinese, Japanese, and Korean) text segmentation:
+	/// please search  <a
+	/// href="http://www.google.com/search?q=word+chinese+segment">google</a>
+	/// </summary>
+	/// @deprecated Use StandardTokenizer, CJKWidthFilter, CJKBigramFilter, and LowerCaseFilter instead. 
+	[Obsolete("Use StandardTokenizer, CJKWidthFilter, CJKBigramFilter, and LowerCaseFilter instead.")]
+	public sealed class CJKTokenizer : Tokenizer
+	{
+		//~ Static fields/initializers ---------------------------------------------
+		/// <summary>
+		/// Word token type </summary>
+		internal const int WORD_TYPE = 0;
+
+		/// <summary>
+		/// Single byte token type </summary>
+		internal const int SINGLE_TOKEN_TYPE = 1;
+
+		/// <summary>
+		/// Double byte token type </summary>
+		internal const int DOUBLE_TOKEN_TYPE = 2;
+
+		/// <summary>
+		/// Names for token types </summary>
+		internal static readonly string[] TOKEN_TYPE_NAMES = new string[] {"word", "single", "double"};
+
+		/// <summary>
+		/// Max word length </summary>
+		private const int MAX_WORD_LEN = 255;
+
+		/// <summary>
+		/// buffer size: </summary>
+		private const int IO_BUFFER_SIZE = 256;
+
+		//~ Instance fields --------------------------------------------------------
+
+		/// <summary>
+		/// word offset, used to imply which character(in ) is parsed </summary>
+		private int offset = 0;
+
+		/// <summary>
+		/// the index used only for ioBuffer </summary>
+		private int bufferIndex = 0;
+
+		/// <summary>
+		/// data length </summary>
+		private int dataLen = 0;
+
+		/// <summary>
+		/// character buffer, store the characters which are used to compose <br>
+		/// the returned Token
+		/// </summary>
+		private readonly char[] buffer = new char[MAX_WORD_LEN];
+
+		/// <summary>
+		/// I/O buffer, used to store the content of the input(one of the <br>
+		/// members of Tokenizer)
+		/// </summary>
+		private readonly char[] ioBuffer = new char[IO_BUFFER_SIZE];
+
+		/// <summary>
+		/// word type: single=>ASCII  double=>non-ASCII word=>default </summary>
+		private int tokenType = WORD_TYPE;
+
+		/// <summary>
+		/// tag: previous character is a cached double-byte character  "C1C2C3C4"
+		/// ----(set the C1 isTokened) C1C2 "C2C3C4" ----(set the C2 isTokened)
+		/// C1C2 C2C3 "C3C4" ----(set the C3 isTokened) "C1C2 C2C3 C3C4"
+		/// </summary>
+		private bool preIsTokened = false;
+
+		private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+		private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+		private readonly TypeAttribute typeAtt = addAttribute(typeof(TypeAttribute));
+
+		//~ Constructors -----------------------------------------------------------
+
+		/// <summary>
+		/// Construct a token stream processing the given input.
+		/// </summary>
+		/// <param name="in"> I/O reader </param>
+		public CJKTokenizer(Reader @in) : base(@in)
+		{
+		}
+
+		public CJKTokenizer(AttributeFactory factory, Reader @in) : base(factory, @in)
+		{
+		}
+
+		//~ Methods ----------------------------------------------------------------
+
+		/// <summary>
+		/// Returns true for the next token in the stream, or false at EOS.
+		/// See http://java.sun.com/j2se/1.3/docs/api/java/lang/Character.UnicodeBlock.html
+		/// for detail.
+		/// </summary>
+		/// <returns> false for end of stream, true otherwise
+		/// </returns>
+		/// <exception cref="java.io.IOException"> - throw IOException when read error <br>
+		///         happened in the InputStream
+		///  </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+		public override bool incrementToken()
+		{
+			clearAttributes();
+			/// <summary>
+			/// how many character(s) has been stored in buffer </summary>
+
+			while (true) // loop until we find a non-empty token
+			{
+
+			  int length = 0;
+
+			  /// <summary>
+			  /// the position used to create Token </summary>
+			  int start = offset;
+
+			  while (true) // loop until we've found a full token
+			  {
+				/// <summary>
+				/// current character </summary>
+				char c;
+
+				/// <summary>
+				/// unicode block of current character for detail </summary>
+				char.UnicodeBlock ub;
+
+				offset++;
+
+				if (bufferIndex >= dataLen)
+				{
+					dataLen = input.read(ioBuffer);
+					bufferIndex = 0;
+				}
+
+				if (dataLen == -1)
+				{
+					if (length > 0)
+					{
+						if (preIsTokened == true)
+						{
+							length = 0;
+							preIsTokened = false;
+						}
+						else
+						{
+						  offset--;
+						}
+
+						break;
+					}
+					else
+					{
+						offset--;
+						return false;
+					}
+				}
+				else
+				{
+					//get current character
+					c = ioBuffer[bufferIndex++];
+
+					//get the UnicodeBlock of the current character
+					ub = char.UnicodeBlock.of(c);
+				}
+
+				//if the current character is ASCII or Extend ASCII
+				if ((ub == char.UnicodeBlock.BASIC_LATIN) || (ub == char.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS))
+				{
+					if (ub == char.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS)
+					{
+					  int i = (int) c;
+					  if (i >= 65281 && i <= 65374)
+					  {
+						// convert certain HALFWIDTH_AND_FULLWIDTH_FORMS to BASIC_LATIN
+						i = i - 65248;
+						c = (char) i;
+					  }
+					}
+
+					// if the current character is a letter or "_" "+" "#"
+					if (char.IsLetterOrDigit(c) || ((c == '_') || (c == '+') || (c == '#')))
+					{
+						if (length == 0)
+						{
+							// "javaC1C2C3C4linux" <br>
+							//      ^--: the current character begin to token the ASCII
+							// letter
+							start = offset - 1;
+						}
+						else if (tokenType == DOUBLE_TOKEN_TYPE)
+						{
+							// "javaC1C2C3C4linux" <br>
+							//              ^--: the previous non-ASCII
+							// : the current character
+							offset--;
+							bufferIndex--;
+
+							if (preIsTokened == true)
+							{
+								// there is only one non-ASCII has been stored
+								length = 0;
+								preIsTokened = false;
+								break;
+							}
+							else
+							{
+								break;
+							}
+						}
+
+						// store the LowerCase(c) in the buffer
+						buffer[length++] = char.ToLower(c);
+						tokenType = SINGLE_TOKEN_TYPE;
+
+						// break the procedure if buffer overflowed!
+						if (length == MAX_WORD_LEN)
+						{
+							break;
+						}
+					}
+					else if (length > 0)
+					{
+						if (preIsTokened == true)
+						{
+							length = 0;
+							preIsTokened = false;
+						}
+						else
+						{
+							break;
+						}
+					}
+				}
+				else
+				{
+					// non-ASCII letter, e.g."C1C2C3C4"
+					if (char.IsLetter(c))
+					{
+						if (length == 0)
+						{
+							start = offset - 1;
+							buffer[length++] = c;
+							tokenType = DOUBLE_TOKEN_TYPE;
+						}
+						else
+						{
+						  if (tokenType == SINGLE_TOKEN_TYPE)
+						  {
+								offset--;
+								bufferIndex--;
+
+								//return the previous ASCII characters
+								break;
+						  }
+							else
+							{
+								buffer[length++] = c;
+								tokenType = DOUBLE_TOKEN_TYPE;
+
+								if (length == 2)
+								{
+									offset--;
+									bufferIndex--;
+									preIsTokened = true;
+
+									break;
+								}
+							}
+						}
+					}
+					else if (length > 0)
+					{
+						if (preIsTokened == true)
+						{
+							// empty the buffer
+							length = 0;
+							preIsTokened = false;
+						}
+						else
+						{
+							break;
+						}
+					}
+				}
+			  }
+
+			if (length > 0)
+			{
+			  termAtt.copyBuffer(buffer, 0, length);
+			  offsetAtt.setOffset(correctOffset(start), correctOffset(start + length));
+			  typeAtt.Type = TOKEN_TYPE_NAMES[tokenType];
+			  return true;
+			}
+			else if (dataLen == -1)
+			{
+			  offset--;
+			  return false;
+			}
+
+			// Cycle back and try for the next token (don't
+			// return an empty string)
+			}
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
+		public override void end()
+		{
+		  base.end();
+		  // set final offset
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int finalOffset = correctOffset(offset);
+		  int finalOffset = correctOffset(offset);
+		  this.offsetAtt.setOffset(finalOffset, finalOffset);
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+		public override void reset()
+		{
+		  base.reset();
+		  offset = bufferIndex = dataLen = 0;
+		  preIsTokened = false;
+		  tokenType = WORD_TYPE;
+		}
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizerFactory.cs
new file mode 100644
index 0000000..526b1b4
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizerFactory.cs
@@ -0,0 +1,58 @@
+using System;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.cjk
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenizerFactory = org.apache.lucene.analysis.util.TokenizerFactory;
+	using AttributeFactory = org.apache.lucene.util.AttributeSource.AttributeFactory;
+
+
+	/// <summary>
+	/// Factory for <seealso cref="CJKTokenizer"/>. 
+	/// <pre class="prettyprint" >
+	/// &lt;fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.CJKTokenizerFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre> </summary>
+	/// @deprecated Use <seealso cref="CJKBigramFilterFactory"/> instead. 
+	[Obsolete("Use <seealso cref="CJKBigramFilterFactory"/> instead.")]
+	public class CJKTokenizerFactory : TokenizerFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new CJKTokenizerFactory </summary>
+	  public CJKTokenizerFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override CJKTokenizer create(AttributeFactory factory, Reader @in)
+	  {
+		return new CJKTokenizer(factory, @in);
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKWidthFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKWidthFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKWidthFilter.cs
new file mode 100644
index 0000000..8beffcc
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKWidthFilter.cs
@@ -0,0 +1,113 @@
+namespace org.apache.lucene.analysis.cjk
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using StemmerUtil = org.apache.lucene.analysis.util.StemmerUtil;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that normalizes CJK width differences:
+	/// <ul>
+	///   <li>Folds fullwidth ASCII variants into the equivalent basic latin
+	///   <li>Folds halfwidth Katakana variants into the equivalent kana
+	/// </ul>
+	/// <para>
+	/// NOTE: this filter can be viewed as a (practical) subset of NFKC/NFKD
+	/// Unicode normalization. See the normalization support in the ICU package
+	/// for full normalization.
+	/// </para>
+	/// </summary>
+	public sealed class CJKWidthFilter : TokenFilter
+	{
+	  private CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+
+	  /* halfwidth kana mappings: 0xFF65-0xFF9D 
+	   *
+	   * note: 0xFF9C and 0xFF9D are only mapped to 0x3099 and 0x309A
+	   * as a fallback when they cannot properly combine with a preceding 
+	   * character into a composed form.
+	   */
+	  private static readonly char[] KANA_NORM = new char[] {0x30fb, 0x30f2, 0x30a1, 0x30a3, 0x30a5, 0x30a7, 0x30a9, 0x30e3, 0x30e5, 0x30e7, 0x30c3, 0x30fc, 0x30a2, 0x30a4, 0x30a6, 0x30a8, 0x30aa, 0x30ab, 0x30ad, 0x30af, 0x30b1, 0x30b3, 0x30b5, 0x30b7, 0x30b9, 0x30bb, 0x30bd, 0x30bf, 0x30c1, 0x30c4, 0x30c6, 0x30c8, 0x30ca, 0x30cb, 0x30cc, 0x30cd, 0x30ce, 0x30cf, 0x30d2, 0x30d5, 0x30d8, 0x30db, 0x30de, 0x30df, 0x30e0, 0x30e1, 0x30e2, 0x30e4, 0x30e6, 0x30e8, 0x30e9, 0x30ea, 0x30eb, 0x30ec, 0x30ed, 0x30ef, 0x30f3, 0x3099, 0x309A};
+
+	  public CJKWidthFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  char[] text = termAtt.buffer();
+		  int length = termAtt.length();
+		  for (int i = 0; i < length; i++)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char ch = text[i];
+			char ch = text[i];
+			if (ch >= 0xFF01 && ch <= 0xFF5E)
+			{
+			  // Fullwidth ASCII variants
+			  text[i] -= 0xFEE0;
+			}
+			else if (ch >= 0xFF65 && ch <= 0xFF9F)
+			{
+			  // Halfwidth Katakana variants
+			  if ((ch == 0xFF9E || ch == 0xFF9F) && i > 0 && combine(text, i, ch))
+			  {
+				length = StemmerUtil.delete(text, i--, length);
+			  }
+			  else
+			  {
+				text[i] = KANA_NORM[ch - 0xFF65];
+			  }
+			}
+		  }
+		  termAtt.Length = length;
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+
+	  /* kana combining diffs: 0x30A6-0x30FD */
+	  private static readonly sbyte[] KANA_COMBINE_VOICED = new sbyte[] {78, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+
+	  private static readonly sbyte[] KANA_COMBINE_HALF_VOICED = new sbyte[] {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+	  /// <summary>
+	  /// returns true if we successfully combined the voice mark </summary>
+	  private static bool combine(char[] text, int pos, char ch)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char prev = text[pos-1];
+		char prev = text[pos - 1];
+		if (prev >= 0x30A6 && prev <= 0x30FD)
+		{
+		  text[pos - 1] += (ch == 0xFF9F) ? KANA_COMBINE_HALF_VOICED[prev - 0x30A6] : KANA_COMBINE_VOICED[prev - 0x30A6];
+		  return text[pos - 1] != prev;
+		}
+		return false;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKWidthFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKWidthFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKWidthFilterFactory.cs
new file mode 100644
index 0000000..a917f90
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKWidthFilterFactory.cs
@@ -0,0 +1,66 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.cjk
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using AbstractAnalysisFactory = org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+	using MultiTermAwareComponent = org.apache.lucene.analysis.util.MultiTermAwareComponent;
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="CJKWidthFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_cjk" class="solr.TextField"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.CJKWidthFilterFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.CJKBigramFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class CJKWidthFilterFactory : TokenFilterFactory, MultiTermAwareComponent
+	{
+
+	  /// <summary>
+	  /// Creates a new CJKWidthFilterFactory </summary>
+	  public CJKWidthFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new CJKWidthFilter(input);
+	  }
+
+	  public virtual AbstractAnalysisFactory MultiTermComponent
+	  {
+		  get
+		  {
+			return this;
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniAnalyzer.cs
new file mode 100644
index 0000000..d964550
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniAnalyzer.cs
@@ -0,0 +1,139 @@
+using System;
+
+namespace org.apache.lucene.analysis.ckb
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Sorani Kurdish.
+	/// </summary>
+	public sealed class SoraniAnalyzer : StopwordAnalyzerBase
+	{
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// File containing default Kurdish stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "stopwords.txt";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop words set. </summary>
+	  /// <returns> default stop words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(typeof(SoraniAnalyzer), DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public SoraniAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  public SoraniAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+	  /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+	  /// stemming.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
+	  public SoraniAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates a
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> A
+	  ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="SoraniNormalizationFilter"/>, 
+	  ///         <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided and <seealso cref="SoraniStemFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		result = new SoraniNormalizationFilter(result);
+		result = new LowerCaseFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		result = new SoraniStemFilter(result);
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniNormalizationFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniNormalizationFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniNormalizationFilter.cs
new file mode 100644
index 0000000..17133ba
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniNormalizationFilter.cs
@@ -0,0 +1,52 @@
+namespace org.apache.lucene.analysis.ckb
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="SoraniNormalizer"/> to normalize the
+	/// orthography.
+	/// </summary>
+	public sealed class SoraniNormalizationFilter : TokenFilter
+	{
+	  private readonly SoraniNormalizer normalizer = new SoraniNormalizer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+
+	  public SoraniNormalizationFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = normalizer.normalize(termAtt.buffer(), termAtt.length());
+		  int newlen = normalizer.normalize(termAtt.buffer(), termAtt.length());
+		  termAtt.Length = newlen;
+		  return true;
+		}
+		return false;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniNormalizationFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniNormalizationFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniNormalizationFilterFactory.cs
new file mode 100644
index 0000000..5f68eb7
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniNormalizationFilterFactory.cs
@@ -0,0 +1,64 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.ckb
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using AbstractAnalysisFactory = org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+	using MultiTermAwareComponent = org.apache.lucene.analysis.util.MultiTermAwareComponent;
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="SoraniNormalizationFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_ckbnormal" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.SoraniNormalizationFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class SoraniNormalizationFilterFactory : TokenFilterFactory, MultiTermAwareComponent
+	{
+
+	  /// <summary>
+	  /// Creates a new SoraniNormalizationFilterFactory </summary>
+	  public SoraniNormalizationFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override SoraniNormalizationFilter create(TokenStream input)
+	  {
+		return new SoraniNormalizationFilter(input);
+	  }
+
+	  public virtual AbstractAnalysisFactory MultiTermComponent
+	  {
+		  get
+		  {
+			return this;
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniNormalizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniNormalizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniNormalizer.cs
new file mode 100644
index 0000000..9c3f551
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniNormalizer.cs
@@ -0,0 +1,140 @@
+namespace org.apache.lucene.analysis.ckb
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.delete;
+
+	/// <summary>
+	/// Normalizes the Unicode representation of Sorani text.
+	/// <para>
+	/// Normalization consists of:
+	/// <ul>
+	///   <li>Alternate forms of 'y' (0064, 0649) are converted to 06CC (FARSI YEH)
+	///   <li>Alternate form of 'k' (0643) is converted to 06A9 (KEHEH)
+	///   <li>Alternate forms of vowel 'e' (0647+200C, word-final 0647, 0629) are converted to 06D5 (AE)
+	///   <li>Alternate (joining) form of 'h' (06BE) is converted to 0647
+	///   <li>Alternate forms of 'rr' (0692, word-initial 0631) are converted to 0695 (REH WITH SMALL V BELOW)
+	///   <li>Harakat, tatweel, and formatting characters such as directional controls are removed.
+	/// </ul>
+	/// </para>
+	/// </summary>
+	public class SoraniNormalizer
+	{
+
+	  internal const char YEH = '\u064A';
+	  internal const char DOTLESS_YEH = '\u0649';
+	  internal const char FARSI_YEH = '\u06CC';
+
+	  internal const char KAF = '\u0643';
+	  internal const char KEHEH = '\u06A9';
+
+	  internal const char HEH = '\u0647';
+	  internal const char AE = '\u06D5';
+	  internal const char ZWNJ = '\u200C';
+	  internal const char HEH_DOACHASHMEE = '\u06BE';
+	  internal const char TEH_MARBUTA = '\u0629';
+
+	  internal const char REH = '\u0631';
+	  internal const char RREH = '\u0695';
+	  internal const char RREH_ABOVE = '\u0692';
+
+	  internal const char TATWEEL = '\u0640';
+	  internal const char FATHATAN = '\u064B';
+	  internal const char DAMMATAN = '\u064C';
+	  internal const char KASRATAN = '\u064D';
+	  internal const char FATHA = '\u064E';
+	  internal const char DAMMA = '\u064F';
+	  internal const char KASRA = '\u0650';
+	  internal const char SHADDA = '\u0651';
+	  internal const char SUKUN = '\u0652';
+
+	  /// <summary>
+	  /// Normalize an input buffer of Sorani text
+	  /// </summary>
+	  /// <param name="s"> input buffer </param>
+	  /// <param name="len"> length of input buffer </param>
+	  /// <returns> length of input buffer after normalization </returns>
+	  public virtual int normalize(char[] s, int len)
+	  {
+		for (int i = 0; i < len; i++)
+		{
+		  switch (s[i])
+		  {
+			case YEH:
+			case DOTLESS_YEH:
+			  s[i] = FARSI_YEH;
+			  break;
+			case KAF:
+			  s[i] = KEHEH;
+			  break;
+			case ZWNJ:
+			  if (i > 0 && s[i - 1] == HEH)
+			  {
+				s[i - 1] = AE;
+			  }
+			  len = delete(s, i, len);
+			  i--;
+			  break;
+			case HEH:
+			  if (i == len - 1)
+			  {
+				s[i] = AE;
+			  }
+			  break;
+			case TEH_MARBUTA:
+			  s[i] = AE;
+			  break;
+			case HEH_DOACHASHMEE:
+			  s[i] = HEH;
+			  break;
+			case REH:
+			  if (i == 0)
+			  {
+				s[i] = RREH;
+			  }
+			  break;
+			case RREH_ABOVE:
+			  s[i] = RREH;
+			  break;
+			case TATWEEL:
+			case KASRATAN:
+			case DAMMATAN:
+			case FATHATAN:
+			case FATHA:
+			case DAMMA:
+			case KASRA:
+			case SHADDA:
+			case SUKUN:
+			  len = delete(s, i, len);
+			  i--;
+			  break;
+			default:
+			  if (char.getType(s[i]) == char.FORMAT)
+			  {
+				len = delete(s, i, len);
+				i--;
+			  }
+		  break;
+		  }
+		}
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniStemFilter.cs
new file mode 100644
index 0000000..5d79be0
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniStemFilter.cs
@@ -0,0 +1,66 @@
+namespace org.apache.lucene.analysis.ckb
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; // javadoc @link
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="SoraniStemmer"/> to stem Sorani words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para> </summary>
+	/// <seealso cref= SetKeywordMarkerFilter  </seealso>
+
+	public sealed class SoraniStemFilter : TokenFilter
+	{
+	  private readonly SoraniStemmer stemmer = new SoraniStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public SoraniStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniStemFilterFactory.cs
new file mode 100644
index 0000000..67018ad
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniStemFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.ckb
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="SoraniStemFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_ckbstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.SoraniNormalizationFilterFactory"/&gt;
+	///     &lt;filter class="solr.SoraniStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class SoraniStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new SoraniStemFilterFactory </summary>
+	  public SoraniStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override SoraniStemFilter create(TokenStream input)
+	  {
+		return new SoraniStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniStemmer.cs
new file mode 100644
index 0000000..4ec57cb
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniStemmer.cs
@@ -0,0 +1,139 @@
+namespace org.apache.lucene.analysis.ckb
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.endsWith;
+
+	/// <summary>
+	/// Light stemmer for Sorani
+	/// </summary>
+	public class SoraniStemmer
+	{
+
+	  /// <summary>
+	  /// Stem an input buffer of Sorani text.
+	  /// </summary>
+	  /// <param name="s"> input buffer </param>
+	  /// <param name="len"> length of input buffer </param>
+	  /// <returns> length of input buffer after normalization </returns>
+	  public virtual int stem(char[] s, int len)
+	  {
+		// postposition
+		if (len > 5 && endsWith(s, len, "دا"))
+		{
+		  len -= 2;
+		}
+		else if (len > 4 && endsWith(s, len, "نا"))
+		{
+		  len--;
+		}
+		else if (len > 6 && endsWith(s, len, "ەوە"))
+		{
+		  len -= 3;
+		}
+
+		// possessive pronoun
+		if (len > 6 && (endsWith(s, len, "مان") || endsWith(s, len, "یان") || endsWith(s, len, "تان")))
+		{
+		  len -= 3;
+		}
+
+		// indefinite singular ezafe
+		if (len > 6 && endsWith(s, len, "ێکی"))
+		{
+		  return len - 3;
+		}
+		else if (len > 7 && endsWith(s, len, "یەکی"))
+		{
+		  return len - 4;
+		}
+		// indefinite singular
+		if (len > 5 && endsWith(s, len, "ێک"))
+		{
+		  return len - 2;
+		}
+		else if (len > 6 && endsWith(s, len, "یەک"))
+		{
+		  return len - 3;
+		}
+		// definite singular
+		else if (len > 6 && endsWith(s, len, "ەکە"))
+		{
+		  return len - 3;
+		}
+		else if (len > 5 && endsWith(s, len, "کە"))
+		{
+		  return len - 2;
+		}
+		// definite plural
+		else if (len > 7 && endsWith(s, len, "ەکان"))
+		{
+		  return len - 4;
+		}
+		else if (len > 6 && endsWith(s, len, "کان"))
+		{
+		  return len - 3;
+		}
+		// indefinite plural ezafe
+		else if (len > 7 && endsWith(s, len, "یانی"))
+		{
+		  return len - 4;
+		}
+		else if (len > 6 && endsWith(s, len, "انی"))
+		{
+		  return len - 3;
+		}
+		// indefinite plural
+		else if (len > 6 && endsWith(s, len, "یان"))
+		{
+		  return len - 3;
+		}
+		else if (len > 5 && endsWith(s, len, "ان"))
+		{
+		  return len - 2;
+		}
+		// demonstrative plural
+		else if (len > 7 && endsWith(s, len, "یانە"))
+		{
+		  return len - 4;
+		}
+		else if (len > 6 && endsWith(s, len, "انە"))
+		{
+		  return len - 3;
+		}
+		// demonstrative singular
+		else if (len > 5 && (endsWith(s, len, "ایە") || endsWith(s, len, "ەیە")))
+		{
+		  return len - 2;
+		}
+		else if (len > 4 && endsWith(s, len, "ە"))
+		{
+		  return len - 1;
+		}
+		// absolute singular ezafe
+		else if (len > 4 && endsWith(s, len, "ی"))
+		{
+		  return len - 1;
+		}
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseAnalyzer.cs
new file mode 100644
index 0000000..9023664
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseAnalyzer.cs
@@ -0,0 +1,49 @@
+using System;
+
+namespace org.apache.lucene.analysis.cn
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using StandardAnalyzer = org.apache.lucene.analysis.standard.StandardAnalyzer; // javadoc @link
+
+	/// <summary>
+	/// An <seealso cref="Analyzer"/> that tokenizes text with <seealso cref="ChineseTokenizer"/> and
+	/// filters with <seealso cref="ChineseFilter"/> </summary>
+	/// @deprecated (3.1) Use <seealso cref="StandardAnalyzer"/> instead, which has the same functionality.
+	/// This analyzer will be removed in Lucene 5.0 
+	[Obsolete("(3.1) Use <seealso cref="StandardAnalyzer"/> instead, which has the same functionality.")]
+	public sealed class ChineseAnalyzer : Analyzer
+	  /// <summary>
+	  /// Creates
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from a <seealso cref="ChineseTokenizer"/> filtered with
+	  ///         <seealso cref="ChineseFilter"/> </returns>
+	{
+		protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new ChineseTokenizer(reader);
+		  Tokenizer source = new ChineseTokenizer(reader);
+		  return new TokenStreamComponents(source, new ChineseFilter(source));
+		}
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilter.cs
new file mode 100644
index 0000000..a631a04
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilter.cs
@@ -0,0 +1,104 @@
+using System;
+
+namespace org.apache.lucene.analysis.cn
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> with a stop word table.  
+	/// <ul>
+	/// <li>Numeric tokens are removed.
+	/// <li>English tokens must be larger than 1 character.
+	/// <li>One Chinese character as one Chinese word.
+	/// </ul>
+	/// TO DO:
+	/// <ol>
+	/// <li>Add Chinese stop words, such as \ue400
+	/// <li>Dictionary based Chinese word extraction
+	/// <li>Intelligent Chinese word extraction
+	/// </ol>
+	/// </summary>
+	/// @deprecated (3.1) Use <seealso cref="StopFilter"/> instead, which has the same functionality.
+	/// This filter will be removed in Lucene 5.0 
+	[Obsolete("(3.1) Use <seealso cref="StopFilter"/> instead, which has the same functionality.")]
+	public sealed class ChineseFilter : TokenFilter
+	{
+
+
+		// Only English now, Chinese to be added later.
+		public static readonly string[] STOP_WORDS = new string[] {"and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"};
+
+
+		private CharArraySet stopTable;
+
+		private CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+
+		public ChineseFilter(TokenStream @in) : base(@in)
+		{
+
+			stopTable = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(STOP_WORDS), false);
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+		public override bool incrementToken()
+		{
+
+			while (input.incrementToken())
+			{
+				char[] text = termAtt.buffer();
+				int termLength = termAtt.length();
+
+			  // why not key off token type here assuming ChineseTokenizer comes first?
+				if (!stopTable.contains(text, 0, termLength))
+				{
+					switch (char.getType(text[0]))
+					{
+
+					case char.LOWERCASE_LETTER:
+					case char.UPPERCASE_LETTER:
+
+						// English word/token should larger than 1 character.
+						if (termLength > 1)
+						{
+							return true;
+						}
+						break;
+					case char.OTHER_LETTER:
+
+						// One Chinese character as one Chinese word.
+						// Chinese word extraction to be added later here.
+
+						return true;
+					}
+
+				}
+
+			}
+			return false;
+		}
+
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilterFactory.cs
new file mode 100644
index 0000000..8e496d7
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilterFactory.cs
@@ -0,0 +1,51 @@
+using System;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.cn
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using StopFilterFactory = org.apache.lucene.analysis.core.StopFilterFactory; // javadocs
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="ChineseFilter"/> </summary>
+	/// @deprecated Use <seealso cref="StopFilterFactory"/> instead. 
+	[Obsolete("Use <seealso cref="StopFilterFactory"/> instead.")]
+	public class ChineseFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new ChineseFilterFactory </summary>
+	  public ChineseFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override ChineseFilter create(TokenStream @in)
+	  {
+		return new ChineseFilter(@in);
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs
new file mode 100644
index 0000000..b2fb638
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs
@@ -0,0 +1,199 @@
+using System;
+
+namespace org.apache.lucene.analysis.cn
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+
+
+	/// <summary>
+	/// Tokenize Chinese text as individual chinese characters.
+	/// 
+	/// <para>
+	/// The difference between ChineseTokenizer and
+	/// CJKTokenizer is that they have different
+	/// token parsing logic.
+	/// </para>
+	/// <para>
+	/// For example, if the Chinese text
+	/// "C1C2C3C4" is to be indexed:
+	/// <ul>
+	/// <li>The tokens returned from ChineseTokenizer are C1, C2, C3, C4. 
+	/// <li>The tokens returned from the CJKTokenizer are C1C2, C2C3, C3C4.
+	/// </ul>
+	/// </para>
+	/// <para>
+	/// Therefore the index created by CJKTokenizer is much larger.
+	/// </para>
+	/// <para>
+	/// The problem is that when searching for C1, C1C2, C1C3,
+	/// C4C2, C1C2C3 ... the ChineseTokenizer works, but the
+	/// CJKTokenizer will not work.
+	/// </para> </summary>
+	/// @deprecated (3.1) Use <seealso cref="StandardTokenizer"/> instead, which has the same functionality.
+	/// This filter will be removed in Lucene 5.0 
+	[Obsolete("(3.1) Use <seealso cref="StandardTokenizer"/> instead, which has the same functionality.")]
+	public sealed class ChineseTokenizer : Tokenizer
+	{
+
+
+		public ChineseTokenizer(Reader @in) : base(@in)
+		{
+		}
+
+		public ChineseTokenizer(AttributeFactory factory, Reader @in) : base(factory, @in)
+		{
+		}
+
+		private int offset = 0, bufferIndex = 0, dataLen = 0;
+		private const int MAX_WORD_LEN = 255;
+		private const int IO_BUFFER_SIZE = 1024;
+		private readonly char[] buffer = new char[MAX_WORD_LEN];
+		private readonly char[] ioBuffer = new char[IO_BUFFER_SIZE];
+
+
+		private int length;
+		private int start;
+
+		private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+		private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+
+		private void push(char c)
+		{
+
+			if (length == 0) // start of token
+			{
+				start = offset - 1;
+			}
+			buffer[length++] = char.ToLower(c); // buffer it
+
+		}
+
+		private bool flush()
+		{
+
+			if (length > 0)
+			{
+				//System.out.println(new String(buffer, 0,
+				//length));
+			  termAtt.copyBuffer(buffer, 0, length);
+			  offsetAtt.setOffset(correctOffset(start), correctOffset(start + length));
+			  return true;
+			}
+			else
+			{
+				return false;
+			}
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+		public override bool incrementToken()
+		{
+			clearAttributes();
+
+			length = 0;
+			start = offset;
+
+
+			while (true)
+			{
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char c;
+				char c;
+				offset++;
+
+				if (bufferIndex >= dataLen)
+				{
+					dataLen = input.read(ioBuffer);
+					bufferIndex = 0;
+				}
+
+				if (dataLen == -1)
+				{
+				  offset--;
+				  return flush();
+				}
+				else
+				{
+					c = ioBuffer[bufferIndex++];
+				}
+
+
+				switch (char.getType(c))
+				{
+
+				case char.DECIMAL_DIGIT_NUMBER:
+				case char.LOWERCASE_LETTER:
+				case char.UPPERCASE_LETTER:
+					push(c);
+					if (length == MAX_WORD_LEN)
+					{
+						return flush();
+					}
+					break;
+
+				case char.OTHER_LETTER:
+					if (length > 0)
+					{
+						bufferIndex--;
+						offset--;
+						return flush();
+					}
+					push(c);
+					return flush();
+
+				default:
+					if (length > 0)
+					{
+						return flush();
+					}
+					break;
+				}
+			}
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
+		public override void end()
+		{
+		  base.end();
+		  // set final offset
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int finalOffset = correctOffset(offset);
+		  int finalOffset = correctOffset(offset);
+		  this.offsetAtt.setOffset(finalOffset, finalOffset);
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+		public override void reset()
+		{
+		  base.reset();
+		  offset = bufferIndex = dataLen = 0;
+		}
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizerFactory.cs
new file mode 100644
index 0000000..3abb93f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizerFactory.cs
@@ -0,0 +1,52 @@
+using System;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.cn
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using TokenizerFactory = org.apache.lucene.analysis.util.TokenizerFactory;
+	using AttributeFactory = org.apache.lucene.util.AttributeSource.AttributeFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="ChineseTokenizer"/> </summary>
+	/// @deprecated Use <seealso cref="org.apache.lucene.analysis.standard.StandardTokenizerFactory"/> instead. 
+	[Obsolete("Use <seealso cref="org.apache.lucene.analysis.standard.StandardTokenizerFactory"/> instead.")]
+	public class ChineseTokenizerFactory : TokenizerFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new ChineseTokenizerFactory </summary>
+	  public ChineseTokenizerFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override ChineseTokenizer create(AttributeFactory factory, Reader @in)
+	  {
+		return new ChineseTokenizer(factory, @in);
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs
new file mode 100644
index 0000000..2b97da8
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs
@@ -0,0 +1,199 @@
+using System.Text;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.analysis.commongrams
+{
+
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using PositionLengthAttribute = org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using Version = org.apache.lucene.util.Version;
+
+	/*
+	 * TODO: Consider implementing https://issues.apache.org/jira/browse/LUCENE-1688 changes to stop list and associated constructors 
+	 */
+
+	/// <summary>
+	/// Construct bigrams for frequently occurring terms while indexing. Single terms
+	/// are still indexed too, with bigrams overlaid. This is achieved through the
+	/// use of <seealso cref="PositionIncrementAttribute#setPositionIncrement(int)"/>. Bigrams have a type
+	/// of <seealso cref="#GRAM_TYPE"/> Example:
+	/// <ul>
+	/// <li>input:"the quick brown fox"</li>
+	/// <li>output:|"the","the-quick"|"brown"|"fox"|</li>
+	/// <li>"the-quick" has a position increment of 0 so it is in the same position
+	/// as "the" "the-quick" has a term.type() of "gram"</li>
+	/// 
+	/// </ul>
+	/// </summary>
+
+	/*
+	 * Constructors and makeCommonSet based on similar code in StopFilter
+	 */
+	public sealed class CommonGramsFilter : TokenFilter
+	{
+
+	  public const string GRAM_TYPE = "gram";
+	  private const char SEPARATOR = '_';
+
+	  private readonly CharArraySet commonWords;
+
+	  private readonly StringBuilder buffer = new StringBuilder();
+
+	  private readonly CharTermAttribute termAttribute = addAttribute(typeof(CharTermAttribute));
+	  private readonly OffsetAttribute offsetAttribute = addAttribute(typeof(OffsetAttribute));
+	  private readonly TypeAttribute typeAttribute = addAttribute(typeof(TypeAttribute));
+	  private readonly PositionIncrementAttribute posIncAttribute = addAttribute(typeof(PositionIncrementAttribute));
+	  private readonly PositionLengthAttribute posLenAttribute = addAttribute(typeof(PositionLengthAttribute));
+
+	  private int lastStartOffset;
+	  private bool lastWasCommon;
+	  private State savedState;
+
+	  /// <summary>
+	  /// Construct a token stream filtering the given input using a Set of common
+	  /// words to create bigrams. Outputs both unigrams with position increment and
+	  /// bigrams with position increment 0 type=gram where one or both of the words
+	  /// in a potential bigram are in the set of common words .
+	  /// </summary>
+	  /// <param name="input"> TokenStream input in filter chain </param>
+	  /// <param name="commonWords"> The set of common words. </param>
+	  public CommonGramsFilter(Version matchVersion, TokenStream input, CharArraySet commonWords) : base(input)
+	  {
+		this.commonWords = commonWords;
+	  }
+
+	  /// <summary>
+	  /// Inserts bigrams for common words into a token stream. For each input token,
+	  /// output the token. If the token and/or the following token are in the list
+	  /// of common words also output a bigram with position increment 0 and
+	  /// type="gram"
+	  /// 
+	  /// TODO:Consider adding an option to not emit unigram stopwords
+	  /// as in CDL XTF BigramStopFilter, CommonGramsQueryFilter would need to be
+	  /// changed to work with this.
+	  /// 
+	  /// TODO: Consider optimizing for the case of three
+	  /// commongrams i.e "man of the year" normally produces 3 bigrams: "man-of",
+	  /// "of-the", "the-year" but with proper management of positions we could
+	  /// eliminate the middle bigram "of-the"and save a disk seek and a whole set of
+	  /// position lookups.
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		// get the next piece of input
+		if (savedState != null)
+		{
+		  restoreState(savedState);
+		  savedState = null;
+		  saveTermBuffer();
+		  return true;
+		}
+		else if (!input.incrementToken())
+		{
+			return false;
+		}
+
+		/* We build n-grams before and after stopwords. 
+		 * When valid, the buffer always contains at least the separator.
+		 * If its empty, there is nothing before this stopword.
+		 */
+		if (lastWasCommon || (Common && buffer.Length > 0))
+		{
+		  savedState = captureState();
+		  gramToken();
+		  return true;
+		}
+
+		saveTermBuffer();
+		return true;
+	  }
+
+	  /// <summary>
+	  /// {@inheritDoc}
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		lastWasCommon = false;
+		savedState = null;
+		buffer.Length = 0;
+	  }
+
+	  // ================================================= Helper Methods ================================================
+
+	  /// <summary>
+	  /// Determines if the current token is a common term
+	  /// </summary>
+	  /// <returns> {@code true} if the current token is a common term, {@code false} otherwise </returns>
+	  private bool Common
+	  {
+		  get
+		  {
+			return commonWords != null && commonWords.contains(termAttribute.buffer(), 0, termAttribute.length());
+		  }
+	  }
+
+	  /// <summary>
+	  /// Saves this information to form the left part of a gram
+	  /// </summary>
+	  private void saveTermBuffer()
+	  {
+		buffer.Length = 0;
+		buffer.Append(termAttribute.buffer(), 0, termAttribute.length());
+		buffer.Append(SEPARATOR);
+		lastStartOffset = offsetAttribute.startOffset();
+		lastWasCommon = Common;
+	  }
+
+	  /// <summary>
+	  /// Constructs a compound token.
+	  /// </summary>
+	  private void gramToken()
+	  {
+		buffer.Append(termAttribute.buffer(), 0, termAttribute.length());
+		int endOffset = offsetAttribute.endOffset();
+
+		clearAttributes();
+
+		int length = buffer.Length;
+		char[] termText = termAttribute.buffer();
+		if (length > termText.Length)
+		{
+		  termText = termAttribute.resizeBuffer(length);
+		}
+
+		buffer.getChars(0, length, termText, 0);
+		termAttribute.Length = length;
+		posIncAttribute.PositionIncrement = 0;
+		posLenAttribute.PositionLength = 2; // bigram
+		offsetAttribute.setOffset(lastStartOffset, endOffset);
+		typeAttribute.Type = GRAM_TYPE;
+		buffer.Length = 0;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs
new file mode 100644
index 0000000..2233e83
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs
@@ -0,0 +1,104 @@
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Util;
+using org.apache.lucene.analysis.commongrams;
+using org.apache.lucene.analysis.util;
+
+namespace Lucene.Net.Analysis.CommonGrams
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+    /// <summary>
+	/// Constructs a <seealso cref="CommonGramsFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_cmmngrms" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.CommonGramsFilterFactory" words="commongramsstopwords.txt" ignoreCase="false"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class CommonGramsFilterFactory : TokenFilterFactory, ResourceLoaderAware
+	{
+	  // TODO: shared base class for Stop/Keep/CommonGrams? 
+	  private CharArraySet commonWords;
+	  private readonly string commonWordFiles;
+	  private readonly string format;
+	  private readonly bool ignoreCase;
+
+	  /// <summary>
+	  /// Creates a new CommonGramsFilterFactory </summary>
+	  public CommonGramsFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		commonWordFiles = get(args, "words");
+		format = get(args, "format");
+		ignoreCase = getBoolean(args, "ignoreCase", false);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void inform(ResourceLoader loader) throws java.io.IOException
+	  public virtual void inform(ResourceLoader loader)
+	  {
+		if (commonWordFiles != null)
+		{
+		  if ("snowball".Equals(format, StringComparison.CurrentCultureIgnoreCase))
+		  {
+			commonWords = getSnowballWordSet(loader, commonWordFiles, ignoreCase);
+		  }
+		  else
+		  {
+			commonWords = GetWordSet(loader, commonWordFiles, ignoreCase);
+		  }
+		}
+		else
+		{
+		  commonWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
+		}
+	  }
+
+	  public virtual bool IgnoreCase
+	  {
+		  get
+		  {
+			return ignoreCase;
+		  }
+	  }
+
+	  public virtual CharArraySet CommonWords
+	  {
+		  get
+		  {
+			return commonWords;
+		  }
+	  }
+
+	  public override TokenFilter Create(TokenStream input)
+	  {
+		CommonGramsFilter commonGrams = new CommonGramsFilter(luceneMatchVersion, input, commonWords);
+		return commonGrams;
+	  }
+	}
+
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs
new file mode 100644
index 0000000..b787bde
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+namespace org.apache.lucene.analysis.commongrams
+{
+
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.commongrams.CommonGramsFilter.GRAM_TYPE;
+
+	/// <summary>
+	/// Wrap a CommonGramsFilter optimizing phrase queries by only returning single
+	/// words when they are not a member of a bigram.
+	/// 
+	/// Example:
+	/// <ul>
+	/// <li>query input to CommonGramsFilter: "the rain in spain falls mainly"
+	/// <li>output of CommomGramsFilter/input to CommonGramsQueryFilter:
+	/// |"the, "the-rain"|"rain" "rain-in"|"in, "in-spain"|"spain"|"falls"|"mainly"
+	/// <li>output of CommonGramsQueryFilter:"the-rain", "rain-in" ,"in-spain",
+	/// "falls", "mainly"
+	/// </ul>
+	/// </summary>
+
+	/*
+	 * See:http://hudson.zones.apache.org/hudson/job/Lucene-trunk/javadoc//all/org/apache/lucene/analysis/TokenStream.html and
+	 * http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/package.html?revision=718798
+	 */
+	public sealed class CommonGramsQueryFilter : TokenFilter
+	{
+
+	  private readonly TypeAttribute typeAttribute = addAttribute(typeof(TypeAttribute));
+	  private readonly PositionIncrementAttribute posIncAttribute = addAttribute(typeof(PositionIncrementAttribute));
+
+	  private State previous;
+	  private string previousType;
+	  private bool exhausted;
+
+	  /// <summary>
+	  /// Constructs a new CommonGramsQueryFilter based on the provided CommomGramsFilter 
+	  /// </summary>
+	  /// <param name="input"> CommonGramsFilter the QueryFilter will use </param>
+	  public CommonGramsQueryFilter(CommonGramsFilter input) : base(input)
+	  {
+	  }
+
+	  /// <summary>
+	  /// {@inheritDoc}
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		previous = null;
+		previousType = null;
+		exhausted = false;
+	  }
+
+	  /// <summary>
+	  /// Output bigrams whenever possible to optimize queries. Only output unigrams
+	  /// when they are not a member of a bigram. Example:
+	  /// <ul>
+	  /// <li>input: "the rain in spain falls mainly"
+	  /// <li>output:"the-rain", "rain-in" ,"in-spain", "falls", "mainly"
+	  /// </ul>
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		while (!exhausted && input.incrementToken())
+		{
+		  State current = captureState();
+
+		  if (previous != null && !GramType)
+		  {
+			restoreState(previous);
+			previous = current;
+			previousType = typeAttribute.type();
+
+			if (GramType)
+			{
+			  posIncAttribute.PositionIncrement = 1;
+			}
+			return true;
+		  }
+
+		  previous = current;
+		}
+
+		exhausted = true;
+
+		if (previous == null || GRAM_TYPE.Equals(previousType))
+		{
+		  return false;
+		}
+
+		restoreState(previous);
+		previous = null;
+
+		if (GramType)
+		{
+		  posIncAttribute.PositionIncrement = 1;
+		}
+		return true;
+	  }
+
+	  // ================================================= Helper Methods ================================================
+
+	  /// <summary>
+	  /// Convenience method to check if the current type is a gram type
+	  /// </summary>
+	  /// <returns> {@code true} if the current type is a gram type, {@code false} otherwise </returns>
+	  public bool GramType
+	  {
+		  get
+		  {
+			return GRAM_TYPE.Equals(typeAttribute.type());
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilterFactory.cs
new file mode 100644
index 0000000..ddee353
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+using Lucene.Net.Analysis.CommonGrams;
+
+namespace org.apache.lucene.analysis.commongrams
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	/// <summary>
+	/// Construct <seealso cref="CommonGramsQueryFilter"/>.
+	/// 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_cmmngrmsqry" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.CommonGramsQueryFilterFactory" words="commongramsquerystopwords.txt" ignoreCase="false"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class CommonGramsQueryFilterFactory : CommonGramsFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new CommonGramsQueryFilterFactory </summary>
+	  public CommonGramsQueryFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Create a CommonGramsFilter and wrap it with a CommonGramsQueryFilter
+	  /// </summary>
+	  public override TokenFilter create(TokenStream input)
+	  {
+		CommonGramsFilter commonGrams = (CommonGramsFilter) base.create(input);
+		return new CommonGramsQueryFilter(commonGrams);
+	  }
+	}
+
+}
\ No newline at end of file


[34/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
Raw porting of Lucene.Net.Analysis.Common


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/99717176
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/99717176
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/99717176

Branch: refs/heads/master
Commit: 9971717653f66f944439ea63add45de338ec21dc
Parents: b8454a3
Author: Itamar Syn-Hershko <it...@code972.com>
Authored: Sat Nov 8 01:11:56 2014 +0200
Committer: Itamar Syn-Hershko <it...@code972.com>
Committed: Sat Nov 8 01:11:56 2014 +0200

----------------------------------------------------------------------
 .../Analysis/Ar/ArabicAnalyzer.cs               |  161 ++
 .../Analysis/Ar/ArabicLetterTokenizer.cs        |   83 +
 .../Analysis/Ar/ArabicLetterTokenizerFactory.cs |   54 +
 .../Analysis/Ar/ArabicNormalizationFilter.cs    |   51 +
 .../Ar/ArabicNormalizationFilterFactory.cs      |   64 +
 .../Analysis/Ar/ArabicNormalizer.cs             |  111 +
 .../Analysis/Ar/ArabicStemFilter.cs             |   66 +
 .../Analysis/Ar/ArabicStemFilterFactory.cs      |   55 +
 .../Analysis/Ar/ArabicStemmer.cs                |  163 ++
 .../Analysis/Bg/BulgarianAnalyzer.cs            |  144 ++
 .../Analysis/Bg/BulgarianStemFilter.cs          |   68 +
 .../Analysis/Bg/BulgarianStemFilterFactory.cs   |   55 +
 .../Analysis/Bg/BulgarianStemmer.cs             |  187 ++
 .../Analysis/Br/BrazilianAnalyzer.cs            |  143 ++
 .../Analysis/Br/BrazilianStemFilter.cs          |   90 +
 .../Analysis/Br/BrazilianStemFilterFactory.cs   |   56 +
 .../Analysis/Br/BrazilianStemmer.cs             | 1395 ++++++++++++
 .../Analysis/Ca/CatalanAnalyzer.cs              |  154 ++
 .../Analysis/CharFilter/BaseCharFilter.cs       |  129 ++
 .../CharFilter/HTMLStripCharFilterFactory.cs    |   67 +
 .../Analysis/CharFilter/MappingCharFilter.cs    |  240 ++
 .../CharFilter/MappingCharFilterFactory.cs      |  184 ++
 .../Analysis/CharFilter/NormalizeCharMap.cs     |  162 ++
 .../Analysis/Cjk/CJKAnalyzer.cs                 |  118 +
 .../Analysis/Cjk/CJKBigramFilter.cs             |  420 ++++
 .../Analysis/Cjk/CJKBigramFilterFactory.cs      |   79 +
 .../Analysis/Cjk/CJKTokenizer.cs                |  370 +++
 .../Analysis/Cjk/CJKTokenizerFactory.cs         |   58 +
 .../Analysis/Cjk/CJKWidthFilter.cs              |  113 +
 .../Analysis/Cjk/CJKWidthFilterFactory.cs       |   66 +
 .../Analysis/Ckb/SoraniAnalyzer.cs              |  139 ++
 .../Analysis/Ckb/SoraniNormalizationFilter.cs   |   52 +
 .../Ckb/SoraniNormalizationFilterFactory.cs     |   64 +
 .../Analysis/Ckb/SoraniNormalizer.cs            |  140 ++
 .../Analysis/Ckb/SoraniStemFilter.cs            |   66 +
 .../Analysis/Ckb/SoraniStemFilterFactory.cs     |   55 +
 .../Analysis/Ckb/SoraniStemmer.cs               |  139 ++
 .../Analysis/Cn/ChineseAnalyzer.cs              |   49 +
 .../Analysis/Cn/ChineseFilter.cs                |  104 +
 .../Analysis/Cn/ChineseFilterFactory.cs         |   51 +
 .../Analysis/Cn/ChineseTokenizer.cs             |  199 ++
 .../Analysis/Cn/ChineseTokenizerFactory.cs      |   52 +
 .../Analysis/CommonGrams/CommonGramsFilter.cs   |  199 ++
 .../CommonGrams/CommonGramsFilterFactory.cs     |  104 +
 .../CommonGrams/CommonGramsQueryFilter.cs       |  139 ++
 .../CommonGramsQueryFilterFactory.cs            |   55 +
 .../Compound/CompoundWordTokenFilterBase.cs     |  202 ++
 .../DictionaryCompoundWordTokenFilter.cs        |  137 ++
 .../DictionaryCompoundWordTokenFilterFactory.cs |   81 +
 .../HyphenationCompoundWordTokenFilter.cs       |  255 +++
 ...HyphenationCompoundWordTokenFilterFactory.cs |  125 ++
 .../Analysis/Compound/hyphenation/ByteVector.cs |  151 ++
 .../Analysis/Compound/hyphenation/CharVector.cs |  163 ++
 .../Analysis/Compound/hyphenation/Hyphen.cs     |   76 +
 .../Compound/hyphenation/Hyphenation.cs         |   55 +
 .../Compound/hyphenation/HyphenationTree.cs     |  533 +++++
 .../Compound/hyphenation/PatternConsumer.cs     |   57 +
 .../Compound/hyphenation/PatternParser.cs       |  463 ++++
 .../Compound/hyphenation/TernaryTree.cs         |  780 +++++++
 .../Analysis/Core/KeywordAnalyzer.cs            |   40 +
 .../Analysis/Core/KeywordTokenizer.cs           |  106 +
 .../Analysis/Core/KeywordTokenizerFactory.cs    |   53 +
 .../Analysis/Core/LetterTokenizer.cs            |   84 +
 .../Analysis/Core/LetterTokenizerFactory.cs     |   54 +
 .../Analysis/Core/LowerCaseFilter.cs            |   62 +
 .../Analysis/Core/LowerCaseFilterFactory.cs     |   62 +
 .../Analysis/Core/LowerCaseTokenizer.cs         |   84 +
 .../Analysis/Core/LowerCaseTokenizerFactory.cs  |   63 +
 .../Analysis/Core/SimpleAnalyzer.cs             |   58 +
 .../Analysis/Core/StopAnalyzer.cs               |  104 +
 .../Analysis/Core/StopFilter.cs                 |  129 ++
 .../Analysis/Core/StopFilterFactory.cs          |  162 ++
 .../Analysis/Core/TypeTokenFilter.cs            |   83 +
 .../Analysis/Core/TypeTokenFilterFactory.cs     |   94 +
 .../Analysis/Core/UpperCaseFilter.cs            |   71 +
 .../Analysis/Core/UpperCaseFilterFactory.cs     |   74 +
 .../Analysis/Core/WhitespaceAnalyzer.cs         |   58 +
 .../Analysis/Core/WhitespaceTokenizer.cs        |   75 +
 .../Analysis/Core/WhitespaceTokenizerFactory.cs |   58 +
 .../Analysis/Cz/CzechAnalyzer.cs                |  161 ++
 .../Analysis/Cz/CzechStemFilter.cs              |   67 +
 .../Analysis/Cz/CzechStemFilterFactory.cs       |   55 +
 .../Analysis/Cz/CzechStemmer.cs                 |  157 ++
 .../Analysis/Da/DanishAnalyzer.cs               |  139 ++
 .../Analysis/De/GermanAnalyzer.cs               |  185 ++
 .../Analysis/De/GermanLightStemFilter.cs        |   66 +
 .../Analysis/De/GermanLightStemFilterFactory.cs |   55 +
 .../Analysis/De/GermanLightStemmer.cs           |  177 ++
 .../Analysis/De/GermanMinimalStemFilter.cs      |   66 +
 .../De/GermanMinimalStemFilterFactory.cs        |   55 +
 .../Analysis/De/GermanMinimalStemmer.cs         |  151 ++
 .../Analysis/De/GermanNormalizationFilter.cs    |  130 ++
 .../De/GermanNormalizationFilterFactory.cs      |   65 +
 .../Analysis/De/GermanStemFilter.cs             |   96 +
 .../Analysis/De/GermanStemFilterFactory.cs      |   56 +
 .../Analysis/De/GermanStemmer.cs                |  308 +++
 .../Analysis/El/GreekAnalyzer.cs                |  139 ++
 .../Analysis/El/GreekLowerCaseFilter.cs         |  135 ++
 .../Analysis/El/GreekLowerCaseFilterFactory.cs  |   66 +
 .../Analysis/El/GreekStemFilter.cs              |   72 +
 .../Analysis/El/GreekStemFilterFactory.cs       |   55 +
 .../Analysis/El/GreekStemmer.cs                 |  799 +++++++
 .../Analysis/En/EnglishAnalyzer.cs              |  121 +
 .../Analysis/En/EnglishMinimalStemFilter.cs     |   66 +
 .../En/EnglishMinimalStemFilterFactory.cs       |   55 +
 .../Analysis/En/EnglishMinimalStemmer.cs        |   61 +
 .../Analysis/En/EnglishPossessiveFilter.cs      |   79 +
 .../En/EnglishPossessiveFilterFactory.cs        |   56 +
 .../Analysis/En/KStemData1.cs                   |   55 +
 .../Analysis/En/KStemData2.cs                   |   53 +
 .../Analysis/En/KStemData3.cs                   |   53 +
 .../Analysis/En/KStemData4.cs                   |   53 +
 .../Analysis/En/KStemData5.cs                   |   53 +
 .../Analysis/En/KStemData6.cs                   |   53 +
 .../Analysis/En/KStemData7.cs                   |   53 +
 .../Analysis/En/KStemData8.cs                   |   53 +
 .../Analysis/En/KStemFilter.cs                  |   81 +
 .../Analysis/En/KStemFilterFactory.cs           |   55 +
 .../Analysis/En/KStemmer.cs                     | 2044 +++++++++++++++++
 .../Analysis/En/PorterStemFilter.cs             |   81 +
 .../Analysis/En/PorterStemFilterFactory.cs      |   55 +
 .../Analysis/En/PorterStemmer.cs                |  871 +++++++
 .../Analysis/Es/SpanishAnalyzer.cs              |  155 ++
 .../Analysis/Es/SpanishLightStemFilter.cs       |   66 +
 .../Es/SpanishLightStemFilterFactory.cs         |   55 +
 .../Analysis/Es/SpanishLightStemmer.cs          |  137 ++
 .../Analysis/Eu/BasqueAnalyzer.cs               |  137 ++
 .../Analysis/Fa/PersianAnalyzer.cs              |  155 ++
 .../Analysis/Fa/PersianCharFilter.cs            |   79 +
 .../Analysis/Fa/PersianCharFilterFactory.cs     |   65 +
 .../Analysis/Fa/PersianNormalizationFilter.cs   |   54 +
 .../Fa/PersianNormalizationFilterFactory.cs     |   66 +
 .../Analysis/Fa/PersianNormalizer.cs            |   97 +
 .../Analysis/Fi/FinnishAnalyzer.cs              |  139 ++
 .../Analysis/Fi/FinnishLightStemFilter.cs       |   66 +
 .../Fi/FinnishLightStemFilterFactory.cs         |   55 +
 .../Analysis/Fi/FinnishLightStemmer.cs          |  335 +++
 .../Analysis/Fr/FrenchAnalyzer.cs               |  205 ++
 .../Analysis/Fr/FrenchLightStemFilter.cs        |   66 +
 .../Analysis/Fr/FrenchLightStemFilterFactory.cs |   56 +
 .../Analysis/Fr/FrenchLightStemmer.cs           |  357 +++
 .../Analysis/Fr/FrenchMinimalStemFilter.cs      |   66 +
 .../Fr/FrenchMinimalStemFilterFactory.cs        |   56 +
 .../Analysis/Fr/FrenchMinimalStemmer.cs         |  106 +
 .../Analysis/Fr/FrenchStemFilter.cs             |  102 +
 .../Analysis/Fr/FrenchStemmer.cs                |  785 +++++++
 .../Analysis/Ga/IrishAnalyzer.cs                |  153 ++
 .../Analysis/Ga/IrishLowerCaseFilter.cs         |   95 +
 .../Analysis/Ga/IrishLowerCaseFilterFactory.cs  |   65 +
 .../Analysis/Gl/GalicianAnalyzer.cs             |  137 ++
 .../Analysis/Gl/GalicianMinimalStemFilter.cs    |   66 +
 .../Gl/GalicianMinimalStemFilterFactory.cs      |   55 +
 .../Analysis/Gl/GalicianMinimalStemmer.cs       |   43 +
 .../Analysis/Gl/GalicianStemFilter.cs           |   70 +
 .../Analysis/Gl/GalicianStemFilterFactory.cs    |   55 +
 .../Analysis/Gl/GalicianStemmer.cs              |  102 +
 .../Analysis/Hi/HindiAnalyzer.cs                |  158 ++
 .../Analysis/Hi/HindiNormalizationFilter.cs     |   62 +
 .../Hi/HindiNormalizationFilterFactory.cs       |   64 +
 .../Analysis/Hi/HindiNormalizer.cs              |  193 ++
 .../Analysis/Hi/HindiStemFilter.cs              |   56 +
 .../Analysis/Hi/HindiStemFilterFactory.cs       |   54 +
 .../Analysis/Hi/HindiStemmer.cs                 |   71 +
 .../Analysis/Hu/HungarianAnalyzer.cs            |  139 ++
 .../Analysis/Hu/HungarianLightStemFilter.cs     |   66 +
 .../Hu/HungarianLightStemFilterFactory.cs       |   55 +
 .../Analysis/Hu/HungarianLightStemmer.cs        |  292 +++
 .../Analysis/Hunspell/Dictionary.cs             | 1235 ++++++++++
 .../Analysis/Hunspell/HunspellStemFilter.cs     |  171 ++
 .../Hunspell/HunspellStemFilterFactory.cs       |  116 +
 .../Analysis/Hunspell/ISO8859_14Decoder.cs      |   47 +
 .../Analysis/Hunspell/Stemmer.cs                |  475 ++++
 .../Analysis/Hy/ArmenianAnalyzer.cs             |  137 ++
 .../Analysis/Id/IndonesianAnalyzer.cs           |  138 ++
 .../Analysis/Id/IndonesianStemFilter.cs         |   75 +
 .../Analysis/Id/IndonesianStemFilterFactory.cs  |   57 +
 .../Analysis/Id/IndonesianStemmer.cs            |  334 +++
 .../Analysis/In/IndicNormalizationFilter.cs     |   52 +
 .../In/IndicNormalizationFilterFactory.cs       |   64 +
 .../Analysis/In/IndicNormalizer.cs              |  194 ++
 .../Analysis/In/IndicTokenizer.cs               |   48 +
 .../Analysis/It/ItalianAnalyzer.cs              |  164 ++
 .../Analysis/It/ItalianLightStemFilter.cs       |   66 +
 .../It/ItalianLightStemFilterFactory.cs         |   55 +
 .../Analysis/It/ItalianLightStemmer.cs          |  155 ++
 .../Analysis/Lv/LatvianAnalyzer.cs              |  137 ++
 .../Analysis/Lv/LatvianStemFilter.cs            |   66 +
 .../Analysis/Lv/LatvianStemFilterFactory.cs     |   55 +
 .../Analysis/Lv/LatvianStemmer.cs               |  198 ++
 .../Miscellaneous/ASCIIFoldingFilter.cs         | 2118 ++++++++++++++++++
 .../Miscellaneous/ASCIIFoldingFilterFactory.cs  |   69 +
 .../Miscellaneous/CapitalizationFilter.cs       |  208 ++
 .../CapitalizationFilterFactory.cs              |  117 +
 .../Miscellaneous/CodepointCountFilter.cs       |   82 +
 .../CodepointCountFilterFactory.cs              |   61 +
 .../Analysis/Miscellaneous/EmptyTokenStream.cs  |   34 +
 .../Miscellaneous/HyphenatedWordsFilter.cs      |  164 ++
 .../HyphenatedWordsFilterFactory.cs             |   55 +
 .../Analysis/Miscellaneous/KeepWordFilter.cs    |   67 +
 .../Miscellaneous/KeepWordFilterFactory.cs      |  113 +
 .../Miscellaneous/KeywordMarkerFilter.cs        |   61 +
 .../Miscellaneous/KeywordMarkerFilterFactory.cs |   99 +
 .../Miscellaneous/KeywordRepeatFilter.cs        |   75 +
 .../Miscellaneous/KeywordRepeatFilterFactory.cs |   52 +
 .../Analysis/Miscellaneous/LengthFilter.cs      |   89 +
 .../Miscellaneous/LengthFilterFactory.cs        |   67 +
 .../Miscellaneous/LimitTokenCountAnalyzer.cs    |   68 +
 .../Miscellaneous/LimitTokenCountFilter.cs      |  109 +
 .../LimitTokenCountFilterFactory.cs             |   67 +
 .../Miscellaneous/LimitTokenPositionFilter.cs   |  116 +
 .../LimitTokenPositionFilterFactory.cs          |   66 +
 .../Lucene47WordDelimiterFilter.cs              |  625 ++++++
 .../Analysis/Miscellaneous/PatternAnalyzer.cs   |  566 +++++
 .../Miscellaneous/PatternKeywordMarkerFilter.cs |   60 +
 .../Miscellaneous/PerFieldAnalyzerWrapper.cs    |   93 +
 .../PrefixAndSuffixAwareTokenFilter.cs          |  112 +
 .../Miscellaneous/PrefixAwareTokenFilter.cs     |  246 ++
 .../RemoveDuplicatesTokenFilter.cs              |   99 +
 .../RemoveDuplicatesTokenFilterFactory.cs       |   55 +
 .../Miscellaneous/ScandinavianFoldingFilter.cs  |  135 ++
 .../ScandinavianFoldingFilterFactory.cs         |   53 +
 .../ScandinavianNormalizationFilter.cs          |  145 ++
 .../ScandinavianNormalizationFilterFactory.cs   |   53 +
 .../Miscellaneous/SetKeywordMarkerFilter.cs     |   59 +
 .../Miscellaneous/SingleTokenTokenStream.cs     |   79 +
 .../Miscellaneous/StemmerOverrideFilter.cs      |  265 +++
 .../StemmerOverrideFilterFactory.cs             |   97 +
 .../Analysis/Miscellaneous/TrimFilter.cs        |  114 +
 .../Analysis/Miscellaneous/TrimFilterFactory.cs |   63 +
 .../Miscellaneous/TruncateTokenFilter.cs        |   66 +
 .../Miscellaneous/TruncateTokenFilterFactory.cs |   66 +
 .../Miscellaneous/WordDelimiterFilter.cs        |  761 +++++++
 .../Miscellaneous/WordDelimiterFilterFactory.cs |  270 +++
 .../Miscellaneous/WordDelimiterIterator.cs      |  367 +++
 .../Analysis/Ngram/EdgeNGramFilterFactory.cs    |   61 +
 .../Analysis/Ngram/EdgeNGramTokenFilter.cs      |  266 +++
 .../Analysis/Ngram/EdgeNGramTokenizer.cs        |   71 +
 .../Analysis/Ngram/EdgeNGramTokenizerFactory.cs |   74 +
 .../Ngram/Lucene43EdgeNGramTokenizer.cs         |  328 +++
 .../Analysis/Ngram/Lucene43NGramTokenizer.cs    |  182 ++
 .../Analysis/Ngram/NGramFilterFactory.cs        |   59 +
 .../Analysis/Ngram/NGramTokenFilter.cs          |  251 +++
 .../Analysis/Ngram/NGramTokenizer.cs            |  278 +++
 .../Analysis/Ngram/NGramTokenizerFactory.cs     |   70 +
 .../Analysis/Nl/DutchAnalyzer.cs                |  231 ++
 .../Analysis/Nl/DutchStemFilter.cs              |  129 ++
 .../Analysis/Nl/DutchStemmer.cs                 |  477 ++++
 .../Analysis/No/NorwegianAnalyzer.cs            |  140 ++
 .../Analysis/No/NorwegianLightStemFilter.cs     |   79 +
 .../No/NorwegianLightStemFilterFactory.cs       |   79 +
 .../Analysis/No/NorwegianLightStemmer.cs        |  158 ++
 .../Analysis/No/NorwegianMinimalStemFilter.cs   |   79 +
 .../No/NorwegianMinimalStemFilterFactory.cs     |   79 +
 .../Analysis/No/NorwegianMinimalStemmer.cs      |  121 +
 .../Analysis/Path/PathHierarchyTokenizer.cs     |  242 ++
 .../Path/PathHierarchyTokenizerFactory.cs       |  105 +
 .../Path/ReversePathHierarchyTokenizer.cs       |  214 ++
 .../Pattern/PatternCaptureGroupFilterFactory.cs |   54 +
 .../Pattern/PatternCaptureGroupTokenFilter.cs   |  227 ++
 .../Pattern/PatternReplaceCharFilter.cs         |  179 ++
 .../Pattern/PatternReplaceCharFilterFactory.cs  |   67 +
 .../Analysis/Pattern/PatternReplaceFilter.cs    |   81 +
 .../Pattern/PatternReplaceFilterFactory.cs      |   64 +
 .../Analysis/Pattern/PatternTokenizer.cs        |  185 ++
 .../Analysis/Pattern/PatternTokenizerFactory.cs |   94 +
 .../Analysis/Payloads/AbstractEncoder.cs        |   39 +
 .../Payloads/DelimitedPayloadTokenFilter.cs     |   82 +
 .../DelimitedPayloadTokenFilterFactory.cs       |   85 +
 .../Analysis/Payloads/FloatEncoder.cs           |   41 +
 .../Analysis/Payloads/IdentityEncoder.cs        |   63 +
 .../Analysis/Payloads/IntegerEncoder.cs         |   42 +
 .../Payloads/NumericPayloadTokenFilter.cs       |   70 +
 .../NumericPayloadTokenFilterFactory.cs         |   60 +
 .../Analysis/Payloads/PayloadEncoder.cs         |   43 +
 .../Analysis/Payloads/PayloadHelper.cs          |   81 +
 .../Payloads/TokenOffsetPayloadTokenFilter.cs   |   61 +
 .../TokenOffsetPayloadTokenFilterFactory.cs     |   56 +
 .../Payloads/TypeAsPayloadTokenFilter.cs        |   62 +
 .../Payloads/TypeAsPayloadTokenFilterFactory.cs |   56 +
 .../Analysis/Position/PositionFilter.cs         |  109 +
 .../Analysis/Position/PositionFilterFactory.cs  |   70 +
 .../Analysis/Pt/PortugueseAnalyzer.cs           |  155 ++
 .../Analysis/Pt/PortugueseLightStemFilter.cs    |   66 +
 .../Pt/PortugueseLightStemFilterFactory.cs      |   55 +
 .../Analysis/Pt/PortugueseLightStemmer.cs       |  252 +++
 .../Analysis/Pt/PortugueseMinimalStemFilter.cs  |   66 +
 .../Pt/PortugueseMinimalStemFilterFactory.cs    |   55 +
 .../Analysis/Pt/PortugueseMinimalStemmer.cs     |   44 +
 .../Analysis/Pt/PortugueseStemFilter.cs         |   70 +
 .../Analysis/Pt/PortugueseStemFilterFactory.cs  |   55 +
 .../Analysis/Pt/PortugueseStemmer.cs            |  126 ++
 .../Analysis/Pt/RSLPStemmerBase.cs              |  410 ++++
 .../Analysis/Query/QueryAutoStopWordAnalyzer.cs |  213 ++
 .../Analysis/Reverse/ReverseStringFilter.cs     |  281 +++
 .../Reverse/ReverseStringFilterFactory.cs       |   59 +
 .../Analysis/Ro/RomanianAnalyzer.cs             |  142 ++
 .../Analysis/Ru/RussianAnalyzer.cs              |  172 ++
 .../Analysis/Ru/RussianLetterTokenizer.cs       |   83 +
 .../Ru/RussianLetterTokenizerFactory.cs         |   52 +
 .../Analysis/Ru/RussianLightStemFilter.cs       |   66 +
 .../Ru/RussianLightStemFilterFactory.cs         |   55 +
 .../Analysis/Ru/RussianLightStemmer.cs          |  134 ++
 .../Analysis/Shingle/ShingleAnalyzerWrapper.cs  |  182 ++
 .../Analysis/Shingle/ShingleFilter.cs           |  724 ++++++
 .../Analysis/Shingle/ShingleFilterFactory.cs    |   86 +
 .../Analysis/Sinks/DateRecognizerSinkFilter.cs  |   79 +
 .../Analysis/Sinks/TeeSinkTokenFilter.cs        |  300 +++
 .../Analysis/Sinks/TokenRangeSinkFilter.cs      |   73 +
 .../Analysis/Sinks/TokenTypeSinkFilter.cs       |   50 +
 .../Analysis/Snowball/SnowballAnalyzer.cs       |  102 +
 .../Analysis/Snowball/SnowballFilter.cs         |  129 ++
 .../Snowball/SnowballPorterFilterFactory.cs     |  101 +
 .../Analysis/Standard/ClassicAnalyzer.cs        |  161 ++
 .../Analysis/Standard/ClassicFilter.cs          |   92 +
 .../Analysis/Standard/ClassicFilterFactory.cs   |   55 +
 .../Analysis/Standard/ClassicTokenizer.cs       |  210 ++
 .../Standard/ClassicTokenizerFactory.cs         |   61 +
 .../Analysis/Standard/ClassicTokenizerImpl.cs   |  723 ++++++
 .../Analysis/Standard/StandardAnalyzer.cs       |  162 ++
 .../Analysis/Standard/StandardFilter.cs         |  100 +
 .../Analysis/Standard/StandardFilterFactory.cs  |   56 +
 .../Analysis/Standard/StandardTokenizer.cs      |  257 +++
 .../Standard/StandardTokenizerFactory.cs        |   61 +
 .../Analysis/Standard/StandardTokenizerImpl.cs  |  733 ++++++
 .../Standard/StandardTokenizerInterface.cs      |   77 +
 .../Analysis/Standard/UAX29URLEmailAnalyzer.cs  |  150 ++
 .../Analysis/Standard/UAX29URLEmailTokenizer.cs |  221 ++
 .../Standard/UAX29URLEmailTokenizerFactory.cs   |   61 +
 .../Analysis/Sv/SwedishAnalyzer.cs              |  139 ++
 .../Analysis/Sv/SwedishLightStemFilter.cs       |   66 +
 .../Sv/SwedishLightStemFilterFactory.cs         |   55 +
 .../Analysis/Sv/SwedishLightStemmer.cs          |  114 +
 .../Analysis/Synonym/FSTSynonymFilterFactory.cs |  186 ++
 .../Analysis/Synonym/SlowSynonymFilter.cs       |  317 +++
 .../Synonym/SlowSynonymFilterFactory.cs         |  391 ++++
 .../Analysis/Synonym/SlowSynonymMap.cs          |  210 ++
 .../Analysis/Synonym/SolrSynonymParser.cs       |  218 ++
 .../Analysis/Synonym/SynonymFilter.cs           |  789 +++++++
 .../Analysis/Synonym/SynonymFilterFactory.cs    |  115 +
 .../Analysis/Synonym/SynonymMap.cs              |  430 ++++
 .../Analysis/Synonym/WordnetSynonymParser.cs    |  135 ++
 .../Analysis/Th/ThaiAnalyzer.cs                 |  143 ++
 .../Analysis/Th/ThaiTokenizer.cs                |  116 +
 .../Analysis/Th/ThaiTokenizerFactory.cs         |   56 +
 .../Analysis/Th/ThaiWordFilter.cs               |  172 ++
 .../Analysis/Th/ThaiWordFilterFactory.cs        |   59 +
 .../Analysis/Tr/ApostropheFilter.cs             |   70 +
 .../Analysis/Tr/ApostropheFilterFactory.cs      |   52 +
 .../Analysis/Tr/TurkishAnalyzer.cs              |  145 ++
 .../Analysis/Tr/TurkishLowerCaseFilter.cs       |  151 ++
 .../Tr/TurkishLowerCaseFilterFactory.cs         |   64 +
 .../Analysis/Util/AbstractAnalysisFactory.cs    |  406 ++++
 .../Analysis/Util/AnalysisSPILoader.cs          |  165 ++
 .../Analysis/Util/CharArrayIterator.cs          |  278 +++
 .../Analysis/Util/CharArrayMap.cs               |  928 ++++++++
 .../Analysis/Util/CharArraySet.cs               |  267 +++
 .../Analysis/Util/CharFilterFactory.cs          |   86 +
 .../Analysis/Util/CharTokenizer.cs              |  209 ++
 .../Analysis/Util/CharacterUtils.cs             |  498 ++++
 .../Analysis/Util/ClasspathResourceLoader.cs    |  105 +
 .../Analysis/Util/ElisionFilter.cs              |   80 +
 .../Analysis/Util/ElisionFilterFactory.cs       |   86 +
 .../Analysis/Util/FilesystemResourceLoader.cs   |  113 +
 .../Analysis/Util/FilteringTokenFilter.cs       |  150 ++
 .../Analysis/Util/MultiTermAwareComponent.cs    |   39 +
 .../Analysis/Util/OpenStringBuilder.cs          |  205 ++
 .../Analysis/Util/ResourceLoader.cs             |   49 +
 .../Analysis/Util/ResourceLoaderAware.cs        |   38 +
 .../Analysis/Util/RollingCharBuffer.cs          |  200 ++
 .../Analysis/Util/SegmentingTokenizerBase.cs    |  258 +++
 .../Analysis/Util/StemmerUtil.cs                |  153 ++
 .../Analysis/Util/StopwordAnalyzerBase.cs       |  172 ++
 .../Analysis/Util/TokenFilterFactory.cs         |   86 +
 .../Analysis/Util/TokenizerFactory.cs           |   93 +
 .../Analysis/Util/WordlistLoader.cs             |  305 +++
 .../Analysis/Wikipedia/WikipediaTokenizer.cs    |  343 +++
 .../Wikipedia/WikipediaTokenizerFactory.cs      |   57 +
 .../Collation/CollationAttributeFactory.cs      |   99 +
 .../Collation/CollationKeyAnalyzer.cs           |  129 ++
 .../Collation/CollationKeyFilter.cs             |  112 +
 .../Collation/CollationKeyFilterFactory.cs      |  254 +++
 .../CollatedTermAttributeImpl.cs                |   52 +
 .../Lucene.Net.Analysis.Common.csproj           |  244 ++
 .../Properties/AssemblyInfo.cs                  |   36 +
 src/Lucene.Net.Core/Analysis/Analyzer.cs        |    2 +-
 src/Lucene.Net.Core/Analysis/AnalyzerWrapper.cs |    2 +-
 src/Lucene.Net.Core/Analysis/TokenStream.cs     |    2 +
 387 files changed, 60480 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicAnalyzer.cs
new file mode 100644
index 0000000..45318a7
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicAnalyzer.cs
@@ -0,0 +1,161 @@
+using System;
+
+namespace org.apache.lucene.analysis.ar
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Arabic. 
+	/// <para>
+	/// This analyzer implements light-stemming as specified by:
+	/// <i>
+	/// Light Stemming for Arabic Information Retrieval
+	/// </i>    
+	/// http://www.mtholyoke.edu/~lballest/Pubs/arab_stem05.pdf
+	/// </para>
+	/// <para>
+	/// The analysis package contains three primary components:
+	/// <ul>
+	///  <li><seealso cref="ArabicNormalizationFilter"/>: Arabic orthographic normalization.
+	///  <li><seealso cref="ArabicStemFilter"/>: Arabic light stemming
+	///  <li>Arabic stop words file: a set of default Arabic stop words.
+	/// </ul>
+	/// 
+	/// </para>
+	/// </summary>
+	public sealed class ArabicAnalyzer : StopwordAnalyzerBase
+	{
+
+	  /// <summary>
+	  /// File containing default Arabic stopwords.
+	  /// 
+	  /// Default stopword list is from http://members.unine.ch/jacques.savoy/clef/index.html
+	  /// The stopword list is BSD-Licensed.
+	  /// </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "stopwords.txt";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop-words set. </summary>
+	  /// <returns> an unmodifiable instance of the default stop-words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = loadStopwordSet(false, typeof(ArabicAnalyzer), DEFAULT_STOPWORD_FILE, "#");
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public ArabicAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          lucene compatibility version </param>
+	  /// <param name="stopwords">
+	  ///          a stopword set </param>
+	  public ArabicAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop word. If a none-empty stem exclusion set is
+	  /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+	  /// <seealso cref="ArabicStemFilter"/>.
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          lucene compatibility version </param>
+	  /// <param name="stopwords">
+	  ///          a stopword set </param>
+	  /// <param name="stemExclusionSet">
+	  ///          a set of terms not to be stemmed </param>
+	  public ArabicAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>,
+	  ///         <seealso cref="ArabicNormalizationFilter"/>, <seealso cref="SetKeywordMarkerFilter"/>
+	  ///         if a stem exclusion set is provided and <seealso cref="ArabicStemFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = matchVersion.onOrAfter(org.apache.lucene.util.Version.LUCENE_31) ? new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader) : new ArabicLetterTokenizer(matchVersion, reader);
+		Tokenizer source = matchVersion.onOrAfter(Version.LUCENE_31) ? new StandardTokenizer(matchVersion, reader) : new ArabicLetterTokenizer(matchVersion, reader);
+		TokenStream result = new LowerCaseFilter(matchVersion, source);
+		// the order here is important: the stopword list is not normalized!
+		result = new StopFilter(matchVersion, result, stopwords);
+		// TODO maybe we should make ArabicNormalization filter also KeywordAttribute aware?!
+		result = new ArabicNormalizationFilter(result);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		return new TokenStreamComponents(source, new ArabicStemFilter(result));
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs
new file mode 100644
index 0000000..5427293
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs
@@ -0,0 +1,83 @@
+using System;
+
+namespace org.apache.lucene.analysis.ar
+{
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using LetterTokenizer = org.apache.lucene.analysis.core.LetterTokenizer;
+	using CharTokenizer = org.apache.lucene.analysis.util.CharTokenizer;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer; // javadoc @link
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Tokenizer that breaks text into runs of letters and diacritics.
+	/// <para>
+	/// The problem with the standard Letter tokenizer is that it fails on diacritics.
+	/// Handling similar to this is necessary for Indic Scripts, Hebrew, Thaana, etc.
+	/// </para>
+	/// <para>
+	/// <a name="version"/>
+	/// You must specify the required <seealso cref="Version"/> compatibility when creating
+	/// <seealso cref="ArabicLetterTokenizer"/>:
+	/// <ul>
+	/// <li>As of 3.1, <seealso cref="CharTokenizer"/> uses an int based API to normalize and
+	/// detect token characters. See <seealso cref="#isTokenChar(int)"/> and
+	/// <seealso cref="#normalize(int)"/> for details.</li>
+	/// </ul>
+	/// </para>
+	/// </summary>
+	/// @deprecated (3.1) Use <seealso cref="StandardTokenizer"/> instead. 
+	[Obsolete("(3.1) Use <seealso cref="StandardTokenizer"/> instead.")]
+	public class ArabicLetterTokenizer : LetterTokenizer
+	{
+	  /// <summary>
+	  /// Construct a new ArabicLetterTokenizer. </summary>
+	  /// <param name="matchVersion"> Lucene version
+	  /// to match See <seealso cref="<a href="#version">above</a>"/>
+	  /// </param>
+	  /// <param name="in">
+	  ///          the input to split up into tokens </param>
+	  public ArabicLetterTokenizer(Version matchVersion, Reader @in) : base(matchVersion, @in)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Construct a new ArabicLetterTokenizer using a given
+	  /// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>. * @param
+	  /// matchVersion Lucene version to match See
+	  /// <seealso cref="<a href="#version">above</a>"/>
+	  /// </summary>
+	  /// <param name="factory">
+	  ///          the attribute factory to use for this Tokenizer </param>
+	  /// <param name="in">
+	  ///          the input to split up into tokens </param>
+	  public ArabicLetterTokenizer(Version matchVersion, AttributeFactory factory, Reader @in) : base(matchVersion, factory, @in)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Allows for Letter category or NonspacingMark category </summary>
+	  /// <seealso cref= org.apache.lucene.analysis.core.LetterTokenizer#isTokenChar(int) </seealso>
+	  protected internal override bool isTokenChar(int c)
+	  {
+		return base.isTokenChar(c) || char.getType(c) == char.NON_SPACING_MARK;
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizerFactory.cs
new file mode 100644
index 0000000..3b6def0
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizerFactory.cs
@@ -0,0 +1,54 @@
+using System;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.ar
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenizerFactory = org.apache.lucene.analysis.util.TokenizerFactory;
+	using AttributeFactory = org.apache.lucene.util.AttributeSource.AttributeFactory;
+
+
+
+	/// <summary>
+	/// Factory for <seealso cref="ArabicLetterTokenizer"/> </summary>
+	/// @deprecated (3.1) Use StandardTokenizerFactory instead.
+	///  
+	[Obsolete("(3.1) Use StandardTokenizerFactory instead.")]
+	public class ArabicLetterTokenizerFactory : TokenizerFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new ArabicNormalizationFilterFactory </summary>
+	  public ArabicLetterTokenizerFactory(IDictionary<string, string> args) : base(args)
+	  {
+		assureMatchVersion();
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override ArabicLetterTokenizer create(AttributeFactory factory, Reader input)
+	  {
+		return new ArabicLetterTokenizer(luceneMatchVersion, factory, input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilter.cs
new file mode 100644
index 0000000..7561878
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilter.cs
@@ -0,0 +1,51 @@
+namespace org.apache.lucene.analysis.ar
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="ArabicNormalizer"/> to normalize the orthography.
+	/// 
+	/// </summary>
+
+	public sealed class ArabicNormalizationFilter : TokenFilter
+	{
+	  private readonly ArabicNormalizer normalizer = new ArabicNormalizer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+
+	  public ArabicNormalizationFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  int newlen = normalizer.normalize(termAtt.buffer(), termAtt.length());
+		  termAtt.Length = newlen;
+		  return true;
+		}
+		return false;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilterFactory.cs
new file mode 100644
index 0000000..bb38dd4
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilterFactory.cs
@@ -0,0 +1,64 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.ar
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using AbstractAnalysisFactory = org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+	using MultiTermAwareComponent = org.apache.lucene.analysis.util.MultiTermAwareComponent;
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="ArabicNormalizationFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_arnormal" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.ArabicNormalizationFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class ArabicNormalizationFilterFactory : TokenFilterFactory, MultiTermAwareComponent
+	{
+
+	  /// <summary>
+	  /// Creates a new ArabicNormalizationFilterFactory </summary>
+	  public ArabicNormalizationFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override ArabicNormalizationFilter create(TokenStream input)
+	  {
+		return new ArabicNormalizationFilter(input);
+	  }
+
+	  public virtual AbstractAnalysisFactory MultiTermComponent
+	  {
+		  get
+		  {
+			return this;
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizer.cs
new file mode 100644
index 0000000..05ddad0
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizer.cs
@@ -0,0 +1,111 @@
+namespace org.apache.lucene.analysis.ar
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+	/// <summary>
+	///  Normalizer for Arabic.
+	///  <para>
+	///  Normalization is done in-place for efficiency, operating on a termbuffer.
+	/// </para>
+	///  <para>
+	///  Normalization is defined as:
+	///  <ul>
+	///  <li> Normalization of hamza with alef seat to a bare alef.
+	///  <li> Normalization of teh marbuta to heh
+	///  <li> Normalization of dotless yeh (alef maksura) to yeh.
+	///  <li> Removal of Arabic diacritics (the harakat)
+	///  <li> Removal of tatweel (stretching character).
+	/// </ul>
+	/// 
+	/// </para>
+	/// </summary>
+	public class ArabicNormalizer
+	{
+	  public const char ALEF = '\u0627';
+	  public const char ALEF_MADDA = '\u0622';
+	  public const char ALEF_HAMZA_ABOVE = '\u0623';
+	  public const char ALEF_HAMZA_BELOW = '\u0625';
+
+	  public const char YEH = '\u064A';
+	  public const char DOTLESS_YEH = '\u0649';
+
+	  public const char TEH_MARBUTA = '\u0629';
+	  public const char HEH = '\u0647';
+
+	  public const char TATWEEL = '\u0640';
+
+	  public const char FATHATAN = '\u064B';
+	  public const char DAMMATAN = '\u064C';
+	  public const char KASRATAN = '\u064D';
+	  public const char FATHA = '\u064E';
+	  public const char DAMMA = '\u064F';
+	  public const char KASRA = '\u0650';
+	  public const char SHADDA = '\u0651';
+	  public const char SUKUN = '\u0652';
+
+	  /// <summary>
+	  /// Normalize an input buffer of Arabic text
+	  /// </summary>
+	  /// <param name="s"> input buffer </param>
+	  /// <param name="len"> length of input buffer </param>
+	  /// <returns> length of input buffer after normalization </returns>
+	  public virtual int normalize(char[] s, int len)
+	  {
+
+		for (int i = 0; i < len; i++)
+		{
+		  switch (s[i])
+		  {
+		  case ALEF_MADDA:
+		  case ALEF_HAMZA_ABOVE:
+		  case ALEF_HAMZA_BELOW:
+			s[i] = ALEF;
+			break;
+		  case DOTLESS_YEH:
+			s[i] = YEH;
+			break;
+		  case TEH_MARBUTA:
+			s[i] = HEH;
+			break;
+		  case TATWEEL:
+		  case KASRATAN:
+		  case DAMMATAN:
+		  case FATHATAN:
+		  case FATHA:
+		  case DAMMA:
+		  case KASRA:
+		  case SHADDA:
+		  case SUKUN:
+			len = StemmerUtil.delete(s, i, len);
+			i--;
+			break;
+		  default:
+			break;
+		  }
+		}
+
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilter.cs
new file mode 100644
index 0000000..8dca664
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilter.cs
@@ -0,0 +1,66 @@
+namespace org.apache.lucene.analysis.ar
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; // javadoc @link
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="ArabicStemmer"/> to stem Arabic words..
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para> </summary>
+	/// <seealso cref= SetKeywordMarkerFilter  </seealso>
+
+	public sealed class ArabicStemFilter : TokenFilter
+	{
+	  private readonly ArabicStemmer stemmer = new ArabicStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public ArabicStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilterFactory.cs
new file mode 100644
index 0000000..851df64
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.ar
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="ArabicStemFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_arstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.ArabicNormalizationFilterFactory"/&gt;
+	///     &lt;filter class="solr.ArabicStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class ArabicStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new ArabicStemFilterFactory </summary>
+	  public ArabicStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override ArabicStemFilter create(TokenStream input)
+	  {
+		return new ArabicStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemmer.cs
new file mode 100644
index 0000000..1776906
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemmer.cs
@@ -0,0 +1,163 @@
+namespace org.apache.lucene.analysis.ar
+{
+
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+	/// <summary>
+	///  Stemmer for Arabic.
+	///  <para>
+	///  Stemming  is done in-place for efficiency, operating on a termbuffer.
+	/// </para>
+	///  <para>
+	///  Stemming is defined as:
+	///  <ul>
+	///  <li> Removal of attached definite article, conjunction, and prepositions.
+	///  <li> Stemming of common suffixes.
+	/// </ul>
+	/// 
+	/// </para>
+	/// </summary>
+	public class ArabicStemmer
+	{
+	  public const char ALEF = '\u0627';
+	  public const char BEH = '\u0628';
+	  public const char TEH_MARBUTA = '\u0629';
+	  public const char TEH = '\u062A';
+	  public const char FEH = '\u0641';
+	  public const char KAF = '\u0643';
+	  public const char LAM = '\u0644';
+	  public const char NOON = '\u0646';
+	  public const char HEH = '\u0647';
+	  public const char WAW = '\u0648';
+	  public const char YEH = '\u064A';
+
+	  public static readonly char[][] prefixes = {};
+
+	  public static readonly char[][] suffixes = {};
+
+	  /// <summary>
+	  /// Stem an input buffer of Arabic text.
+	  /// </summary>
+	  /// <param name="s"> input buffer </param>
+	  /// <param name="len"> length of input buffer </param>
+	  /// <returns> length of input buffer after normalization </returns>
+	  public virtual int stem(char[] s, int len)
+	  {
+		len = stemPrefix(s, len);
+		len = stemSuffix(s, len);
+
+		return len;
+	  }
+
+	  /// <summary>
+	  /// Stem a prefix off an Arabic word. </summary>
+	  /// <param name="s"> input buffer </param>
+	  /// <param name="len"> length of input buffer </param>
+	  /// <returns> new length of input buffer after stemming. </returns>
+	  public virtual int stemPrefix(char[] s, int len)
+	  {
+		for (int i = 0; i < prefixes.Length; i++)
+		{
+		  if (startsWithCheckLength(s, len, prefixes[i]))
+		  {
+			return StemmerUtil.deleteN(s, 0, len, prefixes[i].Length);
+		  }
+		}
+		return len;
+	  }
+
+	  /// <summary>
+	  /// Stem suffix(es) off an Arabic word. </summary>
+	  /// <param name="s"> input buffer </param>
+	  /// <param name="len"> length of input buffer </param>
+	  /// <returns> new length of input buffer after stemming </returns>
+	  public virtual int stemSuffix(char[] s, int len)
+	  {
+		for (int i = 0; i < suffixes.Length; i++)
+		{
+		  if (endsWithCheckLength(s, len, suffixes[i]))
+		  {
+			len = StemmerUtil.deleteN(s, len - suffixes[i].Length, len, suffixes[i].Length);
+		  }
+		}
+		return len;
+	  }
+
+	  /// <summary>
+	  /// Returns true if the prefix matches and can be stemmed </summary>
+	  /// <param name="s"> input buffer </param>
+	  /// <param name="len"> length of input buffer </param>
+	  /// <param name="prefix"> prefix to check </param>
+	  /// <returns> true if the prefix matches and can be stemmed </returns>
+	  internal virtual bool startsWithCheckLength(char[] s, int len, char[] prefix)
+	  {
+		if (prefix.Length == 1 && len < 4) // wa- prefix requires at least 3 characters
+		{
+		  return false;
+		} // other prefixes require only 2.
+		else if (len < prefix.Length + 2)
+		{
+		  return false;
+		}
+		else
+		{
+		  for (int i = 0; i < prefix.Length; i++)
+		  {
+			if (s[i] != prefix[i])
+			{
+			  return false;
+			}
+		  }
+
+		  return true;
+		}
+	  }
+
+	  /// <summary>
+	  /// Returns true if the suffix matches and can be stemmed </summary>
+	  /// <param name="s"> input buffer </param>
+	  /// <param name="len"> length of input buffer </param>
+	  /// <param name="suffix"> suffix to check </param>
+	  /// <returns> true if the suffix matches and can be stemmed </returns>
+	  internal virtual bool endsWithCheckLength(char[] s, int len, char[] suffix)
+	  {
+		if (len < suffix.Length + 2) // all suffixes require at least 2 characters after stemming
+		{
+		  return false;
+		}
+		else
+		{
+		  for (int i = 0; i < suffix.Length; i++)
+		  {
+			if (s[len - suffix.Length + i] != suffix[i])
+			{
+			  return false;
+			}
+		  }
+
+		  return true;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianAnalyzer.cs
new file mode 100644
index 0000000..eb42363
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianAnalyzer.cs
@@ -0,0 +1,144 @@
+using System;
+
+namespace org.apache.lucene.analysis.bg
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Bulgarian.
+	/// <para>
+	/// This analyzer implements light-stemming as specified by: <i> Searching
+	/// Strategies for the Bulgarian Language </i>
+	/// http://members.unine.ch/jacques.savoy/Papers/BUIR.pdf
+	/// </para>
+	/// <para>
+	/// </para>
+	/// </summary>
+	public sealed class BulgarianAnalyzer : StopwordAnalyzerBase
+	{
+	  /// <summary>
+	  /// File containing default Bulgarian stopwords.
+	  /// 
+	  /// Default stopword list is from
+	  /// http://members.unine.ch/jacques.savoy/clef/index.html The stopword list is
+	  /// BSD-Licensed.
+	  /// </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "stopwords.txt";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop-words set.
+	  /// </summary>
+	  /// <returns> an unmodifiable instance of the default stop-words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer
+	  /// class accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = loadStopwordSet(false, typeof(BulgarianAnalyzer), DEFAULT_STOPWORD_FILE, "#");
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words:
+	  /// <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public BulgarianAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words.
+	  /// </summary>
+	  public BulgarianAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words and a stem exclusion set.
+	  /// If a stem exclusion set is provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> 
+	  /// before <seealso cref="BulgarianStemFilter"/>.
+	  /// </summary>
+	  public BulgarianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates a
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> A
+	  ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided and <seealso cref="BulgarianStemFilter"/>. </returns>
+	  public override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		result = new LowerCaseFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		result = new BulgarianStemFilter(result);
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilter.cs
new file mode 100644
index 0000000..d89426c
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilter.cs
@@ -0,0 +1,68 @@
+namespace org.apache.lucene.analysis.bg
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; // for javadoc
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="BulgarianStemmer"/> to stem Bulgarian
+	/// words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// </summary>
+	public sealed class BulgarianStemFilter : TokenFilter
+	{
+	  private readonly BulgarianStemmer stemmer = new BulgarianStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public BulgarianStemFilter(final org.apache.lucene.analysis.TokenStream input)
+	  public BulgarianStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilterFactory.cs
new file mode 100644
index 0000000..1f09691
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.bg
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="BulgarianStemFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_bgstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.BulgarianStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class BulgarianStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new BulgarianStemFilterFactory </summary>
+	  public BulgarianStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new BulgarianStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemmer.cs
new file mode 100644
index 0000000..dda3b5d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemmer.cs
@@ -0,0 +1,187 @@
+namespace org.apache.lucene.analysis.bg
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+	/// <summary>
+	/// Light Stemmer for Bulgarian.
+	/// <para>
+	/// Implements the algorithm described in:  
+	/// <i>
+	/// Searching Strategies for the Bulgarian Language
+	/// </i>
+	/// http://members.unine.ch/jacques.savoy/Papers/BUIR.pdf
+	/// </para>
+	/// </summary>
+	public class BulgarianStemmer
+	{
+
+	  /// <summary>
+	  /// Stem an input buffer of Bulgarian text.
+	  /// </summary>
+	  /// <param name="s"> input buffer </param>
+	  /// <param name="len"> length of input buffer </param>
+	  /// <returns> length of input buffer after normalization </returns>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public int stem(final char s[] , int len)
+	  public virtual int stem(char[] s, int len)
+	  {
+		if (len < 4) // do not stem
+		{
+		  return len;
+		}
+
+		if (len > 5 && StemmerUtil.EndsWith(s, len, "ища"))
+		{
+		  return len - 3;
+		}
+
+		len = removeArticle(s, len);
+		len = removePlural(s, len);
+
+		if (len > 3)
+		{
+		  if (StemmerUtil.EndsWith(s, len, "я"))
+		  {
+			len--;
+		  }
+		  if (StemmerUtil.EndsWith(s, len, "а") || StemmerUtil.EndsWith(s, len, "о") || StemmerUtil.EndsWith(s, len, "е"))
+		  {
+			len--;
+		  }
+		}
+
+		// the rule to rewrite ен -> н is duplicated in the paper.
+		// in the perl implementation referenced by the paper, this is fixed.
+		// (it is fixed here as well)
+		if (len > 4 && StemmerUtil.EndsWith(s, len, "ен"))
+		{
+		  s[len - 2] = 'н'; // replace with н
+		  len--;
+		}
+
+		if (len > 5 && s[len - 2] == 'ъ')
+		{
+		  s[len - 2] = s[len - 1]; // replace ъN with N
+		  len--;
+		}
+
+		return len;
+	  }
+
+	  /// <summary>
+	  /// Mainly remove the definite article </summary>
+	  /// <param name="s"> input buffer </param>
+	  /// <param name="len"> length of input buffer </param>
+	  /// <returns> new stemmed length </returns>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: private int removeArticle(final char s[] , final int len)
+	  private int removeArticle(char[] s, int len)
+	  {
+		if (len > 6 && StemmerUtil.EndsWith(s, len, "ият"))
+		{
+		  return len - 3;
+		}
+
+		if (len > 5)
+		{
+		  if (StemmerUtil.EndsWith(s, len, "ът") || StemmerUtil.EndsWith(s, len, "то") || StemmerUtil.EndsWith(s, len, "те") || StemmerUtil.EndsWith(s, len, "та") || StemmerUtil.EndsWith(s, len, "ия"))
+		  {
+			return len - 2;
+		  }
+		}
+
+		if (len > 4 && StemmerUtil.EndsWith(s, len, "ят"))
+		{
+		  return len - 2;
+		}
+
+		return len;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: private int removePlural(final char s[] , final int len)
+	  private int removePlural(char[] s, int len)
+	  {
+		if (len > 6)
+		{
+		  if (StemmerUtil.EndsWith(s, len, "овци"))
+		  {
+			return len - 3; // replace with о
+		  }
+		  if (StemmerUtil.EndsWith(s, len, "ове"))
+		  {
+			return len - 3;
+		  }
+		  if (StemmerUtil.EndsWith(s, len, "еве"))
+		  {
+			s[len - 3] = 'й'; // replace with й
+			return len - 2;
+		  }
+		}
+
+		if (len > 5)
+		{
+		  if (StemmerUtil.EndsWith(s, len, "ища"))
+		  {
+			return len - 3;
+		  }
+		  if (StemmerUtil.EndsWith(s, len, "та"))
+		  {
+			return len - 2;
+		  }
+		  if (StemmerUtil.EndsWith(s, len, "ци"))
+		  {
+			s[len - 2] = 'к'; // replace with к
+			return len - 1;
+		  }
+		  if (StemmerUtil.EndsWith(s, len, "зи"))
+		  {
+			s[len - 2] = 'г'; // replace with г
+			return len - 1;
+		  }
+
+		  if (s[len - 3] == 'е' && s[len - 1] == 'и')
+		  {
+			s[len - 3] = 'я'; // replace е with я, remove и
+			return len - 1;
+		  }
+		}
+
+		if (len > 4)
+		{
+		  if (StemmerUtil.EndsWith(s, len, "си"))
+		  {
+			s[len - 2] = 'х'; // replace with х
+			return len - 1;
+		  }
+		  if (StemmerUtil.EndsWith(s, len, "и"))
+		  {
+			return len - 1;
+		  }
+		}
+
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianAnalyzer.cs
new file mode 100644
index 0000000..5cd8bd8
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianAnalyzer.cs
@@ -0,0 +1,143 @@
+using System;
+
+namespace org.apache.lucene.analysis.br
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using StandardAnalyzer = org.apache.lucene.analysis.standard.StandardAnalyzer;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Brazilian Portuguese language. 
+	/// <para>
+	/// Supports an external list of stopwords (words that
+	/// will not be indexed at all) and an external list of exclusions (words that will
+	/// not be stemmed, but indexed).
+	/// </para>
+	/// 
+	/// <para><b>NOTE</b>: This class uses the same <seealso cref="Version"/>
+	/// dependent settings as <seealso cref="StandardAnalyzer"/>.</para>
+	/// </summary>
+	public sealed class BrazilianAnalyzer : StopwordAnalyzerBase
+	{
+	  /// <summary>
+	  /// File containing default Brazilian Portuguese stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "stopwords.txt";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop-words set. </summary>
+	  /// <returns> an unmodifiable instance of the default stop-words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(typeof(BrazilianAnalyzer), DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#", Version.LUCENE_CURRENT);
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+
+	  /// <summary>
+	  /// Contains words that should be indexed but not stemmed.
+	  /// </summary>
+	  private CharArraySet excltable = CharArraySet.EMPTY_SET;
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words (<seealso cref="#getDefaultStopSet()"/>).
+	  /// </summary>
+	  public BrazilianAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          lucene compatibility version </param>
+	  /// <param name="stopwords">
+	  ///          a stopword set </param>
+	  public BrazilianAnalyzer(Version matchVersion, CharArraySet stopwords) : base(matchVersion, stopwords)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words and stemming exclusion words
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          lucene compatibility version </param>
+	  /// <param name="stopwords">
+	  ///          a stopword set </param>
+	  public BrazilianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : this(matchVersion, stopwords)
+	  {
+		excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from a <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="LowerCaseFilter"/>, <seealso cref="StandardFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , and <seealso cref="BrazilianStemFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new LowerCaseFilter(matchVersion, source);
+		result = new StandardFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (excltable != null && !excltable.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, excltable);
+		}
+		return new TokenStreamComponents(source, new BrazilianStemFilter(result));
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilter.cs
new file mode 100644
index 0000000..4ea054b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilter.cs
@@ -0,0 +1,90 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.br
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="BrazilianStemmer"/>.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para> </summary>
+	/// <seealso cref= SetKeywordMarkerFilter
+	///  </seealso>
+	public sealed class BrazilianStemFilter : TokenFilter
+	{
+
+	  /// <summary>
+	  /// <seealso cref="BrazilianStemmer"/> in use by this filter.
+	  /// </summary>
+	  private BrazilianStemmer stemmer = new BrazilianStemmer();
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: private java.util.Set<?> exclusions = null;
+	  private HashSet<?> exclusions = null;
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  /// <summary>
+	  /// Creates a new BrazilianStemFilter 
+	  /// </summary>
+	  /// <param name="in"> the source <seealso cref="TokenStream"/>  </param>
+	  public BrazilianStemFilter(TokenStream @in) : base(@in)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String term = termAtt.toString();
+		  string term = termAtt.ToString();
+		  // Check the exclusion table.
+		  if (!keywordAttr.Keyword && (exclusions == null || !exclusions.Contains(term)))
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String s = stemmer.stem(term);
+			string s = stemmer.stem(term);
+			// If not stemmed, don't waste the time adjusting the token.
+			if ((s != null) && !s.Equals(term))
+			{
+			  termAtt.setEmpty().append(s);
+			}
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilterFactory.cs
new file mode 100644
index 0000000..17a5fce
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilterFactory.cs
@@ -0,0 +1,56 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.br
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="BrazilianStemFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_brstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.BrazilianStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class BrazilianStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new BrazilianStemFilterFactory </summary>
+	  public BrazilianStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override BrazilianStemFilter create(TokenStream @in)
+	  {
+		return new BrazilianStemFilter(@in);
+	  }
+	}
+
+
+}
\ No newline at end of file


[33/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemmer.cs
new file mode 100644
index 0000000..d7c385d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemmer.cs
@@ -0,0 +1,1395 @@
+namespace org.apache.lucene.analysis.br
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/// <summary>
+	/// A stemmer for Brazilian Portuguese words.
+	/// </summary>
+	public class BrazilianStemmer
+	{
+	  private static readonly Locale locale = new Locale("pt", "BR");
+
+	  /// <summary>
+	  /// Changed term
+	  /// </summary>
+	  private string TERM;
+	  private string CT;
+	  private string R1;
+	  private string R2;
+	  private string RV;
+
+
+	  public BrazilianStemmer()
+	  {
+	  }
+
+	  /// <summary>
+	  /// Stems the given term to an unique <tt>discriminator</tt>.
+	  /// </summary>
+	  /// <param name="term">  The term that should be stemmed. </param>
+	  /// <returns>      Discriminator for <tt>term</tt> </returns>
+	  protected internal virtual string stem(string term)
+	  {
+		bool altered = false; // altered the term
+
+		// creates CT
+		createCT(term);
+
+		if (!isIndexable(CT))
+		{
+		  return null;
+		}
+		if (!isStemmable(CT))
+		{
+		  return CT;
+		}
+
+		R1 = getR1(CT);
+		R2 = getR1(R1);
+		RV = getRV(CT);
+		TERM = term + ";" + CT;
+
+		altered = step1();
+		if (!altered)
+		{
+		  altered = step2();
+		}
+
+		if (altered)
+		{
+		  step3();
+		}
+		else
+		{
+		  step4();
+		}
+
+		step5();
+
+		return CT;
+	  }
+
+	  /// <summary>
+	  /// Checks a term if it can be processed correctly.
+	  /// </summary>
+	  /// <returns>  true if, and only if, the given term consists in letters. </returns>
+	  private bool isStemmable(string term)
+	  {
+		for (int c = 0; c < term.Length; c++)
+		{
+		  // Discard terms that contain non-letter characters.
+		  if (!char.IsLetter(term[c]))
+		  {
+			return false;
+		  }
+		}
+		return true;
+	  }
+
+	  /// <summary>
+	  /// Checks a term if it can be processed indexed.
+	  /// </summary>
+	  /// <returns>  true if it can be indexed </returns>
+	  private bool isIndexable(string term)
+	  {
+		return (term.Length < 30) && (term.Length > 2);
+	  }
+
+	  /// <summary>
+	  /// See if string is 'a','e','i','o','u'
+	  /// </summary>
+	  /// <returns> true if is vowel </returns>
+	  private bool isVowel(char value)
+	  {
+		return (value == 'a') || (value == 'e') || (value == 'i') || (value == 'o') || (value == 'u');
+	  }
+
+	  /// <summary>
+	  /// Gets R1
+	  /// 
+	  /// R1 - is the region after the first non-vowel following a vowel,
+	  ///      or is the null region at the end of the word if there is
+	  ///      no such non-vowel.
+	  /// </summary>
+	  /// <returns> null or a string representing R1 </returns>
+	  private string getR1(string value)
+	  {
+		int i;
+		int j;
+
+		// be-safe !!!
+		if (value == null)
+		{
+		  return null;
+		}
+
+		// find 1st vowel
+		i = value.Length - 1;
+		for (j = 0 ; j < i ; j++)
+		{
+		  if (isVowel(value[j]))
+		  {
+			break;
+		  }
+		}
+
+		if (!(j < i))
+		{
+		  return null;
+		}
+
+		// find 1st non-vowel
+		for (; j < i ; j++)
+		{
+		  if (!(isVowel(value[j])))
+		  {
+			break;
+		  }
+		}
+
+		if (!(j < i))
+		{
+		  return null;
+		}
+
+		return value.Substring(j + 1);
+	  }
+
+	  /// <summary>
+	  /// Gets RV
+	  /// 
+	  /// RV - IF the second letter is a consonant, RV is the region after
+	  ///      the next following vowel,
+	  /// 
+	  ///      OR if the first two letters are vowels, RV is the region
+	  ///      after the next consonant,
+	  /// 
+	  ///      AND otherwise (consonant-vowel case) RV is the region after
+	  ///      the third letter.
+	  /// 
+	  ///      BUT RV is the end of the word if this positions cannot be
+	  ///      found.
+	  /// </summary>
+	  /// <returns> null or a string representing RV </returns>
+	  private string getRV(string value)
+	  {
+		int i;
+		int j;
+
+		// be-safe !!!
+		if (value == null)
+		{
+		  return null;
+		}
+
+		i = value.Length - 1;
+
+		// RV - IF the second letter is a consonant, RV is the region after
+		//      the next following vowel,
+		if ((i > 0) && !isVowel(value[1]))
+		{
+		  // find 1st vowel
+		  for (j = 2 ; j < i ; j++)
+		  {
+			if (isVowel(value[j]))
+			{
+			  break;
+			}
+		  }
+
+		  if (j < i)
+		  {
+			return value.Substring(j + 1);
+		  }
+		}
+
+
+		// RV - OR if the first two letters are vowels, RV is the region
+		//      after the next consonant,
+		if ((i > 1) && isVowel(value[0]) && isVowel(value[1]))
+		{
+		  // find 1st consoant
+		  for (j = 2 ; j < i ; j++)
+		  {
+			if (!isVowel(value[j]))
+			{
+			  break;
+			}
+		  }
+
+		  if (j < i)
+		  {
+			return value.Substring(j + 1);
+		  }
+		}
+
+		// RV - AND otherwise (consonant-vowel case) RV is the region after
+		//      the third letter.
+		if (i > 2)
+		{
+		  return value.Substring(3);
+		}
+
+		return null;
+	  }
+
+	  /// <summary>
+	  /// 1) Turn to lowercase
+	  /// 2) Remove accents
+	  /// 3) ã -> a ; õ -> o
+	  /// 4) ç -> c
+	  /// </summary>
+	  /// <returns> null or a string transformed </returns>
+	  private string changeTerm(string value)
+	  {
+		int j;
+		string r = "";
+
+		// be-safe !!!
+		if (value == null)
+		{
+		  return null;
+		}
+
+		value = value.ToLower(locale);
+		for (j = 0 ; j < value.Length ; j++)
+		{
+		  if ((value[j] == 'á') || (value[j] == 'â') || (value[j] == 'ã'))
+		  {
+			r = r + "a";
+			continue;
+		  }
+		  if ((value[j] == 'é') || (value[j] == 'ê'))
+		  {
+			r = r + "e";
+			continue;
+		  }
+		  if (value[j] == 'í')
+		  {
+			r = r + "i";
+			continue;
+		  }
+		  if ((value[j] == 'ó') || (value[j] == 'ô') || (value[j] == 'õ'))
+		  {
+			r = r + "o";
+			continue;
+		  }
+		  if ((value[j] == 'ú') || (value[j] == 'ü'))
+		  {
+			r = r + "u";
+			continue;
+		  }
+		  if (value[j] == 'ç')
+		  {
+			r = r + "c";
+			continue;
+		  }
+		  if (value[j] == 'ñ')
+		  {
+			r = r + "n";
+			continue;
+		  }
+
+		  r = r + value[j];
+		}
+
+		return r;
+	  }
+
+	  /// <summary>
+	  /// Check if a string ends with a suffix
+	  /// </summary>
+	  /// <returns> true if the string ends with the specified suffix </returns>
+	  private bool suffix(string value, string suffix)
+	  {
+
+		// be-safe !!!
+		if ((value == null) || (suffix_Renamed == null))
+		{
+		  return false;
+		}
+
+		if (suffix_Renamed.Length > value.Length)
+		{
+		  return false;
+		}
+
+		return value.Substring(value.Length - suffix_Renamed.Length).Equals(suffix_Renamed);
+	  }
+
+	  /// <summary>
+	  /// Replace a string suffix by another
+	  /// </summary>
+	  /// <returns> the replaced String </returns>
+	  private string replaceSuffix(string value, string toReplace, string changeTo)
+	  {
+		string vvalue;
+
+		// be-safe !!!
+		if ((value == null) || (toReplace == null) || (changeTo == null))
+		{
+		  return value;
+		}
+
+		vvalue = removeSuffix(value,toReplace);
+
+		if (value.Equals(vvalue))
+		{
+		  return value;
+		}
+		else
+		{
+		  return vvalue + changeTo;
+		}
+	  }
+
+	  /// <summary>
+	  /// Remove a string suffix
+	  /// </summary>
+	  /// <returns> the String without the suffix </returns>
+	  private string removeSuffix(string value, string toRemove)
+	  {
+		// be-safe !!!
+		if ((value == null) || (toRemove == null) || !suffix(value,toRemove))
+		{
+		  return value;
+		}
+
+		return value.Substring(0,value.Length - toRemove.Length);
+	  }
+
+	  /// <summary>
+	  /// See if a suffix is preceded by a String
+	  /// </summary>
+	  /// <returns> true if the suffix is preceded </returns>
+	  private bool suffixPreceded(string value, string suffix, string preceded)
+	  {
+		// be-safe !!!
+		if ((value == null) || (suffix_Renamed == null) || (preceded == null) || !suffix(value,suffix_Renamed))
+		{
+		  return false;
+		}
+
+		return suffix(removeSuffix(value,suffix_Renamed),preceded);
+	  }
+
+	  /// <summary>
+	  /// Creates CT (changed term) , substituting * 'ã' and 'õ' for 'a~' and 'o~'.
+	  /// </summary>
+	  private void createCT(string term)
+	  {
+		CT = changeTerm(term);
+
+		if (CT.Length < 2)
+		{
+			return;
+		}
+
+		// if the first character is ... , remove it
+		if ((CT[0] == '"') || (CT[0] == '\'') || (CT[0] == '-') || (CT[0] == ',') || (CT[0] == ';') || (CT[0] == '.') || (CT[0] == '?') || (CT[0] == '!'))
+		{
+			CT = CT.Substring(1);
+		}
+
+		if (CT.Length < 2)
+		{
+			return;
+		}
+
+		// if the last character is ... , remove it
+		if ((CT[CT.Length - 1] == '-') || (CT[CT.Length - 1] == ',') || (CT[CT.Length - 1] == ';') || (CT[CT.Length - 1] == '.') || (CT[CT.Length - 1] == '?') || (CT[CT.Length - 1] == '!') || (CT[CT.Length - 1] == '\'') || (CT[CT.Length - 1] == '"'))
+		{
+			CT = CT.Substring(0,CT.Length - 1);
+		}
+	  }
+
+
+	  /// <summary>
+	  /// Standard suffix removal.
+	  /// Search for the longest among the following suffixes, and perform
+	  /// the following actions:
+	  /// </summary>
+	  /// <returns> false if no ending was removed </returns>
+	  private bool step1()
+	  {
+		if (CT == null)
+		{
+			return false;
+		}
+
+		// suffix length = 7
+		if (suffix(CT,"uciones") && suffix(R2,"uciones"))
+		{
+			CT = replaceSuffix(CT,"uciones","u");
+			return true;
+		}
+
+		// suffix length = 6
+		if (CT.Length >= 6)
+		{
+		  if (suffix(CT,"imentos") && suffix(R2,"imentos"))
+		  {
+			  CT = removeSuffix(CT,"imentos");
+			  return true;
+		  }
+		  if (suffix(CT,"amentos") && suffix(R2,"amentos"))
+		  {
+			  CT = removeSuffix(CT,"amentos");
+			  return true;
+		  }
+		  if (suffix(CT,"adores") && suffix(R2,"adores"))
+		  {
+			  CT = removeSuffix(CT,"adores");
+			  return true;
+		  }
+		  if (suffix(CT,"adoras") && suffix(R2,"adoras"))
+		  {
+			  CT = removeSuffix(CT,"adoras");
+			  return true;
+		  }
+		  if (suffix(CT,"logias") && suffix(R2,"logias"))
+		  {
+			  replaceSuffix(CT,"logias","log");
+			  return true;
+		  }
+		  if (suffix(CT,"encias") && suffix(R2,"encias"))
+		  {
+			  CT = replaceSuffix(CT,"encias","ente");
+			  return true;
+		  }
+		  if (suffix(CT,"amente") && suffix(R1,"amente"))
+		  {
+			  CT = removeSuffix(CT,"amente");
+			  return true;
+		  }
+		  if (suffix(CT,"idades") && suffix(R2,"idades"))
+		  {
+			  CT = removeSuffix(CT,"idades");
+			  return true;
+		  }
+		}
+
+		// suffix length = 5
+		if (CT.Length >= 5)
+		{
+		  if (suffix(CT,"acoes") && suffix(R2,"acoes"))
+		  {
+			  CT = removeSuffix(CT,"acoes");
+			  return true;
+		  }
+		  if (suffix(CT,"imento") && suffix(R2,"imento"))
+		  {
+			  CT = removeSuffix(CT,"imento");
+			  return true;
+		  }
+		  if (suffix(CT,"amento") && suffix(R2,"amento"))
+		  {
+			  CT = removeSuffix(CT,"amento");
+			  return true;
+		  }
+		  if (suffix(CT,"adora") && suffix(R2,"adora"))
+		  {
+			  CT = removeSuffix(CT,"adora");
+			  return true;
+		  }
+		  if (suffix(CT,"ismos") && suffix(R2,"ismos"))
+		  {
+			  CT = removeSuffix(CT,"ismos");
+			  return true;
+		  }
+		  if (suffix(CT,"istas") && suffix(R2,"istas"))
+		  {
+			  CT = removeSuffix(CT,"istas");
+			  return true;
+		  }
+		  if (suffix(CT,"logia") && suffix(R2,"logia"))
+		  {
+			  CT = replaceSuffix(CT,"logia","log");
+			  return true;
+		  }
+		  if (suffix(CT,"ucion") && suffix(R2,"ucion"))
+		  {
+			  CT = replaceSuffix(CT,"ucion","u");
+			  return true;
+		  }
+		  if (suffix(CT,"encia") && suffix(R2,"encia"))
+		  {
+			  CT = replaceSuffix(CT,"encia","ente");
+			  return true;
+		  }
+		  if (suffix(CT,"mente") && suffix(R2,"mente"))
+		  {
+			  CT = removeSuffix(CT,"mente");
+			  return true;
+		  }
+		  if (suffix(CT,"idade") && suffix(R2,"idade"))
+		  {
+			  CT = removeSuffix(CT,"idade");
+			  return true;
+		  }
+		}
+
+		// suffix length = 4
+		if (CT.Length >= 4)
+		{
+		  if (suffix(CT,"acao") && suffix(R2,"acao"))
+		  {
+			  CT = removeSuffix(CT,"acao");
+			  return true;
+		  }
+		  if (suffix(CT,"ezas") && suffix(R2,"ezas"))
+		  {
+			  CT = removeSuffix(CT,"ezas");
+			  return true;
+		  }
+		  if (suffix(CT,"icos") && suffix(R2,"icos"))
+		  {
+			  CT = removeSuffix(CT,"icos");
+			  return true;
+		  }
+		  if (suffix(CT,"icas") && suffix(R2,"icas"))
+		  {
+			  CT = removeSuffix(CT,"icas");
+			  return true;
+		  }
+		  if (suffix(CT,"ismo") && suffix(R2,"ismo"))
+		  {
+			  CT = removeSuffix(CT,"ismo");
+			  return true;
+		  }
+		  if (suffix(CT,"avel") && suffix(R2,"avel"))
+		  {
+			  CT = removeSuffix(CT,"avel");
+			  return true;
+		  }
+		  if (suffix(CT,"ivel") && suffix(R2,"ivel"))
+		  {
+			  CT = removeSuffix(CT,"ivel");
+			  return true;
+		  }
+		  if (suffix(CT,"ista") && suffix(R2,"ista"))
+		  {
+			  CT = removeSuffix(CT,"ista");
+			  return true;
+		  }
+		  if (suffix(CT,"osos") && suffix(R2,"osos"))
+		  {
+			  CT = removeSuffix(CT,"osos");
+			  return true;
+		  }
+		  if (suffix(CT,"osas") && suffix(R2,"osas"))
+		  {
+			  CT = removeSuffix(CT,"osas");
+			  return true;
+		  }
+		  if (suffix(CT,"ador") && suffix(R2,"ador"))
+		  {
+			  CT = removeSuffix(CT,"ador");
+			  return true;
+		  }
+		  if (suffix(CT,"ivas") && suffix(R2,"ivas"))
+		  {
+			  CT = removeSuffix(CT,"ivas");
+			  return true;
+		  }
+		  if (suffix(CT,"ivos") && suffix(R2,"ivos"))
+		  {
+			  CT = removeSuffix(CT,"ivos");
+			  return true;
+		  }
+		  if (suffix(CT,"iras") && suffix(RV,"iras") && suffixPreceded(CT,"iras","e"))
+		  {
+			  CT = replaceSuffix(CT,"iras","ir");
+			  return true;
+		  }
+		}
+
+		// suffix length = 3
+		if (CT.Length >= 3)
+		{
+		  if (suffix(CT,"eza") && suffix(R2,"eza"))
+		  {
+			  CT = removeSuffix(CT,"eza");
+			  return true;
+		  }
+		  if (suffix(CT,"ico") && suffix(R2,"ico"))
+		  {
+			  CT = removeSuffix(CT,"ico");
+			  return true;
+		  }
+		  if (suffix(CT,"ica") && suffix(R2,"ica"))
+		  {
+			  CT = removeSuffix(CT,"ica");
+			  return true;
+		  }
+		  if (suffix(CT,"oso") && suffix(R2,"oso"))
+		  {
+			  CT = removeSuffix(CT,"oso");
+			  return true;
+		  }
+		  if (suffix(CT,"osa") && suffix(R2,"osa"))
+		  {
+			  CT = removeSuffix(CT,"osa");
+			  return true;
+		  }
+		  if (suffix(CT,"iva") && suffix(R2,"iva"))
+		  {
+			  CT = removeSuffix(CT,"iva");
+			  return true;
+		  }
+		  if (suffix(CT,"ivo") && suffix(R2,"ivo"))
+		  {
+			  CT = removeSuffix(CT,"ivo");
+			  return true;
+		  }
+		  if (suffix(CT,"ira") && suffix(RV,"ira") && suffixPreceded(CT,"ira","e"))
+		  {
+			  CT = replaceSuffix(CT,"ira","ir");
+			  return true;
+		  }
+		}
+
+		// no ending was removed by step1
+		return false;
+	  }
+
+
+	  /// <summary>
+	  /// Verb suffixes.
+	  /// 
+	  /// Search for the longest among the following suffixes in RV,
+	  /// and if found, delete.
+	  /// </summary>
+	  /// <returns> false if no ending was removed </returns>
+	  private bool step2()
+	  {
+		if (RV == null)
+		{
+			return false;
+		}
+
+		// suffix lenght = 7
+		if (RV.Length >= 7)
+		{
+		  if (suffix(RV,"issemos"))
+		  {
+			CT = removeSuffix(CT,"issemos");
+			return true;
+		  }
+		  if (suffix(RV,"essemos"))
+		  {
+			CT = removeSuffix(CT,"essemos");
+			return true;
+		  }
+		  if (suffix(RV,"assemos"))
+		  {
+			CT = removeSuffix(CT,"assemos");
+			return true;
+		  }
+		  if (suffix(RV,"ariamos"))
+		  {
+			CT = removeSuffix(CT,"ariamos");
+			return true;
+		  }
+		  if (suffix(RV,"eriamos"))
+		  {
+			CT = removeSuffix(CT,"eriamos");
+			return true;
+		  }
+		  if (suffix(RV,"iriamos"))
+		  {
+			CT = removeSuffix(CT,"iriamos");
+			return true;
+		  }
+		}
+
+		// suffix length = 6
+		if (RV.Length >= 6)
+		{
+		  if (suffix(RV,"iremos"))
+		  {
+			CT = removeSuffix(CT,"iremos");
+			return true;
+		  }
+		  if (suffix(RV,"eremos"))
+		  {
+			CT = removeSuffix(CT,"eremos");
+			return true;
+		  }
+		  if (suffix(RV,"aremos"))
+		  {
+			CT = removeSuffix(CT,"aremos");
+			return true;
+		  }
+		  if (suffix(RV,"avamos"))
+		  {
+			CT = removeSuffix(CT,"avamos");
+			return true;
+		  }
+		  if (suffix(RV,"iramos"))
+		  {
+			CT = removeSuffix(CT,"iramos");
+			return true;
+		  }
+		  if (suffix(RV,"eramos"))
+		  {
+			CT = removeSuffix(CT,"eramos");
+			return true;
+		  }
+		  if (suffix(RV,"aramos"))
+		  {
+			CT = removeSuffix(CT,"aramos");
+			return true;
+		  }
+		  if (suffix(RV,"asseis"))
+		  {
+			CT = removeSuffix(CT,"asseis");
+			return true;
+		  }
+		  if (suffix(RV,"esseis"))
+		  {
+			CT = removeSuffix(CT,"esseis");
+			return true;
+		  }
+		  if (suffix(RV,"isseis"))
+		  {
+			CT = removeSuffix(CT,"isseis");
+			return true;
+		  }
+		  if (suffix(RV,"arieis"))
+		  {
+			CT = removeSuffix(CT,"arieis");
+			return true;
+		  }
+		  if (suffix(RV,"erieis"))
+		  {
+			CT = removeSuffix(CT,"erieis");
+			return true;
+		  }
+		  if (suffix(RV,"irieis"))
+		  {
+			CT = removeSuffix(CT,"irieis");
+			return true;
+		  }
+		}
+
+
+		// suffix length = 5
+		if (RV.Length >= 5)
+		{
+		  if (suffix(RV,"irmos"))
+		  {
+			CT = removeSuffix(CT,"irmos");
+			return true;
+		  }
+		  if (suffix(RV,"iamos"))
+		  {
+			CT = removeSuffix(CT,"iamos");
+			return true;
+		  }
+		  if (suffix(RV,"armos"))
+		  {
+			CT = removeSuffix(CT,"armos");
+			return true;
+		  }
+		  if (suffix(RV,"ermos"))
+		  {
+			CT = removeSuffix(CT,"ermos");
+			return true;
+		  }
+		  if (suffix(RV,"areis"))
+		  {
+			CT = removeSuffix(CT,"areis");
+			return true;
+		  }
+		  if (suffix(RV,"ereis"))
+		  {
+			CT = removeSuffix(CT,"ereis");
+			return true;
+		  }
+		  if (suffix(RV,"ireis"))
+		  {
+			CT = removeSuffix(CT,"ireis");
+			return true;
+		  }
+		  if (suffix(RV,"asses"))
+		  {
+			CT = removeSuffix(CT,"asses");
+			return true;
+		  }
+		  if (suffix(RV,"esses"))
+		  {
+			CT = removeSuffix(CT,"esses");
+			return true;
+		  }
+		  if (suffix(RV,"isses"))
+		  {
+			CT = removeSuffix(CT,"isses");
+			return true;
+		  }
+		  if (suffix(RV,"astes"))
+		  {
+			CT = removeSuffix(CT,"astes");
+			return true;
+		  }
+		  if (suffix(RV,"assem"))
+		  {
+			CT = removeSuffix(CT,"assem");
+			return true;
+		  }
+		  if (suffix(RV,"essem"))
+		  {
+			CT = removeSuffix(CT,"essem");
+			return true;
+		  }
+		  if (suffix(RV,"issem"))
+		  {
+			CT = removeSuffix(CT,"issem");
+			return true;
+		  }
+		  if (suffix(RV,"ardes"))
+		  {
+			CT = removeSuffix(CT,"ardes");
+			return true;
+		  }
+		  if (suffix(RV,"erdes"))
+		  {
+			CT = removeSuffix(CT,"erdes");
+			return true;
+		  }
+		  if (suffix(RV,"irdes"))
+		  {
+			CT = removeSuffix(CT,"irdes");
+			return true;
+		  }
+		  if (suffix(RV,"ariam"))
+		  {
+			CT = removeSuffix(CT,"ariam");
+			return true;
+		  }
+		  if (suffix(RV,"eriam"))
+		  {
+			CT = removeSuffix(CT,"eriam");
+			return true;
+		  }
+		  if (suffix(RV,"iriam"))
+		  {
+			CT = removeSuffix(CT,"iriam");
+			return true;
+		  }
+		  if (suffix(RV,"arias"))
+		  {
+			CT = removeSuffix(CT,"arias");
+			return true;
+		  }
+		  if (suffix(RV,"erias"))
+		  {
+			CT = removeSuffix(CT,"erias");
+			return true;
+		  }
+		  if (suffix(RV,"irias"))
+		  {
+			CT = removeSuffix(CT,"irias");
+			return true;
+		  }
+		  if (suffix(RV,"estes"))
+		  {
+			CT = removeSuffix(CT,"estes");
+			return true;
+		  }
+		  if (suffix(RV,"istes"))
+		  {
+			CT = removeSuffix(CT,"istes");
+			return true;
+		  }
+		  if (suffix(RV,"areis"))
+		  {
+			CT = removeSuffix(CT,"areis");
+			return true;
+		  }
+		  if (suffix(RV,"aveis"))
+		  {
+			CT = removeSuffix(CT,"aveis");
+			return true;
+		  }
+		}
+
+		// suffix length = 4
+		if (RV.Length >= 4)
+		{
+		  if (suffix(RV,"aria"))
+		  {
+			CT = removeSuffix(CT,"aria");
+			return true;
+		  }
+		  if (suffix(RV,"eria"))
+		  {
+			CT = removeSuffix(CT,"eria");
+			return true;
+		  }
+		  if (suffix(RV,"iria"))
+		  {
+			CT = removeSuffix(CT,"iria");
+			return true;
+		  }
+		  if (suffix(RV,"asse"))
+		  {
+			CT = removeSuffix(CT,"asse");
+			return true;
+		  }
+		  if (suffix(RV,"esse"))
+		  {
+			CT = removeSuffix(CT,"esse");
+			return true;
+		  }
+		  if (suffix(RV,"isse"))
+		  {
+			CT = removeSuffix(CT,"isse");
+			return true;
+		  }
+		  if (suffix(RV,"aste"))
+		  {
+			CT = removeSuffix(CT,"aste");
+			return true;
+		  }
+		  if (suffix(RV,"este"))
+		  {
+			CT = removeSuffix(CT,"este");
+			return true;
+		  }
+		  if (suffix(RV,"iste"))
+		  {
+			CT = removeSuffix(CT,"iste");
+			return true;
+		  }
+		  if (suffix(RV,"arei"))
+		  {
+			CT = removeSuffix(CT,"arei");
+			return true;
+		  }
+		  if (suffix(RV,"erei"))
+		  {
+			CT = removeSuffix(CT,"erei");
+			return true;
+		  }
+		  if (suffix(RV,"irei"))
+		  {
+			CT = removeSuffix(CT,"irei");
+			return true;
+		  }
+		  if (suffix(RV,"aram"))
+		  {
+			CT = removeSuffix(CT,"aram");
+			return true;
+		  }
+		  if (suffix(RV,"eram"))
+		  {
+			CT = removeSuffix(CT,"eram");
+			return true;
+		  }
+		  if (suffix(RV,"iram"))
+		  {
+			CT = removeSuffix(CT,"iram");
+			return true;
+		  }
+		  if (suffix(RV,"avam"))
+		  {
+			CT = removeSuffix(CT,"avam");
+			return true;
+		  }
+		  if (suffix(RV,"arem"))
+		  {
+			CT = removeSuffix(CT,"arem");
+			return true;
+		  }
+		  if (suffix(RV,"erem"))
+		  {
+			CT = removeSuffix(CT,"erem");
+			return true;
+		  }
+		  if (suffix(RV,"irem"))
+		  {
+			CT = removeSuffix(CT,"irem");
+			return true;
+		  }
+		  if (suffix(RV,"ando"))
+		  {
+			CT = removeSuffix(CT,"ando");
+			return true;
+		  }
+		  if (suffix(RV,"endo"))
+		  {
+			CT = removeSuffix(CT,"endo");
+			return true;
+		  }
+		  if (suffix(RV,"indo"))
+		  {
+			CT = removeSuffix(CT,"indo");
+			return true;
+		  }
+		  if (suffix(RV,"arao"))
+		  {
+			CT = removeSuffix(CT,"arao");
+			return true;
+		  }
+		  if (suffix(RV,"erao"))
+		  {
+			CT = removeSuffix(CT,"erao");
+			return true;
+		  }
+		  if (suffix(RV,"irao"))
+		  {
+			CT = removeSuffix(CT,"irao");
+			return true;
+		  }
+		  if (suffix(RV,"adas"))
+		  {
+			CT = removeSuffix(CT,"adas");
+			return true;
+		  }
+		  if (suffix(RV,"idas"))
+		  {
+			CT = removeSuffix(CT,"idas");
+			return true;
+		  }
+		  if (suffix(RV,"aras"))
+		  {
+			CT = removeSuffix(CT,"aras");
+			return true;
+		  }
+		  if (suffix(RV,"eras"))
+		  {
+			CT = removeSuffix(CT,"eras");
+			return true;
+		  }
+		  if (suffix(RV,"iras"))
+		  {
+			CT = removeSuffix(CT,"iras");
+			return true;
+		  }
+		  if (suffix(RV,"avas"))
+		  {
+			CT = removeSuffix(CT,"avas");
+			return true;
+		  }
+		  if (suffix(RV,"ares"))
+		  {
+			CT = removeSuffix(CT,"ares");
+			return true;
+		  }
+		  if (suffix(RV,"eres"))
+		  {
+			CT = removeSuffix(CT,"eres");
+			return true;
+		  }
+		  if (suffix(RV,"ires"))
+		  {
+			CT = removeSuffix(CT,"ires");
+			return true;
+		  }
+		  if (suffix(RV,"ados"))
+		  {
+			CT = removeSuffix(CT,"ados");
+			return true;
+		  }
+		  if (suffix(RV,"idos"))
+		  {
+			CT = removeSuffix(CT,"idos");
+			return true;
+		  }
+		  if (suffix(RV,"amos"))
+		  {
+			CT = removeSuffix(CT,"amos");
+			return true;
+		  }
+		  if (suffix(RV,"emos"))
+		  {
+			CT = removeSuffix(CT,"emos");
+			return true;
+		  }
+		  if (suffix(RV,"imos"))
+		  {
+			CT = removeSuffix(CT,"imos");
+			return true;
+		  }
+		  if (suffix(RV,"iras"))
+		  {
+			CT = removeSuffix(CT,"iras");
+			return true;
+		  }
+		  if (suffix(RV,"ieis"))
+		  {
+			CT = removeSuffix(CT,"ieis");
+			return true;
+		  }
+		}
+
+		// suffix length = 3
+		if (RV.Length >= 3)
+		{
+		  if (suffix(RV,"ada"))
+		  {
+			CT = removeSuffix(CT,"ada");
+			return true;
+		  }
+		  if (suffix(RV,"ida"))
+		  {
+			CT = removeSuffix(CT,"ida");
+			return true;
+		  }
+		  if (suffix(RV,"ara"))
+		  {
+			CT = removeSuffix(CT,"ara");
+			return true;
+		  }
+		  if (suffix(RV,"era"))
+		  {
+			CT = removeSuffix(CT,"era");
+			return true;
+		  }
+		  if (suffix(RV,"ira"))
+		  {
+			CT = removeSuffix(CT,"ava");
+			return true;
+		  }
+		  if (suffix(RV,"iam"))
+		  {
+			CT = removeSuffix(CT,"iam");
+			return true;
+		  }
+		  if (suffix(RV,"ado"))
+		  {
+			CT = removeSuffix(CT,"ado");
+			return true;
+		  }
+		  if (suffix(RV,"ido"))
+		  {
+			CT = removeSuffix(CT,"ido");
+			return true;
+		  }
+		  if (suffix(RV,"ias"))
+		  {
+			CT = removeSuffix(CT,"ias");
+			return true;
+		  }
+		  if (suffix(RV,"ais"))
+		  {
+			CT = removeSuffix(CT,"ais");
+			return true;
+		  }
+		  if (suffix(RV,"eis"))
+		  {
+			CT = removeSuffix(CT,"eis");
+			return true;
+		  }
+		  if (suffix(RV,"ira"))
+		  {
+			CT = removeSuffix(CT,"ira");
+			return true;
+		  }
+		  if (suffix(RV,"ear"))
+		  {
+			CT = removeSuffix(CT,"ear");
+			return true;
+		  }
+		}
+
+		// suffix length = 2
+		if (RV.Length >= 2)
+		{
+		  if (suffix(RV,"ia"))
+		  {
+			CT = removeSuffix(CT,"ia");
+			return true;
+		  }
+		  if (suffix(RV,"ei"))
+		  {
+			CT = removeSuffix(CT,"ei");
+			return true;
+		  }
+		  if (suffix(RV,"am"))
+		  {
+			CT = removeSuffix(CT,"am");
+			return true;
+		  }
+		  if (suffix(RV,"em"))
+		  {
+			CT = removeSuffix(CT,"em");
+			return true;
+		  }
+		  if (suffix(RV,"ar"))
+		  {
+			CT = removeSuffix(CT,"ar");
+			return true;
+		  }
+		  if (suffix(RV,"er"))
+		  {
+			CT = removeSuffix(CT,"er");
+			return true;
+		  }
+		  if (suffix(RV,"ir"))
+		  {
+			CT = removeSuffix(CT,"ir");
+			return true;
+		  }
+		  if (suffix(RV,"as"))
+		  {
+			CT = removeSuffix(CT,"as");
+			return true;
+		  }
+		  if (suffix(RV,"es"))
+		  {
+			CT = removeSuffix(CT,"es");
+			return true;
+		  }
+		  if (suffix(RV,"is"))
+		  {
+			CT = removeSuffix(CT,"is");
+			return true;
+		  }
+		  if (suffix(RV,"eu"))
+		  {
+			CT = removeSuffix(CT,"eu");
+			return true;
+		  }
+		  if (suffix(RV,"iu"))
+		  {
+			CT = removeSuffix(CT,"iu");
+			return true;
+		  }
+		  if (suffix(RV,"iu"))
+		  {
+			CT = removeSuffix(CT,"iu");
+			return true;
+		  }
+		  if (suffix(RV,"ou"))
+		  {
+			CT = removeSuffix(CT,"ou");
+			return true;
+		  }
+		}
+
+		// no ending was removed by step2
+		return false;
+	  }
+
+	  /// <summary>
+	  /// Delete suffix 'i' if in RV and preceded by 'c'
+	  /// 
+	  /// </summary>
+	  private void step3()
+	  {
+		if (RV == null)
+		{
+			return;
+		}
+
+		if (suffix(RV,"i") && suffixPreceded(RV,"i","c"))
+		{
+		  CT = removeSuffix(CT,"i");
+		}
+
+	  }
+
+	  /// <summary>
+	  /// Residual suffix
+	  /// 
+	  /// If the word ends with one of the suffixes (os a i o á í ó)
+	  /// in RV, delete it
+	  /// 
+	  /// </summary>
+	  private void step4()
+	  {
+		if (RV == null)
+		{
+			return;
+		}
+
+		if (suffix(RV,"os"))
+		{
+		  CT = removeSuffix(CT,"os");
+		  return;
+		}
+		if (suffix(RV,"a"))
+		{
+		  CT = removeSuffix(CT,"a");
+		  return;
+		}
+		if (suffix(RV,"i"))
+		{
+		  CT = removeSuffix(CT,"i");
+		  return;
+		}
+		if (suffix(RV,"o"))
+		{
+		  CT = removeSuffix(CT,"o");
+		  return;
+		}
+
+	  }
+
+	  /// <summary>
+	  /// If the word ends with one of ( e é ê) in RV,delete it,
+	  /// and if preceded by 'gu' (or 'ci') with the 'u' (or 'i') in RV,
+	  /// delete the 'u' (or 'i')
+	  /// 
+	  /// Or if the word ends ç remove the cedilha
+	  /// 
+	  /// </summary>
+	  private void step5()
+	  {
+		if (RV == null)
+		{
+			return;
+		}
+
+		if (suffix(RV,"e"))
+		{
+		  if (suffixPreceded(RV,"e","gu"))
+		  {
+			CT = removeSuffix(CT,"e");
+			CT = removeSuffix(CT,"u");
+			return;
+		  }
+
+		  if (suffixPreceded(RV,"e","ci"))
+		  {
+			CT = removeSuffix(CT,"e");
+			CT = removeSuffix(CT,"i");
+			return;
+		  }
+
+		  CT = removeSuffix(CT,"e");
+		  return;
+		}
+	  }
+
+	  /// <summary>
+	  /// For log and debug purpose
+	  /// </summary>
+	  /// <returns>  TERM, CT, RV, R1 and R2 </returns>
+	  public virtual string log()
+	  {
+		return " (TERM = " + TERM + ")" + " (CT = " + CT + ")" + " (RV = " + RV + ")" + " (R1 = " + R1 + ")" + " (R2 = " + R2 + ")";
+	  }
+
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ca/CatalanAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ca/CatalanAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ca/CatalanAnalyzer.cs
new file mode 100644
index 0000000..939d358
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ca/CatalanAnalyzer.cs
@@ -0,0 +1,154 @@
+using System;
+
+namespace org.apache.lucene.analysis.ca
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using ElisionFilter = org.apache.lucene.analysis.util.ElisionFilter;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using Version = org.apache.lucene.util.Version;
+	using CatalanStemmer = org.tartarus.snowball.ext.CatalanStemmer;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Catalan.
+	/// <para>
+	/// <a name="version"/>
+	/// </para>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating CatalanAnalyzer:
+	/// <ul>
+	///   <li> As of 3.6, ElisionFilter with a set of Catalan 
+	///        contractions is used by default.
+	/// </ul>
+	/// </para>
+	/// </summary>
+	public sealed class CatalanAnalyzer : StopwordAnalyzerBase
+	{
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// File containing default Catalan stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "stopwords.txt";
+
+	  private static readonly CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("d", "l", "m", "n", "s", "t"), true));
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop words set. </summary>
+	  /// <returns> default stop words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = loadStopwordSet(false, typeof(CatalanAnalyzer), DEFAULT_STOPWORD_FILE, "#");
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public CatalanAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  public CatalanAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+	  /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+	  /// stemming.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
+	  public CatalanAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates a
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> A
+	  ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="ElisionFilter"/>, <seealso cref="LowerCaseFilter"/>, 
+	  ///         <seealso cref="StopFilter"/>, <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided and <seealso cref="SnowballFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		if (matchVersion.onOrAfter(Version.LUCENE_36))
+		{
+		  result = new ElisionFilter(result, DEFAULT_ARTICLES);
+		}
+		result = new LowerCaseFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		result = new SnowballFilter(result, new CatalanStemmer());
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/BaseCharFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/BaseCharFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/BaseCharFilter.cs
new file mode 100644
index 0000000..1127842
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/BaseCharFilter.cs
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Analysis.CharFilter
+{
+    /// <summary>
+	/// Base utility class for implementing a <seealso cref="CharFilter"/>.
+	/// You subclass this, and then record mappings by calling
+	/// <seealso cref="#addOffCorrectMap"/>, and then invoke the correct
+	/// method to correct an offset.
+	/// </summary>
+	public abstract class BaseCharFilter : CharFilter
+	{
+
+	  private int[] offsets;
+	  private int[] diffs;
+	  private int size = 0;
+
+	  public BaseCharFilter(Reader @in) : base(@in)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Retrieve the corrected offset. </summary>
+	  protected internal override int correct(int currentOff)
+	  {
+		if (offsets == null || currentOff < offsets[0])
+		{
+		  return currentOff;
+		}
+
+		int hi = size - 1;
+		if (currentOff >= offsets[hi])
+		{
+		  return currentOff + diffs[hi];
+		}
+
+		int lo = 0;
+		int mid = -1;
+
+		while (hi >= lo)
+		{
+		  mid = (int)((uint)(lo + hi) >> 1);
+		  if (currentOff < offsets[mid])
+		  {
+			hi = mid - 1;
+		  }
+		  else if (currentOff > offsets[mid])
+		  {
+			lo = mid + 1;
+		  }
+		  else
+		  {
+			return currentOff + diffs[mid];
+		  }
+		}
+
+		if (currentOff < offsets[mid])
+		{
+		  return mid == 0 ? currentOff : currentOff + diffs[mid - 1];
+		}
+		else
+		{
+		  return currentOff + diffs[mid];
+		}
+	  }
+
+	  protected internal virtual int LastCumulativeDiff
+	  {
+		  get
+		  {
+			return offsets == null ? 0 : diffs[size-1];
+		  }
+	  }
+
+	  /// <summary>
+	  /// <para>
+	  ///   Adds an offset correction mapping at the given output stream offset.
+	  /// </para>
+	  /// <para>
+	  ///   Assumption: the offset given with each successive call to this method
+	  ///   will not be smaller than the offset given at the previous invocation.
+	  /// </para>
+	  /// </summary>
+	  /// <param name="off"> The output stream offset at which to apply the correction </param>
+	  /// <param name="cumulativeDiff"> The input offset is given by adding this
+	  ///                       to the output offset </param>
+	  protected internal virtual void addOffCorrectMap(int off, int cumulativeDiff)
+	  {
+		if (offsets == null)
+		{
+		  offsets = new int[64];
+		  diffs = new int[64];
+		}
+		else if (size == offsets.Length)
+		{
+		  offsets = ArrayUtil.grow(offsets);
+		  diffs = ArrayUtil.grow(diffs);
+		}
+
+		assert(size == 0 || off >= offsets[size - 1]) : "Offset #" + size + "(" + off + ") is less than the last recorded offset " + offsets[size - 1] + "\n" + Arrays.ToString(offsets) + "\n" + Arrays.ToString(diffs);
+
+		if (size == 0 || off != offsets[size - 1])
+		{
+		  offsets[size] = off;
+		  diffs[size++] = cumulativeDiff;
+		} // Overwrite the diff at the last recorded offset
+		else
+		{
+		  diffs[size - 1] = cumulativeDiff;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilterFactory.cs
new file mode 100644
index 0000000..2d527fc
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilterFactory.cs
@@ -0,0 +1,67 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.charfilter
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharFilterFactory = org.apache.lucene.analysis.util.CharFilterFactory;
+
+
+	/// <summary>
+	/// Factory for <seealso cref="HTMLStripCharFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_html" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;charFilter class="solr.HTMLStripCharFilterFactory" escapedTags="a, title" /&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class HTMLStripCharFilterFactory : CharFilterFactory
+	{
+	  internal readonly HashSet<string> escapedTags;
+	  internal static readonly Pattern TAG_NAME_PATTERN = Pattern.compile("[^\\s,]+");
+
+	  /// <summary>
+	  /// Creates a new HTMLStripCharFilterFactory </summary>
+	  public HTMLStripCharFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		escapedTags = getSet(args, "escapedTags");
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override HTMLStripCharFilter create(Reader input)
+	  {
+		HTMLStripCharFilter charFilter;
+		if (null == escapedTags)
+		{
+		  charFilter = new HTMLStripCharFilter(input);
+		}
+		else
+		{
+		  charFilter = new HTMLStripCharFilter(input, escapedTags);
+		}
+		return charFilter;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilter.cs
new file mode 100644
index 0000000..5a148be
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilter.cs
@@ -0,0 +1,240 @@
+using System;
+using System.Diagnostics;
+using System.Collections.Generic;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+using Lucene.Net.Analysis.CharFilter;
+
+namespace org.apache.lucene.analysis.charfilter
+{
+
+
+	using RollingCharBuffer = org.apache.lucene.analysis.util.RollingCharBuffer;
+	using CharsRef = org.apache.lucene.util.CharsRef;
+	using CharSequenceOutputs = org.apache.lucene.util.fst.CharSequenceOutputs;
+	using FST = org.apache.lucene.util.fst.FST;
+	using Outputs = org.apache.lucene.util.fst.Outputs;
+
+	/// <summary>
+	/// Simplistic <seealso cref="CharFilter"/> that applies the mappings
+	/// contained in a <seealso cref="NormalizeCharMap"/> to the character
+	/// stream, and correcting the resulting changes to the
+	/// offsets.  Matching is greedy (longest pattern matching at
+	/// a given point wins).  Replacement is allowed to be the
+	/// empty string.
+	/// </summary>
+
+	public class MappingCharFilter : BaseCharFilter
+	{
+
+	  private readonly Outputs<CharsRef> outputs = CharSequenceOutputs.Singleton;
+	  private readonly FST<CharsRef> map;
+	  private readonly FST.BytesReader fstReader;
+	  private readonly RollingCharBuffer buffer = new RollingCharBuffer();
+	  private readonly FST.Arc<CharsRef> scratchArc = new FST.Arc<CharsRef>();
+	  private readonly IDictionary<char?, FST.Arc<CharsRef>> cachedRootArcs;
+
+	  private CharsRef replacement;
+	  private int replacementPointer;
+	  private int inputOff;
+
+	  /// <summary>
+	  /// Default constructor that takes a <seealso cref="Reader"/>. </summary>
+	  public MappingCharFilter(NormalizeCharMap normMap, Reader @in) : base(@in)
+	  {
+		buffer.reset(@in);
+
+		map = normMap.map;
+		cachedRootArcs = normMap.cachedRootArcs;
+
+		if (map != null)
+		{
+		  fstReader = map.BytesReader;
+		}
+		else
+		{
+		  fstReader = null;
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		input.reset();
+		buffer.reset(input);
+		replacement = null;
+		inputOff = 0;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public int read() throws java.io.IOException
+	  public override int read()
+	  {
+
+		//System.out.println("\nread");
+		while (true)
+		{
+
+		  if (replacement != null && replacementPointer < replacement.length)
+		  {
+			//System.out.println("  return repl[" + replacementPointer + "]=" + replacement.chars[replacement.offset + replacementPointer]);
+			return replacement.chars[replacement.offset + replacementPointer++];
+		  }
+
+		  // TODO: a more efficient approach would be Aho/Corasick's
+		  // algorithm
+		  // (http://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_string_matching_algorithm)
+		  // or this generalizatio: www.cis.uni-muenchen.de/people/Schulz/Pub/dictle5.ps
+		  //
+		  // I think this would be (almost?) equivalent to 1) adding
+		  // epsilon arcs from all final nodes back to the init
+		  // node in the FST, 2) adding a .* (skip any char)
+		  // loop on the initial node, and 3) determinizing
+		  // that.  Then we would not have to restart matching
+		  // at each position.
+
+		  int lastMatchLen = -1;
+		  CharsRef lastMatch = null;
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int firstCH = buffer.get(inputOff);
+		  int firstCH = buffer.get(inputOff);
+		  if (firstCH != -1)
+		  {
+			FST.Arc<CharsRef> arc = cachedRootArcs[Convert.ToChar((char) firstCH)];
+			if (arc != null)
+			{
+			  if (!FST.targetHasArcs(arc))
+			  {
+				// Fast pass for single character match:
+				Debug.Assert(arc.Final);
+				lastMatchLen = 1;
+				lastMatch = arc.output;
+			  }
+			  else
+			  {
+				int lookahead = 0;
+				CharsRef output = arc.output;
+				while (true)
+				{
+				  lookahead++;
+
+				  if (arc.Final)
+				  {
+					// Match! (to node is final)
+					lastMatchLen = lookahead;
+					lastMatch = outputs.add(output, arc.nextFinalOutput);
+					// Greedy: keep searching to see if there's a
+					// longer match...
+				  }
+
+				  if (!FST.targetHasArcs(arc))
+				  {
+					break;
+				  }
+
+				  int ch = buffer.get(inputOff + lookahead);
+				  if (ch == -1)
+				  {
+					break;
+				  }
+				  if ((arc = map.findTargetArc(ch, arc, scratchArc, fstReader)) == null)
+				  {
+					// Dead end
+					break;
+				  }
+				  output = outputs.add(output, arc.output);
+				}
+			  }
+			}
+		  }
+
+		  if (lastMatch != null)
+		  {
+			inputOff += lastMatchLen;
+			//System.out.println("  match!  len=" + lastMatchLen + " repl=" + lastMatch);
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int diff = lastMatchLen - lastMatch.length;
+			int diff = lastMatchLen - lastMatch.length;
+
+			if (diff != 0)
+			{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int prevCumulativeDiff = getLastCumulativeDiff();
+			  int prevCumulativeDiff = LastCumulativeDiff;
+			  if (diff > 0)
+			  {
+				// Replacement is shorter than matched input:
+				addOffCorrectMap(inputOff - diff - prevCumulativeDiff, prevCumulativeDiff + diff);
+			  }
+			  else
+			  {
+				// Replacement is longer than matched input: remap
+				// the "extra" chars all back to the same input
+				// offset:
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int outputStart = inputOff - prevCumulativeDiff;
+				int outputStart = inputOff - prevCumulativeDiff;
+				for (int extraIDX = 0;extraIDX < -diff;extraIDX++)
+				{
+				  addOffCorrectMap(outputStart + extraIDX, prevCumulativeDiff - extraIDX - 1);
+				}
+			  }
+			}
+
+			replacement = lastMatch;
+			replacementPointer = 0;
+
+		  }
+		  else
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int ret = buffer.get(inputOff);
+			int ret = buffer.get(inputOff);
+			if (ret != -1)
+			{
+			  inputOff++;
+			  buffer.freeBefore(inputOff);
+			}
+			return ret;
+		  }
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public int read(char[] cbuf, int off, int len) throws java.io.IOException
+	  public override int read(char[] cbuf, int off, int len)
+	  {
+		int numRead = 0;
+		for (int i = off; i < off + len; i++)
+		{
+		  int c = read();
+		  if (c == -1)
+		  {
+			  break;
+		  }
+		  cbuf[i] = (char) c;
+		  numRead++;
+		}
+
+		return numRead == 0 ? - 1 : numRead;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilterFactory.cs
new file mode 100644
index 0000000..4489b7c
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilterFactory.cs
@@ -0,0 +1,184 @@
+using System.Collections.Generic;
+using Lucene.Net.Analysis.Util;
+
+namespace org.apache.lucene.analysis.charfilter
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using AbstractAnalysisFactory = AbstractAnalysisFactory;
+	using CharFilterFactory = org.apache.lucene.analysis.util.CharFilterFactory;
+	using MultiTermAwareComponent = org.apache.lucene.analysis.util.MultiTermAwareComponent;
+	using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
+	using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
+
+	/// <summary>
+	/// Factory for <seealso cref="MappingCharFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_map" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;charFilter class="solr.MappingCharFilterFactory" mapping="mapping.txt"/&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// 
+	/// @since Solr 1.4
+	/// </summary>
+	public class MappingCharFilterFactory : CharFilterFactory, ResourceLoaderAware, MultiTermAwareComponent
+	{
+
+	  protected internal NormalizeCharMap normMap;
+	  private readonly string mapping;
+
+	  /// <summary>
+	  /// Creates a new MappingCharFilterFactory </summary>
+	  public MappingCharFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		mapping = get(args, "mapping");
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  // TODO: this should use inputstreams from the loader, not File!
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void inform(org.apache.lucene.analysis.util.ResourceLoader loader) throws java.io.IOException
+	  public virtual void inform(ResourceLoader loader)
+	  {
+		if (mapping != null)
+		{
+		  IList<string> wlist = null;
+		  File mappingFile = new File(mapping);
+		  if (mappingFile.exists())
+		  {
+			wlist = getLines(loader, mapping);
+		  }
+		  else
+		  {
+			IList<string> files = splitFileNames(mapping);
+			wlist = new List<>();
+			foreach (string file in files)
+			{
+			  IList<string> lines = getLines(loader, file.Trim());
+			  wlist.AddRange(lines);
+			}
+		  }
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
+		  NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
+		  parseRules(wlist, builder);
+		  normMap = builder.build();
+		  if (normMap.map == null)
+		  {
+			// if the inner FST is null, it means it accepts nothing (e.g. the file is empty)
+			// so just set the whole map to null
+			normMap = null;
+		  }
+		}
+	  }
+
+	  public override Reader create(Reader input)
+	  {
+		// if the map is null, it means there's actually no mappings... just return the original stream
+		// as there is nothing to do here.
+		return normMap == null ? input : new MappingCharFilter(normMap,input);
+	  }
+
+	  // "source" => "target"
+	  internal static Pattern p = Pattern.compile("\"(.*)\"\\s*=>\\s*\"(.*)\"\\s*$");
+
+	  protected internal virtual void parseRules(IList<string> rules, NormalizeCharMap.Builder builder)
+	  {
+		foreach (string rule in rules)
+		{
+		  Matcher m = p.matcher(rule);
+		  if (!m.find())
+		  {
+			throw new System.ArgumentException("Invalid Mapping Rule : [" + rule + "], file = " + mapping);
+		  }
+		  builder.add(parseString(m.group(1)), parseString(m.group(2)));
+		}
+	  }
+
+	  internal char[] @out = new char[256];
+
+	  protected internal virtual string parseString(string s)
+	  {
+		int readPos = 0;
+		int len = s.Length;
+		int writePos = 0;
+		while (readPos < len)
+		{
+		  char c = s[readPos++];
+		  if (c == '\\')
+		  {
+			if (readPos >= len)
+			{
+			  throw new System.ArgumentException("Invalid escaped char in [" + s + "]");
+			}
+			c = s[readPos++];
+			switch (c)
+			{
+			  case '\\' :
+				  c = '\\';
+				  break;
+			  case '"' :
+				  c = '"';
+				  break;
+			  case 'n' :
+				  c = '\n';
+				  break;
+			  case 't' :
+				  c = '\t';
+				  break;
+			  case 'r' :
+				  c = '\r';
+				  break;
+			  case 'b' :
+				  c = '\b';
+				  break;
+			  case 'f' :
+				  c = '\f';
+				  break;
+			  case 'u' :
+				if (readPos + 3 >= len)
+				{
+				  throw new System.ArgumentException("Invalid escaped char in [" + s + "]");
+				}
+				c = (char)int.Parse(s.Substring(readPos, 4), 16);
+				readPos += 4;
+				break;
+			}
+		  }
+		  @out[writePos++] = c;
+		}
+		return new string(@out, 0, writePos);
+	  }
+
+	  public virtual AbstractAnalysisFactory MultiTermComponent
+	  {
+		  get
+		  {
+			return this;
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/NormalizeCharMap.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/NormalizeCharMap.cs b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/NormalizeCharMap.cs
new file mode 100644
index 0000000..ade4318
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/NormalizeCharMap.cs
@@ -0,0 +1,162 @@
+using System;
+using System.Diagnostics;
+using System.Collections.Generic;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.analysis.charfilter
+{
+
+
+	using CharsRef = org.apache.lucene.util.CharsRef;
+	using IntsRef = org.apache.lucene.util.IntsRef;
+	using Builder = org.apache.lucene.util.fst.Builder;
+	using CharSequenceOutputs = org.apache.lucene.util.fst.CharSequenceOutputs;
+	using FST = org.apache.lucene.util.fst.FST;
+	using Outputs = org.apache.lucene.util.fst.Outputs;
+	using Util = org.apache.lucene.util.fst.Util;
+
+	// TODO: save/load?
+
+	/// <summary>
+	/// Holds a map of String input to String output, to be used
+	/// with <seealso cref="MappingCharFilter"/>.  Use the <seealso cref="Builder"/>
+	/// to create this.
+	/// </summary>
+	public class NormalizeCharMap
+	{
+
+	  internal readonly FST<CharsRef> map;
+	  internal readonly IDictionary<char?, FST.Arc<CharsRef>> cachedRootArcs = new Dictionary<char?, FST.Arc<CharsRef>>();
+
+	  // Use the builder to create:
+	  private NormalizeCharMap(FST<CharsRef> map)
+	  {
+		this.map = map;
+		if (map != null)
+		{
+		  try
+		  {
+			// Pre-cache root arcs:
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.fst.FST.Arc<org.apache.lucene.util.CharsRef> scratchArc = new org.apache.lucene.util.fst.FST.Arc<>();
+			FST.Arc<CharsRef> scratchArc = new FST.Arc<CharsRef>();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.fst.FST.BytesReader fstReader = map.getBytesReader();
+			FST.BytesReader fstReader = map.BytesReader;
+			map.getFirstArc(scratchArc);
+			if (FST.targetHasArcs(scratchArc))
+			{
+			  map.readFirstRealTargetArc(scratchArc.target, scratchArc, fstReader);
+			  while (true)
+			  {
+				Debug.Assert(scratchArc.label != FST.END_LABEL);
+				cachedRootArcs[Convert.ToChar((char) scratchArc.label)] = (new FST.Arc<CharsRef>()).copyFrom(scratchArc);
+				if (scratchArc.Last)
+				{
+				  break;
+				}
+				map.readNextRealArc(scratchArc, fstReader);
+			  }
+			}
+			//System.out.println("cached " + cachedRootArcs.size() + " root arcs");
+		  }
+		  catch (IOException ioe)
+		  {
+			// Bogus FST IOExceptions!!  (will never happen)
+			throw new Exception(ioe);
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an NormalizeCharMap.
+	  /// <para>
+	  /// Call add() until you have added all the mappings, then call build() to get a NormalizeCharMap
+	  /// @lucene.experimental
+	  /// </para>
+	  /// </summary>
+	  public class Builder
+	  {
+
+		internal readonly IDictionary<string, string> pendingPairs = new SortedDictionary<string, string>();
+
+		/// <summary>
+		/// Records a replacement to be applied to the input
+		///  stream.  Whenever <code>singleMatch</code> occurs in
+		///  the input, it will be replaced with
+		///  <code>replacement</code>.
+		/// </summary>
+		/// <param name="match"> input String to be replaced </param>
+		/// <param name="replacement"> output String </param>
+		/// <exception cref="IllegalArgumentException"> if
+		/// <code>match</code> is the empty string, or was
+		/// already previously added </exception>
+		public virtual void add(string match, string replacement)
+		{
+		  if (match.Length == 0)
+		  {
+			throw new System.ArgumentException("cannot match the empty string");
+		  }
+		  if (pendingPairs.ContainsKey(match))
+		  {
+			throw new System.ArgumentException("match \"" + match + "\" was already added");
+		  }
+		  pendingPairs[match] = replacement;
+		}
+
+		/// <summary>
+		/// Builds the NormalizeCharMap; call this once you
+		///  are done calling <seealso cref="#add"/>. 
+		/// </summary>
+		public virtual NormalizeCharMap build()
+		{
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.fst.FST<org.apache.lucene.util.CharsRef> map;
+		  FST<CharsRef> map;
+		  try
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.fst.Outputs<org.apache.lucene.util.CharsRef> outputs = org.apache.lucene.util.fst.CharSequenceOutputs.getSingleton();
+			Outputs<CharsRef> outputs = CharSequenceOutputs.Singleton;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.fst.Builder<org.apache.lucene.util.CharsRef> builder = new org.apache.lucene.util.fst.Builder<>(org.apache.lucene.util.fst.FST.INPUT_TYPE.BYTE2, outputs);
+			Builder<CharsRef> builder = new Builder<CharsRef>(FST.INPUT_TYPE.BYTE2, outputs);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.IntsRef scratch = new org.apache.lucene.util.IntsRef();
+			IntsRef scratch = new IntsRef();
+			foreach (KeyValuePair<string, string> ent in pendingPairs.SetOfKeyValuePairs())
+			{
+			  builder.add(Util.toUTF16(ent.Key, scratch), new CharsRef(ent.Value));
+			}
+			map = builder.finish();
+			pendingPairs.Clear();
+		  }
+		  catch (IOException ioe)
+		  {
+			// Bogus FST IOExceptions!!  (will never happen)
+			throw new Exception(ioe);
+		  }
+
+		  return new NormalizeCharMap(map);
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKAnalyzer.cs
new file mode 100644
index 0000000..801fd45
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKAnalyzer.cs
@@ -0,0 +1,118 @@
+using System;
+
+namespace org.apache.lucene.analysis.cjk
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// An <seealso cref="Analyzer"/> that tokenizes text with <seealso cref="StandardTokenizer"/>,
+	/// normalizes content with <seealso cref="CJKWidthFilter"/>, folds case with
+	/// <seealso cref="LowerCaseFilter"/>, forms bigrams of CJK with <seealso cref="CJKBigramFilter"/>,
+	/// and filters stopwords with <seealso cref="StopFilter"/>
+	/// </summary>
+	public sealed class CJKAnalyzer : StopwordAnalyzerBase
+	{
+	  /// <summary>
+	  /// File containing default CJK stopwords.
+	  /// <p/>
+	  /// Currently it contains some common English words that are not usually
+	  /// useful for searching and some double-byte interpunctions.
+	  /// </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "stopwords.txt";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop-words set. </summary>
+	  /// <returns> an unmodifiable instance of the default stop-words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = loadStopwordSet(false, typeof(CJKAnalyzer), DEFAULT_STOPWORD_FILE, "#");
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer which removes words in <seealso cref="#getDefaultStopSet()"/>.
+	  /// </summary>
+	  public CJKAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          lucene compatibility version </param>
+	  /// <param name="stopwords">
+	  ///          a stopword set </param>
+	  public CJKAnalyzer(Version matchVersion, CharArraySet stopwords) : base(matchVersion, stopwords)
+	  {
+	  }
+
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+		if (matchVersion.onOrAfter(Version.LUCENE_36))
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		  Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		  // run the widthfilter first before bigramming, it sometimes combines characters.
+		  TokenStream result = new CJKWidthFilter(source);
+		  result = new LowerCaseFilter(matchVersion, result);
+		  result = new CJKBigramFilter(result);
+		  return new TokenStreamComponents(source, new StopFilter(matchVersion, result, stopwords));
+		}
+		else
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new CJKTokenizer(reader);
+		  Tokenizer source = new CJKTokenizer(reader);
+		  return new TokenStreamComponents(source, new StopFilter(matchVersion, source, stopwords));
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilter.cs
new file mode 100644
index 0000000..4ad6f5f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilter.cs
@@ -0,0 +1,420 @@
+namespace org.apache.lucene.analysis.cjk
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using PositionLengthAttribute = org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+	using ArrayUtil = org.apache.lucene.util.ArrayUtil;
+
+	/// <summary>
+	/// Forms bigrams of CJK terms that are generated from StandardTokenizer
+	/// or ICUTokenizer.
+	/// <para>
+	/// CJK types are set by these tokenizers, but you can also use 
+	/// <seealso cref="#CJKBigramFilter(TokenStream, int)"/> to explicitly control which
+	/// of the CJK scripts are turned into bigrams.
+	/// </para>
+	/// <para>
+	/// By default, when a CJK character has no adjacent characters to form
+	/// a bigram, it is output in unigram form. If you want to always output
+	/// both unigrams and bigrams, set the <code>outputUnigrams</code>
+	/// flag in <seealso cref="CJKBigramFilter#CJKBigramFilter(TokenStream, int, boolean)"/>.
+	/// This can be used for a combined unigram+bigram approach.
+	/// </para>
+	/// <para>
+	/// In all cases, all non-CJK input is passed thru unmodified.
+	/// </para>
+	/// </summary>
+	public sealed class CJKBigramFilter : TokenFilter
+	{
+	  // configuration
+	  /// <summary>
+	  /// bigram flag for Han Ideographs </summary>
+	  public const int HAN = 1;
+	  /// <summary>
+	  /// bigram flag for Hiragana </summary>
+	  public const int HIRAGANA = 2;
+	  /// <summary>
+	  /// bigram flag for Katakana </summary>
+	  public const int KATAKANA = 4;
+	  /// <summary>
+	  /// bigram flag for Hangul </summary>
+	  public const int HANGUL = 8;
+
+	  /// <summary>
+	  /// when we emit a bigram, its then marked as this type </summary>
+	  public const string DOUBLE_TYPE = "<DOUBLE>";
+	  /// <summary>
+	  /// when we emit a unigram, its then marked as this type </summary>
+	  public const string SINGLE_TYPE = "<SINGLE>";
+
+	  // the types from standardtokenizer
+	  private static readonly string HAN_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.IDEOGRAPHIC];
+	  private static readonly string HIRAGANA_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HIRAGANA];
+	  private static readonly string KATAKANA_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.KATAKANA];
+	  private static readonly string HANGUL_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HANGUL];
+
+	  // sentinel value for ignoring a script 
+	  private static readonly object NO = new object();
+
+	  // these are set to either their type or NO if we want to pass them thru
+	  private readonly object doHan;
+	  private readonly object doHiragana;
+	  private readonly object doKatakana;
+	  private readonly object doHangul;
+
+	  // true if we should output unigram tokens always
+	  private readonly bool outputUnigrams;
+	  private bool ngramState; // false = output unigram, true = output bigram
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly TypeAttribute typeAtt = addAttribute(typeof(TypeAttribute));
+	  private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+	  private readonly PositionIncrementAttribute posIncAtt = addAttribute(typeof(PositionIncrementAttribute));
+	  private readonly PositionLengthAttribute posLengthAtt = addAttribute(typeof(PositionLengthAttribute));
+
+	  // buffers containing codepoint and offsets in parallel
+	  internal int[] buffer = new int[8];
+	  internal int[] startOffset = new int[8];
+	  internal int[] endOffset = new int[8];
+	  // length of valid buffer
+	  internal int bufferLen;
+	  // current buffer index
+	  internal int index;
+
+	  // the last end offset, to determine if we should bigram across tokens
+	  internal int lastEndOffset;
+
+	  private bool exhausted;
+
+	  /// <summary>
+	  /// Calls {@link CJKBigramFilter#CJKBigramFilter(TokenStream, int)
+	  ///       CJKBigramFilter(in, HAN | HIRAGANA | KATAKANA | HANGUL)}
+	  /// </summary>
+	  public CJKBigramFilter(TokenStream @in) : this(@in, HAN | HIRAGANA | KATAKANA | HANGUL)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Calls {@link CJKBigramFilter#CJKBigramFilter(TokenStream, int, boolean)
+	  ///       CJKBigramFilter(in, flags, false)}
+	  /// </summary>
+	  public CJKBigramFilter(TokenStream @in, int flags) : this(@in, flags, false)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Create a new CJKBigramFilter, specifying which writing systems should be bigrammed,
+	  /// and whether or not unigrams should also be output. </summary>
+	  /// <param name="flags"> OR'ed set from <seealso cref="CJKBigramFilter#HAN"/>, <seealso cref="CJKBigramFilter#HIRAGANA"/>, 
+	  ///        <seealso cref="CJKBigramFilter#KATAKANA"/>, <seealso cref="CJKBigramFilter#HANGUL"/> </param>
+	  /// <param name="outputUnigrams"> true if unigrams for the selected writing systems should also be output.
+	  ///        when this is false, this is only done when there are no adjacent characters to form
+	  ///        a bigram. </param>
+	  public CJKBigramFilter(TokenStream @in, int flags, bool outputUnigrams) : base(@in)
+	  {
+		doHan = (flags & HAN) == 0 ? NO : HAN_TYPE;
+		doHiragana = (flags & HIRAGANA) == 0 ? NO : HIRAGANA_TYPE;
+		doKatakana = (flags & KATAKANA) == 0 ? NO : KATAKANA_TYPE;
+		doHangul = (flags & HANGUL) == 0 ? NO : HANGUL_TYPE;
+		this.outputUnigrams = outputUnigrams;
+	  }
+
+	  /*
+	   * much of this complexity revolves around handling the special case of a 
+	   * "lone cjk character" where cjktokenizer would output a unigram. this 
+	   * is also the only time we ever have to captureState.
+	   */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		while (true)
+		{
+		  if (hasBufferedBigram())
+		  {
+
+			// case 1: we have multiple remaining codepoints buffered,
+			// so we can emit a bigram here.
+
+			if (outputUnigrams)
+			{
+
+			  // when also outputting unigrams, we output the unigram first,
+			  // then rewind back to revisit the bigram.
+			  // so an input of ABC is A + (rewind)AB + B + (rewind)BC + C
+			  // the logic in hasBufferedUnigram ensures we output the C, 
+			  // even though it did actually have adjacent CJK characters.
+
+			  if (ngramState)
+			  {
+				flushBigram();
+			  }
+			  else
+			  {
+				flushUnigram();
+				index--;
+			  }
+			  ngramState = !ngramState;
+			}
+			else
+			{
+			  flushBigram();
+			}
+			return true;
+		  }
+		  else if (doNext())
+		  {
+
+			// case 2: look at the token type. should we form any n-grams?
+
+			string type = typeAtt.type();
+			if (type == doHan || type == doHiragana || type == doKatakana || type == doHangul)
+			{
+
+			  // acceptable CJK type: we form n-grams from these.
+			  // as long as the offsets are aligned, we just add these to our current buffer.
+			  // otherwise, we clear the buffer and start over.
+
+			  if (offsetAtt.startOffset() != lastEndOffset) // unaligned, clear queue
+			  {
+				if (hasBufferedUnigram())
+				{
+
+				  // we have a buffered unigram, and we peeked ahead to see if we could form
+				  // a bigram, but we can't, because the offsets are unaligned. capture the state 
+				  // of this peeked data to be revisited next time thru the loop, and dump our unigram.
+
+				  loneState = captureState();
+				  flushUnigram();
+				  return true;
+				}
+				index = 0;
+				bufferLen = 0;
+			  }
+			  refill();
+			}
+			else
+			{
+
+			  // not a CJK type: we just return these as-is.
+
+			  if (hasBufferedUnigram())
+			  {
+
+				// we have a buffered unigram, and we peeked ahead to see if we could form
+				// a bigram, but we can't, because its not a CJK type. capture the state 
+				// of this peeked data to be revisited next time thru the loop, and dump our unigram.
+
+				loneState = captureState();
+				flushUnigram();
+				return true;
+			  }
+			  return true;
+			}
+		  }
+		  else
+		  {
+
+			// case 3: we have only zero or 1 codepoints buffered, 
+			// so not enough to form a bigram. But, we also have no
+			// more input. So if we have a buffered codepoint, emit
+			// a unigram, otherwise, its end of stream.
+
+			if (hasBufferedUnigram())
+			{
+			  flushUnigram(); // flush our remaining unigram
+			  return true;
+			}
+			return false;
+		  }
+		}
+	  }
+
+	  private State loneState; // rarely used: only for "lone cjk characters", where we emit unigrams
+
+	  /// <summary>
+	  /// looks at next input token, returning false is none is available 
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private boolean doNext() throws java.io.IOException
+	  private bool doNext()
+	  {
+		if (loneState != null)
+		{
+		  restoreState(loneState);
+		  loneState = null;
+		  return true;
+		}
+		else
+		{
+		  if (exhausted)
+		  {
+			return false;
+		  }
+		  else if (input.incrementToken())
+		  {
+			return true;
+		  }
+		  else
+		  {
+			exhausted = true;
+			return false;
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// refills buffers with new data from the current token.
+	  /// </summary>
+	  private void refill()
+	  {
+		// compact buffers to keep them smallish if they become large
+		// just a safety check, but technically we only need the last codepoint
+		if (bufferLen > 64)
+		{
+		  int last = bufferLen - 1;
+		  buffer[0] = buffer[last];
+		  startOffset[0] = startOffset[last];
+		  endOffset[0] = endOffset[last];
+		  bufferLen = 1;
+		  index -= last;
+		}
+
+		char[] termBuffer = termAtt.buffer();
+		int len = termAtt.length();
+		int start = offsetAtt.startOffset();
+		int end = offsetAtt.endOffset();
+
+		int newSize = bufferLen + len;
+		buffer = ArrayUtil.grow(buffer, newSize);
+		startOffset = ArrayUtil.grow(startOffset, newSize);
+		endOffset = ArrayUtil.grow(endOffset, newSize);
+		lastEndOffset = end;
+
+		if (end - start != len)
+		{
+		  // crazy offsets (modified by synonym or charfilter): just preserve
+		  for (int i = 0, cp = 0; i < len; i += char.charCount(cp))
+		  {
+			cp = buffer[bufferLen] = char.codePointAt(termBuffer, i, len);
+			startOffset[bufferLen] = start;
+			endOffset[bufferLen] = end;
+			bufferLen++;
+		  }
+		}
+		else
+		{
+		  // normal offsets
+		  for (int i = 0, cp = 0, cpLen = 0; i < len; i += cpLen)
+		  {
+			cp = buffer[bufferLen] = char.codePointAt(termBuffer, i, len);
+			cpLen = char.charCount(cp);
+			startOffset[bufferLen] = start;
+			start = endOffset[bufferLen] = start + cpLen;
+			bufferLen++;
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Flushes a bigram token to output from our buffer 
+	  /// This is the normal case, e.g. ABC -> AB BC
+	  /// </summary>
+	  private void flushBigram()
+	  {
+		clearAttributes();
+		char[] termBuffer = termAtt.resizeBuffer(4); // maximum bigram length in code units (2 supplementaries)
+		int len1 = char.toChars(buffer[index], termBuffer, 0);
+		int len2 = len1 + char.toChars(buffer[index + 1], termBuffer, len1);
+		termAtt.Length = len2;
+		offsetAtt.setOffset(startOffset[index], endOffset[index + 1]);
+		typeAtt.Type = DOUBLE_TYPE;
+		// when outputting unigrams, all bigrams are synonyms that span two unigrams
+		if (outputUnigrams)
+		{
+		  posIncAtt.PositionIncrement = 0;
+		  posLengthAtt.PositionLength = 2;
+		}
+		index++;
+	  }
+
+	  /// <summary>
+	  /// Flushes a unigram token to output from our buffer.
+	  /// This happens when we encounter isolated CJK characters, either the whole
+	  /// CJK string is a single character, or we encounter a CJK character surrounded 
+	  /// by space, punctuation, english, etc, but not beside any other CJK.
+	  /// </summary>
+	  private void flushUnigram()
+	  {
+		clearAttributes();
+		char[] termBuffer = termAtt.resizeBuffer(2); // maximum unigram length (2 surrogates)
+		int len = char.toChars(buffer[index], termBuffer, 0);
+		termAtt.Length = len;
+		offsetAtt.setOffset(startOffset[index], endOffset[index]);
+		typeAtt.Type = SINGLE_TYPE;
+		index++;
+	  }
+
+	  /// <summary>
+	  /// True if we have multiple codepoints sitting in our buffer
+	  /// </summary>
+	  private bool hasBufferedBigram()
+	  {
+		return bufferLen - index > 1;
+	  }
+
+	  /// <summary>
+	  /// True if we have a single codepoint sitting in our buffer, where its future
+	  /// (whether it is emitted as unigram or forms a bigram) depends upon not-yet-seen
+	  /// inputs.
+	  /// </summary>
+	  private bool hasBufferedUnigram()
+	  {
+		if (outputUnigrams)
+		{
+		  // when outputting unigrams always
+		  return bufferLen - index == 1;
+		}
+		else
+		{
+		  // otherwise its only when we have a lone CJK character
+		  return bufferLen == 1 && index == 0;
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		bufferLen = 0;
+		index = 0;
+		lastEndOffset = 0;
+		loneState = null;
+		exhausted = false;
+		ngramState = false;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilterFactory.cs
new file mode 100644
index 0000000..9783238
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilterFactory.cs
@@ -0,0 +1,79 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.cjk
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="CJKBigramFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_cjk" class="solr.TextField"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.CJKWidthFilterFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.CJKBigramFilterFactory" 
+	///       han="true" hiragana="true" 
+	///       katakana="true" hangul="true" outputUnigrams="false" /&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class CJKBigramFilterFactory : TokenFilterFactory
+	{
+	  internal readonly int flags;
+	  internal readonly bool outputUnigrams;
+
+	  /// <summary>
+	  /// Creates a new CJKBigramFilterFactory </summary>
+	  public CJKBigramFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		int flags = 0;
+		if (getBoolean(args, "han", true))
+		{
+		  flags |= CJKBigramFilter.HAN;
+		}
+		if (getBoolean(args, "hiragana", true))
+		{
+		  flags |= CJKBigramFilter.HIRAGANA;
+		}
+		if (getBoolean(args, "katakana", true))
+		{
+		  flags |= CJKBigramFilter.KATAKANA;
+		}
+		if (getBoolean(args, "hangul", true))
+		{
+		  flags |= CJKBigramFilter.HANGUL;
+		}
+		this.flags = flags;
+		this.outputUnigrams = getBoolean(args, "outputUnigrams", false);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new CJKBigramFilter(input, flags, outputUnigrams);
+	  }
+	}
+
+}
\ No newline at end of file


[23/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs
new file mode 100644
index 0000000..91d84ee
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs
@@ -0,0 +1,2044 @@
+using System;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+This file was partially derived from the
+original CIIR University of Massachusetts Amherst version of KStemmer.java (license for
+the original shown below)
+ */
+
+/*
+ Copyright © 2003,
+ Center for Intelligent Information Retrieval,
+ University of Massachusetts, Amherst.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ 3. The names "Center for Intelligent Information Retrieval" and
+ "University of Massachusetts" must not be used to endorse or promote products
+ derived from this software without prior written permission. To obtain
+ permission, contact info@ciir.cs.umass.edu.
+
+ THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF MASSACHUSETTS AND OTHER CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ SUCH DAMAGE.
+ */
+namespace org.apache.lucene.analysis.en
+{
+
+	using org.apache.lucene.analysis.util;
+	using OpenStringBuilder = org.apache.lucene.analysis.util.OpenStringBuilder;
+	/// <summary>
+	/// <para>Title: Kstemmer</para>
+	/// <para>Description: This is a java version of Bob Krovetz' kstem stemmer</para>
+	/// <para>Copyright: Copyright 2008, Luicid Imagination, Inc. </para>
+	/// <para>Copyright: Copyright 2003, CIIR University of Massachusetts Amherst (http://ciir.cs.umass.edu) </para>
+	/// </summary>
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// This class implements the Kstem algorithm
+	/// </summary>
+	public class KStemmer
+	{
+	  private const int MaxWordLen = 50;
+
+	  private static readonly string[] exceptionWords = new string[] {"aide", "bathe", "caste", "cute", "dame", "dime", "doge", "done", "dune", "envelope", "gage", "grille", "grippe", "lobe", "mane", "mare", "nape", "node", "pane", "pate", "plane", "pope", "programme", "quite", "ripe", "rote", "rune", "sage", "severe", "shoppe", "sine", "slime", "snipe", "steppe", "suite", "swinge", "tare", "tine", "tope", "tripe", "twine"};
+
+	  private static readonly string[][] directConflations = new string[][]
+	  {
+		  new string[] {"aging", "age"},
+		  new string[] {"going", "go"},
+		  new string[] {"goes", "go"},
+		  new string[] {"lying", "lie"},
+		  new string[] {"using", "use"},
+		  new string[] {"owing", "owe"},
+		  new string[] {"suing", "sue"},
+		  new string[] {"dying", "die"},
+		  new string[] {"tying", "tie"},
+		  new string[] {"vying", "vie"},
+		  new string[] {"aged", "age"},
+		  new string[] {"used", "use"},
+		  new string[] {"vied", "vie"},
+		  new string[] {"cued", "cue"},
+		  new string[] {"died", "die"},
+		  new string[] {"eyed", "eye"},
+		  new string[] {"hued", "hue"},
+		  new string[] {"iced", "ice"},
+		  new string[] {"lied", "lie"},
+		  new string[] {"owed", "owe"},
+		  new string[] {"sued", "sue"},
+		  new string[] {"toed", "toe"},
+		  new string[] {"tied", "tie"},
+		  new string[] {"does", "do"},
+		  new string[] {"doing", "do"},
+		  new string[] {"aeronautical", "aeronautics"},
+		  new string[] {"mathematical", "mathematics"},
+		  new string[] {"political", "politics"},
+		  new string[] {"metaphysical", "metaphysics"},
+		  new string[] {"cylindrical", "cylinder"},
+		  new string[] {"nazism", "nazi"},
+		  new string[] {"ambiguity", "ambiguous"},
+		  new string[] {"barbarity", "barbarous"},
+		  new string[] {"credulity", "credulous"},
+		  new string[] {"generosity", "generous"},
+		  new string[] {"spontaneity", "spontaneous"},
+		  new string[] {"unanimity", "unanimous"},
+		  new string[] {"voracity", "voracious"},
+		  new string[] {"fled", "flee"},
+		  new string[] {"miscarriage", "miscarry"}
+	  };
+
+	  private static readonly string[][] countryNationality = new string[][]
+	  {
+		  new string[] {"afghan", "afghanistan"},
+		  new string[] {"african", "africa"},
+		  new string[] {"albanian", "albania"},
+		  new string[] {"algerian", "algeria"},
+		  new string[] {"american", "america"},
+		  new string[] {"andorran", "andorra"},
+		  new string[] {"angolan", "angola"},
+		  new string[] {"arabian", "arabia"},
+		  new string[] {"argentine", "argentina"},
+		  new string[] {"armenian", "armenia"},
+		  new string[] {"asian", "asia"},
+		  new string[] {"australian", "australia"},
+		  new string[] {"austrian", "austria"},
+		  new string[] {"azerbaijani", "azerbaijan"},
+		  new string[] {"azeri", "azerbaijan"},
+		  new string[] {"bangladeshi", "bangladesh"},
+		  new string[] {"belgian", "belgium"},
+		  new string[] {"bermudan", "bermuda"},
+		  new string[] {"bolivian", "bolivia"},
+		  new string[] {"bosnian", "bosnia"},
+		  new string[] {"botswanan", "botswana"},
+		  new string[] {"brazilian", "brazil"},
+		  new string[] {"british", "britain"},
+		  new string[] {"bulgarian", "bulgaria"},
+		  new string[] {"burmese", "burma"},
+		  new string[] {"californian", "california"},
+		  new string[] {"cambodian", "cambodia"},
+		  new string[] {"canadian", "canada"},
+		  new string[] {"chadian", "chad"},
+		  new string[] {"chilean", "chile"},
+		  new string[] {"chinese", "china"},
+		  new string[] {"colombian", "colombia"},
+		  new string[] {"croat", "croatia"},
+		  new string[] {"croatian", "croatia"},
+		  new string[] {"cuban", "cuba"},
+		  new string[] {"cypriot", "cyprus"},
+		  new string[] {"czechoslovakian", "czechoslovakia"},
+		  new string[] {"danish", "denmark"},
+		  new string[] {"egyptian", "egypt"},
+		  new string[] {"equadorian", "equador"},
+		  new string[] {"eritrean", "eritrea"},
+		  new string[] {"estonian", "estonia"},
+		  new string[] {"ethiopian", "ethiopia"},
+		  new string[] {"european", "europe"},
+		  new string[] {"fijian", "fiji"},
+		  new string[] {"filipino", "philippines"},
+		  new string[] {"finnish", "finland"},
+		  new string[] {"french", "france"},
+		  new string[] {"gambian", "gambia"},
+		  new string[] {"georgian", "georgia"},
+		  new string[] {"german", "germany"},
+		  new string[] {"ghanian", "ghana"},
+		  new string[] {"greek", "greece"},
+		  new string[] {"grenadan", "grenada"},
+		  new string[] {"guamian", "guam"},
+		  new string[] {"guatemalan", "guatemala"},
+		  new string[] {"guinean", "guinea"},
+		  new string[] {"guyanan", "guyana"},
+		  new string[] {"haitian", "haiti"},
+		  new string[] {"hawaiian", "hawaii"},
+		  new string[] {"holland", "dutch"},
+		  new string[] {"honduran", "honduras"},
+		  new string[] {"hungarian", "hungary"},
+		  new string[] {"icelandic", "iceland"},
+		  new string[] {"indonesian", "indonesia"},
+		  new string[] {"iranian", "iran"},
+		  new string[] {"iraqi", "iraq"},
+		  new string[] {"iraqui", "iraq"},
+		  new string[] {"irish", "ireland"},
+		  new string[] {"israeli", "israel"},
+		  new string[] {"italian", "italy"},
+		  new string[] {"jamaican", "jamaica"},
+		  new string[] {"japanese", "japan"},
+		  new string[] {"jordanian", "jordan"},
+		  new string[] {"kampuchean", "cambodia"},
+		  new string[] {"kenyan", "kenya"},
+		  new string[] {"korean", "korea"},
+		  new string[] {"kuwaiti", "kuwait"},
+		  new string[] {"lankan", "lanka"},
+		  new string[] {"laotian", "laos"},
+		  new string[] {"latvian", "latvia"},
+		  new string[] {"lebanese", "lebanon"},
+		  new string[] {"liberian", "liberia"},
+		  new string[] {"libyan", "libya"},
+		  new string[] {"lithuanian", "lithuania"},
+		  new string[] {"macedonian", "macedonia"},
+		  new string[] {"madagascan", "madagascar"},
+		  new string[] {"malaysian", "malaysia"},
+		  new string[] {"maltese", "malta"},
+		  new string[] {"mauritanian", "mauritania"},
+		  new string[] {"mexican", "mexico"},
+		  new string[] {"micronesian", "micronesia"},
+		  new string[] {"moldovan", "moldova"},
+		  new string[] {"monacan", "monaco"},
+		  new string[] {"mongolian", "mongolia"},
+		  new string[] {"montenegran", "montenegro"},
+		  new string[] {"moroccan", "morocco"},
+		  new string[] {"myanmar", "burma"},
+		  new string[] {"namibian", "namibia"},
+		  new string[] {"nepalese", "nepal"},
+		  new string[] {"nicaraguan", "nicaragua"},
+		  new string[] {"nigerian", "nigeria"},
+		  new string[] {"norwegian", "norway"},
+		  new string[] {"omani", "oman"},
+		  new string[] {"pakistani", "pakistan"},
+		  new string[] {"panamanian", "panama"},
+		  new string[] {"papuan", "papua"},
+		  new string[] {"paraguayan", "paraguay"},
+		  new string[] {"peruvian", "peru"},
+		  new string[] {"portuguese", "portugal"},
+		  new string[] {"romanian", "romania"},
+		  new string[] {"rumania", "romania"},
+		  new string[] {"rumanian", "romania"},
+		  new string[] {"russian", "russia"},
+		  new string[] {"rwandan", "rwanda"},
+		  new string[] {"samoan", "samoa"},
+		  new string[] {"scottish", "scotland"},
+		  new string[] {"serb", "serbia"},
+		  new string[] {"serbian", "serbia"},
+		  new string[] {"siam", "thailand"},
+		  new string[] {"siamese", "thailand"},
+		  new string[] {"slovakia", "slovak"},
+		  new string[] {"slovakian", "slovak"},
+		  new string[] {"slovenian", "slovenia"},
+		  new string[] {"somali", "somalia"},
+		  new string[] {"somalian", "somalia"},
+		  new string[] {"spanish", "spain"},
+		  new string[] {"swedish", "sweden"},
+		  new string[] {"swiss", "switzerland"},
+		  new string[] {"syrian", "syria"},
+		  new string[] {"taiwanese", "taiwan"},
+		  new string[] {"tanzanian", "tanzania"},
+		  new string[] {"texan", "texas"},
+		  new string[] {"thai", "thailand"},
+		  new string[] {"tunisian", "tunisia"},
+		  new string[] {"turkish", "turkey"},
+		  new string[] {"ugandan", "uganda"},
+		  new string[] {"ukrainian", "ukraine"},
+		  new string[] {"uruguayan", "uruguay"},
+		  new string[] {"uzbek", "uzbekistan"},
+		  new string[] {"venezuelan", "venezuela"},
+		  new string[] {"vietnamese", "viet"},
+		  new string[] {"virginian", "virginia"},
+		  new string[] {"yemeni", "yemen"},
+		  new string[] {"yugoslav", "yugoslavia"},
+		  new string[] {"yugoslavian", "yugoslavia"},
+		  new string[] {"zambian", "zambia"},
+		  new string[] {"zealander", "zealand"},
+		  new string[] {"zimbabwean", "zimbabwe"}
+	  };
+
+	  private static readonly string[] supplementDict = new string[] {"aids", "applicator", "capacitor", "digitize", "electromagnet", "ellipsoid", "exosphere", "extensible", "ferromagnet", "graphics", "hydromagnet", "polygraph", "toroid", "superconduct", "backscatter", "connectionism"};
+
+	  private static readonly string[] properNouns = new string[] {"abrams", "achilles", "acropolis", "adams", "agnes", "aires", "alexander", "alexis", "alfred", "algiers", "alps", "amadeus", "ames", "amos", "andes", "angeles", "annapolis", "antilles", "aquarius", "archimedes", "arkansas", "asher", "ashly", "athens", "atkins", "atlantis", "avis", "bahamas", "bangor", "barbados", "barger", "bering", "brahms", "brandeis", "brussels", "bruxelles", "cairns", "camoros", "camus", "carlos", "celts", "chalker", "charles", "cheops", "ching", "christmas", "cocos", "collins", "columbus", "confucius", "conners", "connolly", "copernicus", "cramer", "cyclops", "cygnus", "cyprus", "dallas", "damascus", "daniels", "davies", "davis", "decker", "denning", "dennis", "descartes", "dickens", "doris", "douglas", "downs", "dreyfus", "dukakis", "dulles", "dumfries", "ecclesiastes", "edwards", "emily", "erasmus", "euphrates", "evans", "everglades", "fairbanks", "federales", "fisher", "fitzsimmons", "fleming", 
 "forbes", "fowler", "france", "francis", "goering", "goodling", "goths", "grenadines", "guiness", "hades", "harding", "harris", "hastings", "hawkes", "hawking", "hayes", "heights", "hercules", "himalayas", "hippocrates", "hobbs", "holmes", "honduras", "hopkins", "hughes", "humphreys", "illinois", "indianapolis", "inverness", "iris", "iroquois", "irving", "isaacs", "italy", "james", "jarvis", "jeffreys", "jesus", "jones", "josephus", "judas", "julius", "kansas", "keynes", "kipling", "kiwanis", "lansing", "laos", "leeds", "levis", "leviticus", "lewis", "louis", "maccabees", "madras", "maimonides", "maldive", "massachusetts", "matthews", "mauritius", "memphis", "mercedes", "midas", "mingus", "minneapolis", "mohammed", "moines", "morris", "moses", "myers", "myknos", "nablus", "nanjing", "nantes", "naples", "neal", "netherlands", "nevis", "nostradamus", "oedipus", "olympus", "orleans", "orly", "papas", "paris", "parker", "pauling", "peking", "pershing", "peter", "peters", "philippines", 
 "phineas", "pisces", "pryor", "pythagoras", "queens", "rabelais", "ramses", "reynolds", "rhesus", "rhodes", "richards", "robins", "rodgers", "rogers", "rubens", "sagittarius", "seychelles", "socrates", "texas", "thames", "thomas", "tiberias", "tunis", "venus", "vilnius", "wales", "warner", "wilkins", "williams", "wyoming", "xmas", "yonkers", "zeus", "frances", "aarhus", "adonis", "andrews", "angus", "antares", "aquinas", "arcturus", "ares", "artemis", "augustus", "ayers", "barnabas", "barnes", "becker", "bejing", "biggs", "billings", "boeing", "boris", "borroughs", "briggs", "buenos", "calais", "caracas", "cassius", "cerberus", "ceres", "cervantes", "chantilly", "chartres", "chester", "connally", "conner", "coors", "cummings", "curtis", "daedalus", "dionysus", "dobbs", "dolores", "edmonds"};
+
+	  internal class DictEntry
+	  {
+		internal bool exception;
+		internal string root;
+
+		internal DictEntry(string root, bool isException)
+		{
+		  this.root = root;
+		  this.exception = isException;
+		}
+	  }
+
+	  private static readonly CharArrayMap<DictEntry> dict_ht = initializeDictHash();
+
+	  /// <summary>
+	  ///*
+	  /// caching off private int maxCacheSize; private CharArrayMap<String> cache =
+	  /// null; private static final String SAME = "SAME"; // use if stemmed form is
+	  /// the same
+	  /// **
+	  /// </summary>
+
+	  private readonly OpenStringBuilder word = new OpenStringBuilder();
+	  private int j; // index of final letter in stem (within word)
+	  private int k; /*
+	                  * INDEX of final letter in word. You must add 1 to k to get
+	                  * the current length of word. When you want the length of
+	                  * word, use the method wordLength, which returns (k+1).
+	                  */
+
+	  /// <summary>
+	  ///*
+	  /// private void initializeStemHash() { if (maxCacheSize > 0) cache = new
+	  /// CharArrayMap<String>(maxCacheSize,false); }
+	  /// **
+	  /// </summary>
+
+	  private char finalChar()
+	  {
+		return word.charAt(k);
+	  }
+
+	  private char penultChar()
+	  {
+		return word.charAt(k - 1);
+	  }
+
+	  private bool isVowel(int index)
+	  {
+		return !isCons(index);
+	  }
+
+	  private bool isCons(int index)
+	  {
+		char ch;
+
+		ch = word.charAt(index);
+
+		if ((ch == 'a') || (ch == 'e') || (ch == 'i') || (ch == 'o') || (ch == 'u'))
+		{
+			return false;
+		}
+		if ((ch != 'y') || (index == 0))
+		{
+			return true;
+		}
+		else
+		{
+			return (!isCons(index - 1));
+		}
+	  }
+
+	  private static CharArrayMap<DictEntry> initializeDictHash()
+	  {
+		DictEntry defaultEntry;
+		DictEntry entry;
+
+		CharArrayMap<DictEntry> d = new CharArrayMap<DictEntry>(Version.LUCENE_CURRENT, 1000, false);
+		for (int i = 0; i < exceptionWords.Length; i++)
+		{
+		  if (!d.containsKey(exceptionWords[i]))
+		  {
+			entry = new DictEntry(exceptionWords[i], true);
+			d.put(exceptionWords[i], entry);
+		  }
+		  else
+		  {
+			throw new Exception("Warning: Entry [" + exceptionWords[i] + "] already in dictionary 1");
+		  }
+		}
+
+		for (int i = 0; i < directConflations.Length; i++)
+		{
+		  if (!d.containsKey(directConflations[i][0]))
+		  {
+			entry = new DictEntry(directConflations[i][1], false);
+			d.put(directConflations[i][0], entry);
+		  }
+		  else
+		  {
+			throw new Exception("Warning: Entry [" + directConflations[i][0] + "] already in dictionary 2");
+		  }
+		}
+
+		for (int i = 0; i < countryNationality.Length; i++)
+		{
+		  if (!d.containsKey(countryNationality[i][0]))
+		  {
+			entry = new DictEntry(countryNationality[i][1], false);
+			d.put(countryNationality[i][0], entry);
+		  }
+		  else
+		  {
+			throw new Exception("Warning: Entry [" + countryNationality[i][0] + "] already in dictionary 3");
+		  }
+		}
+
+		defaultEntry = new DictEntry(null, false);
+
+		string[] array;
+		array = KStemData1.data;
+
+		for (int i = 0; i < array.Length; i++)
+		{
+		  if (!d.containsKey(array[i]))
+		  {
+			d.put(array[i], defaultEntry);
+		  }
+		  else
+		  {
+			throw new Exception("Warning: Entry [" + array[i] + "] already in dictionary 4");
+		  }
+		}
+
+		array = KStemData2.data;
+		for (int i = 0; i < array.Length; i++)
+		{
+		  if (!d.containsKey(array[i]))
+		  {
+			d.put(array[i], defaultEntry);
+		  }
+		  else
+		  {
+			throw new Exception("Warning: Entry [" + array[i] + "] already in dictionary 4");
+		  }
+		}
+
+		array = KStemData3.data;
+		for (int i = 0; i < array.Length; i++)
+		{
+		  if (!d.containsKey(array[i]))
+		  {
+			d.put(array[i], defaultEntry);
+		  }
+		  else
+		  {
+			throw new Exception("Warning: Entry [" + array[i] + "] already in dictionary 4");
+		  }
+		}
+
+		array = KStemData4.data;
+		for (int i = 0; i < array.Length; i++)
+		{
+		  if (!d.containsKey(array[i]))
+		  {
+			d.put(array[i], defaultEntry);
+		  }
+		  else
+		  {
+			throw new Exception("Warning: Entry [" + array[i] + "] already in dictionary 4");
+		  }
+		}
+
+		array = KStemData5.data;
+		for (int i = 0; i < array.Length; i++)
+		{
+		  if (!d.containsKey(array[i]))
+		  {
+			d.put(array[i], defaultEntry);
+		  }
+		  else
+		  {
+			throw new Exception("Warning: Entry [" + array[i] + "] already in dictionary 4");
+		  }
+		}
+
+		array = KStemData6.data;
+		for (int i = 0; i < array.Length; i++)
+		{
+		  if (!d.containsKey(array[i]))
+		  {
+			d.put(array[i], defaultEntry);
+		  }
+		  else
+		  {
+			throw new Exception("Warning: Entry [" + array[i] + "] already in dictionary 4");
+		  }
+		}
+
+		array = KStemData7.data;
+		for (int i = 0; i < array.Length; i++)
+		{
+		  if (!d.containsKey(array[i]))
+		  {
+			d.put(array[i], defaultEntry);
+		  }
+		  else
+		  {
+			throw new Exception("Warning: Entry [" + array[i] + "] already in dictionary 4");
+		  }
+		}
+
+		for (int i = 0; i < KStemData8.data.Length; i++)
+		{
+		  if (!d.containsKey(KStemData8.data[i]))
+		  {
+			d.put(KStemData8.data[i], defaultEntry);
+		  }
+		  else
+		  {
+			throw new Exception("Warning: Entry [" + KStemData8.data[i] + "] already in dictionary 4");
+		  }
+		}
+
+		for (int i = 0; i < supplementDict.Length; i++)
+		{
+		  if (!d.containsKey(supplementDict[i]))
+		  {
+			d.put(supplementDict[i], defaultEntry);
+		  }
+		  else
+		  {
+			throw new Exception("Warning: Entry [" + supplementDict[i] + "] already in dictionary 5");
+		  }
+		}
+
+		for (int i = 0; i < properNouns.Length; i++)
+		{
+		  if (!d.containsKey(properNouns[i]))
+		  {
+			d.put(properNouns[i], defaultEntry);
+		  }
+		  else
+		  {
+			throw new Exception("Warning: Entry [" + properNouns[i] + "] already in dictionary 6");
+		  }
+		}
+
+		return d;
+	  }
+
+	  private bool isAlpha(char ch)
+	  {
+		return ch >= 'a' && ch <= 'z'; // terms must be lowercased already
+	  }
+
+	  /* length of stem within word */
+	  private int stemLength()
+	  {
+		return j + 1;
+	  };
+
+	  private bool endsIn(char[] s)
+	  {
+		if (s.Length > k)
+		{
+			return false;
+		}
+
+		int r = word.length() - s.Length; // length of word before this suffix
+		j = k;
+		for (int r1 = r, i = 0; i < s.Length; i++, r1++)
+		{
+		  if (s[i] != word.charAt(r1))
+		  {
+			  return false;
+		  }
+		}
+		j = r - 1; // index of the character BEFORE the posfix
+		return true;
+	  }
+
+	  private bool endsIn(char a, char b)
+	  {
+		if (2 > k)
+		{
+			return false;
+		}
+		// check left to right since the endings have often already matched
+		if (word.charAt(k - 1) == a && word.charAt(k) == b)
+		{
+		  j = k - 2;
+		  return true;
+		}
+		return false;
+	  }
+
+	  private bool endsIn(char a, char b, char c)
+	  {
+		if (3 > k)
+		{
+			return false;
+		}
+		if (word.charAt(k - 2) == a && word.charAt(k - 1) == b && word.charAt(k) == c)
+		{
+		  j = k - 3;
+		  return true;
+		}
+		return false;
+	  }
+
+	  private bool endsIn(char a, char b, char c, char d)
+	  {
+		if (4 > k)
+		{
+			return false;
+		}
+		if (word.charAt(k - 3) == a && word.charAt(k - 2) == b && word.charAt(k - 1) == c && word.charAt(k) == d)
+		{
+		  j = k - 4;
+		  return true;
+		}
+		return false;
+	  }
+
+	  private DictEntry wordInDict()
+	  {
+		/// <summary>
+		///*
+		/// if (matchedEntry != null) { if (dict_ht.get(word.getArray(), 0,
+		/// word.size()) != matchedEntry) {
+		/// System.out.println("Uh oh... cached entry doesn't match"); } return
+		/// matchedEntry; }
+		/// **
+		/// </summary>
+		if (matchedEntry != null)
+		{
+			return matchedEntry;
+		}
+		DictEntry e = dict_ht.get(word.Array, 0, word.length());
+		if (e != null && !e.exception)
+		{
+		  matchedEntry = e; // only cache if it's not an exception.
+		}
+		// lookups.add(word.toString());
+		return e;
+	  }
+
+	  /* Convert plurals to singular form, and '-ies' to 'y' */
+	  private void plural()
+	  {
+		if (word.charAt(k) == 's')
+		{
+		  if (endsIn('i', 'e', 's'))
+		  {
+			word.Length = j + 3;
+			k--;
+			if (lookup()) // ensure calories -> calorie
+			{
+			return;
+			}
+			k++;
+			word.unsafeWrite('s');
+			Suffix = "y";
+			lookup();
+		  }
+		  else if (endsIn('e', 's'))
+		  {
+			/* try just removing the "s" */
+			word.Length = j + 2;
+			k--;
+
+			/*
+			 * note: don't check for exceptions here. So, `aides' -> `aide', but
+			 * `aided' -> `aid'. The exception for double s is used to prevent
+			 * crosses -> crosse. This is actually correct if crosses is a plural
+			 * noun (a type of racket used in lacrosse), but the verb is much more
+			 * common
+			 */
+
+			/// <summary>
+			///**
+			/// YCS: this was the one place where lookup was not followed by return.
+			/// So restructure it. if ((j>0)&&(lookup(word.toString())) &&
+			/// !((word.charAt(j) == 's') && (word.charAt(j-1) == 's'))) return;
+			/// ****
+			/// </summary>
+			bool tryE = j > 0 && !((word.charAt(j) == 's') && (word.charAt(j - 1) == 's'));
+			if (tryE && lookup())
+			{
+				return;
+			}
+
+			/* try removing the "es" */
+
+			word.Length = j + 1;
+			k--;
+			if (lookup())
+			{
+				return;
+			}
+
+			/* the default is to retain the "e" */
+			word.unsafeWrite('e');
+			k++;
+
+			if (!tryE) // if we didn't try the "e" ending before
+			{
+				lookup();
+			}
+			return;
+		  }
+		  else
+		  {
+			if (word.length() > 3 && penultChar() != 's' && !endsIn('o', 'u', 's'))
+			{
+			  /* unless the word ends in "ous" or a double "s", remove the final "s" */
+
+			  word.Length = k;
+			  k--;
+			  lookup();
+			}
+		  }
+		}
+	  }
+
+	  private string Suffix
+	  {
+		  set
+		  {
+			setSuff(value, value.Length);
+		  }
+	  }
+
+	  /* replace old suffix with s */
+	  private void setSuff(string s, int len)
+	  {
+		word.Length = j + 1;
+		for (int l = 0; l < len; l++)
+		{
+		  word.unsafeWrite(s[l]);
+		}
+		k = j + len;
+	  }
+
+	  /* Returns true if the word is found in the dictionary */
+	  // almost all uses of lookup() return immediately and are
+	  // followed by another lookup in the dict. Store the match
+	  // to avoid this double lookup.
+	  internal DictEntry matchedEntry = null;
+
+	  private bool lookup()
+	  {
+		/// <summary>
+		///****
+		/// debugging code String thisLookup = word.toString(); boolean added =
+		/// lookups.add(thisLookup); if (!added) {
+		/// System.out.println("######extra lookup:" + thisLookup); // occaasional
+		/// extra lookups aren't necessarily errors... could happen by diff
+		/// manipulations // throw new RuntimeException("######extra lookup:" +
+		/// thisLookup); } else { // System.out.println("new lookup:" + thisLookup);
+		/// }
+		/// *****
+		/// </summary>
+
+		matchedEntry = dict_ht.get(word.Array, 0, word.size());
+		return matchedEntry != null;
+	  }
+
+	  // Set<String> lookups = new HashSet<>();
+
+	  /* convert past tense (-ed) to present, and `-ied' to `y' */
+	  private void pastTense()
+	  {
+		/*
+		 * Handle words less than 5 letters with a direct mapping This prevents
+		 * (fled -> fl).
+		 */
+		if (word.length() <= 4)
+		{
+			return;
+		}
+
+		if (endsIn('i', 'e', 'd'))
+		{
+		  word.Length = j + 3;
+		  k--;
+		  if (lookup()) // we almost always want to convert -ied to -y, but
+		  {
+		  return; // this isn't true for short words (died->die)
+		  }
+		  k++; // I don't know any long words that this applies to,
+		  word.unsafeWrite('d'); // but just in case...
+		  Suffix = "y";
+		  lookup();
+		  return;
+		}
+
+		/* the vowelInStem() is necessary so we don't stem acronyms */
+		if (endsIn('e', 'd') && vowelInStem())
+		{
+		  /* see if the root ends in `e' */
+		  word.Length = j + 2;
+		  k = j + 1;
+
+		  DictEntry entry = wordInDict();
+		  if (entry != null) /*
+		  {
+			  if (!entry.exception)
+	                                                * if it's in the dictionary and
+	                                                * not an exception
+	                                                */
+			  {
+		  return;
+			  }
+		  }
+
+		  /* try removing the "ed" */
+		  word.Length = j + 1;
+		  k = j;
+		  if (lookup())
+		  {
+			  return;
+		  }
+
+		  /*
+		   * try removing a doubled consonant. if the root isn't found in the
+		   * dictionary, the default is to leave it doubled. This will correctly
+		   * capture `backfilled' -> `backfill' instead of `backfill' ->
+		   * `backfille', and seems correct most of the time
+		   */
+
+		  if (doubleC(k))
+		  {
+			word.Length = k;
+			k--;
+			if (lookup())
+			{
+				return;
+			}
+			word.unsafeWrite(word.charAt(k));
+			k++;
+			lookup();
+			return;
+		  }
+
+		  /* if we have a `un-' prefix, then leave the word alone */
+		  /* (this will sometimes screw up with `under-', but we */
+		  /* will take care of that later) */
+
+		  if ((word.charAt(0) == 'u') && (word.charAt(1) == 'n'))
+		  {
+			word.unsafeWrite('e');
+			word.unsafeWrite('d');
+			k = k + 2;
+			// nolookup()
+			return;
+		  }
+
+		  /*
+		   * it wasn't found by just removing the `d' or the `ed', so prefer to end
+		   * with an `e' (e.g., `microcoded' -> `microcode').
+		   */
+
+		  word.Length = j + 1;
+		  word.unsafeWrite('e');
+		  k = j + 1;
+		  // nolookup() - we already tried the "e" ending
+		  return;
+		}
+	  }
+
+	  /* return TRUE if word ends with a double consonant */
+	  private bool doubleC(int i)
+	  {
+		if (i < 1)
+		{
+			return false;
+		}
+
+		if (word.charAt(i) != word.charAt(i - 1))
+		{
+			return false;
+		}
+		return (isCons(i));
+	  }
+
+	  private bool vowelInStem()
+	  {
+		for (int i = 0; i < stemLength(); i++)
+		{
+		  if (isVowel(i))
+		  {
+			  return true;
+		  }
+		}
+		return false;
+	  }
+
+	  /* handle `-ing' endings */
+	  private void aspect()
+	  {
+		/*
+		 * handle short words (aging -> age) via a direct mapping. This prevents
+		 * (thing -> the) in the version of this routine that ignores inflectional
+		 * variants that are mentioned in the dictionary (when the root is also
+		 * present)
+		 */
+
+		if (word.length() <= 5)
+		{
+			return;
+		}
+
+		/* the vowelinstem() is necessary so we don't stem acronyms */
+		if (endsIn('i', 'n', 'g') && vowelInStem())
+		{
+
+		  /* try adding an `e' to the stem and check against the dictionary */
+		  word.setCharAt(j + 1, 'e');
+		  word.Length = j + 2;
+		  k = j + 1;
+
+		  DictEntry entry = wordInDict();
+		  if (entry != null)
+		  {
+			if (!entry.exception) // if it's in the dictionary and not an exception
+			{
+			return;
+			}
+		  }
+
+		  /* adding on the `e' didn't work, so remove it */
+		  word.Length = k;
+		  k--; // note that `ing' has also been removed
+
+		  if (lookup())
+		  {
+			  return;
+		  }
+
+		  /* if I can remove a doubled consonant and get a word, then do so */
+		  if (doubleC(k))
+		  {
+			k--;
+			word.Length = k + 1;
+			if (lookup())
+			{
+				return;
+			}
+			word.unsafeWrite(word.charAt(k)); // restore the doubled consonant
+
+			/* the default is to leave the consonant doubled */
+			/* (e.g.,`fingerspelling' -> `fingerspell'). Unfortunately */
+			/* `bookselling' -> `booksell' and `mislabelling' -> `mislabell'). */
+			/* Without making the algorithm significantly more complicated, this */
+			/* is the best I can do */
+			k++;
+			lookup();
+			return;
+		  }
+
+		  /*
+		   * the word wasn't in the dictionary after removing the stem, and then
+		   * checking with and without a final `e'. The default is to add an `e'
+		   * unless the word ends in two consonants, so `microcoding' ->
+		   * `microcode'. The two consonants restriction wouldn't normally be
+		   * necessary, but is needed because we don't try to deal with prefixes and
+		   * compounds, and most of the time it is correct (e.g., footstamping ->
+		   * footstamp, not footstampe; however, decoupled -> decoupl). We can
+		   * prevent almost all of the incorrect stems if we try to do some prefix
+		   * analysis first
+		   */
+
+		  if ((j > 0) && isCons(j) && isCons(j - 1))
+		  {
+			k = j;
+			word.Length = k + 1;
+			// nolookup() because we already did according to the comment
+			return;
+		  }
+
+		  word.Length = j + 1;
+		  word.unsafeWrite('e');
+		  k = j + 1;
+		  // nolookup(); we already tried an 'e' ending
+		  return;
+		}
+	  }
+
+	  /*
+	   * this routine deals with -ity endings. It accepts -ability, -ibility, and
+	   * -ality, even without checking the dictionary because they are so
+	   * productive. The first two are mapped to -ble, and the -ity is remove for
+	   * the latter
+	   */
+	  private void ityEndings()
+	  {
+		int old_k = k;
+
+		if (endsIn('i', 't', 'y'))
+		{
+		  word.Length = j + 1; // try just removing -ity
+		  k = j;
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  word.unsafeWrite('e'); // try removing -ity and adding -e
+		  k = j + 1;
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  word.setCharAt(j + 1, 'i');
+		  word.append("ty");
+		  k = old_k;
+		  /*
+		   * the -ability and -ibility endings are highly productive, so just accept
+		   * them
+		   */
+		  if ((j > 0) && (word.charAt(j - 1) == 'i') && (word.charAt(j) == 'l'))
+		  {
+			word.Length = j - 1;
+			word.append("le"); // convert to -ble
+			k = j;
+			lookup();
+			return;
+		  }
+
+		  /* ditto for -ivity */
+		  if ((j > 0) && (word.charAt(j - 1) == 'i') && (word.charAt(j) == 'v'))
+		  {
+			word.Length = j + 1;
+			word.unsafeWrite('e'); // convert to -ive
+			k = j + 1;
+			lookup();
+			return;
+		  }
+		  /* ditto for -ality */
+		  if ((j > 0) && (word.charAt(j - 1) == 'a') && (word.charAt(j) == 'l'))
+		  {
+			word.Length = j + 1;
+			k = j;
+			lookup();
+			return;
+		  }
+
+		  /*
+		   * if the root isn't in the dictionary, and the variant *is* there, then
+		   * use the variant. This allows `immunity'->`immune', but prevents
+		   * `capacity'->`capac'. If neither the variant nor the root form are in
+		   * the dictionary, then remove the ending as a default
+		   */
+
+		  if (lookup())
+		  {
+			  return;
+		  }
+
+		  /* the default is to remove -ity altogether */
+		  word.Length = j + 1;
+		  k = j;
+		  // nolookup(), we already did it.
+		  return;
+		}
+	  }
+
+	  /* handle -ence and -ance */
+	  private void nceEndings()
+	  {
+		int old_k = k;
+		char word_char;
+
+		if (endsIn('n', 'c', 'e'))
+		{
+		  word_char = word.charAt(j);
+		  if (!((word_char == 'e') || (word_char == 'a')))
+		  {
+			  return;
+		  }
+		  word.Length = j;
+		  word.unsafeWrite('e'); // try converting -e/ance to -e (adherance/adhere)
+		  k = j;
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  word.Length = j; /*
+	                          * try removing -e/ance altogether
+	                          * (disappearance/disappear)
+	                          */
+		  k = j - 1;
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  word.unsafeWrite(word_char); // restore the original ending
+		  word.append("nce");
+		  k = old_k;
+		  // nolookup() because we restored the original ending
+		}
+		return;
+	  }
+
+	  /* handle -ness */
+	  private void nessEndings()
+	  {
+		if (endsIn('n', 'e', 's', 's'))
+		{
+		/*
+		                                   * this is a very productive endings, so
+		                                   * just accept it
+		                                   */
+		  word.Length = j + 1;
+		  k = j;
+		  if (word.charAt(j) == 'i')
+		  {
+			  word.setCharAt(j, 'y');
+		  }
+		  lookup();
+		}
+		return;
+	  }
+
+	  /* handle -ism */
+	  private void ismEndings()
+	  {
+		if (endsIn('i', 's', 'm'))
+		{
+		/*
+		                              * this is a very productive ending, so just
+		                              * accept it
+		                              */
+		  word.Length = j + 1;
+		  k = j;
+		  lookup();
+		}
+		return;
+	  }
+
+	  /* this routine deals with -ment endings. */
+	  private void mentEndings()
+	  {
+		int old_k = k;
+
+		if (endsIn('m', 'e', 'n', 't'))
+		{
+		  word.Length = j + 1;
+		  k = j;
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  word.append("ment");
+		  k = old_k;
+		  // nolookup
+		}
+		return;
+	  }
+
+	  /* this routine deals with -ize endings. */
+	  private void izeEndings()
+	  {
+		int old_k = k;
+
+		if (endsIn('i', 'z', 'e'))
+		{
+		  word.Length = j + 1; // try removing -ize entirely
+		  k = j;
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  word.unsafeWrite('i');
+
+		  if (doubleC(j)) // allow for a doubled consonant
+		  {
+			word.Length = j;
+			k = j - 1;
+			if (lookup())
+			{
+				return;
+			}
+			word.unsafeWrite(word.charAt(j - 1));
+		  }
+
+		  word.Length = j + 1;
+		  word.unsafeWrite('e'); // try removing -ize and adding -e
+		  k = j + 1;
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  word.Length = j + 1;
+		  word.append("ize");
+		  k = old_k;
+		  // nolookup()
+		}
+		return;
+	  }
+
+	  /* handle -ency and -ancy */
+	  private void ncyEndings()
+	  {
+		if (endsIn('n', 'c', 'y'))
+		{
+		  if (!((word.charAt(j) == 'e') || (word.charAt(j) == 'a')))
+		  {
+			  return;
+		  }
+		  word.setCharAt(j + 2, 't'); // try converting -ncy to -nt
+		  word.Length = j + 3;
+		  k = j + 2;
+
+		  if (lookup())
+		  {
+			  return;
+		  }
+
+		  word.setCharAt(j + 2, 'c'); // the default is to convert it to -nce
+		  word.unsafeWrite('e');
+		  k = j + 3;
+		  lookup();
+		}
+		return;
+	  }
+
+	  /* handle -able and -ible */
+	  private void bleEndings()
+	  {
+		int old_k = k;
+		char word_char;
+
+		if (endsIn('b', 'l', 'e'))
+		{
+		  if (!((word.charAt(j) == 'a') || (word.charAt(j) == 'i')))
+		  {
+			  return;
+		  }
+		  word_char = word.charAt(j);
+		  word.Length = j; // try just removing the ending
+		  k = j - 1;
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  if (doubleC(k)) // allow for a doubled consonant
+		  {
+			word.Length = k;
+			k--;
+			if (lookup())
+			{
+				return;
+			}
+			k++;
+			word.unsafeWrite(word.charAt(k - 1));
+		  }
+		  word.Length = j;
+		  word.unsafeWrite('e'); // try removing -a/ible and adding -e
+		  k = j;
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  word.Length = j;
+		  word.append("ate"); // try removing -able and adding -ate
+		  /* (e.g., compensable/compensate) */
+		  k = j + 2;
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  word.Length = j;
+		  word.unsafeWrite(word_char); // restore the original values
+		  word.append("ble");
+		  k = old_k;
+		  // nolookup()
+		}
+		return;
+	  }
+
+	  /*
+	   * handle -ic endings. This is fairly straightforward, but this is also the
+	   * only place we try *expanding* an ending, -ic -> -ical. This is to handle
+	   * cases like `canonic' -> `canonical'
+	   */
+	  private void icEndings()
+	  {
+		if (endsIn('i', 'c'))
+		{
+		  word.Length = j + 3;
+		  word.append("al"); // try converting -ic to -ical
+		  k = j + 4;
+		  if (lookup())
+		  {
+			  return;
+		  }
+
+		  word.setCharAt(j + 1, 'y'); // try converting -ic to -y
+		  word.Length = j + 2;
+		  k = j + 1;
+		  if (lookup())
+		  {
+			  return;
+		  }
+
+		  word.setCharAt(j + 1, 'e'); // try converting -ic to -e
+		  if (lookup())
+		  {
+			  return;
+		  }
+
+		  word.Length = j + 1; // try removing -ic altogether
+		  k = j;
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  word.append("ic"); // restore the original ending
+		  k = j + 2;
+		  // nolookup()
+		}
+		return;
+	  }
+
+	  private static char[] ization = "ization".ToCharArray();
+	  private static char[] ition = "ition".ToCharArray();
+	  private static char[] ation = "ation".ToCharArray();
+	  private static char[] ication = "ication".ToCharArray();
+
+	  /* handle some derivational endings */
+	  /*
+	   * this routine deals with -ion, -ition, -ation, -ization, and -ication. The
+	   * -ization ending is always converted to -ize
+	   */
+	  private void ionEndings()
+	  {
+		int old_k = k;
+		if (!endsIn('i', 'o', 'n'))
+		{
+		  return;
+		}
+
+		if (endsIn(ization))
+		{
+		/*
+		                        * the -ize ending is very productive, so simply
+		                        * accept it as the root
+		                        */
+		  word.Length = j + 3;
+		  word.unsafeWrite('e');
+		  k = j + 3;
+		  lookup();
+		  return;
+		}
+
+		if (endsIn(ition))
+		{
+		  word.Length = j + 1;
+		  word.unsafeWrite('e');
+		  k = j + 1;
+		  if (lookup()) /*
+	                     * remove -ition and add `e', and check against the
+	                     * dictionary
+	                     */
+		  {
+		  return; // (e.g., definition->define, opposition->oppose)
+		  }
+
+		  /* restore original values */
+		  word.Length = j + 1;
+		  word.append("ition");
+		  k = old_k;
+		  // nolookup()
+		}
+		else if (endsIn(ation))
+		{
+		  word.Length = j + 3;
+		  word.unsafeWrite('e');
+		  k = j + 3;
+		  if (lookup()) // remove -ion and add `e', and check against the dictionary
+		  {
+		  return; // (elmination -> eliminate)
+		  }
+
+		  word.Length = j + 1;
+		  word.unsafeWrite('e'); /*
+	                              * remove -ation and add `e', and check against the
+	                              * dictionary
+	                              */
+		  k = j + 1;
+		  if (lookup())
+		  {
+			  return;
+		  }
+
+		  word.Length = j + 1; /*
+	                             * just remove -ation (resignation->resign) and
+	                             * check dictionary
+	                             */
+		  k = j;
+		  if (lookup())
+		  {
+			  return;
+		  }
+
+		  /* restore original values */
+		  word.Length = j + 1;
+		  word.append("ation");
+		  k = old_k;
+		  // nolookup()
+
+		}
+
+		/*
+		 * test -ication after -ation is attempted (e.g., `complication->complicate'
+		 * rather than `complication->comply')
+		 */
+
+		if (endsIn(ication))
+		{
+		  word.Length = j + 1;
+		  word.unsafeWrite('y');
+		  k = j + 1;
+		  if (lookup()) /*
+	                     * remove -ication and add `y', and check against the
+	                     * dictionary
+	                     */
+		  {
+		  return; // (e.g., amplification -> amplify)
+		  }
+
+		  /* restore original values */
+		  word.Length = j + 1;
+		  word.append("ication");
+		  k = old_k;
+		  // nolookup()
+		}
+
+		// if (endsIn(ion)) {
+		if (true) // we checked for this earlier... just need to set "j"
+		{
+		  j = k - 3; // YCS
+
+		  word.Length = j + 1;
+		  word.unsafeWrite('e');
+		  k = j + 1;
+		  if (lookup()) // remove -ion and add `e', and check against the dictionary
+		  {
+		  return;
+		  }
+
+		  word.Length = j + 1;
+		  k = j;
+		  if (lookup()) // remove -ion, and if it's found, treat that as the root
+		  {
+		  return;
+		  }
+
+		  /* restore original values */
+		  word.Length = j + 1;
+		  word.append("ion");
+		  k = old_k;
+		  // nolookup()
+		}
+
+		// nolookup(); all of the other paths restored original values
+		return;
+	  }
+
+	  /*
+	   * this routine deals with -er, -or, -ier, and -eer. The -izer ending is
+	   * always converted to -ize
+	   */
+	  private void erAndOrEndings()
+	  {
+		int old_k = k;
+
+		if (word.charAt(k) != 'r') // YCS
+		{
+			return;
+		}
+
+		char word_char; // so we can remember if it was -er or -or
+
+		if (endsIn('i', 'z', 'e', 'r'))
+		{
+		/*
+		                                   * -ize is very productive, so accept it
+		                                   * as the root
+		                                   */
+		  word.Length = j + 4;
+		  k = j + 3;
+		  lookup();
+		  return;
+		}
+
+		if (endsIn('e', 'r') || endsIn('o', 'r'))
+		{
+		  word_char = word.charAt(j + 1);
+		  if (doubleC(j))
+		  {
+			word.Length = j;
+			k = j - 1;
+			if (lookup())
+			{
+				return;
+			}
+			word.unsafeWrite(word.charAt(j - 1)); // restore the doubled consonant
+		  }
+
+		  if (word.charAt(j) == 'i') // do we have a -ier ending?
+		  {
+			word.setCharAt(j, 'y');
+			word.Length = j + 1;
+			k = j;
+			if (lookup()) // yes, so check against the dictionary
+			{
+			return;
+			}
+			word.setCharAt(j, 'i'); // restore the endings
+			word.unsafeWrite('e');
+		  }
+
+		  if (word.charAt(j) == 'e') // handle -eer
+		  {
+			word.Length = j;
+			k = j - 1;
+			if (lookup())
+			{
+				return;
+			}
+			word.unsafeWrite('e');
+		  }
+
+		  word.Length = j + 2; // remove the -r ending
+		  k = j + 1;
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  word.Length = j + 1; // try removing -er/-or
+		  k = j;
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  word.unsafeWrite('e'); // try removing -or and adding -e
+		  k = j + 1;
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  word.Length = j + 1;
+		  word.unsafeWrite(word_char);
+		  word.unsafeWrite('r'); // restore the word to the way it was
+		  k = old_k;
+		  // nolookup()
+		}
+
+	  }
+
+	  /*
+	   * this routine deals with -ly endings. The -ally ending is always converted
+	   * to -al Sometimes this will temporarily leave us with a non-word (e.g.,
+	   * heuristically maps to heuristical), but then the -al is removed in the next
+	   * step.
+	   */
+	  private void lyEndings()
+	  {
+		int old_k = k;
+
+		if (endsIn('l', 'y'))
+		{
+
+		  word.setCharAt(j + 2, 'e'); // try converting -ly to -le
+
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  word.setCharAt(j + 2, 'y');
+
+		  word.Length = j + 1; // try just removing the -ly
+		  k = j;
+
+		  if (lookup())
+		  {
+			  return;
+		  }
+
+		  if ((j > 0) && (word.charAt(j - 1) == 'a') && (word.charAt(j) == 'l')) /*
+	                                                                              * always
+	                                                                              * convert
+	                                                                              * -
+	                                                                              * ally
+	                                                                              * to
+	                                                                              * -
+	                                                                              * al
+	                                                                              */
+		  {
+		  return;
+		  }
+		  word.append("ly");
+		  k = old_k;
+
+		  if ((j > 0) && (word.charAt(j - 1) == 'a') && (word.charAt(j) == 'b'))
+		  {
+		  /*
+		                                                                            * always
+		                                                                            * convert
+		                                                                            * -
+		                                                                            * ably
+		                                                                            * to
+		                                                                            * -
+		                                                                            * able
+		                                                                            */
+			word.setCharAt(j + 2, 'e');
+			k = j + 2;
+			return;
+		  }
+
+		  if (word.charAt(j) == 'i') // e.g., militarily -> military
+		  {
+			word.Length = j;
+			word.unsafeWrite('y');
+			k = j;
+			if (lookup())
+			{
+				return;
+			}
+			word.Length = j;
+			word.append("ily");
+			k = old_k;
+		  }
+
+		  word.Length = j + 1; // the default is to remove -ly
+
+		  k = j;
+		  // nolookup()... we already tried removing the "ly" variant
+		}
+		return;
+	  }
+
+	  /*
+	   * this routine deals with -al endings. Some of the endings from the previous
+	   * routine are finished up here.
+	   */
+	  private void alEndings()
+	  {
+		int old_k = k;
+
+		if (word.length() < 4)
+		{
+			return;
+		}
+		if (endsIn('a', 'l'))
+		{
+		  word.Length = j + 1;
+		  k = j;
+		  if (lookup()) // try just removing the -al
+		  {
+		  return;
+		  }
+
+		  if (doubleC(j)) // allow for a doubled consonant
+		  {
+			word.Length = j;
+			k = j - 1;
+			if (lookup())
+			{
+				return;
+			}
+			word.unsafeWrite(word.charAt(j - 1));
+		  }
+
+		  word.Length = j + 1;
+		  word.unsafeWrite('e'); // try removing the -al and adding -e
+		  k = j + 1;
+		  if (lookup())
+		  {
+			  return;
+		  }
+
+		  word.Length = j + 1;
+		  word.append("um"); // try converting -al to -um
+		  /* (e.g., optimal - > optimum ) */
+		  k = j + 2;
+		  if (lookup())
+		  {
+			  return;
+		  }
+
+		  word.Length = j + 1;
+		  word.append("al"); // restore the ending to the way it was
+		  k = old_k;
+
+		  if ((j > 0) && (word.charAt(j - 1) == 'i') && (word.charAt(j) == 'c'))
+		  {
+			word.Length = j - 1; // try removing -ical
+			k = j - 2;
+			if (lookup())
+			{
+				return;
+			}
+
+			word.Length = j - 1;
+			word.unsafeWrite('y'); // try turning -ical to -y (e.g., bibliographical)
+			k = j - 1;
+			if (lookup())
+			{
+				return;
+			}
+
+			word.Length = j - 1;
+			word.append("ic"); // the default is to convert -ical to -ic
+			k = j;
+			// nolookup() ... converting ical to ic means removing "al" which we
+			// already tried
+			// ERROR
+			lookup();
+			return;
+		  }
+
+		  if (word.charAt(j) == 'i') // sometimes -ial endings should be removed
+		  {
+			word.Length = j; // (sometimes it gets turned into -y, but we
+			k = j - 1; // aren't dealing with that case for now)
+			if (lookup())
+			{
+				return;
+			}
+			word.append("ial");
+			k = old_k;
+			lookup();
+		  }
+
+		}
+		return;
+	  }
+
+	  /*
+	   * this routine deals with -ive endings. It normalizes some of the -ative
+	   * endings directly, and also maps some -ive endings to -ion.
+	   */
+	  private void iveEndings()
+	  {
+		int old_k = k;
+
+		if (endsIn('i', 'v', 'e'))
+		{
+		  word.Length = j + 1; // try removing -ive entirely
+		  k = j;
+		  if (lookup())
+		  {
+			  return;
+		  }
+
+		  word.unsafeWrite('e'); // try removing -ive and adding -e
+		  k = j + 1;
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  word.Length = j + 1;
+		  word.append("ive");
+		  if ((j > 0) && (word.charAt(j - 1) == 'a') && (word.charAt(j) == 't'))
+		  {
+			word.setCharAt(j - 1, 'e'); // try removing -ative and adding -e
+			word.Length = j; // (e.g., determinative -> determine)
+			k = j - 1;
+			if (lookup())
+			{
+				return;
+			}
+			word.Length = j - 1; // try just removing -ative
+			if (lookup())
+			{
+				return;
+			}
+
+			word.append("ative");
+			k = old_k;
+		  }
+
+		  /* try mapping -ive to -ion (e.g., injunctive/injunction) */
+		  word.setCharAt(j + 2, 'o');
+		  word.setCharAt(j + 3, 'n');
+		  if (lookup())
+		  {
+			  return;
+		  }
+
+		  word.setCharAt(j + 2, 'v'); // restore the original values
+		  word.setCharAt(j + 3, 'e');
+		  k = old_k;
+		  // nolookup()
+		}
+		return;
+	  }
+
+	  internal KStemmer()
+	  {
+	  }
+
+	  internal virtual string stem(string term)
+	  {
+		bool changed = stem(term.ToCharArray(), term.Length);
+		if (!changed)
+		{
+			return term;
+		}
+		return asString();
+	  }
+
+	  /// <summary>
+	  /// Returns the result of the stem (assuming the word was changed) as a String.
+	  /// </summary>
+	  internal virtual string asString()
+	  {
+		string s = string;
+		if (s != null)
+		{
+			return s;
+		}
+		return word.ToString();
+	  }
+
+	  internal virtual CharSequence asCharSequence()
+	  {
+		return result != null ? result : word;
+	  }
+
+	  internal virtual string String
+	  {
+		  get
+		  {
+			return result;
+		  }
+	  }
+
+	  internal virtual char[] Chars
+	  {
+		  get
+		  {
+			return word.Array;
+		  }
+	  }
+
+	  internal virtual int Length
+	  {
+		  get
+		  {
+			return word.length();
+		  }
+	  }
+
+	  internal string result;
+
+	  private bool matched()
+	  {
+		/// <summary>
+		///*
+		/// if (!lookups.contains(word.toString())) { throw new
+		/// RuntimeException("didn't look up "+word.toString()+" prev="+prevLookup);
+		/// }
+		/// **
+		/// </summary>
+		// lookup();
+		return matchedEntry != null;
+	  }
+
+	  /// <summary>
+	  /// Stems the text in the token. Returns true if changed.
+	  /// </summary>
+	  internal virtual bool stem(char[] term, int len)
+	  {
+
+		result = null;
+
+		k = len - 1;
+		if ((k <= 1) || (k >= MaxWordLen - 1))
+		{
+		  return false; // don't stem
+		}
+
+		// first check the stemmer dictionaries, and avoid using the
+		// cache if it's in there.
+		DictEntry entry = dict_ht.get(term, 0, len);
+		if (entry != null)
+		{
+		  if (entry.root != null)
+		  {
+			result = entry.root;
+			return true;
+		  }
+		  return false;
+		}
+
+		/// <summary>
+		///*
+		/// caching off is normally faster if (cache == null) initializeStemHash();
+		/// 
+		/// // now check the cache, before we copy chars to "word" if (cache != null)
+		/// { String val = cache.get(term, 0, len); if (val != null) { if (val !=
+		/// SAME) { result = val; return true; } return false; } }
+		/// **
+		/// </summary>
+
+		word.reset();
+		// allocate enough space so that an expansion is never needed
+		word.reserve(len + 10);
+		for (int i = 0; i < len; i++)
+		{
+		  char ch = term[i];
+		  if (!isAlpha(ch)) // don't stem
+		  {
+			  return false;
+		  }
+		  // don't lowercase... it's a requirement that lowercase filter be
+		  // used before this stemmer.
+		  word.unsafeWrite(ch);
+		}
+
+		matchedEntry = null;
+		/// <summary>
+		///*
+		/// lookups.clear(); lookups.add(word.toString());
+		/// **
+		/// </summary>
+
+		/*
+		 * This while loop will never be executed more than one time; it is here
+		 * only to allow the break statement to be used to escape as soon as a word
+		 * is recognized
+		 */
+		while (true)
+		{
+		  // YCS: extra lookup()s were inserted so we don't need to
+		  // do an extra wordInDict() here.
+		  plural();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  pastTense();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  aspect();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  ityEndings();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  nessEndings();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  ionEndings();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  erAndOrEndings();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  lyEndings();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  alEndings();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  entry = wordInDict();
+		  iveEndings();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  izeEndings();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  mentEndings();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  bleEndings();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  ismEndings();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  icEndings();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  ncyEndings();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  nceEndings();
+		  matched();
+		  break;
+		}
+
+		/*
+		 * try for a direct mapping (allows for cases like `Italian'->`Italy' and
+		 * `Italians'->`Italy')
+		 */
+		entry = matchedEntry;
+		if (entry != null)
+		{
+		  result = entry.root; // may be null, which means that "word" is the stem
+		}
+
+		/// <summary>
+		///*
+		/// caching off is normally faster if (cache != null && cache.size() <
+		/// maxCacheSize) { char[] key = new char[len]; System.arraycopy(term, 0,
+		/// key, 0, len); if (result != null) { cache.put(key, result); } else {
+		/// cache.put(key, word.toString()); } }
+		/// **
+		/// </summary>
+
+		/// <summary>
+		///*
+		/// if (entry == null) { if (!word.toString().equals(new String(term,0,len)))
+		/// { System.out.println("CASE:" + word.toString() + "," + new
+		/// String(term,0,len));
+		/// 
+		/// } }
+		/// **
+		/// </summary>
+
+		// no entry matched means result is "word"
+		return true;
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemFilter.cs
new file mode 100644
index 0000000..7933292
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemFilter.cs
@@ -0,0 +1,81 @@
+namespace org.apache.lucene.analysis.en
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+	/// <summary>
+	/// Transforms the token stream as per the Porter stemming algorithm.
+	///    Note: the input to the stemming filter must already be in lower case,
+	///    so you will need to use LowerCaseFilter or LowerCaseTokenizer farther
+	///    down the Tokenizer chain in order for this to work properly!
+	///    <P>
+	///    To use this filter with other analyzers, you'll want to write an
+	///    Analyzer class that sets up the TokenStream chain as you want it.
+	///    To use this with LowerCaseTokenizer, for example, you'd write an
+	///    analyzer like this:
+	///    <P>
+	///    <PRE class="prettyprint">
+	///    class MyAnalyzer extends Analyzer {
+	///      {@literal @Override}
+	///      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+	///        Tokenizer source = new LowerCaseTokenizer(version, reader);
+	///        return new TokenStreamComponents(source, new PorterStemFilter(source));
+	///      }
+	///    }
+	///    </PRE>
+	///    <para>
+	///    Note: This filter is aware of the <seealso cref="KeywordAttribute"/>. To prevent
+	///    certain terms from being passed to the stemmer
+	///    <seealso cref="KeywordAttribute#isKeyword()"/> should be set to <code>true</code>
+	///    in a previous <seealso cref="TokenStream"/>.
+	/// 
+	///    Note: For including the original term as well as the stemmed version, see
+	///   <seealso cref="org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory"/>
+	///    </para>
+	/// </summary>
+	public sealed class PorterStemFilter : TokenFilter
+	{
+	  private readonly PorterStemmer stemmer = new PorterStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public PorterStemFilter(TokenStream @in) : base(@in)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (!input.incrementToken())
+		{
+		  return false;
+		}
+
+		if ((!keywordAttr.Keyword) && stemmer.stem(termAtt.buffer(), 0, termAtt.length()))
+		{
+		  termAtt.copyBuffer(stemmer.ResultBuffer, 0, stemmer.ResultLength);
+		}
+		return true;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemFilterFactory.cs
new file mode 100644
index 0000000..588b559
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.en
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="PorterStemFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_porterstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.PorterStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class PorterStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new PorterStemFilterFactory </summary>
+	  public PorterStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override PorterStemFilter create(TokenStream input)
+	  {
+		return new PorterStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemmer.cs
new file mode 100644
index 0000000..16d01e1
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemmer.cs
@@ -0,0 +1,871 @@
+using System;
+
+namespace org.apache.lucene.analysis.en
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/*
+	
+	   Porter stemmer in Java. The original paper is in
+	
+	       Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
+	       no. 3, pp 130-137,
+	
+	   See also http://www.tartarus.org/~martin/PorterStemmer/index.html
+	
+	   Bug 1 (reported by Gonzalo Parra 16/10/99) fixed as marked below.
+	   Tthe words 'aed', 'eed', 'oed' leave k at 'a' for step 3, and b[k-1]
+	   is then out outside the bounds of b.
+	
+	   Similarly,
+	
+	   Bug 2 (reported by Steve Dyrdahl 22/2/00) fixed as marked below.
+	   'ion' by itself leaves j = -1 in the test for 'ion' in step 5, and
+	   b[j] is then outside the bounds of b.
+	
+	   Release 3.
+	
+	   [ This version is derived from Release 3, modified by Brian Goetz to
+	     optimize for fewer object creations.  ]
+	
+	*/
+
+
+
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_CHAR;
+	using ArrayUtil = org.apache.lucene.util.ArrayUtil;
+
+	/// 
+	/// <summary>
+	/// Stemmer, implementing the Porter Stemming Algorithm
+	/// 
+	/// The Stemmer class transforms a word into its root form.  The input
+	/// word can be provided a character at time (by calling add()), or at once
+	/// by calling one of the various stem(something) methods.
+	/// </summary>
+
+	internal class PorterStemmer
+	{
+	  private char[] b;
+	  private int i, j, k, k0; // offset into b
+	  private bool dirty = false;
+	  private const int INITIAL_SIZE = 50;
+
+	  public PorterStemmer()
+	  {
+		b = new char[INITIAL_SIZE];
+		i = 0;
+	  }
+
+	  /// <summary>
+	  /// reset() resets the stemmer so it can stem another word.  If you invoke
+	  /// the stemmer by calling add(char) and then stem(), you must call reset()
+	  /// before starting another word.
+	  /// </summary>
+	  public virtual void reset()
+	  {
+		  i = 0;
+		  dirty = false;
+	  }
+
+	  /// <summary>
+	  /// Add a character to the word being stemmed.  When you are finished
+	  /// adding characters, you can call stem(void) to process the word.
+	  /// </summary>
+	  public virtual void add(char ch)
+	  {
+		if (b.Length <= i)
+		{
+		  b = ArrayUtil.grow(b, i + 1);
+		}
+		b[i++] = ch;
+	  }
+
+	  /// <summary>
+	  /// After a word has been stemmed, it can be retrieved by toString(),
+	  /// or a reference to the internal buffer can be retrieved by getResultBuffer
+	  /// and getResultLength (which is generally more efficient.)
+	  /// </summary>
+	  public override string ToString()
+	  {
+		  return new string(b,0,i);
+	  }
+
+	  /// <summary>
+	  /// Returns the length of the word resulting from the stemming process.
+	  /// </summary>
+	  public virtual int ResultLength
+	  {
+		  get
+		  {
+			  return i;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Returns a reference to a character buffer containing the results of
+	  /// the stemming process.  You also need to consult getResultLength()
+	  /// to determine the length of the result.
+	  /// </summary>
+	  public virtual char[] ResultBuffer
+	  {
+		  get
+		  {
+			  return b;
+		  }
+	  }
+
+	  /* cons(i) is true <=> b[i] is a consonant. */
+
+	  private bool cons(int i)
+	  {
+		switch (b[i])
+		{
+		case 'a':
+	case 'e':
+	case 'i':
+	case 'o':
+	case 'u':
+		  return false;
+		case 'y':
+		  return (i == k0) ? true :!cons(i - 1);
+		default:
+		  return true;
+		}
+	  }
+
+	  /* m() measures the number of consonant sequences between k0 and j. if c is
+	     a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
+	     presence,
+	
+	          <c><v>       gives 0
+	          <c>vc<v>     gives 1
+	          <c>vcvc<v>   gives 2
+	          <c>vcvcvc<v> gives 3
+	          ....
+	  */
+
+	  private int m()
+	  {
+		int n = 0;
+		int i = k0;
+		while (true)
+		{
+		  if (i > j)
+		  {
+			return n;
+		  }
+		  if (!cons(i))
+		  {
+			break;
+		  }
+		  i++;
+		}
+		i++;
+		while (true)
+		{
+		  while (true)
+		  {
+			if (i > j)
+			{
+			  return n;
+			}
+			if (cons(i))
+			{
+			  break;
+			}
+			i++;
+		  }
+		  i++;
+		  n++;
+		  while (true)
+		  {
+			if (i > j)
+			{
+			  return n;
+			}
+			if (!cons(i))
+			{
+			  break;
+			}
+			i++;
+		  }
+		  i++;
+		}
+	  }
+
+	  /* vowelinstem() is true <=> k0,...j contains a vowel */
+
+	  private bool vowelinstem()
+	  {
+		int i;
+		for (i = k0; i <= j; i++)
+		{
+		  if (!cons(i))
+		  {
+			return true;
+		  }
+		}
+		return false;
+	  }
+
+	  /* doublec(j) is true <=> j,(j-1) contain a double consonant. */
+
+	  private bool doublec(int j)
+	  {
+		if (j < k0 + 1)
+		{
+		  return false;
+		}
+		if (b[j] != b[j - 1])
+		{
+		  return false;
+		}
+		return cons(j);
+	  }
+
+	  /* cvc(i) is true <=> i-2,i-1,i has the form consonant - vowel - consonant
+	     and also if the second c is not w,x or y. this is used when trying to
+	     restore an e at the end of a short word. e.g.
+	
+	          cav(e), lov(e), hop(e), crim(e), but
+	          snow, box, tray.
+	
+	  */
+
+	  private bool cvc(int i)
+	  {
+		if (i < k0 + 2 || !cons(i) || cons(i - 1) || !cons(i - 2))
+		{
+		  return false;
+		}
+		else
+		{
+		  int ch = b[i];
+		  if (ch == 'w' || ch == 'x' || ch == 'y')
+		  {
+			  return false;
+		  }
+		}
+		return true;
+	  }
+
+	  private bool ends(string s)
+	  {
+		int l = s.Length;
+		int o = k - l + 1;
+		if (o < k0)
+		{
+		  return false;
+		}
+		for (int i = 0; i < l; i++)
+		{
+		  if (b[o + i] != s[i])
+		  {
+			return false;
+		  }
+		}
+		j = k - l;
+		return true;
+	  }
+
+	  /* setto(s) sets (j+1),...k to the characters in the string s, readjusting
+	     k. */
+
+	  internal virtual void setto(string s)
+	  {
+		int l = s.Length;
+		int o = j + 1;
+		for (int i = 0; i < l; i++)
+		{
+		  b[o + i] = s[i];
+		}
+		k = j + l;
+		dirty = true;
+	  }
+
+	  /* r(s) is used further down. */
+
+	  internal virtual void r(string s)
+	  {
+		  if (m() > 0)
+		  {
+			  setto(s);
+		  }
+	  }
+
+	  /* step1() gets rid of plurals and -ed or -ing. e.g.
+	
+	           caresses  ->  caress
+	           ponies    ->  poni
+	           ties      ->  ti
+	           caress    ->  caress
+	           cats      ->  cat
+	
+	           feed      ->  feed
+	           agreed    ->  agree
+	           disabled  ->  disable
+	
+	           matting   ->  mat
+	           mating    ->  mate
+	           meeting   ->  meet
+	           milling   ->  mill
+	           messing   ->  mess
+	
+	           meetings  ->  meet
+	
+	  */
+
+	  private void step1()
+	  {
+		if (b[k] == 's')
+		{
+		  if (ends("sses"))
+		  {
+			  k -= 2;
+		  }
+		  else if (ends("ies"))
+		  {
+			  setto("i");
+		  }
+		  else if (b[k - 1] != 's')
+		  {
+			  k--;
+		  }
+		}
+		if (ends("eed"))
+		{
+		  if (m() > 0)
+		  {
+			k--;
+		  }
+		}
+		else if ((ends("ed") || ends("ing")) && vowelinstem())
+		{
+		  k = j;
+		  if (ends("at"))
+		  {
+			  setto("ate");
+		  }
+		  else if (ends("bl"))
+		  {
+			  setto("ble");
+		  }
+		  else if (ends("iz"))
+		  {
+			  setto("ize");
+		  }
+		  else if (doublec(k))
+		  {
+			int ch = b[k--];
+			if (ch == 'l' || ch == 's' || ch == 'z')
+			{
+			  k++;
+			}
+		  }
+		  else if (m() == 1 && cvc(k))
+		  {
+			setto("e");
+		  }
+		}
+	  }
+
+	  /* step2() turns terminal y to i when there is another vowel in the stem. */
+
+	  private void step2()
+	  {
+		if (ends("y") && vowelinstem())
+		{
+		  b[k] = 'i';
+		  dirty = true;
+		}
+	  }
+
+	  /* step3() maps double suffices to single ones. so -ization ( = -ize plus
+	     -ation) maps to -ize etc. note that the string before the suffix must give
+	     m() > 0. */
+
+	  private void step3()
+	  {
+		if (k == k0) // For Bug 1
+		{
+			return;
+		}
+		switch (b[k - 1])
+		{
+		case 'a':
+		  if (ends("ational"))
+		  {
+			  r("ate");
+			  break;
+		  }
+		  if (ends("tional"))
+		  {
+			  r("tion");
+			  break;
+		  }
+		  break;
+		case 'c':
+		  if (ends("enci"))
+		  {
+			  r("ence");
+			  break;
+		  }
+		  if (ends("anci"))
+		  {
+			  r("ance");
+			  break;
+		  }
+		  break;
+		case 'e':
+		  if (ends("izer"))
+		  {
+			  r("ize");
+			  break;
+		  }
+		  break;
+		case 'l':
+		  if (ends("bli"))
+		  {
+			  r("ble");
+			  break;
+		  }
+		  if (ends("alli"))
+		  {
+			  r("al");
+			  break;
+		  }
+		  if (ends("entli"))
+		  {
+			  r("ent");
+			  break;
+		  }
+		  if (ends("eli"))
+		  {
+			  r("e");
+			  break;
+		  }
+		  if (ends("ousli"))
+		  {
+			  r("ous");
+			  break;
+		  }
+		  break;
+		case 'o':
+		  if (ends("ization"))
+		  {
+			  r("ize");
+			  break;
+		  }
+		  if (ends("ation"))
+		  {
+			  r("ate");
+			  break;
+		  }
+		  if (ends("ator"))
+		  {
+			  r("ate");
+			  break;
+		  }
+		  break;
+		case 's':
+		  if (ends("alism"))
+		  {
+			  r("al");
+			  break;
+		  }
+		  if (ends("iveness"))
+		  {
+			  r("ive");
+			  break;
+		  }
+		  if (ends("fulness"))
+		  {
+			  r("ful");
+			  break;
+		  }
+		  if (ends("ousness"))
+		  {
+			  r("ous");
+			  break;
+		  }
+		  break;
+		case 't':
+		  if (ends("aliti"))
+		  {
+			  r("al");
+			  break;
+		  }
+		  if (ends("iviti"))
+		  {
+			  r("ive");
+			  break;
+		  }
+		  if (ends("biliti"))
+		  {
+			  r("ble");
+			  break;
+		  }
+		  break;
+		case 'g':
+		  if (ends("logi"))
+		  {
+			  r("log");
+			  break;
+		  }
+		}
+	  }
+
+	  /* step4() deals with -ic-, -full, -ness etc. similar strategy to step3. */
+
+	  private void step4()
+	  {
+		switch (b[k])
+		{
+		case 'e':
+		  if (ends("icate"))
+		  {
+			  r("ic");
+			  break;
+		  }
+		  if (ends("ative"))
+		  {
+			  r("");
+			  break;
+		  }
+		  if (ends("alize"))
+		  {
+			  r("al");
+			  break;
+		  }
+		  break;
+		case 'i':
+		  if (ends("iciti"))
+		  {
+			  r("ic");
+			  break;
+		  }
+		  break;
+		case 'l':
+		  if (ends("ical"))
+		  {
+			  r("ic");
+			  break;
+		  }
+		  if (ends("ful"))
+		  {
+			  r("");
+			  break;
+		  }
+		  break;
+		case 's':
+		  if (ends("ness"))
+		  {
+			  r("");
+			  break;
+		  }
+		  break;
+		}
+	  }
+
+	  /* step5() takes off -ant, -ence etc., in context <c>vcvc<v>. */
+
+	  private void step5()
+	  {
+		if (k == k0) // for Bug 1
+		{
+			return;
+		}
+		switch (b[k - 1])
+		{
+		case 'a':
+		  if (ends("al"))
+		  {
+			  break;
+		  }
+		  return;
+		case 'c':
+		  if (ends("ance"))
+		  {
+			  break;
+		  }
+		  if (ends("ence"))
+		  {
+			  break;
+		  }
+		  return;
+		case 'e':
+		  if (ends("er"))
+		  {
+			  break;
+		  }
+		  return;
+		case 'i':
+		  if (ends("ic"))
+		  {
+			  break;
+		  }
+		  return;
+		case 'l':
+		  if (ends("able"))
+		  {
+			  break;
+		  }
+		  if (ends("ible"))
+		  {
+			  break;
+		  }
+		  return;
+		case 'n':
+		  if (ends("ant"))
+		  {
+			  break;
+		  }
+		  if (ends("ement"))
+		  {
+			  break;
+		  }
+		  if (ends("ment"))
+		  {
+			  break;
+		  }
+		  /* element etc. not stripped before the m */
+		  if (ends("ent"))
+		  {
+			  break;
+		  }
+		  return;
+		case 'o':
+		  if (ends("ion") && j >= 0 && (b[j] == 's' || b[j] == 't'))
+		  {
+			  break;
+		  }
+		  /* j >= 0 fixes Bug 2 */
+		  if (ends("ou"))
+		  {
+			  break;
+		  }
+		  return;
+		  /* takes care of -ous */
+		case 's':
+		  if (ends("ism"))
+		  {
+			  break;
+		  }
+		  return;
+		case 't':
+		  if (ends("ate"))
+		  {
+			  break;
+		  }
+		  if (ends("iti"))
+		  {
+			  break;
+		  }
+		  return;
+		case 'u':
+		  if (ends("ous"))
+		  {
+			  break;
+		  }
+		  return;
+		case 'v':
+		  if (ends("ive"))
+		  {
+			  break;
+		  }
+		  return;
+		case 'z':
+		  if (ends("ize"))
+		  {
+			  break;
+		  }
+		  return;
+		default:
+		  return;
+		}
+		if (m() > 1)
+		{
+		  k = j;
+		}
+	  }
+
+	  /* step6() removes a final -e if m() > 1. */
+
+	  private void step6()
+	  {
+		j = k;
+		if (b[k] == 'e')
+		{
+		  int a = m();
+		  if (a > 1 || a == 1 && !cvc(k - 1))
+		  {
+			k--;
+		  }
+		}
+		if (b[k] == 'l' && doublec(k) && m() > 1)
+		{
+		  k--;
+		}
+	  }
+
+
+	  /// <summary>
+	  /// Stem a word provided as a String.  Returns the result as a String.
+	  /// </summary>
+	  public virtual string stem(string s)
+	  {
+		if (stem(s.ToCharArray(), s.Length))
+		{
+		  return ToString();
+		}
+		else
+		{
+		  return s;
+		}
+	  }
+
+	  /// <summary>
+	  /// Stem a word contained in a char[].  Returns true if the stemming process
+	  /// resulted in a word different from the input.  You can retrieve the
+	  /// result with getResultLength()/getResultBuffer() or toString().
+	  /// </summary>
+	  public virtual bool stem(char[] word)
+	  {
+		return stem(word, word.Length);
+	  }
+
+	  /// <summary>
+	  /// Stem a word contained in a portion of a char[] array.  Returns
+	  /// true if the stemming process resulted in a word different from
+	  /// the input.  You can retrieve the result with
+	  /// getResultLength()/getResultBuffer() or toString().
+	  /// </summary>
+	  public virtual bool stem(char[] wordBuffer, int offset, int wordLen)
+	  {
+		reset();
+		if (b.Length < wordLen)
+		{
+		  b = new char[ArrayUtil.oversize(wordLen, NUM_BYTES_CHAR)];
+		}
+		Array.Copy(wordBuffer, offset, b, 0, wordLen);
+		i = wordLen;
+		return stem(0);
+	  }
+
+	  /// <summary>
+	  /// Stem a word contained in a leading portion of a char[] array.
+	  /// Returns true if the stemming process resulted in a word different
+	  /// from the input.  You can retrieve the result with
+	  /// getResultLength()/getResultBuffer() or toString().
+	  /// </summary>
+	  public virtual bool stem(char[] word, int wordLen)
+	  {
+		return stem(word, 0, wordLen);
+	  }
+
+	  /// <summary>
+	  /// Stem the word placed into the Stemmer buffer through calls to add().
+	  /// Returns true if the stemming process resulted in a word different
+	  /// from the input.  You can retrieve the result with
+	  /// getResultLength()/getResultBuffer() or toString().
+	  /// </summary>
+	  public virtual bool stem()
+	  {
+		return stem(0);
+	  }
+
+	  public virtual bool stem(int i0)
+	  {
+		k = i - 1;
+		k0 = i0;
+		if (k > k0 + 1)
+		{
+		  step1();
+		  step2();
+		  step3();
+		  step4();
+		  step5();
+		  step6();
+		}
+		// Also, a word is considered dirty if we lopped off letters
+		// Thanks to Ifigenia Vairelles for pointing this out.
+		if (i != k + 1)
+		{
+		  dirty = true;
+		}
+		i = k + 1;
+		return dirty;
+	  }
+
+	  /* Test program for demonstrating the Stemmer.  It reads a file and
+	   * stems each word, writing the result to standard out.
+	   * Usage: Stemmer file-name
+	  public static void main(String[] args) {
+	    PorterStemmer s = new PorterStemmer();
+	
+	    for (int i = 0; i < args.length; i++) {
+	      try {
+	        InputStream in = new FileInputStream(args[i]);
+	        byte[] buffer = new byte[1024];
+	        int bufferLen, offset, ch;
+	
+	        bufferLen = in.read(buffer);
+	        offset = 0;
+	        s.reset();
+	
+	        while(true) {
+	          if (offset < bufferLen)
+	            ch = buffer[offset++];
+	          else {
+	            bufferLen = in.read(buffer);
+	            offset = 0;
+	            if (bufferLen < 0)
+	              ch = -1;
+	            else
+	              ch = buffer[offset++];
+	          }
+	
+	          if (Character.isLetter((char) ch)) {
+	            s.add(Character.toLowerCase((char) ch));
+	          }
+	          else {
+	             s.stem();
+	             System.out.print(s.toString());
+	             s.reset();
+	             if (ch < 0)
+	               break;
+	             else {
+	               System.out.print((char) ch);
+	             }
+	           }
+	        }
+	
+	        in.close();
+	      }
+	      catch (IOException e) {
+	        System.out.println("error reading " + args[i]);
+	      }
+	    }
+	  }*/
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Es/SpanishAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Es/SpanishAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Es/SpanishAnalyzer.cs
new file mode 100644
index 0000000..e6d4f76
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Es/SpanishAnalyzer.cs
@@ -0,0 +1,155 @@
+using System;
+
+namespace org.apache.lucene.analysis.es
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using Version = org.apache.lucene.util.Version;
+	using SpanishStemmer = org.tartarus.snowball.ext.SpanishStemmer;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Spanish.
+	/// <para>
+	/// <a name="version"/>
+	/// </para>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating SpanishAnalyzer:
+	/// <ul>
+	///   <li> As of 3.6, SpanishLightStemFilter is used for less aggressive stemming.
+	/// </ul>
+	/// </para>
+	/// </summary>
+	public sealed class SpanishAnalyzer : StopwordAnalyzerBase
+	{
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// File containing default Spanish stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "spanish_stop.txt";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop words set. </summary>
+	  /// <returns> default stop words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(typeof(SnowballFilter), DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public SpanishAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  public SpanishAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+	  /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+	  /// stemming.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
+	  public SpanishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates a
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> A
+	  ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided and <seealso cref="SpanishLightStemFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		result = new LowerCaseFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		if (matchVersion.onOrAfter(Version.LUCENE_36))
+		{
+		  result = new SpanishLightStemFilter(result);
+		}
+		else
+		{
+		  result = new SnowballFilter(result, new SpanishStemmer());
+		}
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Es/SpanishLightStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Es/SpanishLightStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Es/SpanishLightStemFilter.cs
new file mode 100644
index 0000000..73834e1
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Es/SpanishLightStemFilter.cs
@@ -0,0 +1,66 @@
+namespace org.apache.lucene.analysis.es
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="SpanishLightStemmer"/> to stem Spanish
+	/// words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// </summary>
+	public sealed class SpanishLightStemFilter : TokenFilter
+	{
+	  private readonly SpanishLightStemmer stemmer = new SpanishLightStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public SpanishLightStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file


[21/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchMinimalStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchMinimalStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchMinimalStemmer.cs
new file mode 100644
index 0000000..2ca0cce
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchMinimalStemmer.cs
@@ -0,0 +1,106 @@
+namespace org.apache.lucene.analysis.fr
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/* 
+	 * This algorithm is updated based on code located at:
+	 * http://members.unine.ch/jacques.savoy/clef/
+	 * 
+	 * Full copyright for that code follows:
+	 */
+
+	/*
+	 * Copyright (c) 2005, Jacques Savoy
+	 * All rights reserved.
+	 *
+	 * Redistribution and use in source and binary forms, with or without 
+	 * modification, are permitted provided that the following conditions are met:
+	 *
+	 * Redistributions of source code must retain the above copyright notice, this 
+	 * list of conditions and the following disclaimer. Redistributions in binary 
+	 * form must reproduce the above copyright notice, this list of conditions and
+	 * the following disclaimer in the documentation and/or other materials 
+	 * provided with the distribution. Neither the name of the author nor the names 
+	 * of its contributors may be used to endorse or promote products derived from 
+	 * this software without specific prior written permission.
+	 * 
+	 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+	 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+	 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+	 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+	 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+	 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+	 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+	 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+	 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+	 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+	 * POSSIBILITY OF SUCH DAMAGE.
+	 */
+
+	/// <summary>
+	/// Light Stemmer for French.
+	/// <para>
+	/// This stemmer implements the following algorithm:
+	/// <i>A Stemming procedure and stopword list for general French corpora.</i>
+	/// Jacques Savoy.
+	/// </para>
+	/// </summary>
+	public class FrenchMinimalStemmer
+	{
+	  public virtual int stem(char[] s, int len)
+	  {
+		if (len < 6)
+		{
+		  return len;
+		}
+
+		if (s[len - 1] == 'x')
+		{
+		  if (s[len - 3] == 'a' && s[len - 2] == 'u')
+		  {
+			s[len - 2] = 'l';
+		  }
+		  return len - 1;
+		}
+
+		if (s[len - 1] == 's')
+		{
+			len--;
+		}
+		if (s[len - 1] == 'r')
+		{
+			len--;
+		}
+		if (s[len - 1] == 'e')
+		{
+			len--;
+		}
+		if (s[len - 1] == 'é')
+		{
+			len--;
+		}
+		if (s[len - 1] == s[len - 2])
+		{
+			len--;
+		}
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchStemFilter.cs
new file mode 100644
index 0000000..8258dfb
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchStemFilter.cs
@@ -0,0 +1,102 @@
+using System;
+
+namespace org.apache.lucene.analysis.fr
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using KeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; // for javadoc
+	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that stems french words. 
+	/// <para>
+	/// The used stemmer can be changed at runtime after the
+	/// filter object is created (as long as it is a <seealso cref="FrenchStemmer"/>).
+	/// </para>
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="KeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para> </summary>
+	/// <seealso cref= KeywordMarkerFilter </seealso>
+	/// @deprecated (3.1) Use <seealso cref="SnowballFilter"/> with 
+	/// <seealso cref="org.tartarus.snowball.ext.FrenchStemmer"/> instead, which has the
+	/// same functionality. This filter will be removed in Lucene 5.0 
+	[Obsolete("(3.1) Use <seealso cref="SnowballFilter"/> with")]
+	public sealed class FrenchStemFilter : TokenFilter
+	{
+
+	  /// <summary>
+	  /// The actual token in the input stream.
+	  /// </summary>
+	  private FrenchStemmer stemmer = new FrenchStemmer();
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public FrenchStemFilter(TokenStream @in) : base(@in)
+	  {
+	  }
+
+	  /// <returns>  Returns true for the next token in the stream, or false at EOS </returns>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  string term = termAtt.ToString();
+
+		  // Check the exclusion table
+		  if (!keywordAttr.Keyword)
+		  {
+			string s = stemmer.stem(term);
+			// If not stemmed, don't waste the time  adjusting the token.
+			if ((s != null) && !s.Equals(term))
+			{
+			  termAtt.setEmpty().append(s);
+			}
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	  /// <summary>
+	  /// Set a alternative/custom <seealso cref="FrenchStemmer"/> for this filter.
+	  /// </summary>
+	  public FrenchStemmer Stemmer
+	  {
+		  set
+		  {
+			if (value != null)
+			{
+			  this.stemmer = value;
+			}
+		  }
+	  }
+	}
+
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchStemmer.cs
new file mode 100644
index 0000000..4657e29
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchStemmer.cs
@@ -0,0 +1,785 @@
+using System;
+using System.Text;
+
+namespace org.apache.lucene.analysis.fr
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/// <summary>
+	/// A stemmer for French words. 
+	/// <para>
+	/// The algorithm is based on the work of
+	/// Dr Martin Porter on his snowball project<br>
+	/// refer to http://snowball.sourceforge.net/french/stemmer.html<br>
+	/// (French stemming algorithm) for details
+	/// </para> </summary>
+	/// @deprecated Use <seealso cref="org.tartarus.snowball.ext.FrenchStemmer"/> instead, 
+	/// which has the same functionality. This filter will be removed in Lucene 4.0 
+	[Obsolete("Use <seealso cref="org.tartarus.snowball.ext.FrenchStemmer"/> instead,")]
+	public class FrenchStemmer
+	{
+	  private static readonly Locale locale = new Locale("fr", "FR");
+
+
+	  /// <summary>
+	  /// Buffer for the terms while stemming them.
+	  /// </summary>
+	  private StringBuilder sb = new StringBuilder();
+
+	  /// <summary>
+	  /// A temporary buffer, used to reconstruct R2
+	  /// </summary>
+	   private StringBuilder tb = new StringBuilder();
+
+	  /// <summary>
+	  /// Region R0 is equal to the whole buffer
+	  /// </summary>
+	  private string R0;
+
+	  /// <summary>
+	  /// Region RV
+	  /// "If the word begins with two vowels, RV is the region after the third letter,
+	  /// otherwise the region after the first vowel not at the beginning of the word,
+	  /// or the end of the word if these positions cannot be found."
+	  /// </summary>
+		private string RV;
+
+	  /// <summary>
+	  /// Region R1
+	  /// "R1 is the region after the first non-vowel following a vowel
+	  /// or is the null region at the end of the word if there is no such non-vowel"
+	  /// </summary>
+		private string R1;
+
+	  /// <summary>
+	  /// Region R2
+	  /// "R2 is the region after the first non-vowel in R1 following a vowel
+	  /// or is the null region at the end of the word if there is no such non-vowel"
+	  /// </summary>
+		private string R2;
+
+
+	  /// <summary>
+	  /// Set to true if we need to perform step 2
+	  /// </summary>
+		private bool suite;
+
+	  /// <summary>
+	  /// Set to true if the buffer was modified
+	  /// </summary>
+		private bool modified;
+
+
+		/// <summary>
+		/// Stems the given term to a unique <tt>discriminator</tt>.
+		/// </summary>
+		/// <param name="term">  java.langString The term that should be stemmed </param>
+		/// <returns> java.lang.String  Discriminator for <tt>term</tt> </returns>
+		protected internal virtual string stem(string term)
+		{
+		if (!isStemmable(term))
+		{
+		  return term;
+		}
+
+		// Use lowercase for medium stemming.
+		term = term.ToLower(locale);
+
+		// Reset the StringBuilder.
+		sb.Remove(0, sb.Length);
+		sb.Insert(0, term);
+
+		// reset the booleans
+		modified = false;
+		suite = false;
+
+		sb = treatVowels(sb);
+
+		setStrings();
+
+		step1();
+
+		if (!modified || suite)
+		{
+		  if (RV != null)
+		  {
+			suite = step2a();
+			if (!suite)
+			{
+			  step2b();
+			}
+		  }
+		}
+
+		if (modified || suite)
+		{
+		  step3();
+		}
+		else
+		{
+		  step4();
+		}
+
+		step5();
+
+		step6();
+
+		return sb.ToString();
+		}
+
+	  /// <summary>
+	  /// Sets the search region Strings<br>
+	  /// it needs to be done each time the buffer was modified
+	  /// </summary>
+	  private void setStrings()
+	  {
+		// set the strings
+		R0 = sb.ToString();
+		RV = retrieveRV(sb);
+		R1 = retrieveR(sb);
+		if (R1 != null)
+		{
+		  tb.Remove(0, tb.Length);
+		  tb.Insert(0, R1);
+		  R2 = retrieveR(tb);
+		}
+		else
+		{
+		  R2 = null;
+		}
+	  }
+
+	  /// <summary>
+	  /// First step of the Porter Algorithm<br>
+	  /// refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+	  /// </summary>
+	  private void step1()
+	  {
+		string[] suffix = new string[] {"ances", "iqUes", "ismes", "ables", "istes", "ance", "iqUe", "isme", "able", "iste"};
+		deleteFrom(R2, suffix);
+
+		replaceFrom(R2, new string[] {"logies", "logie"}, "log");
+		replaceFrom(R2, new string[] {"usions", "utions", "usion", "ution"}, "u");
+		replaceFrom(R2, new string[] {"ences", "ence"}, "ent");
+
+		string[] search = new string[] {"atrices", "ateurs", "ations", "atrice", "ateur", "ation"};
+		deleteButSuffixFromElseReplace(R2, search, "ic", true, R0, "iqU");
+
+		deleteButSuffixFromElseReplace(R2, new string[] {"ements", "ement"}, "eus", false, R0, "eux");
+		deleteButSuffixFrom(R2, new string[] {"ements", "ement"}, "ativ", false);
+		deleteButSuffixFrom(R2, new string[] {"ements", "ement"}, "iv", false);
+		deleteButSuffixFrom(R2, new string[] {"ements", "ement"}, "abl", false);
+		deleteButSuffixFrom(R2, new string[] {"ements", "ement"}, "iqU", false);
+
+		deleteFromIfTestVowelBeforeIn(R1, new string[] {"issements", "issement"}, false, R0);
+		deleteFrom(RV, new string[] {"ements", "ement"});
+
+		deleteButSuffixFromElseReplace(R2, new string[] {"ités", "ité"}, "abil", false, R0, "abl");
+		deleteButSuffixFromElseReplace(R2, new string[] {"ités", "ité"}, "ic", false, R0, "iqU");
+		deleteButSuffixFrom(R2, new string[] {"ités", "ité"}, "iv", true);
+
+		string[] autre = new string[] {"ifs", "ives", "if", "ive"};
+		deleteButSuffixFromElseReplace(R2, autre, "icat", false, R0, "iqU");
+		deleteButSuffixFromElseReplace(R2, autre, "at", true, R2, "iqU");
+
+		replaceFrom(R0, new string[] {"eaux"}, "eau");
+
+		replaceFrom(R1, new string[] {"aux"}, "al");
+
+		deleteButSuffixFromElseReplace(R2, new string[] {"euses", "euse"}, "", true, R1, "eux");
+
+		deleteFrom(R2, new string[] {"eux"});
+
+		// if one of the next steps is performed, we will need to perform step2a
+		bool temp = false;
+		temp = replaceFrom(RV, new string[] {"amment"}, "ant");
+		if (temp == true)
+		{
+		  suite = true;
+		}
+		temp = replaceFrom(RV, new string[] {"emment"}, "ent");
+		if (temp == true)
+		{
+		  suite = true;
+		}
+		temp = deleteFromIfTestVowelBeforeIn(RV, new string[] {"ments", "ment"}, true, RV);
+		if (temp == true)
+		{
+		  suite = true;
+		}
+
+	  }
+
+	  /// <summary>
+	  /// Second step (A) of the Porter Algorithm<br>
+	  /// Will be performed if nothing changed from the first step
+	  /// or changed were done in the amment, emment, ments or ment suffixes<br>
+	  /// refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+	  /// </summary>
+	  /// <returns> boolean - true if something changed in the StringBuilder </returns>
+	  private bool step2a()
+	  {
+		string[] search = new string[] {"îmes", "îtes", "iraIent", "irait", "irais", "irai", "iras", "ira", "irent", "iriez", "irez", "irions", "irons", "iront", "issaIent", "issais", "issantes", "issante", "issants", "issant", "issait", "issais", "issions", "issons", "issiez", "issez", "issent", "isses", "isse", "ir", "is", "ît", "it", "ies", "ie", "i"};
+		return deleteFromIfTestVowelBeforeIn(RV, search, false, RV);
+	  }
+
+	  /// <summary>
+	  /// Second step (B) of the Porter Algorithm<br>
+	  /// Will be performed if step 2 A was performed unsuccessfully<br>
+	  /// refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+	  /// </summary>
+	  private void step2b()
+	  {
+		string[] suffix = new string[] {"eraIent", "erais", "erait", "erai", "eras", "erions", "eriez", "erons", "eront","erez", "èrent", "era", "ées", "iez", "ée", "és", "er", "ez", "é"};
+		deleteFrom(RV, suffix);
+
+		string[] search = new string[] {"assions", "assiez", "assent", "asses", "asse", "aIent", "antes", "aIent", "Aient", "ante", "âmes", "âtes", "ants", "ant", "ait", "aît", "ais", "Ait", "Aît", "Ais", "ât", "as", "ai", "Ai", "a"};
+		deleteButSuffixFrom(RV, search, "e", true);
+
+		deleteFrom(R2, new string[] {"ions"});
+	  }
+
+	  /// <summary>
+	  /// Third step of the Porter Algorithm<br>
+	  /// refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+	  /// </summary>
+	  private void step3()
+	  {
+		if (sb.Length > 0)
+		{
+		  char ch = sb[sb.Length - 1];
+		  if (ch == 'Y')
+		  {
+			sb[sb.Length - 1] = 'i';
+			setStrings();
+		  }
+		  else if (ch == 'ç')
+		  {
+			sb[sb.Length - 1] = 'c';
+			setStrings();
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Fourth step of the Porter Algorithm<br>
+	  /// refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+	  /// </summary>
+	  private void step4()
+	  {
+		if (sb.Length > 1)
+		{
+		  char ch = sb[sb.Length - 1];
+		  if (ch == 's')
+		  {
+			char b = sb[sb.Length - 2];
+			if (b != 'a' && b != 'i' && b != 'o' && b != 'u' && b != 'è' && b != 's')
+			{
+			  sb.Remove(sb.Length - 1, sb.Length - sb.Length - 1);
+			  setStrings();
+			}
+		  }
+		}
+		bool found = deleteFromIfPrecededIn(R2, new string[] {"ion"}, RV, "s");
+		if (!found)
+		{
+		found = deleteFromIfPrecededIn(R2, new string[] {"ion"}, RV, "t");
+		}
+
+		replaceFrom(RV, new string[] {"Ière", "ière", "Ier", "ier"}, "i");
+		deleteFrom(RV, new string[] {"e"});
+		deleteFromIfPrecededIn(RV, new string[] {"ë"}, R0, "gu");
+	  }
+
+	  /// <summary>
+	  /// Fifth step of the Porter Algorithm<br>
+	  /// refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+	  /// </summary>
+	  private void step5()
+	  {
+		if (R0 != null)
+		{
+		  if (R0.EndsWith("enn", StringComparison.Ordinal) || R0.EndsWith("onn", StringComparison.Ordinal) || R0.EndsWith("ett", StringComparison.Ordinal) || R0.EndsWith("ell", StringComparison.Ordinal) || R0.EndsWith("eill", StringComparison.Ordinal))
+		  {
+			sb.Remove(sb.Length - 1, sb.Length - sb.Length - 1);
+			setStrings();
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Sixth (and last!) step of the Porter Algorithm<br>
+	  /// refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+	  /// </summary>
+	  private void step6()
+	  {
+		if (R0 != null && R0.Length > 0)
+		{
+		  bool seenVowel = false;
+		  bool seenConson = false;
+		  int pos = -1;
+		  for (int i = R0.Length - 1; i > -1; i--)
+		  {
+			char ch = R0[i];
+			if (isVowel(ch))
+			{
+			  if (!seenVowel)
+			  {
+				if (ch == 'é' || ch == 'è')
+				{
+				  pos = i;
+				  break;
+				}
+			  }
+			  seenVowel = true;
+			}
+			else
+			{
+			  if (seenVowel)
+			  {
+				break;
+			  }
+			  else
+			  {
+				seenConson = true;
+			  }
+			}
+		  }
+		  if (pos > -1 && seenConson && !seenVowel)
+		  {
+			sb[pos] = 'e';
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Delete a suffix searched in zone "source" if zone "from" contains prefix + search string
+	  /// </summary>
+	  /// <param name="source"> java.lang.String - the primary source zone for search </param>
+	  /// <param name="search"> java.lang.String[] - the strings to search for suppression </param>
+	  /// <param name="from"> java.lang.String - the secondary source zone for search </param>
+	  /// <param name="prefix"> java.lang.String - the prefix to add to the search string to test </param>
+	  /// <returns> boolean - true if modified </returns>
+	  private bool deleteFromIfPrecededIn(string source, string[] search, string from, string prefix)
+	  {
+		bool found = false;
+		if (source != null)
+		{
+		  for (int i = 0; i < search.Length; i++)
+		  {
+			if (source.EndsWith(search[i], StringComparison.Ordinal))
+			{
+			  if (from != null && from.EndsWith(prefix + search[i], StringComparison.Ordinal))
+			  {
+				sb.Remove(sb.Length - search[i].Length, sb.Length - sb.Length - search[i].Length);
+				found = true;
+				setStrings();
+				break;
+			  }
+			}
+		  }
+		}
+		return found;
+	  }
+
+	  /// <summary>
+	  /// Delete a suffix searched in zone "source" if the preceding letter is (or isn't) a vowel
+	  /// </summary>
+	  /// <param name="source"> java.lang.String - the primary source zone for search </param>
+	  /// <param name="search"> java.lang.String[] - the strings to search for suppression </param>
+	  /// <param name="vowel"> boolean - true if we need a vowel before the search string </param>
+	  /// <param name="from"> java.lang.String - the secondary source zone for search (where vowel could be) </param>
+	  /// <returns> boolean - true if modified </returns>
+	  private bool deleteFromIfTestVowelBeforeIn(string source, string[] search, bool vowel, string from)
+	  {
+		bool found = false;
+		if (source != null && from != null)
+		{
+		  for (int i = 0; i < search.Length; i++)
+		  {
+			if (source.EndsWith(search[i], StringComparison.Ordinal))
+			{
+			  if ((search[i].Length + 1) <= from.Length)
+			  {
+				bool test = isVowel(sb[sb.Length - (search[i].Length + 1)]);
+				if (test == vowel)
+				{
+				  sb.Remove(sb.Length - search[i].Length, sb.Length - sb.Length - search[i].Length);
+				  modified = true;
+				  found = true;
+				  setStrings();
+				  break;
+				}
+			  }
+			}
+		  }
+		}
+		return found;
+	  }
+
+	  /// <summary>
+	  /// Delete a suffix searched in zone "source" if preceded by the prefix
+	  /// </summary>
+	  /// <param name="source"> java.lang.String - the primary source zone for search </param>
+	  /// <param name="search"> java.lang.String[] - the strings to search for suppression </param>
+	  /// <param name="prefix"> java.lang.String - the prefix to add to the search string to test </param>
+	  /// <param name="without"> boolean - true if it will be deleted even without prefix found </param>
+	  private void deleteButSuffixFrom(string source, string[] search, string prefix, bool without)
+	  {
+		if (source != null)
+		{
+		  for (int i = 0; i < search.Length; i++)
+		  {
+			if (source.EndsWith(prefix + search[i], StringComparison.Ordinal))
+			{
+			  sb.Remove(sb.Length - (prefix.Length + search[i].Length), sb.Length - sb.Length - (prefix.Length + search[i].Length));
+			  modified = true;
+			  setStrings();
+			  break;
+			}
+			else if (without && source.EndsWith(search[i], StringComparison.Ordinal))
+			{
+			  sb.Remove(sb.Length - search[i].Length, sb.Length - sb.Length - search[i].Length);
+			  modified = true;
+			  setStrings();
+			  break;
+			}
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Delete a suffix searched in zone "source" if preceded by prefix<br>
+	  /// or replace it with the replace string if preceded by the prefix in the zone "from"<br>
+	  /// or delete the suffix if specified
+	  /// </summary>
+	  /// <param name="source"> java.lang.String - the primary source zone for search </param>
+	  /// <param name="search"> java.lang.String[] - the strings to search for suppression </param>
+	  /// <param name="prefix"> java.lang.String - the prefix to add to the search string to test </param>
+	  /// <param name="without"> boolean - true if it will be deleted even without prefix found </param>
+	  private void deleteButSuffixFromElseReplace(string source, string[] search, string prefix, bool without, string from, string replace)
+	  {
+		if (source != null)
+		{
+		  for (int i = 0; i < search.Length; i++)
+		  {
+			if (source.EndsWith(prefix + search[i], StringComparison.Ordinal))
+			{
+			  sb.Remove(sb.Length - (prefix.Length + search[i].Length), sb.Length - sb.Length - (prefix.Length + search[i].Length));
+			  modified = true;
+			  setStrings();
+			  break;
+			}
+			else if (from != null && from.EndsWith(prefix + search[i], StringComparison.Ordinal))
+			{
+			  sb.Remove(sb.Length - (prefix.Length + search[i].Length), sb.Length - sb.Length - (prefix.Length + search[i].Length)).Insert(sb.Length - (prefix.Length + search[i].Length), replace);
+			  modified = true;
+			  setStrings();
+			  break;
+			}
+			else if (without && source.EndsWith(search[i], StringComparison.Ordinal))
+			{
+			  sb.Remove(sb.Length - search[i].Length, sb.Length - sb.Length - search[i].Length);
+			  modified = true;
+			  setStrings();
+			  break;
+			}
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Replace a search string with another within the source zone
+	  /// </summary>
+	  /// <param name="source"> java.lang.String - the source zone for search </param>
+	  /// <param name="search"> java.lang.String[] - the strings to search for replacement </param>
+	  /// <param name="replace"> java.lang.String - the replacement string </param>
+	  private bool replaceFrom(string source, string[] search, string replace)
+	  {
+		bool found = false;
+		if (source != null)
+		{
+		  for (int i = 0; i < search.Length; i++)
+		  {
+			if (source.EndsWith(search[i], StringComparison.Ordinal))
+			{
+			  sb.Remove(sb.Length - search[i].Length, sb.Length - sb.Length - search[i].Length).Insert(sb.Length - search[i].Length, replace);
+			  modified = true;
+			  found = true;
+			  setStrings();
+			  break;
+			}
+		  }
+		}
+		return found;
+	  }
+
+	  /// <summary>
+	  /// Delete a search string within the source zone
+	  /// </summary>
+	  /// <param name="source"> the source zone for search </param>
+	  /// <param name="suffix"> the strings to search for suppression </param>
+	  private void deleteFrom(string source, string[] suffix)
+	  {
+		if (source != null)
+		{
+		  for (int i = 0; i < suffix.Length; i++)
+		  {
+			if (source.EndsWith(suffix[i], StringComparison.Ordinal))
+			{
+			  sb.Remove(sb.Length - suffix[i].Length, sb.Length - sb.Length - suffix[i].Length);
+			  modified = true;
+			  setStrings();
+			  break;
+			}
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Test if a char is a french vowel, including accentuated ones
+	  /// </summary>
+	  /// <param name="ch"> the char to test </param>
+	  /// <returns> boolean - true if the char is a vowel </returns>
+	  private bool isVowel(char ch)
+	  {
+		switch (ch)
+		{
+		  case 'a':
+		  case 'e':
+		  case 'i':
+		  case 'o':
+		  case 'u':
+		  case 'y':
+		  case 'â':
+		  case 'à':
+		  case 'ë':
+		  case 'é':
+		  case 'ê':
+		  case 'è':
+		  case 'ï':
+		  case 'î':
+		  case 'ô':
+		  case 'ü':
+		  case 'ù':
+		  case 'û':
+			return true;
+		  default:
+			return false;
+		}
+	  }
+
+	  /// <summary>
+	  /// Retrieve the "R zone" (1 or 2 depending on the buffer) and return the corresponding string<br>
+	  /// "R is the region after the first non-vowel following a vowel
+	  /// or is the null region at the end of the word if there is no such non-vowel"<br> </summary>
+	  /// <param name="buffer"> java.lang.StringBuilder - the in buffer </param>
+	  /// <returns> java.lang.String - the resulting string </returns>
+	  private string retrieveR(StringBuilder buffer)
+	  {
+		int len = buffer.Length;
+		int pos = -1;
+		for (int c = 0; c < len; c++)
+		{
+		  if (isVowel(buffer[c]))
+		  {
+			pos = c;
+			break;
+		  }
+		}
+		if (pos > -1)
+		{
+		  int consonne = -1;
+		  for (int c = pos; c < len; c++)
+		  {
+			if (!isVowel(buffer[c]))
+			{
+			  consonne = c;
+			  break;
+			}
+		  }
+		  if (consonne > -1 && (consonne+1) < len)
+		  {
+			return StringHelperClass.SubstringSpecial(buffer, consonne+1, len);
+		  }
+		  else
+		  {
+			return null;
+		  }
+		}
+		else
+		{
+		  return null;
+		}
+	  }
+
+	  /// <summary>
+	  /// Retrieve the "RV zone" from a buffer an return the corresponding string<br>
+	  /// "If the word begins with two vowels, RV is the region after the third letter,
+	  /// otherwise the region after the first vowel not at the beginning of the word,
+	  /// or the end of the word if these positions cannot be found."<br> </summary>
+	  /// <param name="buffer"> java.lang.StringBuilder - the in buffer </param>
+	  /// <returns> java.lang.String - the resulting string </returns>
+	  private string retrieveRV(StringBuilder buffer)
+	  {
+		int len = buffer.Length;
+		if (buffer.Length > 3)
+		{
+		  if (isVowel(buffer[0]) && isVowel(buffer[1]))
+		  {
+			return buffer.Substring(3, len - 3);
+		  }
+		  else
+		  {
+			int pos = 0;
+			for (int c = 1; c < len; c++)
+			{
+			  if (isVowel(buffer[c]))
+			  {
+				pos = c;
+				break;
+			  }
+			}
+			if (pos + 1 < len)
+			{
+			  return StringHelperClass.SubstringSpecial(buffer, pos + 1, len);
+			}
+			else
+			{
+			  return null;
+			}
+		  }
+		}
+		else
+		{
+		  return null;
+		}
+	  }
+
+
+
+		/// <summary>
+		/// Turns u and i preceded AND followed by a vowel to UpperCase<br>
+		/// Turns y preceded OR followed by a vowel to UpperCase<br>
+		/// Turns u preceded by q to UpperCase<br>
+		/// </summary>
+		/// <param name="buffer"> java.util.StringBuilder - the buffer to treat </param>
+		/// <returns> java.util.StringBuilder - the treated buffer </returns>
+		private StringBuilder treatVowels(StringBuilder buffer)
+		{
+		for (int c = 0; c < buffer.Length; c++)
+		{
+		  char ch = buffer[c];
+
+		  if (c == 0) // first char
+		  {
+			if (buffer.Length > 1)
+			{
+			  if (ch == 'y' && isVowel(buffer[c + 1]))
+			  {
+				buffer[c] = 'Y';
+			  }
+			}
+		  }
+		  else if (c == buffer.Length - 1) // last char
+		  {
+			if (ch == 'u' && buffer[c - 1] == 'q')
+			{
+			  buffer[c] = 'U';
+			}
+			if (ch == 'y' && isVowel(buffer[c - 1]))
+			{
+			  buffer[c] = 'Y';
+			}
+		  }
+		  else // other cases
+		  {
+			if (ch == 'u')
+			{
+			  if (buffer[c - 1] == 'q')
+			  {
+				buffer[c] = 'U';
+			  }
+			  else if (isVowel(buffer[c - 1]) && isVowel(buffer[c + 1]))
+			  {
+				buffer[c] = 'U';
+			  }
+			}
+			if (ch == 'i')
+			{
+			  if (isVowel(buffer[c - 1]) && isVowel(buffer[c + 1]))
+			  {
+				buffer[c] = 'I';
+			  }
+			}
+			if (ch == 'y')
+			{
+			  if (isVowel(buffer[c - 1]) || isVowel(buffer[c + 1]))
+			  {
+				buffer[c] = 'Y';
+			  }
+			}
+		  }
+		}
+
+		return buffer;
+		}
+
+		/// <summary>
+		/// Checks a term if it can be processed correctly.
+		/// </summary>
+		/// <returns> boolean - true if, and only if, the given term consists in letters. </returns>
+		private bool isStemmable(string term)
+		{
+		bool upper = false;
+		int first = -1;
+		for (int c = 0; c < term.Length; c++)
+		{
+		  // Discard terms that contain non-letter characters.
+		  if (!char.IsLetter(term[c]))
+		  {
+			return false;
+		  }
+		  // Discard terms that contain multiple uppercase letters.
+		  if (char.IsUpper(term[c]))
+		  {
+			if (upper)
+			{
+			  return false;
+			}
+		  // First encountered uppercase letter, set flag and save
+		  // position.
+			else
+			{
+			  first = c;
+			  upper = true;
+			}
+		  }
+		}
+		// Discard the term if it contains a single uppercase letter that
+		// is not starting the term.
+		if (first > 0)
+		{
+		  return false;
+		}
+		return true;
+		}
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ga/IrishAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ga/IrishAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ga/IrishAnalyzer.cs
new file mode 100644
index 0000000..1222998
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ga/IrishAnalyzer.cs
@@ -0,0 +1,153 @@
+using System;
+
+namespace org.apache.lucene.analysis.ga
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using ElisionFilter = org.apache.lucene.analysis.util.ElisionFilter;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using Version = org.apache.lucene.util.Version;
+	using IrishStemmer = org.tartarus.snowball.ext.IrishStemmer;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Irish.
+	/// </summary>
+	public sealed class IrishAnalyzer : StopwordAnalyzerBase
+	{
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// File containing default Irish stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "stopwords.txt";
+
+	  private static readonly CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("d", "m", "b"), true));
+
+	  /// <summary>
+	  /// When StandardTokenizer splits t‑athair into {t, athair}, we don't
+	  /// want to cause a position increment, otherwise there will be problems
+	  /// with phrase queries versus tAthair (which would not have a gap).
+	  /// </summary>
+	  private static readonly CharArraySet HYPHENATIONS = CharArraySet.unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("h", "n", "t"), true));
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop words set. </summary>
+	  /// <returns> default stop words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = loadStopwordSet(false, typeof(IrishAnalyzer), DEFAULT_STOPWORD_FILE, "#");
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public IrishAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  public IrishAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+	  /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+	  /// stemming.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
+	  public IrishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates a
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> A
+	  ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="IrishLowerCaseFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided and <seealso cref="SnowballFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		StopFilter s = new StopFilter(matchVersion, result, HYPHENATIONS);
+		if (!matchVersion.onOrAfter(Version.LUCENE_44))
+		{
+		  s.EnablePositionIncrements = false;
+		}
+		result = s;
+		result = new ElisionFilter(result, DEFAULT_ARTICLES);
+		result = new IrishLowerCaseFilter(result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		result = new SnowballFilter(result, new IrishStemmer());
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ga/IrishLowerCaseFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ga/IrishLowerCaseFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ga/IrishLowerCaseFilter.cs
new file mode 100644
index 0000000..52b342b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ga/IrishLowerCaseFilter.cs
@@ -0,0 +1,95 @@
+namespace org.apache.lucene.analysis.ga
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+	/// <summary>
+	/// Normalises token text to lower case, handling t-prothesis
+	/// and n-eclipsis (i.e., that 'nAthair' should become 'n-athair')
+	/// </summary>
+	public sealed class IrishLowerCaseFilter : TokenFilter
+	{
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+
+	  /// <summary>
+	  /// Create an IrishLowerCaseFilter that normalises Irish token text.
+	  /// </summary>
+	  public IrishLowerCaseFilter(TokenStream @in) : base(@in)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  char[] chArray = termAtt.buffer();
+		  int chLen = termAtt.length();
+		  int idx = 0;
+
+		  if (chLen > 1 && (chArray[0] == 'n' || chArray[0] == 't') && isUpperVowel(chArray[1]))
+		  {
+			chArray = termAtt.resizeBuffer(chLen + 1);
+			for (int i = chLen; i > 1; i--)
+			{
+			  chArray[i] = chArray[i - 1];
+			}
+			chArray[1] = '-';
+			termAtt.Length = chLen + 1;
+			idx = 2;
+			chLen = chLen + 1;
+		  }
+
+		  for (int i = idx; i < chLen;)
+		  {
+			i += char.toChars(char.ToLower(chArray[i]), chArray, i);
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+
+	  private bool isUpperVowel(int v)
+	  {
+		switch (v)
+		{
+		  case 'A':
+		  case 'E':
+		  case 'I':
+		  case 'O':
+		  case 'U':
+		  // vowels with acute accent (fada)
+		  case '\u00c1':
+		  case '\u00c9':
+		  case '\u00cd':
+		  case '\u00d3':
+		  case '\u00da':
+			return true;
+		  default:
+			return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ga/IrishLowerCaseFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ga/IrishLowerCaseFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ga/IrishLowerCaseFilterFactory.cs
new file mode 100644
index 0000000..83183ce
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ga/IrishLowerCaseFilterFactory.cs
@@ -0,0 +1,65 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.ga
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using AbstractAnalysisFactory = org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+	using MultiTermAwareComponent = org.apache.lucene.analysis.util.MultiTermAwareComponent;
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="IrishLowerCaseFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.IrishLowerCaseFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class IrishLowerCaseFilterFactory : TokenFilterFactory, MultiTermAwareComponent
+	{
+
+	  /// <summary>
+	  /// Creates a new IrishLowerCaseFilterFactory </summary>
+	  public IrishLowerCaseFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new IrishLowerCaseFilter(input);
+	  }
+
+	  // this will 'mostly work', except for special cases, just like most other filters
+	  public virtual AbstractAnalysisFactory MultiTermComponent
+	  {
+		  get
+		  {
+			return this;
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Gl/GalicianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Gl/GalicianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Gl/GalicianAnalyzer.cs
new file mode 100644
index 0000000..5b1f540
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Gl/GalicianAnalyzer.cs
@@ -0,0 +1,137 @@
+using System;
+
+namespace org.apache.lucene.analysis.gl
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Galician.
+	/// </summary>
+	public sealed class GalicianAnalyzer : StopwordAnalyzerBase
+	{
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// File containing default Galician stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "stopwords.txt";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop words set. </summary>
+	  /// <returns> default stop words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(typeof(GalicianAnalyzer), DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+		  }
+		  catch (IOException ex)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set", ex);
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public GalicianAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  public GalicianAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+	  /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+	  /// stemming.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
+	  public GalicianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates a
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> A
+	  ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided and <seealso cref="GalicianStemFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		result = new LowerCaseFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		result = new GalicianStemFilter(result);
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Gl/GalicianMinimalStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Gl/GalicianMinimalStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Gl/GalicianMinimalStemFilter.cs
new file mode 100644
index 0000000..77dc426
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Gl/GalicianMinimalStemFilter.cs
@@ -0,0 +1,66 @@
+namespace org.apache.lucene.analysis.gl
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="GalicianMinimalStemmer"/> to stem 
+	/// Galician words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// </summary>
+	public sealed class GalicianMinimalStemFilter : TokenFilter
+	{
+	  private readonly GalicianMinimalStemmer stemmer = new GalicianMinimalStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public GalicianMinimalStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Gl/GalicianMinimalStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Gl/GalicianMinimalStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Gl/GalicianMinimalStemFilterFactory.cs
new file mode 100644
index 0000000..3bf12e7
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Gl/GalicianMinimalStemFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.gl
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="GalicianMinimalStemFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_glplural" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.GalicianMinimalStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class GalicianMinimalStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new GalicianMinimalStemFilterFactory </summary>
+	  public GalicianMinimalStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new GalicianMinimalStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Gl/GalicianMinimalStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Gl/GalicianMinimalStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Gl/GalicianMinimalStemmer.cs
new file mode 100644
index 0000000..dff283b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Gl/GalicianMinimalStemmer.cs
@@ -0,0 +1,43 @@
+namespace org.apache.lucene.analysis.gl
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using RSLPStemmerBase = org.apache.lucene.analysis.pt.RSLPStemmerBase;
+
+	/// <summary>
+	/// Minimal Stemmer for Galician
+	/// <para>
+	/// This follows the "RSLP-S" algorithm, but modified for Galician.
+	/// Hence this stemmer only applies the plural reduction step of:
+	/// "Regras do lematizador para o galego"
+	/// </para>
+	/// </summary>
+	/// <seealso cref= RSLPStemmerBase </seealso>
+	public class GalicianMinimalStemmer : RSLPStemmerBase
+	{
+
+	  private static readonly Step pluralStep = parse(typeof(GalicianMinimalStemmer), "galician.rslp")["Plural"];
+
+	  public virtual int stem(char[] s, int len)
+	  {
+		return pluralStep.apply(s, len);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Gl/GalicianStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Gl/GalicianStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Gl/GalicianStemFilter.cs
new file mode 100644
index 0000000..9cff9c6
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Gl/GalicianStemFilter.cs
@@ -0,0 +1,70 @@
+namespace org.apache.lucene.analysis.gl
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="GalicianStemmer"/> to stem 
+	/// Galician words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// </summary>
+	public sealed class GalicianStemFilter : TokenFilter
+	{
+	  private readonly GalicianStemmer stemmer = new GalicianStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public GalicianStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+			// this stemmer increases word length by 1: worst case '*çom' -> '*ción'
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int len = termAtt.length();
+			int len = termAtt.length();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.resizeBuffer(len+1), len);
+			int newlen = stemmer.stem(termAtt.resizeBuffer(len + 1), len);
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Gl/GalicianStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Gl/GalicianStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Gl/GalicianStemFilterFactory.cs
new file mode 100644
index 0000000..3e3c393
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Gl/GalicianStemFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.gl
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="GalicianStemFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_glstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.GalicianStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class GalicianStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new GalicianStemFilterFactory </summary>
+	  public GalicianStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new GalicianStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Gl/GalicianStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Gl/GalicianStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Gl/GalicianStemmer.cs
new file mode 100644
index 0000000..f435318
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Gl/GalicianStemmer.cs
@@ -0,0 +1,102 @@
+using System.Diagnostics;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.gl
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using RSLPStemmerBase = org.apache.lucene.analysis.pt.RSLPStemmerBase;
+
+	/// <summary>
+	/// Galician stemmer implementing "Regras do lematizador para o galego".
+	/// </summary>
+	/// <seealso cref= RSLPStemmerBase </seealso>
+	/// <seealso cref= <a href="http://bvg.udc.es/recursos_lingua/stemming.jsp">Description of rules</a> </seealso>
+	public class GalicianStemmer : RSLPStemmerBase
+	{
+	  private static readonly Step plural, unification, adverb, augmentative, noun, verb, vowel;
+
+	  static GalicianStemmer()
+	  {
+		IDictionary<string, Step> steps = parse(typeof(GalicianStemmer), "galician.rslp");
+		plural = steps["Plural"];
+		unification = steps["Unification"];
+		adverb = steps["Adverb"];
+		augmentative = steps["Augmentative"];
+		noun = steps["Noun"];
+		verb = steps["Verb"];
+		vowel = steps["Vowel"];
+	  }
+
+	  /// <param name="s"> buffer, oversized to at least <code>len+1</code> </param>
+	  /// <param name="len"> initial valid length of buffer </param>
+	  /// <returns> new valid length, stemmed </returns>
+	  public virtual int stem(char[] s, int len)
+	  {
+		Debug.Assert(s.Length >= len + 1, "this stemmer requires an oversized array of at least 1");
+
+		len = plural.apply(s, len);
+		len = unification.apply(s, len);
+		len = adverb.apply(s, len);
+
+		int oldlen;
+		do
+		{
+		  oldlen = len;
+		  len = augmentative.apply(s, len);
+		} while (len != oldlen);
+
+		oldlen = len;
+		len = noun.apply(s, len);
+		if (len == oldlen) // suffix not removed
+		{
+		  len = verb.apply(s, len);
+		}
+
+		len = vowel.apply(s, len);
+
+		// RSLG accent removal
+		for (int i = 0; i < len; i++)
+		{
+		  switch (s[i])
+		  {
+			case 'á':
+				s[i] = 'a';
+				break;
+			case 'é':
+			case 'ê':
+				s[i] = 'e';
+				break;
+			case 'í':
+				s[i] = 'i';
+				break;
+			case 'ó':
+				s[i] = 'o';
+				break;
+			case 'ú':
+				s[i] = 'u';
+				break;
+		  }
+		}
+
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiAnalyzer.cs
new file mode 100644
index 0000000..c5f2b97
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiAnalyzer.cs
@@ -0,0 +1,158 @@
+using System;
+
+namespace org.apache.lucene.analysis.hi
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using IndicNormalizationFilter = org.apache.lucene.analysis.@in.IndicNormalizationFilter;
+	using IndicTokenizer = org.apache.lucene.analysis.@in.IndicTokenizer;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Analyzer for Hindi.
+	/// <para>
+	/// <a name="version"/>
+	/// </para>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating HindiAnalyzer:
+	/// <ul>
+	///   <li> As of 3.6, StandardTokenizer is used for tokenization
+	/// </ul>
+	/// </para>
+	/// </summary>
+	public sealed class HindiAnalyzer : StopwordAnalyzerBase
+	{
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// File containing default Hindi stopwords.
+	  /// 
+	  /// Default stopword list is from http://members.unine.ch/jacques.savoy/clef/index.html
+	  /// The stopword list is BSD-Licensed.
+	  /// </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "stopwords.txt";
+	  private const string STOPWORDS_COMMENT = "#";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop-words set. </summary>
+	  /// <returns> an unmodifiable instance of the default stop-words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = loadStopwordSet(false, typeof(HindiAnalyzer), DEFAULT_STOPWORD_FILE, STOPWORDS_COMMENT);
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words
+	  /// </summary>
+	  /// <param name="version"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  /// <param name="stemExclusionSet"> a stemming exclusion set </param>
+	  public HindiAnalyzer(Version version, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(version, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words 
+	  /// </summary>
+	  /// <param name="version"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  public HindiAnalyzer(Version version, CharArraySet stopwords) : this(version, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words:
+	  /// <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public HindiAnalyzer(Version version) : this(version, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from a <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="LowerCaseFilter"/>, <seealso cref="IndicNormalizationFilter"/>,
+	  ///         <seealso cref="HindiNormalizationFilter"/>, <seealso cref="SetKeywordMarkerFilter"/>
+	  ///         if a stem exclusion set is provided, <seealso cref="HindiStemFilter"/>, and
+	  ///         Hindi Stop words </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source;
+		Tokenizer source;
+		if (matchVersion.onOrAfter(Version.LUCENE_36))
+		{
+		  source = new StandardTokenizer(matchVersion, reader);
+		}
+		else
+		{
+		  source = new IndicTokenizer(matchVersion, reader);
+		}
+		TokenStream result = new LowerCaseFilter(matchVersion, source);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		result = new IndicNormalizationFilter(result);
+		result = new HindiNormalizationFilter(result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		result = new HindiStemFilter(result);
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiNormalizationFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiNormalizationFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiNormalizationFilter.cs
new file mode 100644
index 0000000..2d31f1d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiNormalizationFilter.cs
@@ -0,0 +1,62 @@
+namespace org.apache.lucene.analysis.hi
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; // javadoc @link
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="HindiNormalizer"/> to normalize the
+	/// orthography.
+	/// <para>
+	/// In some cases the normalization may cause unrelated terms to conflate, so
+	/// to prevent terms from being normalized use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para> </summary>
+	/// <seealso cref= HindiNormalizer </seealso>
+	public sealed class HindiNormalizationFilter : TokenFilter
+	{
+
+	  private readonly HindiNormalizer normalizer = new HindiNormalizer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAtt = addAttribute(typeof(KeywordAttribute));
+
+	  public HindiNormalizationFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAtt.Keyword)
+		  {
+			termAtt.Length = normalizer.normalize(termAtt.buffer(), termAtt.length());
+		  }
+		  return true;
+		}
+		return false;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiNormalizationFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiNormalizationFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiNormalizationFilterFactory.cs
new file mode 100644
index 0000000..f18c199
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiNormalizationFilterFactory.cs
@@ -0,0 +1,64 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.hi
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using AbstractAnalysisFactory = org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+	using MultiTermAwareComponent = org.apache.lucene.analysis.util.MultiTermAwareComponent;
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="HindiNormalizationFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_hinormal" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.HindiNormalizationFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class HindiNormalizationFilterFactory : TokenFilterFactory, MultiTermAwareComponent
+	{
+
+	  /// <summary>
+	  /// Creates a new HindiNormalizationFilterFactory </summary>
+	  public HindiNormalizationFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new HindiNormalizationFilter(input);
+	  }
+
+	  public virtual AbstractAnalysisFactory MultiTermComponent
+	  {
+		  get
+		  {
+			return this;
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiNormalizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiNormalizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiNormalizer.cs
new file mode 100644
index 0000000..ebd674b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiNormalizer.cs
@@ -0,0 +1,193 @@
+namespace org.apache.lucene.analysis.hi
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+	/// <summary>
+	/// Normalizer for Hindi.
+	/// <para>
+	/// Normalizes text to remove some differences in spelling variations.
+	/// </para>
+	/// <para>
+	/// Implements the Hindi-language specific algorithm specified in:
+	/// <i>Word normalization in Indian languages</i>
+	/// Prasad Pingali and Vasudeva Varma.
+	/// http://web2py.iiit.ac.in/publications/default/download/inproceedings.pdf.3fe5b38c-02ee-41ce-9a8f-3e745670be32.pdf
+	/// </para>
+	/// <para>
+	/// with the following additions from <i>Hindi CLIR in Thirty Days</i>
+	/// Leah S. Larkey, Margaret E. Connell, and Nasreen AbdulJaleel.
+	/// http://maroo.cs.umass.edu/pub/web/getpdf.php?id=454:
+	/// <ul>
+	///  <li>Internal Zero-width joiner and Zero-width non-joiners are removed
+	///  <li>In addition to chandrabindu, NA+halant is normalized to anusvara
+	/// </ul>
+	/// 
+	/// </para>
+	/// </summary>
+	public class HindiNormalizer
+	{
+	  /// <summary>
+	  /// Normalize an input buffer of Hindi text
+	  /// </summary>
+	  /// <param name="s"> input buffer </param>
+	  /// <param name="len"> length of input buffer </param>
+	  /// <returns> length of input buffer after normalization </returns>
+	  public virtual int normalize(char[] s, int len)
+	  {
+
+		for (int i = 0; i < len; i++)
+		{
+		  switch (s[i])
+		  {
+			// dead n -> bindu
+		  case '\u0928':
+			if (i + 1 < len && s[i + 1] == '\u094D')
+			{
+			  s[i] = '\u0902';
+			  len = StemmerUtil.delete(s, i + 1, len);
+			}
+			break;
+		  // candrabindu -> bindu
+		  case '\u0901':
+			s[i] = '\u0902';
+			break;
+		  // nukta deletions
+		  case '\u093C':
+			len = StemmerUtil.delete(s, i, len);
+			i--;
+			break;
+		  case '\u0929':
+			s[i] = '\u0928';
+			break;
+		  case '\u0931':
+			s[i] = '\u0930';
+			break;
+		  case '\u0934':
+			s[i] = '\u0933';
+			break;
+		  case '\u0958':
+			s[i] = '\u0915';
+			break;
+		  case '\u0959':
+			s[i] = '\u0916';
+			break;
+		  case '\u095A':
+			s[i] = '\u0917';
+			break;
+		  case '\u095B':
+			s[i] = '\u091C';
+			break;
+		  case '\u095C':
+			s[i] = '\u0921';
+			break;
+		  case '\u095D':
+			s[i] = '\u0922';
+			break;
+		  case '\u095E':
+			s[i] = '\u092B';
+			break;
+		  case '\u095F':
+			s[i] = '\u092F';
+			break;
+			// zwj/zwnj -> delete
+		  case '\u200D':
+		  case '\u200C':
+			len = StemmerUtil.delete(s, i, len);
+			i--;
+			break;
+			// virama -> delete
+		  case '\u094D':
+			len = StemmerUtil.delete(s, i, len);
+			i--;
+			break;
+			// chandra/short -> replace
+		  case '\u0945':
+		  case '\u0946':
+			s[i] = '\u0947';
+			break;
+		  case '\u0949':
+		  case '\u094A':
+			s[i] = '\u094B';
+			break;
+		  case '\u090D':
+		  case '\u090E':
+			s[i] = '\u090F';
+			break;
+		  case '\u0911':
+		  case '\u0912':
+			s[i] = '\u0913';
+			break;
+		  case '\u0972':
+			s[i] = '\u0905';
+			break;
+			// long -> short ind. vowels
+		  case '\u0906':
+			s[i] = '\u0905';
+			break;
+		  case '\u0908':
+			s[i] = '\u0907';
+			break;
+		  case '\u090A':
+			s[i] = '\u0909';
+			break;
+		  case '\u0960':
+			s[i] = '\u090B';
+			break;
+		  case '\u0961':
+			s[i] = '\u090C';
+			break;
+		  case '\u0910':
+			s[i] = '\u090F';
+			break;
+		  case '\u0914':
+			s[i] = '\u0913';
+			break;
+			// long -> short dep. vowels
+		  case '\u0940':
+			s[i] = '\u093F';
+			break;
+		  case '\u0942':
+			s[i] = '\u0941';
+			break;
+		  case '\u0944':
+			s[i] = '\u0943';
+			break;
+		  case '\u0963':
+			s[i] = '\u0962';
+			break;
+		  case '\u0948':
+			s[i] = '\u0947';
+			break;
+		  case '\u094C':
+			s[i] = '\u094B';
+			break;
+		  default:
+			break;
+		  }
+		}
+
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiStemFilter.cs
new file mode 100644
index 0000000..9098b66
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiStemFilter.cs
@@ -0,0 +1,56 @@
+namespace org.apache.lucene.analysis.hi
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="HindiStemmer"/> to stem Hindi words.
+	/// </summary>
+	public sealed class HindiStemFilter : TokenFilter
+	{
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAtt = addAttribute(typeof(KeywordAttribute));
+	  private readonly HindiStemmer stemmer = new HindiStemmer();
+
+	  public HindiStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAtt.Keyword)
+		  {
+			termAtt.Length = stemmer.stem(termAtt.buffer(), termAtt.length());
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file


[20/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiStemFilterFactory.cs
new file mode 100644
index 0000000..4f068b3
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiStemFilterFactory.cs
@@ -0,0 +1,54 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.hi
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="HindiStemFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_histem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.HindiStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class HindiStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new HindiStemFilterFactory </summary>
+	  public HindiStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new HindiStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiStemmer.cs
new file mode 100644
index 0000000..b177a31
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiStemmer.cs
@@ -0,0 +1,71 @@
+namespace org.apache.lucene.analysis.hi
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+	/// <summary>
+	/// Light Stemmer for Hindi.
+	/// <para>
+	/// Implements the algorithm specified in:
+	/// <i>A Lightweight Stemmer for Hindi</i>
+	/// Ananthakrishnan Ramanathan and Durgesh D Rao.
+	/// http://computing.open.ac.uk/Sites/EACLSouthAsia/Papers/p6-Ramanathan.pdf
+	/// </para>
+	/// </summary>
+	public class HindiStemmer
+	{
+	  public virtual int stem(char[] buffer, int len)
+	  {
+		// 5
+		if ((len > 6) && (StemmerUtil.EndsWith(buffer, len, "ाएंगी") || StemmerUtil.EndsWith(buffer, len, "ाएंगे") || StemmerUtil.EndsWith(buffer, len, "ाऊंगी") || StemmerUtil.EndsWith(buffer, len, "ाऊंगा") || StemmerUtil.EndsWith(buffer, len, "ाइयाँ") || StemmerUtil.EndsWith(buffer, len, "ाइयों") || StemmerUtil.EndsWith(buffer, len, "ाइयां")))
+		{
+		  return len - 5;
+		}
+
+		// 4
+		if ((len > 5) && (StemmerUtil.EndsWith(buffer, len, "ाएगी") || StemmerUtil.EndsWith(buffer, len, "ाएगा") || StemmerUtil.EndsWith(buffer, len, "ाओगी") || StemmerUtil.EndsWith(buffer, len, "ाओगे") || StemmerUtil.EndsWith(buffer, len, "एंगी") || StemmerUtil.EndsWith(buffer, len, "ेंगी") || StemmerUtil.EndsWith(buffer, len, "एंगे") || StemmerUtil.EndsWith(buffer, len, "ेंगे") || StemmerUtil.EndsWith(buffer, len, "ूंगी") || StemmerUtil.EndsWith(buffer, len, "ूंगा") || StemmerUtil.EndsWith(buffer, len, "ातीं") || StemmerUtil.EndsWith(buffer, len, "नाओं") || StemmerUtil.EndsWith(buffer, len, "नाएं") || StemmerUtil.EndsWith(buffer, len, "ताओं") || StemmerUtil.EndsWith(buffer, len, "ताएं") || StemmerUtil.EndsWith(buffer, len, "ियाँ") || StemmerUtil.EndsWith(buffer, len, "ियों") || StemmerUtil.EndsWith(buffer, len, "ियां")))
+		{
+		  return len - 4;
+		}
+
+		// 3
+		if ((len > 4) && (StemmerUtil.EndsWith(buffer, len, "ाकर") || StemmerUtil.EndsWith(buffer, len, "ाइए") || StemmerUtil.EndsWith(buffer, len, "ाईं") || StemmerUtil.EndsWith(buffer, len, "ाया") || StemmerUtil.EndsWith(buffer, len, "ेगी") || StemmerUtil.EndsWith(buffer, len, "ेगा") || StemmerUtil.EndsWith(buffer, len, "ोगी") || StemmerUtil.EndsWith(buffer, len, "ोगे") || StemmerUtil.EndsWith(buffer, len, "ाने") || StemmerUtil.EndsWith(buffer, len, "ाना") || StemmerUtil.EndsWith(buffer, len, "ाते") || StemmerUtil.EndsWith(buffer, len, "ाती") || StemmerUtil.EndsWith(buffer, len, "ाता") || StemmerUtil.EndsWith(buffer, len, "तीं") || StemmerUtil.EndsWith(buffer, len, "ाओं") || StemmerUtil.EndsWith(buffer, len, "ाएं") || StemmerUtil.EndsWith(buffer, len, "ुओं") || StemmerUtil.EndsWith(buffer, len, "ुएं") || StemmerUtil.EndsWith(buffer, len, "ुआं")))
+		{
+		  return len - 3;
+		}
+
+		// 2
+		if ((len > 3) && (StemmerUtil.EndsWith(buffer, len, "कर") || StemmerUtil.EndsWith(buffer, len, "ाओ") || StemmerUtil.EndsWith(buffer, len, "िए") || StemmerUtil.EndsWith(buffer, len, "ाई") || StemmerUtil.EndsWith(buffer, len, "ाए") || StemmerUtil.EndsWith(buffer, len, "ने") || StemmerUtil.EndsWith(buffer, len, "नी") || StemmerUtil.EndsWith(buffer, len, "ना") || StemmerUtil.EndsWith(buffer, len, "ते") || StemmerUtil.EndsWith(buffer, len, "ीं") || StemmerUtil.EndsWith(buffer, len, "ती") || StemmerUtil.EndsWith(buffer, len, "ता") || StemmerUtil.EndsWith(buffer, len, "ाँ") || StemmerUtil.EndsWith(buffer, len, "ां") || StemmerUtil.EndsWith(buffer, len, "ों") || StemmerUtil.EndsWith(buffer, len, "ें")))
+		{
+		  return len - 2;
+		}
+
+		// 1
+		if ((len > 2) && (StemmerUtil.EndsWith(buffer, len, "ो") || StemmerUtil.EndsWith(buffer, len, "े") || StemmerUtil.EndsWith(buffer, len, "ू") || StemmerUtil.EndsWith(buffer, len, "ु") || StemmerUtil.EndsWith(buffer, len, "ी") || StemmerUtil.EndsWith(buffer, len, "ि") || StemmerUtil.EndsWith(buffer, len, "ा")))
+		{
+		  return len - 1;
+		}
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianAnalyzer.cs
new file mode 100644
index 0000000..a47d86a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianAnalyzer.cs
@@ -0,0 +1,139 @@
+using System;
+
+namespace org.apache.lucene.analysis.hu
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using Version = org.apache.lucene.util.Version;
+	using HungarianStemmer = org.tartarus.snowball.ext.HungarianStemmer;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Hungarian.
+	/// </summary>
+	public sealed class HungarianAnalyzer : StopwordAnalyzerBase
+	{
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// File containing default Hungarian stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "hungarian_stop.txt";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop words set. </summary>
+	  /// <returns> default stop words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(typeof(SnowballFilter), DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public HungarianAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  public HungarianAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+	  /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+	  /// stemming.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
+	  public HungarianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates a
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> A
+	  ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided and <seealso cref="SnowballFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		result = new LowerCaseFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		result = new SnowballFilter(result, new HungarianStemmer());
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianLightStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianLightStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianLightStemFilter.cs
new file mode 100644
index 0000000..c51040b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianLightStemFilter.cs
@@ -0,0 +1,66 @@
+namespace org.apache.lucene.analysis.hu
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="HungarianLightStemmer"/> to stem
+	/// Hungarian words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// </summary>
+	public sealed class HungarianLightStemFilter : TokenFilter
+	{
+	  private readonly HungarianLightStemmer stemmer = new HungarianLightStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public HungarianLightStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianLightStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianLightStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianLightStemFilterFactory.cs
new file mode 100644
index 0000000..090c64b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianLightStemFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.hu
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="HungarianLightStemFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_hulgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.HungarianLightStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class HungarianLightStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new HungarianLightStemFilterFactory </summary>
+	  public HungarianLightStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new HungarianLightStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianLightStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianLightStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianLightStemmer.cs
new file mode 100644
index 0000000..eb29272
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianLightStemmer.cs
@@ -0,0 +1,292 @@
+namespace org.apache.lucene.analysis.hu
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/* 
+	 * This algorithm is updated based on code located at:
+	 * http://members.unine.ch/jacques.savoy/clef/
+	 * 
+	 * Full copyright for that code follows:
+	 */
+
+	/*
+	 * Copyright (c) 2005, Jacques Savoy
+	 * All rights reserved.
+	 *
+	 * Redistribution and use in source and binary forms, with or without 
+	 * modification, are permitted provided that the following conditions are met:
+	 *
+	 * Redistributions of source code must retain the above copyright notice, this 
+	 * list of conditions and the following disclaimer. Redistributions in binary 
+	 * form must reproduce the above copyright notice, this list of conditions and
+	 * the following disclaimer in the documentation and/or other materials 
+	 * provided with the distribution. Neither the name of the author nor the names 
+	 * of its contributors may be used to endorse or promote products derived from 
+	 * this software without specific prior written permission.
+	 * 
+	 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+	 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+	 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+	 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+	 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+	 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+	 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+	 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+	 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+	 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+	 * POSSIBILITY OF SUCH DAMAGE.
+	 */
+
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+	/// <summary>
+	/// Light Stemmer for Hungarian.
+	/// <para>
+	/// This stemmer implements the "UniNE" algorithm in:
+	/// <i>Light Stemming Approaches for the French, Portuguese, German and Hungarian Languages</i>
+	/// Jacques Savoy
+	/// </para>
+	/// </summary>
+	public class HungarianLightStemmer
+	{
+	  public virtual int stem(char[] s, int len)
+	  {
+		for (int i = 0; i < len; i++)
+		{
+		  switch (s[i])
+		  {
+			case 'á':
+				s[i] = 'a';
+				break;
+			case 'ë':
+			case 'é':
+				s[i] = 'e';
+				break;
+			case 'í':
+				s[i] = 'i';
+				break;
+			case 'ó':
+			case 'ő':
+			case 'õ':
+			case 'ö':
+				s[i] = 'o';
+				break;
+			case 'ú':
+			case 'ű':
+			case 'ũ':
+			case 'û':
+			case 'ü':
+				s[i] = 'u';
+				break;
+		  }
+		}
+
+		len = removeCase(s, len);
+		len = removePossessive(s, len);
+		len = removePlural(s, len);
+		return normalize(s, len);
+	  }
+
+	  private int removeCase(char[] s, int len)
+	  {
+		if (len > 6 && StemmerUtil.EndsWith(s, len, "kent"))
+		{
+		  return len - 4;
+		}
+
+		if (len > 5)
+		{
+		  if (StemmerUtil.EndsWith(s, len, "nak") || StemmerUtil.EndsWith(s, len, "nek") || StemmerUtil.EndsWith(s, len, "val") || StemmerUtil.EndsWith(s, len, "vel") || StemmerUtil.EndsWith(s, len, "ert") || StemmerUtil.EndsWith(s, len, "rol") || StemmerUtil.EndsWith(s, len, "ban") || StemmerUtil.EndsWith(s, len, "ben") || StemmerUtil.EndsWith(s, len, "bol") || StemmerUtil.EndsWith(s, len, "nal") || StemmerUtil.EndsWith(s, len, "nel") || StemmerUtil.EndsWith(s, len, "hoz") || StemmerUtil.EndsWith(s, len, "hez") || StemmerUtil.EndsWith(s, len, "tol"))
+		  {
+			return len - 3;
+		  }
+
+		  if (StemmerUtil.EndsWith(s, len, "al") || StemmerUtil.EndsWith(s, len, "el"))
+		  {
+			if (!isVowel(s[len - 3]) && s[len - 3] == s[len - 4])
+			{
+			  return len - 3;
+			}
+		  }
+		}
+
+		if (len > 4)
+		{
+		  if (StemmerUtil.EndsWith(s, len, "at") || StemmerUtil.EndsWith(s, len, "et") || StemmerUtil.EndsWith(s, len, "ot") || StemmerUtil.EndsWith(s, len, "va") || StemmerUtil.EndsWith(s, len, "ve") || StemmerUtil.EndsWith(s, len, "ra") || StemmerUtil.EndsWith(s, len, "re") || StemmerUtil.EndsWith(s, len, "ba") || StemmerUtil.EndsWith(s, len, "be") || StemmerUtil.EndsWith(s, len, "ul") || StemmerUtil.EndsWith(s, len, "ig"))
+		  {
+			return len - 2;
+		  }
+
+		  if ((StemmerUtil.EndsWith(s, len, "on") || StemmerUtil.EndsWith(s, len, "en")) && !isVowel(s[len - 3]))
+		  {
+			  return len - 2;
+		  }
+
+		  switch (s[len - 1])
+		  {
+			case 't':
+			case 'n':
+				return len - 1;
+			case 'a':
+			case 'e':
+				if (s[len - 2] == s[len - 3] && !isVowel(s[len - 2]))
+				{
+					return len - 2;
+				}
+		  }
+		}
+
+		return len;
+	  }
+
+	  private int removePossessive(char[] s, int len)
+	  {
+		if (len > 6)
+		{
+		  if (!isVowel(s[len - 5]) && (StemmerUtil.EndsWith(s, len, "atok") || StemmerUtil.EndsWith(s, len, "otok") || StemmerUtil.EndsWith(s, len, "etek")))
+		  {
+			return len - 4;
+		  }
+
+		  if (StemmerUtil.EndsWith(s, len, "itek") || StemmerUtil.EndsWith(s, len, "itok"))
+		  {
+			return len - 4;
+		  }
+		}
+
+		if (len > 5)
+		{
+		  if (!isVowel(s[len - 4]) && (StemmerUtil.EndsWith(s, len, "unk") || StemmerUtil.EndsWith(s, len, "tok") || StemmerUtil.EndsWith(s, len, "tek")))
+		  {
+			return len - 3;
+		  }
+
+		  if (isVowel(s[len - 4]) && StemmerUtil.EndsWith(s, len, "juk"))
+		  {
+			return len - 3;
+		  }
+
+		  if (StemmerUtil.EndsWith(s, len, "ink"))
+		  {
+			return len - 3;
+		  }
+		}
+
+		if (len > 4)
+		{
+		  if (!isVowel(s[len - 3]) && (StemmerUtil.EndsWith(s, len, "am") || StemmerUtil.EndsWith(s, len, "em") || StemmerUtil.EndsWith(s, len, "om") || StemmerUtil.EndsWith(s, len, "ad") || StemmerUtil.EndsWith(s, len, "ed") || StemmerUtil.EndsWith(s, len, "od") || StemmerUtil.EndsWith(s, len, "uk")))
+		  {
+			return len - 2;
+		  }
+
+		  if (isVowel(s[len - 3]) && (StemmerUtil.EndsWith(s, len, "nk") || StemmerUtil.EndsWith(s, len, "ja") || StemmerUtil.EndsWith(s, len, "je")))
+		  {
+			return len - 2;
+		  }
+
+		  if (StemmerUtil.EndsWith(s, len, "im") || StemmerUtil.EndsWith(s, len, "id") || StemmerUtil.EndsWith(s, len, "ik"))
+		  {
+			return len - 2;
+		  }
+		}
+
+		if (len > 3)
+		{
+		  switch (s[len - 1])
+		  {
+			case 'a':
+			case 'e':
+				if (!isVowel(s[len - 2]))
+				{
+					return len - 1;
+				}
+				break;
+			case 'm':
+			case 'd':
+				if (isVowel(s[len - 2]))
+				{
+					return len - 1;
+				}
+				break;
+			case 'i':
+				return len - 1;
+		  }
+		}
+
+		return len;
+	  }
+
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @SuppressWarnings("fallthrough") private int removePlural(char s[] , int len)
+	  private int removePlural(char[] s, int len)
+	  {
+		if (len > 3 && s[len - 1] == 'k')
+		{
+		  switch (s[len - 2])
+		  {
+			case 'a':
+			case 'o':
+			case 'e': // intentional fallthru
+				if (len > 4)
+				{
+					return len - 2;
+				}
+			default:
+				return len - 1;
+		  }
+		}
+		return len;
+	  }
+
+	  private int normalize(char[] s, int len)
+	  {
+		if (len > 3)
+		{
+		  switch (s[len - 1])
+		  {
+			case 'a':
+			case 'e':
+			case 'i':
+			case 'o':
+				return len - 1;
+		  }
+		}
+		return len;
+	  }
+
+	  private bool isVowel(char ch)
+	  {
+		switch (ch)
+		{
+		  case 'a':
+		  case 'e':
+		  case 'i':
+		  case 'o':
+		  case 'u':
+		  case 'y':
+			  return true;
+		  default:
+			  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
new file mode 100644
index 0000000..ff6f4e2
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
@@ -0,0 +1,1235 @@
+using System;
+using System.Diagnostics;
+using System.Collections.Generic;
+using System.Text;
+
+namespace org.apache.lucene.analysis.hunspell
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using ByteArrayDataOutput = org.apache.lucene.store.ByteArrayDataOutput;
+	using ArrayUtil = org.apache.lucene.util.ArrayUtil;
+	using BytesRef = org.apache.lucene.util.BytesRef;
+	using BytesRefHash = org.apache.lucene.util.BytesRefHash;
+	using CharsRef = org.apache.lucene.util.CharsRef;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using IntsRef = org.apache.lucene.util.IntsRef;
+	using OfflineSorter = org.apache.lucene.util.OfflineSorter;
+	using ByteSequencesReader = org.apache.lucene.util.OfflineSorter.ByteSequencesReader;
+	using ByteSequencesWriter = org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
+	using CharacterRunAutomaton = org.apache.lucene.util.automaton.CharacterRunAutomaton;
+	using RegExp = org.apache.lucene.util.automaton.RegExp;
+	using Builder = org.apache.lucene.util.fst.Builder;
+	using CharSequenceOutputs = org.apache.lucene.util.fst.CharSequenceOutputs;
+	using FST = org.apache.lucene.util.fst.FST;
+	using IntSequenceOutputs = org.apache.lucene.util.fst.IntSequenceOutputs;
+	using Outputs = org.apache.lucene.util.fst.Outputs;
+	using Util = org.apache.lucene.util.fst.Util;
+
+
+	/// <summary>
+	/// In-memory structure for the dictionary (.dic) and affix (.aff)
+	/// data of a hunspell dictionary.
+	/// </summary>
+	public class Dictionary
+	{
+
+	  internal static readonly char[] NOFLAGS = new char[0];
+
+	  private const string ALIAS_KEY = "AF";
+	  private const string PREFIX_KEY = "PFX";
+	  private const string SUFFIX_KEY = "SFX";
+	  private const string FLAG_KEY = "FLAG";
+	  private const string COMPLEXPREFIXES_KEY = "COMPLEXPREFIXES";
+	  private const string CIRCUMFIX_KEY = "CIRCUMFIX";
+	  private const string IGNORE_KEY = "IGNORE";
+	  private const string ICONV_KEY = "ICONV";
+	  private const string OCONV_KEY = "OCONV";
+
+	  private const string NUM_FLAG_TYPE = "num";
+	  private const string UTF8_FLAG_TYPE = "UTF-8";
+	  private const string LONG_FLAG_TYPE = "long";
+
+	  // TODO: really for suffixes we should reverse the automaton and run them backwards
+	  private const string PREFIX_CONDITION_REGEX_PATTERN = "%s.*";
+	  private const string SUFFIX_CONDITION_REGEX_PATTERN = ".*%s";
+
+	  internal FST<IntsRef> prefixes;
+	  internal FST<IntsRef> suffixes;
+
+	  // all condition checks used by prefixes and suffixes. these are typically re-used across
+	  // many affix stripping rules. so these are deduplicated, to save RAM.
+	  internal List<CharacterRunAutomaton> patterns = new List<CharacterRunAutomaton>();
+
+	  // the entries in the .dic file, mapping to their set of flags.
+	  // the fst output is the ordinal list for flagLookup
+	  internal FST<IntsRef> words;
+	  // the list of unique flagsets (wordforms). theoretically huge, but practically
+	  // small (e.g. for polish this is 756), otherwise humans wouldn't be able to deal with it either.
+	  internal BytesRefHash flagLookup = new BytesRefHash();
+
+	  // the list of unique strip affixes.
+	  internal char[] stripData;
+	  internal int[] stripOffsets;
+
+	  // 8 bytes per affix
+	  internal sbyte[] affixData = new sbyte[64];
+	  private int currentAffix = 0;
+
+	  private FlagParsingStrategy flagParsingStrategy = new SimpleFlagParsingStrategy(); // Default flag parsing strategy
+
+	  private string[] aliases;
+	  private int aliasCount = 0;
+
+	  private readonly File tempDir = OfflineSorter.defaultTempDir(); // TODO: make this configurable?
+
+	  internal bool ignoreCase;
+	  internal bool complexPrefixes;
+	  internal bool twoStageAffix; // if no affixes have continuation classes, no need to do 2-level affix stripping
+
+	  internal int circumfix = -1; // circumfix flag, or -1 if one is not defined
+
+	  // ignored characters (dictionary, affix, inputs)
+	  private char[] ignore;
+
+	  // FSTs used for ICONV/OCONV, output ord pointing to replacement text
+	  internal FST<CharsRef> iconv;
+	  internal FST<CharsRef> oconv;
+
+	  internal bool needsInputCleaning;
+	  internal bool needsOutputCleaning;
+
+	  /// <summary>
+	  /// Creates a new Dictionary containing the information read from the provided InputStreams to hunspell affix
+	  /// and dictionary files.
+	  /// You have to close the provided InputStreams yourself.
+	  /// </summary>
+	  /// <param name="affix"> InputStream for reading the hunspell affix file (won't be closed). </param>
+	  /// <param name="dictionary"> InputStream for reading the hunspell dictionary file (won't be closed). </param>
+	  /// <exception cref="IOException"> Can be thrown while reading from the InputStreams </exception>
+	  /// <exception cref="ParseException"> Can be thrown if the content of the files does not meet expected formats </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public Dictionary(java.io.InputStream affix, java.io.InputStream dictionary) throws java.io.IOException, java.text.ParseException
+	  public Dictionary(InputStream affix, InputStream dictionary) : this(affix, Collections.singletonList(dictionary), false)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates a new Dictionary containing the information read from the provided InputStreams to hunspell affix
+	  /// and dictionary files.
+	  /// You have to close the provided InputStreams yourself.
+	  /// </summary>
+	  /// <param name="affix"> InputStream for reading the hunspell affix file (won't be closed). </param>
+	  /// <param name="dictionaries"> InputStream for reading the hunspell dictionary files (won't be closed). </param>
+	  /// <exception cref="IOException"> Can be thrown while reading from the InputStreams </exception>
+	  /// <exception cref="ParseException"> Can be thrown if the content of the files does not meet expected formats </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public Dictionary(java.io.InputStream affix, java.util.List<java.io.InputStream> dictionaries, boolean ignoreCase) throws java.io.IOException, java.text.ParseException
+	  public Dictionary(InputStream affix, IList<InputStream> dictionaries, bool ignoreCase)
+	  {
+		this.ignoreCase = ignoreCase;
+		this.needsInputCleaning = ignoreCase;
+		this.needsOutputCleaning = false; // set if we have an OCONV
+		flagLookup.add(new BytesRef()); // no flags -> ord 0
+
+		File aff = File.createTempFile("affix", "aff", tempDir);
+		OutputStream @out = new BufferedOutputStream(new FileOutputStream(aff));
+		InputStream aff1 = null;
+		InputStream aff2 = null;
+		try
+		{
+		  // copy contents of affix stream to temp file
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final byte [] buffer = new byte [1024 * 8];
+		  sbyte[] buffer = new sbyte [1024 * 8];
+		  int len;
+		  while ((len = affix.read(buffer)) > 0)
+		  {
+			@out.write(buffer, 0, len);
+		  }
+		  @out.close();
+
+		  // pass 1: get encoding
+		  aff1 = new BufferedInputStream(new FileInputStream(aff));
+		  string encoding = getDictionaryEncoding(aff1);
+
+		  // pass 2: parse affixes
+		  CharsetDecoder decoder = getJavaEncoding(encoding);
+		  aff2 = new BufferedInputStream(new FileInputStream(aff));
+		  readAffixFile(aff2, decoder);
+
+		  // read dictionary entries
+		  IntSequenceOutputs o = IntSequenceOutputs.Singleton;
+		  Builder<IntsRef> b = new Builder<IntsRef>(FST.INPUT_TYPE.BYTE4, o);
+		  readDictionaryFiles(dictionaries, decoder, b);
+		  words = b.finish();
+		  aliases = null; // no longer needed
+		}
+		finally
+		{
+		  IOUtils.closeWhileHandlingException(@out, aff1, aff2);
+		  aff.delete();
+		}
+	  }
+
+	  /// <summary>
+	  /// Looks up Hunspell word forms from the dictionary
+	  /// </summary>
+	  internal virtual IntsRef lookupWord(char[] word, int offset, int length)
+	  {
+		return lookup(words, word, offset, length);
+	  }
+
+	  /// <summary>
+	  /// Looks up HunspellAffix prefixes that have an append that matches the String created from the given char array, offset and length
+	  /// </summary>
+	  /// <param name="word"> Char array to generate the String from </param>
+	  /// <param name="offset"> Offset in the char array that the String starts at </param>
+	  /// <param name="length"> Length from the offset that the String is </param>
+	  /// <returns> List of HunspellAffix prefixes with an append that matches the String, or {@code null} if none are found </returns>
+	  internal virtual IntsRef lookupPrefix(char[] word, int offset, int length)
+	  {
+		return lookup(prefixes, word, offset, length);
+	  }
+
+	  /// <summary>
+	  /// Looks up HunspellAffix suffixes that have an append that matches the String created from the given char array, offset and length
+	  /// </summary>
+	  /// <param name="word"> Char array to generate the String from </param>
+	  /// <param name="offset"> Offset in the char array that the String starts at </param>
+	  /// <param name="length"> Length from the offset that the String is </param>
+	  /// <returns> List of HunspellAffix suffixes with an append that matches the String, or {@code null} if none are found </returns>
+	  internal virtual IntsRef lookupSuffix(char[] word, int offset, int length)
+	  {
+		return lookup(suffixes, word, offset, length);
+	  }
+
+	  // TODO: this is pretty stupid, considering how the stemming algorithm works
+	  // we can speed it up to be significantly faster!
+	  internal virtual IntsRef lookup(FST<IntsRef> fst, char[] word, int offset, int length)
+	  {
+		if (fst == null)
+		{
+		  return null;
+		}
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.fst.FST.BytesReader bytesReader = fst.getBytesReader();
+		FST.BytesReader bytesReader = fst.BytesReader;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.fst.FST.Arc<org.apache.lucene.util.IntsRef> arc = fst.getFirstArc(new org.apache.lucene.util.fst.FST.Arc<org.apache.lucene.util.IntsRef>());
+		FST.Arc<IntsRef> arc = fst.getFirstArc(new FST.Arc<IntsRef>());
+		// Accumulate output as we go
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.IntsRef NO_OUTPUT = fst.outputs.getNoOutput();
+		IntsRef NO_OUTPUT = fst.outputs.NoOutput;
+		IntsRef output = NO_OUTPUT;
+
+		int l = offset + length;
+		try
+		{
+		  for (int i = offset, cp = 0; i < l; i += char.charCount(cp))
+		  {
+			cp = char.codePointAt(word, i, l);
+			if (fst.findTargetArc(cp, arc, arc, bytesReader) == null)
+			{
+			  return null;
+			}
+			else if (arc.output != NO_OUTPUT)
+			{
+			  output = fst.outputs.add(output, arc.output);
+			}
+		  }
+		  if (fst.findTargetArc(FST.END_LABEL, arc, arc, bytesReader) == null)
+		  {
+			return null;
+		  }
+		  else if (arc.output != NO_OUTPUT)
+		  {
+			return fst.outputs.add(output, arc.output);
+		  }
+		  else
+		  {
+			return output;
+		  }
+		}
+		catch (IOException bogus)
+		{
+		  throw new Exception(bogus);
+		}
+	  }
+
+	  /// <summary>
+	  /// Reads the affix file through the provided InputStream, building up the prefix and suffix maps
+	  /// </summary>
+	  /// <param name="affixStream"> InputStream to read the content of the affix file from </param>
+	  /// <param name="decoder"> CharsetDecoder to decode the content of the file </param>
+	  /// <exception cref="IOException"> Can be thrown while reading from the InputStream </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void readAffixFile(java.io.InputStream affixStream, java.nio.charset.CharsetDecoder decoder) throws java.io.IOException, java.text.ParseException
+	  private void readAffixFile(InputStream affixStream, CharsetDecoder decoder)
+	  {
+		SortedDictionary<string, IList<char?>> prefixes = new SortedDictionary<string, IList<char?>>();
+		SortedDictionary<string, IList<char?>> suffixes = new SortedDictionary<string, IList<char?>>();
+		IDictionary<string, int?> seenPatterns = new Dictionary<string, int?>();
+
+		// zero condition -> 0 ord
+		seenPatterns[".*"] = 0;
+		patterns.Add(null);
+
+		// zero strip -> 0 ord
+		IDictionary<string, int?> seenStrips = new LinkedHashMap<string, int?>();
+		seenStrips[""] = 0;
+
+		LineNumberReader reader = new LineNumberReader(new InputStreamReader(affixStream, decoder));
+		string line = null;
+		while ((line = reader.readLine()) != null)
+		{
+		  // ignore any BOM marker on first line
+		  if (reader.LineNumber == 1 && line.StartsWith("\uFEFF", StringComparison.Ordinal))
+		  {
+			line = line.Substring(1);
+		  }
+		  if (line.StartsWith(ALIAS_KEY, StringComparison.Ordinal))
+		  {
+			parseAlias(line);
+		  }
+		  else if (line.StartsWith(PREFIX_KEY, StringComparison.Ordinal))
+		  {
+			parseAffix(prefixes, line, reader, PREFIX_CONDITION_REGEX_PATTERN, seenPatterns, seenStrips);
+		  }
+		  else if (line.StartsWith(SUFFIX_KEY, StringComparison.Ordinal))
+		  {
+			parseAffix(suffixes, line, reader, SUFFIX_CONDITION_REGEX_PATTERN, seenPatterns, seenStrips);
+		  }
+		  else if (line.StartsWith(FLAG_KEY, StringComparison.Ordinal))
+		  {
+			// Assume that the FLAG line comes before any prefix or suffixes
+			// Store the strategy so it can be used when parsing the dic file
+			flagParsingStrategy = getFlagParsingStrategy(line);
+		  }
+		  else if (line.Equals(COMPLEXPREFIXES_KEY))
+		  {
+			complexPrefixes = true; // 2-stage prefix+1-stage suffix instead of 2-stage suffix+1-stage prefix
+		  }
+		  else if (line.StartsWith(CIRCUMFIX_KEY, StringComparison.Ordinal))
+		  {
+			string[] parts = line.Split("\\s+", true);
+			if (parts.Length != 2)
+			{
+			  throw new ParseException("Illegal CIRCUMFIX declaration", reader.LineNumber);
+			}
+			circumfix = flagParsingStrategy.parseFlag(parts[1]);
+		  }
+		  else if (line.StartsWith(IGNORE_KEY, StringComparison.Ordinal))
+		  {
+			string[] parts = line.Split("\\s+", true);
+			if (parts.Length != 2)
+			{
+			  throw new ParseException("Illegal IGNORE declaration", reader.LineNumber);
+			}
+			ignore = parts[1].ToCharArray();
+			Arrays.sort(ignore);
+			needsInputCleaning = true;
+		  }
+		  else if (line.StartsWith(ICONV_KEY, StringComparison.Ordinal) || line.StartsWith(OCONV_KEY, StringComparison.Ordinal))
+		  {
+			string[] parts = line.Split("\\s+", true);
+			string type = parts[0];
+			if (parts.Length != 2)
+			{
+			  throw new ParseException("Illegal " + type + " declaration", reader.LineNumber);
+			}
+			int num = int.Parse(parts[1]);
+			FST<CharsRef> res = parseConversions(reader, num);
+			if (type.Equals("ICONV"))
+			{
+			  iconv = res;
+			  needsInputCleaning |= iconv != null;
+			}
+			else
+			{
+			  oconv = res;
+			  needsOutputCleaning |= oconv != null;
+			}
+		  }
+		}
+
+		this.prefixes = affixFST(prefixes);
+		this.suffixes = affixFST(suffixes);
+
+		int totalChars = 0;
+		foreach (string strip in seenStrips.Keys)
+		{
+		  totalChars += strip.Length;
+		}
+		stripData = new char[totalChars];
+		stripOffsets = new int[seenStrips.Count + 1];
+		int currentOffset = 0;
+		int currentIndex = 0;
+		foreach (string strip in seenStrips.Keys)
+		{
+		  stripOffsets[currentIndex++] = currentOffset;
+		  strip.CopyTo(0, stripData, currentOffset, strip.Length - 0);
+		  currentOffset += strip.Length;
+		}
+		Debug.Assert(currentIndex == seenStrips.Count);
+		stripOffsets[currentIndex] = currentOffset;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private org.apache.lucene.util.fst.FST<org.apache.lucene.util.IntsRef> affixFST(java.util.TreeMap<String,java.util.List<Character>> affixes) throws java.io.IOException
+	  private FST<IntsRef> affixFST(SortedDictionary<string, IList<char?>> affixes)
+	  {
+		IntSequenceOutputs outputs = IntSequenceOutputs.Singleton;
+		Builder<IntsRef> builder = new Builder<IntsRef>(FST.INPUT_TYPE.BYTE4, outputs);
+
+		IntsRef scratch = new IntsRef();
+		foreach (KeyValuePair<string, IList<char?>> entry in affixes.SetOfKeyValuePairs())
+		{
+		  Util.toUTF32(entry.Key, scratch);
+		  IList<char?> entries = entry.Value;
+		  IntsRef output = new IntsRef(entries.Count);
+		  foreach (char? c in entries)
+		  {
+			output.ints[output.length++] = c;
+		  }
+		  builder.add(scratch, output);
+		}
+		return builder.finish();
+	  }
+
+	  /// <summary>
+	  /// Parses a specific affix rule putting the result into the provided affix map
+	  /// </summary>
+	  /// <param name="affixes"> Map where the result of the parsing will be put </param>
+	  /// <param name="header"> Header line of the affix rule </param>
+	  /// <param name="reader"> BufferedReader to read the content of the rule from </param>
+	  /// <param name="conditionPattern"> <seealso cref="String#format(String, Object...)"/> pattern to be used to generate the condition regex
+	  ///                         pattern </param>
+	  /// <param name="seenPatterns"> map from condition -> index of patterns, for deduplication. </param>
+	  /// <exception cref="IOException"> Can be thrown while reading the rule </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void parseAffix(java.util.TreeMap<String,java.util.List<Character>> affixes, String header, java.io.LineNumberReader reader, String conditionPattern, java.util.Map<String,Integer> seenPatterns, java.util.Map<String,Integer> seenStrips) throws java.io.IOException, java.text.ParseException
+	  private void parseAffix(SortedDictionary<string, IList<char?>> affixes, string header, LineNumberReader reader, string conditionPattern, IDictionary<string, int?> seenPatterns, IDictionary<string, int?> seenStrips)
+	  {
+
+		BytesRef scratch = new BytesRef();
+		StringBuilder sb = new StringBuilder();
+		string[] args = header.Split("\\s+", true);
+
+		bool crossProduct = args[2].Equals("Y");
+
+		int numLines = int.Parse(args[3]);
+		affixData = ArrayUtil.grow(affixData, (currentAffix << 3) + (numLines << 3));
+		ByteArrayDataOutput affixWriter = new ByteArrayDataOutput(affixData, currentAffix << 3, numLines << 3);
+
+		for (int i = 0; i < numLines; i++)
+		{
+		  Debug.Assert(affixWriter.Position == currentAffix << 3);
+		  string line = reader.readLine();
+		  string[] ruleArgs = line.Split("\\s+", true);
+
+		  // from the manpage: PFX flag stripping prefix [condition [morphological_fields...]]
+		  // condition is optional
+		  if (ruleArgs.Length < 4)
+		  {
+			  throw new ParseException("The affix file contains a rule with less than four elements: " + line, reader.LineNumber);
+		  }
+
+		  char flag = flagParsingStrategy.parseFlag(ruleArgs[1]);
+		  string strip = ruleArgs[2].Equals("0") ? "" : ruleArgs[2];
+		  string affixArg = ruleArgs[3];
+		  char[] appendFlags = null;
+
+		  int flagSep = affixArg.LastIndexOf('/');
+		  if (flagSep != -1)
+		  {
+			string flagPart = affixArg.Substring(flagSep + 1);
+			affixArg = affixArg.Substring(0, flagSep);
+
+			if (aliasCount > 0)
+			{
+			  flagPart = getAliasValue(int.Parse(flagPart));
+			}
+
+			appendFlags = flagParsingStrategy.parseFlags(flagPart);
+			Arrays.sort(appendFlags);
+			twoStageAffix = true;
+		  }
+
+		  // TODO: add test and fix zero-affix handling!
+
+		  string condition = ruleArgs.Length > 4 ? ruleArgs[4] : ".";
+		  // at least the gascon affix file has this issue
+		  if (condition.StartsWith("[", StringComparison.Ordinal) && !condition.EndsWith("]", StringComparison.Ordinal))
+		  {
+			condition = condition + "]";
+		  }
+		  // "dash hasn't got special meaning" (we must escape it)
+		  if (condition.IndexOf('-') >= 0)
+		  {
+			condition = condition.Replace("-", "\\-");
+		  }
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String regex;
+		  string regex;
+		  if (".".Equals(condition))
+		  {
+			regex = ".*"; // Zero condition is indicated by dot
+		  }
+		  else if (condition.Equals(strip))
+		  {
+			regex = ".*"; // TODO: optimize this better:
+						  // if we remove 'strip' from condition, we don't have to append 'strip' to check it...!
+						  // but this is complicated...
+		  }
+		  else
+		  {
+			regex = string.format(Locale.ROOT, conditionPattern, condition);
+		  }
+
+		  // deduplicate patterns
+		  int? patternIndex = seenPatterns[regex];
+		  if (patternIndex == null)
+		  {
+			patternIndex = patterns.Count;
+			if (patternIndex > short.MaxValue)
+			{
+			  throw new System.NotSupportedException("Too many patterns, please report this to dev@lucene.apache.org");
+			}
+			seenPatterns[regex] = patternIndex;
+			CharacterRunAutomaton pattern = new CharacterRunAutomaton((new RegExp(regex, RegExp.NONE)).toAutomaton());
+			patterns.Add(pattern);
+		  }
+
+		  int? stripOrd = seenStrips[strip];
+		  if (stripOrd == null)
+		  {
+			stripOrd = seenStrips.Count;
+			seenStrips[strip] = stripOrd;
+			if (stripOrd > Char.MaxValue)
+			{
+			  throw new System.NotSupportedException("Too many unique strips, please report this to dev@lucene.apache.org");
+			}
+		  }
+
+		  if (appendFlags == null)
+		  {
+			appendFlags = NOFLAGS;
+		  }
+
+		  encodeFlags(scratch, appendFlags);
+		  int appendFlagsOrd = flagLookup.add(scratch);
+		  if (appendFlagsOrd < 0)
+		  {
+			// already exists in our hash
+			appendFlagsOrd = (-appendFlagsOrd) - 1;
+		  }
+		  else if (appendFlagsOrd > short.MaxValue)
+		  {
+			// this limit is probably flexible, but its a good sanity check too
+			throw new System.NotSupportedException("Too many unique append flags, please report this to dev@lucene.apache.org");
+		  }
+
+		  affixWriter.writeShort((short)flag);
+		  affixWriter.writeShort((int)(short)stripOrd);
+		  // encode crossProduct into patternIndex
+		  int patternOrd = (int)patternIndex << 1 | (crossProduct ? 1 : 0);
+		  affixWriter.writeShort((short)patternOrd);
+		  affixWriter.writeShort((short)appendFlagsOrd);
+
+		  if (needsInputCleaning)
+		  {
+			CharSequence cleaned = cleanInput(affixArg, sb);
+			affixArg = cleaned.ToString();
+		  }
+
+		  IList<char?> list = affixes[affixArg];
+		  if (list == null)
+		  {
+			list = new List<>();
+			affixes[affixArg] = list;
+		  }
+
+		  list.Add((char)currentAffix);
+		  currentAffix++;
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private org.apache.lucene.util.fst.FST<org.apache.lucene.util.CharsRef> parseConversions(java.io.LineNumberReader reader, int num) throws java.io.IOException, java.text.ParseException
+	  private FST<CharsRef> parseConversions(LineNumberReader reader, int num)
+	  {
+		IDictionary<string, string> mappings = new SortedDictionary<string, string>();
+
+		for (int i = 0; i < num; i++)
+		{
+		  string line = reader.readLine();
+		  string[] parts = line.Split("\\s+", true);
+		  if (parts.Length != 3)
+		  {
+			throw new ParseException("invalid syntax: " + line, reader.LineNumber);
+		  }
+		  if (mappings.put(parts[1], parts[2]) != null)
+		  {
+			throw new System.InvalidOperationException("duplicate mapping specified for: " + parts[1]);
+		  }
+		}
+
+		Outputs<CharsRef> outputs = CharSequenceOutputs.Singleton;
+		Builder<CharsRef> builder = new Builder<CharsRef>(FST.INPUT_TYPE.BYTE2, outputs);
+		IntsRef scratchInts = new IntsRef();
+		foreach (KeyValuePair<string, string> entry in mappings.SetOfKeyValuePairs())
+		{
+		  Util.toUTF16(entry.Key, scratchInts);
+		  builder.add(scratchInts, new CharsRef(entry.Value));
+		}
+
+		return builder.finish();
+	  }
+
+	  /// <summary>
+	  /// pattern accepts optional BOM + SET + any whitespace </summary>
+	  internal static readonly Pattern ENCODING_PATTERN = Pattern.compile("^(\u00EF\u00BB\u00BF)?SET\\s+");
+
+	  /// <summary>
+	  /// Parses the encoding specified in the affix file readable through the provided InputStream
+	  /// </summary>
+	  /// <param name="affix"> InputStream for reading the affix file </param>
+	  /// <returns> Encoding specified in the affix file </returns>
+	  /// <exception cref="IOException"> Can be thrown while reading from the InputStream </exception>
+	  /// <exception cref="ParseException"> Thrown if the first non-empty non-comment line read from the file does not adhere to the format {@code SET <encoding>} </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: static String getDictionaryEncoding(java.io.InputStream affix) throws java.io.IOException, java.text.ParseException
+	  internal static string getDictionaryEncoding(InputStream affix)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final StringBuilder encoding = new StringBuilder();
+		StringBuilder encoding = new StringBuilder();
+		for (;;)
+		{
+		  encoding.Length = 0;
+		  int ch;
+		  while ((ch = affix.read()) >= 0)
+		  {
+			if (ch == '\n')
+			{
+			  break;
+			}
+			if (ch != '\r')
+			{
+			  encoding.Append((char)ch);
+			}
+		  }
+		  if (encoding.Length == 0 || encoding[0] == '#' || encoding.ToString().Trim().Length == 0)
+		  {
+			  // this test only at the end as ineffective but would allow lines only containing spaces:
+			if (ch < 0)
+			{
+			  throw new ParseException("Unexpected end of affix file.", 0);
+			}
+			continue;
+		  }
+		  Matcher matcher = ENCODING_PATTERN.matcher(encoding);
+		  if (matcher.find())
+		  {
+			int last = matcher.end();
+			return encoding.Substring(last).Trim();
+		  }
+		}
+	  }
+
+	  internal static readonly IDictionary<string, string> CHARSET_ALIASES;
+	  static Dictionary()
+	  {
+		IDictionary<string, string> m = new Dictionary<string, string>();
+		m["microsoft-cp1251"] = "windows-1251";
+		m["TIS620-2533"] = "TIS-620";
+		CHARSET_ALIASES = Collections.unmodifiableMap(m);
+	  }
+
+	  /// <summary>
+	  /// Retrieves the CharsetDecoder for the given encoding.  Note, This isn't perfect as I think ISCII-DEVANAGARI and
+	  /// MICROSOFT-CP1251 etc are allowed...
+	  /// </summary>
+	  /// <param name="encoding"> Encoding to retrieve the CharsetDecoder for </param>
+	  /// <returns> CharSetDecoder for the given encoding </returns>
+	  private CharsetDecoder getJavaEncoding(string encoding)
+	  {
+		if ("ISO8859-14".Equals(encoding))
+		{
+		  return new ISO8859_14Decoder();
+		}
+		string canon = CHARSET_ALIASES[encoding];
+		if (canon != null)
+		{
+		  encoding = canon;
+		}
+		Charset charset = Charset.forName(encoding);
+		return charset.newDecoder().onMalformedInput(CodingErrorAction.REPLACE);
+	  }
+
+	  /// <summary>
+	  /// Determines the appropriate <seealso cref="FlagParsingStrategy"/> based on the FLAG definition line taken from the affix file
+	  /// </summary>
+	  /// <param name="flagLine"> Line containing the flag information </param>
+	  /// <returns> FlagParsingStrategy that handles parsing flags in the way specified in the FLAG definition </returns>
+	  internal static FlagParsingStrategy getFlagParsingStrategy(string flagLine)
+	  {
+		string[] parts = flagLine.Split("\\s+", true);
+		if (parts.Length != 2)
+		{
+		  throw new System.ArgumentException("Illegal FLAG specification: " + flagLine);
+		}
+		string flagType = parts[1];
+
+		if (NUM_FLAG_TYPE.Equals(flagType))
+		{
+		  return new NumFlagParsingStrategy();
+		}
+		else if (UTF8_FLAG_TYPE.Equals(flagType))
+		{
+		  return new SimpleFlagParsingStrategy();
+		}
+		else if (LONG_FLAG_TYPE.Equals(flagType))
+		{
+		  return new DoubleASCIIFlagParsingStrategy();
+		}
+
+		throw new System.ArgumentException("Unknown flag type: " + flagType);
+	  }
+
+	  internal readonly char FLAG_SEPARATOR = (char)0x1f; // flag separator after escaping
+
+	  internal virtual string unescapeEntry(string entry)
+	  {
+		StringBuilder sb = new StringBuilder();
+		for (int i = 0; i < entry.Length; i++)
+		{
+		  char ch = entry[i];
+		  if (ch == '\\' && i + 1 < entry.Length)
+		  {
+			sb.Append(entry[i + 1]);
+			i++;
+		  }
+		  else if (ch == '/')
+		  {
+			sb.Append(FLAG_SEPARATOR);
+		  }
+		  else
+		  {
+			sb.Append(ch);
+		  }
+		}
+		return sb.ToString();
+	  }
+
+	  /// <summary>
+	  /// Reads the dictionary file through the provided InputStreams, building up the words map
+	  /// </summary>
+	  /// <param name="dictionaries"> InputStreams to read the dictionary file through </param>
+	  /// <param name="decoder"> CharsetDecoder used to decode the contents of the file </param>
+	  /// <exception cref="IOException"> Can be thrown while reading from the file </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void readDictionaryFiles(java.util.List<java.io.InputStream> dictionaries, java.nio.charset.CharsetDecoder decoder, org.apache.lucene.util.fst.Builder<org.apache.lucene.util.IntsRef> words) throws java.io.IOException
+	  private void readDictionaryFiles(IList<InputStream> dictionaries, CharsetDecoder decoder, Builder<IntsRef> words)
+	  {
+		BytesRef flagsScratch = new BytesRef();
+		IntsRef scratchInts = new IntsRef();
+
+		StringBuilder sb = new StringBuilder();
+
+		File unsorted = File.createTempFile("unsorted", "dat", tempDir);
+		OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(unsorted);
+		bool success = false;
+		try
+		{
+		  foreach (InputStream dictionary in dictionaries)
+		  {
+			BufferedReader lines = new BufferedReader(new InputStreamReader(dictionary, decoder));
+			string line = lines.readLine(); // first line is number of entries (approximately, sometimes)
+
+			while ((line = lines.readLine()) != null)
+			{
+			  line = unescapeEntry(line);
+			  if (needsInputCleaning)
+			  {
+				int flagSep = line.LastIndexOf(FLAG_SEPARATOR);
+				if (flagSep == -1)
+				{
+				  CharSequence cleansed = cleanInput(line, sb);
+				  writer.write(cleansed.ToString().GetBytes(StandardCharsets.UTF_8));
+				}
+				else
+				{
+				  string text = line.Substring(0, flagSep);
+				  CharSequence cleansed = cleanInput(text, sb);
+				  if (cleansed != sb)
+				  {
+					sb.Length = 0;
+					sb.Append(cleansed);
+				  }
+				  sb.Append(line.Substring(flagSep));
+				  writer.write(sb.ToString().GetBytes(StandardCharsets.UTF_8));
+				}
+			  }
+			  else
+			  {
+				writer.write(line.GetBytes(StandardCharsets.UTF_8));
+			  }
+			}
+		  }
+		  success = true;
+		}
+		finally
+		{
+		  if (success)
+		  {
+			IOUtils.close(writer);
+		  }
+		  else
+		  {
+			IOUtils.closeWhileHandlingException(writer);
+		  }
+		}
+		File sorted = File.createTempFile("sorted", "dat", tempDir);
+
+		OfflineSorter sorter = new OfflineSorter(new ComparatorAnonymousInnerClassHelper(this));
+		sorter.sort(unsorted, sorted);
+		unsorted.delete();
+
+		OfflineSorter.ByteSequencesReader reader = new OfflineSorter.ByteSequencesReader(sorted);
+		BytesRef scratchLine = new BytesRef();
+
+		// TODO: the flags themselves can be double-chars (long) or also numeric
+		// either way the trick is to encode them as char... but they must be parsed differently
+
+		string currentEntry = null;
+		IntsRef currentOrds = new IntsRef();
+
+		string line;
+		while (reader.read(scratchLine))
+		{
+		  line = scratchLine.utf8ToString();
+		  string entry;
+		  char[] wordForm;
+
+		  int flagSep = line.LastIndexOf(FLAG_SEPARATOR);
+		  if (flagSep == -1)
+		  {
+			wordForm = NOFLAGS;
+			entry = line;
+		  }
+		  else
+		  {
+			// note, there can be comments (morph description) after a flag.
+			// we should really look for any whitespace: currently just tab and space
+			int end = line.IndexOf('\t', flagSep);
+			if (end == -1)
+			{
+			  end = line.Length;
+			}
+			int end2 = line.IndexOf(' ', flagSep);
+			if (end2 == -1)
+			{
+			  end2 = line.Length;
+			}
+			end = Math.Min(end, end2);
+
+			string flagPart = StringHelperClass.SubstringSpecial(line, flagSep + 1, end);
+			if (aliasCount > 0)
+			{
+			  flagPart = getAliasValue(int.Parse(flagPart));
+			}
+
+			wordForm = flagParsingStrategy.parseFlags(flagPart);
+			Arrays.sort(wordForm);
+			entry = line.Substring(0, flagSep);
+		  }
+
+		  int cmp = currentEntry == null ? 1 : entry.CompareTo(currentEntry);
+		  if (cmp < 0)
+		  {
+			throw new System.ArgumentException("out of order: " + entry + " < " + currentEntry);
+		  }
+		  else
+		  {
+			encodeFlags(flagsScratch, wordForm);
+			int ord = flagLookup.add(flagsScratch);
+			if (ord < 0)
+			{
+			  // already exists in our hash
+			  ord = (-ord) - 1;
+			}
+			// finalize current entry, and switch "current" if necessary
+			if (cmp > 0 && currentEntry != null)
+			{
+			  Util.toUTF32(currentEntry, scratchInts);
+			  words.add(scratchInts, currentOrds);
+			}
+			// swap current
+			if (cmp > 0 || currentEntry == null)
+			{
+			  currentEntry = entry;
+			  currentOrds = new IntsRef(); // must be this way
+			}
+			currentOrds.grow(currentOrds.length + 1);
+			currentOrds.ints[currentOrds.length++] = ord;
+		  }
+		}
+
+		// finalize last entry
+		Util.toUTF32(currentEntry, scratchInts);
+		words.add(scratchInts, currentOrds);
+
+		reader.close();
+		sorted.delete();
+	  }
+
+	  private class ComparatorAnonymousInnerClassHelper : IComparer<BytesRef>
+	  {
+		  private readonly Dictionary outerInstance;
+
+		  public ComparatorAnonymousInnerClassHelper(Dictionary outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+			  scratch1 = new BytesRef();
+			  scratch2 = new BytesRef();
+		  }
+
+		  internal BytesRef scratch1;
+		  internal BytesRef scratch2;
+
+		  public virtual int Compare(BytesRef o1, BytesRef o2)
+		  {
+			scratch1.bytes = o1.bytes;
+			scratch1.offset = o1.offset;
+			scratch1.length = o1.length;
+
+			for (int i = scratch1.length - 1; i >= 0; i--)
+			{
+			  if (scratch1.bytes[scratch1.offset + i] == outerInstance.FLAG_SEPARATOR)
+			  {
+				scratch1.length = i;
+				break;
+			  }
+			}
+
+			scratch2.bytes = o2.bytes;
+			scratch2.offset = o2.offset;
+			scratch2.length = o2.length;
+
+			for (int i = scratch2.length - 1; i >= 0; i--)
+			{
+			  if (scratch2.bytes[scratch2.offset + i] == outerInstance.FLAG_SEPARATOR)
+			  {
+				scratch2.length = i;
+				break;
+			  }
+			}
+
+			int cmp = scratch1.compareTo(scratch2);
+			if (cmp == 0)
+			{
+			  // tie break on whole row
+			  return o1.compareTo(o2);
+			}
+			else
+			{
+			  return cmp;
+			}
+		  }
+	  }
+
+	  internal static char[] decodeFlags(BytesRef b)
+	  {
+		if (b.length == 0)
+		{
+		  return CharsRef.EMPTY_CHARS;
+		}
+		int len = (int)((uint)b.length >> 1);
+		char[] flags = new char[len];
+		int upto = 0;
+		int end = b.offset + b.length;
+		for (int i = b.offset; i < end; i += 2)
+		{
+		  flags[upto++] = (char)((b.bytes[i] << 8) | (b.bytes[i + 1] & 0xff));
+		}
+		return flags;
+	  }
+
+	  internal static void encodeFlags(BytesRef b, char[] flags)
+	  {
+		int len = flags.Length << 1;
+		b.grow(len);
+		b.length = len;
+		int upto = b.offset;
+		for (int i = 0; i < flags.Length; i++)
+		{
+		  int flag = flags[i];
+		  b.bytes[upto++] = unchecked((sbyte)((flag >> 8) & 0xff));
+		  b.bytes[upto++] = unchecked((sbyte)(flag & 0xff));
+		}
+	  }
+
+	  private void parseAlias(string line)
+	  {
+		string[] ruleArgs = line.Split("\\s+", true);
+		if (aliases == null)
+		{
+		  //first line should be the aliases count
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int count = Integer.parseInt(ruleArgs[1]);
+		  int count = int.Parse(ruleArgs[1]);
+		  aliases = new string[count];
+		}
+		else
+		{
+		  // an alias can map to no flags
+		  string aliasValue = ruleArgs.Length == 1 ? "" : ruleArgs[1];
+		  aliases[aliasCount++] = aliasValue;
+		}
+	  }
+
+	  private string getAliasValue(int id)
+	  {
+		try
+		{
+		  return aliases[id - 1];
+		}
+		catch (System.IndexOutOfRangeException ex)
+		{
+		  throw new System.ArgumentException("Bad flag alias number:" + id, ex);
+		}
+	  }
+
+	  /// <summary>
+	  /// Abstraction of the process of parsing flags taken from the affix and dic files
+	  /// </summary>
+	  internal abstract class FlagParsingStrategy
+	  {
+
+		/// <summary>
+		/// Parses the given String into a single flag
+		/// </summary>
+		/// <param name="rawFlag"> String to parse into a flag </param>
+		/// <returns> Parsed flag </returns>
+		internal virtual char parseFlag(string rawFlag)
+		{
+		  char[] flags = parseFlags(rawFlag);
+		  if (flags.Length != 1)
+		  {
+			throw new System.ArgumentException("expected only one flag, got: " + rawFlag);
+		  }
+		  return flags[0];
+		}
+
+		/// <summary>
+		/// Parses the given String into multiple flags
+		/// </summary>
+		/// <param name="rawFlags"> String to parse into flags </param>
+		/// <returns> Parsed flags </returns>
+		internal abstract char[] parseFlags(string rawFlags);
+	  }
+
+	  /// <summary>
+	  /// Simple implementation of <seealso cref="FlagParsingStrategy"/> that treats the chars in each String as a individual flags.
+	  /// Can be used with both the ASCII and UTF-8 flag types.
+	  /// </summary>
+	  private class SimpleFlagParsingStrategy : FlagParsingStrategy
+	  {
+		public override char[] parseFlags(string rawFlags)
+		{
+		  return rawFlags.ToCharArray();
+		}
+	  }
+
+	  /// <summary>
+	  /// Implementation of <seealso cref="FlagParsingStrategy"/> that assumes each flag is encoded in its numerical form.  In the case
+	  /// of multiple flags, each number is separated by a comma.
+	  /// </summary>
+	  private class NumFlagParsingStrategy : FlagParsingStrategy
+	  {
+		public override char[] parseFlags(string rawFlags)
+		{
+		  string[] rawFlagParts = rawFlags.Trim().Split(",", true);
+		  char[] flags = new char[rawFlagParts.Length];
+		  int upto = 0;
+
+		  for (int i = 0; i < rawFlagParts.Length; i++)
+		  {
+			// note, removing the trailing X/leading I for nepali... what is the rule here?! 
+			string replacement = rawFlagParts[i].replaceAll("[^0-9]", "");
+			// note, ignoring empty flags (this happens in danish, for example)
+			if (replacement.Length == 0)
+			{
+			  continue;
+			}
+			flags[upto++] = (char) int.Parse(replacement);
+		  }
+
+		  if (upto < flags.Length)
+		  {
+			flags = Arrays.copyOf(flags, upto);
+		  }
+		  return flags;
+		}
+	  }
+
+	  /// <summary>
+	  /// Implementation of <seealso cref="FlagParsingStrategy"/> that assumes each flag is encoded as two ASCII characters whose codes
+	  /// must be combined into a single character.
+	  /// 
+	  /// TODO (rmuir) test
+	  /// </summary>
+	  private class DoubleASCIIFlagParsingStrategy : FlagParsingStrategy
+	  {
+
+		public override char[] parseFlags(string rawFlags)
+		{
+		  if (rawFlags.Length == 0)
+		  {
+			return new char[0];
+		  }
+
+		  StringBuilder builder = new StringBuilder();
+		  if (rawFlags.Length % 2 == 1)
+		  {
+			throw new System.ArgumentException("Invalid flags (should be even number of characters): " + rawFlags);
+		  }
+		  for (int i = 0; i < rawFlags.Length; i += 2)
+		  {
+			char cookedFlag = (char)((int) rawFlags[i] + (int) rawFlags[i + 1]);
+			builder.Append(cookedFlag);
+		  }
+
+		  char[] flags = new char[builder.Length];
+		  builder.getChars(0, builder.Length, flags, 0);
+		  return flags;
+		}
+	  }
+
+	  internal static bool hasFlag(char[] flags, char flag)
+	  {
+		return Arrays.binarySearch(flags, flag) >= 0;
+	  }
+
+	  internal virtual CharSequence cleanInput(CharSequence input, StringBuilder reuse)
+	  {
+		reuse.Length = 0;
+
+		for (int i = 0; i < input.length(); i++)
+		{
+		  char ch = input.charAt(i);
+
+		  if (ignore != null && Arrays.binarySearch(ignore, ch) >= 0)
+		  {
+			continue;
+		  }
+
+		  if (ignoreCase && iconv == null)
+		  {
+			// if we have no input conversion mappings, do this on-the-fly
+			ch = char.ToLower(ch);
+		  }
+
+		  reuse.Append(ch);
+		}
+
+		if (iconv != null)
+		{
+		  try
+		  {
+			applyMappings(iconv, reuse);
+		  }
+		  catch (IOException bogus)
+		  {
+			throw new Exception(bogus);
+		  }
+		  if (ignoreCase)
+		  {
+			for (int i = 0; i < reuse.Length; i++)
+			{
+			  reuse[i] = char.ToLower(reuse[i]);
+			}
+		  }
+		}
+
+		return reuse;
+	  }
+
+	  // TODO: this could be more efficient!
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: static void applyMappings(org.apache.lucene.util.fst.FST<org.apache.lucene.util.CharsRef> fst, StringBuilder sb) throws java.io.IOException
+	  internal static void applyMappings(FST<CharsRef> fst, StringBuilder sb)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.fst.FST.BytesReader bytesReader = fst.getBytesReader();
+		FST.BytesReader bytesReader = fst.BytesReader;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.fst.FST.Arc<org.apache.lucene.util.CharsRef> firstArc = fst.getFirstArc(new org.apache.lucene.util.fst.FST.Arc<org.apache.lucene.util.CharsRef>());
+		FST.Arc<CharsRef> firstArc = fst.getFirstArc(new FST.Arc<CharsRef>());
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.CharsRef NO_OUTPUT = fst.outputs.getNoOutput();
+		CharsRef NO_OUTPUT = fst.outputs.NoOutput;
+
+		// temporary stuff
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.fst.FST.Arc<org.apache.lucene.util.CharsRef> arc = new org.apache.lucene.util.fst.FST.Arc<>();
+		FST.Arc<CharsRef> arc = new FST.Arc<CharsRef>();
+		int longestMatch;
+		CharsRef longestOutput;
+
+		for (int i = 0; i < sb.Length; i++)
+		{
+		  arc.copyFrom(firstArc);
+		  CharsRef output = NO_OUTPUT;
+		  longestMatch = -1;
+		  longestOutput = null;
+
+		  for (int j = i; j < sb.Length; j++)
+		  {
+			char ch = sb[j];
+			if (fst.findTargetArc(ch, arc, arc, bytesReader) == null)
+			{
+			  break;
+			}
+			else
+			{
+			  output = fst.outputs.add(output, arc.output);
+			}
+			if (arc.Final)
+			{
+			  longestOutput = fst.outputs.add(output, arc.nextFinalOutput);
+			  longestMatch = j;
+			}
+		  }
+
+		  if (longestMatch >= 0)
+		  {
+			sb.Remove(i, longestMatch + 1 - i);
+			sb.Insert(i, longestOutput);
+			i += (longestOutput.length - 1);
+		  }
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs
new file mode 100644
index 0000000..bf59a70
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs
@@ -0,0 +1,171 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.hunspell
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using CharsRef = org.apache.lucene.util.CharsRef;
+
+	/// <summary>
+	/// TokenFilter that uses hunspell affix rules and words to stem tokens.  Since hunspell supports a word having multiple
+	/// stems, this filter can emit multiple tokens for each consumed token
+	/// 
+	/// <para>
+	/// Note: This filter is aware of the <seealso cref="KeywordAttribute"/>. To prevent
+	/// certain terms from being passed to the stemmer
+	/// <seealso cref="KeywordAttribute#isKeyword()"/> should be set to <code>true</code>
+	/// in a previous <seealso cref="TokenStream"/>.
+	/// 
+	/// Note: For including the original term as well as the stemmed version, see
+	/// <seealso cref="org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory"/>
+	/// </para>
+	/// 
+	/// @lucene.experimental
+	/// </summary>
+	public sealed class HunspellStemFilter : TokenFilter
+	{
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly PositionIncrementAttribute posIncAtt = addAttribute(typeof(PositionIncrementAttribute));
+	  private readonly KeywordAttribute keywordAtt = addAttribute(typeof(KeywordAttribute));
+	  private readonly Stemmer stemmer;
+
+	  private IList<CharsRef> buffer;
+	  private State savedState;
+
+	  private readonly bool dedup;
+	  private readonly bool longestOnly;
+
+	  /// <summary>
+	  /// Create a <seealso cref="HunspellStemFilter"/> outputting all possible stems. </summary>
+	  ///  <seealso cref= #HunspellStemFilter(TokenStream, Dictionary, boolean)  </seealso>
+	  public HunspellStemFilter(TokenStream input, Dictionary dictionary) : this(input, dictionary, true)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Create a <seealso cref="HunspellStemFilter"/> outputting all possible stems. </summary>
+	  ///  <seealso cref= #HunspellStemFilter(TokenStream, Dictionary, boolean, boolean)  </seealso>
+	  public HunspellStemFilter(TokenStream input, Dictionary dictionary, bool dedup) : this(input, dictionary, dedup, false)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates a new HunspellStemFilter that will stem tokens from the given TokenStream using affix rules in the provided
+	  /// Dictionary
+	  /// </summary>
+	  /// <param name="input"> TokenStream whose tokens will be stemmed </param>
+	  /// <param name="dictionary"> HunspellDictionary containing the affix rules and words that will be used to stem the tokens </param>
+	  /// <param name="longestOnly"> true if only the longest term should be output. </param>
+	  public HunspellStemFilter(TokenStream input, Dictionary dictionary, bool dedup, bool longestOnly) : base(input)
+	  {
+		this.dedup = dedup && longestOnly == false; // don't waste time deduping if longestOnly is set
+		this.stemmer = new Stemmer(dictionary);
+		this.longestOnly = longestOnly;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (buffer != null && buffer.Count > 0)
+		{
+		  CharsRef nextStem = buffer.Remove(0);
+		  restoreState(savedState);
+		  posIncAtt.PositionIncrement = 0;
+		  termAtt.setEmpty().append(nextStem);
+		  return true;
+		}
+
+		if (!input.incrementToken())
+		{
+		  return false;
+		}
+
+		if (keywordAtt.Keyword)
+		{
+		  return true;
+		}
+
+		buffer = dedup ? stemmer.uniqueStems(termAtt.buffer(), termAtt.length()) : stemmer.stem(termAtt.buffer(), termAtt.length());
+
+		if (buffer.Count == 0) // we do not know this word, return it unchanged
+		{
+		  return true;
+		}
+
+		if (longestOnly && buffer.Count > 1)
+		{
+		  buffer.Sort(lengthComparator);
+		}
+
+		CharsRef stem = buffer.Remove(0);
+		termAtt.setEmpty().append(stem);
+
+		if (longestOnly)
+		{
+		  buffer.Clear();
+		}
+		else
+		{
+		  if (buffer.Count > 0)
+		  {
+			savedState = captureState();
+		  }
+		}
+
+		return true;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		buffer = null;
+	  }
+
+	  internal static readonly IComparer<CharsRef> lengthComparator = new ComparatorAnonymousInnerClassHelper();
+
+	  private class ComparatorAnonymousInnerClassHelper : IComparer<CharsRef>
+	  {
+		  public ComparatorAnonymousInnerClassHelper()
+		  {
+		  }
+
+		  public virtual int Compare(CharsRef o1, CharsRef o2)
+		  {
+			if (o2.length == o1.length)
+			{
+			  // tie break on text
+			  return o2.compareTo(o1);
+			}
+			else
+			{
+			  return o2.length < o1.length ? - 1 : 1;
+			}
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilterFactory.cs
new file mode 100644
index 0000000..c9888fd
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilterFactory.cs
@@ -0,0 +1,116 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.hunspell
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
+	using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+
+	/// <summary>
+	/// TokenFilterFactory that creates instances of <seealso cref="HunspellStemFilter"/>.
+	/// Example config for British English:
+	/// <pre class="prettyprint">
+	/// &lt;filter class=&quot;solr.HunspellStemFilterFactory&quot;
+	///         dictionary=&quot;en_GB.dic,my_custom.dic&quot;
+	///         affix=&quot;en_GB.aff&quot; 
+	///         ignoreCase=&quot;false&quot;
+	///         longestOnly=&quot;false&quot; /&gt;</pre>
+	/// Both parameters dictionary and affix are mandatory.
+	/// Dictionaries for many languages are available through the OpenOffice project.
+	/// 
+	/// See <a href="http://wiki.apache.org/solr/Hunspell">http://wiki.apache.org/solr/Hunspell</a>
+	/// @lucene.experimental
+	/// </summary>
+	public class HunspellStemFilterFactory : TokenFilterFactory, ResourceLoaderAware
+	{
+	  private const string PARAM_DICTIONARY = "dictionary";
+	  private const string PARAM_AFFIX = "affix";
+	  private const string PARAM_RECURSION_CAP = "recursionCap";
+	  private const string PARAM_IGNORE_CASE = "ignoreCase";
+	  private const string PARAM_LONGEST_ONLY = "longestOnly";
+
+	  private readonly string dictionaryFiles;
+	  private readonly string affixFile;
+	  private readonly bool ignoreCase;
+	  private readonly bool longestOnly;
+	  private Dictionary dictionary;
+
+	  /// <summary>
+	  /// Creates a new HunspellStemFilterFactory </summary>
+	  public HunspellStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		dictionaryFiles = require(args, PARAM_DICTIONARY);
+		affixFile = get(args, PARAM_AFFIX);
+		ignoreCase = getBoolean(args, PARAM_IGNORE_CASE, false);
+		longestOnly = getBoolean(args, PARAM_LONGEST_ONLY, false);
+		// this isnt necessary: we properly load all dictionaries.
+		// but recognize and ignore for back compat
+		getBoolean(args, "strictAffixParsing", true);
+		// this isn't necessary: multi-stage stripping is fixed and 
+		// flags like COMPLEXPREFIXES in the data itself control this.
+		// but recognize and ignore for back compat
+		getInt(args, "recursionCap", 0);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void inform(org.apache.lucene.analysis.util.ResourceLoader loader) throws java.io.IOException
+	  public virtual void inform(ResourceLoader loader)
+	  {
+		string[] dicts = dictionaryFiles.Split(",", true);
+
+		InputStream affix = null;
+		IList<InputStream> dictionaries = new List<InputStream>();
+
+		try
+		{
+		  dictionaries = new List<>();
+		  foreach (string file in dicts)
+		  {
+			dictionaries.Add(loader.openResource(file));
+		  }
+		  affix = loader.openResource(affixFile);
+
+		  this.dictionary = new Dictionary(affix, dictionaries, ignoreCase);
+		}
+		catch (ParseException e)
+		{
+		  throw new IOException("Unable to load hunspell data! [dictionary=" + dictionaries + ",affix=" + affixFile + "]", e);
+		}
+		finally
+		{
+		  IOUtils.closeWhileHandlingException(affix);
+		  IOUtils.closeWhileHandlingException(dictionaries);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream tokenStream)
+	  {
+		return new HunspellStemFilter(tokenStream, dictionary, true, longestOnly);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/ISO8859_14Decoder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/ISO8859_14Decoder.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/ISO8859_14Decoder.cs
new file mode 100644
index 0000000..87872c9
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/ISO8859_14Decoder.cs
@@ -0,0 +1,47 @@
+namespace org.apache.lucene.analysis.hunspell
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	// many hunspell dictionaries use this encoding, yet java does not have it?!?!
+	internal sealed class ISO8859_14Decoder : CharsetDecoder
+	{
+
+	  internal static readonly char[] TABLE = new char[] {0x00A0, 0x1E02, 0x1E03, 0x00A3, 0x010A, 0x010B, 0x1E0A, 0x00A7, 0x1E80, 0x00A9, 0x1E82, 0x1E0B, 0x1EF2, 0x00AD, 0x00AE, 0x0178, 0x1E1E, 0x1E1F, 0x0120, 0x0121, 0x1E40, 0x1E41, 0x00B6, 0x1E56, 0x1E81, 0x1E57, 0x1E83, 0x1E60, 0x1EF3, 0x1E84, 0x1E85, 0x1E61, 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x0174, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x1E6A, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x0176, 0x00DF, 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, 0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF};
+
+	  internal ISO8859_14Decoder() : base(StandardCharsets.ISO_88591, 1f, 1f); / / fake with similar properties
+	  {
+	  }
+
+	  protected internal override CoderResult decodeLoop(ByteBuffer @in, CharBuffer @out)
+	  {
+		while (@in.hasRemaining() && @out.hasRemaining())
+		{
+		  char ch = (char)(@in.get() & 0xff);
+		  if (ch >= 0xA0)
+		  {
+			ch = TABLE[ch - 0xA0];
+		  }
+		  @out.put(ch);
+		}
+		return @in.hasRemaining() ? CoderResult.OVERFLOW : CoderResult.UNDERFLOW;
+	  }
+	}
+
+}
\ No newline at end of file


[26/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData3.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData3.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData3.cs
new file mode 100644
index 0000000..94b462c
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData3.cs
@@ -0,0 +1,53 @@
+/*
+Copyright © 2003,
+Center for Intelligent Information Retrieval,
+University of Massachusetts, Amherst.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+3. The names "Center for Intelligent Information Retrieval" and
+"University of Massachusetts" must not be used to endorse or promote products
+derived from this software without prior written permission. To obtain
+permission, contact info@ciir.cs.umass.edu.
+
+THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF MASSACHUSETTS AND OTHER CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
+*/
+/* This is a java version of Bob Krovetz' KStem.
+ *
+ * Java version by Sergio Guzman-Lara.
+ * CIIR-UMass Amherst http://ciir.cs.umass.edu
+ */
+namespace org.apache.lucene.analysis.en
+{
+
+	/// <summary>
+	/// A list of words used by Kstem
+	/// </summary>
+	internal class KStemData3
+	{
+		private KStemData3()
+		{
+		}
+	   internal static string[] data = new string[] {"distasteful","distemper","distempered","distend","distension", "distil","distill","distillation","distiller","distillery", "distinct","distinction","distinctive","distinguish","distinguishable", "distinguished","distort","distortion","distract","distracted", "distraction","distrain","distraint","distrait","distraught", "distress","distressing","distribute","distribution","distributive", "distributor","district","distrust","distrustful","disturb", "disturbance","disturbed","disunion","disunite","disunity", "disuse","disused","disyllabic","disyllable","ditch", "dither","dithers","ditto","ditty","diuretic", "diurnal","divagate","divan","dive","diver", "diverge","divergence","divers","diverse","diversify", "diversion","diversionary","diversity","divert","divertimento", "divertissement","divest","divide","dividend","dividers", "divination","divine","diviner","divingboard","divinity", "divisible","division","divisive","divisor","divorce",
  "divot","divulge","divvy","dixie","dixieland", "dizzy","djinn","dna","do","dobbin", "doc","docile","dock","docker","docket", "dockyard","doctor","doctoral","doctorate","doctrinaire", "doctrinal","doctrine","document","documentary","documentation", "dodder","doddering","doddle","dodge","dodgems", "dodger","dodgy","dodo","doe","doer", "doeskin","doff","dog","dogcart","dogcatcher", "dogfight","dogfish","dogged","doggerel","doggie", "doggo","doggone","doggy","doghouse","dogie", "dogleg","dogma","dogmatic","dogmatics","dogmatism", "dogs","dogsbody","dogtooth","dogtrot","dogwood", "doh","doily","doings","doldrums","dole", "doleful","doll","dollar","dollop","dolly", "dolmen","dolor","dolorous","dolour","dolphin", "dolt","domain","dome","domed","domestic", "domesticate","domesticity","domicile","domiciliary","dominance", "dominant","dominate","domination","domineer","dominican", "dominion","domino","dominoes","don","donate", "donation","donjon","donkey","donkeywork","donnish", "donor","doo
 dle","doodlebug","doom","doomsday", "door","doorbell","doorframe","doorkeeper","doorknob", "doorknocker","doorman","doormat","doornail","doorplate", "doorscraper","doorstep","doorstopper","doorway","dope", "dopey","dopy","doric","dormant","dormer", "dormitory","dormouse","dorsal","dory","dosage", "dose","doss","dosser","dosshouse","dossier", "dost","dot","dotage","dote","doth", "doting","dottle","dotty","double","doubles", "doublet","doublethink","doubloon","doubly","doubt", "doubtful","doubtless","douche","dough","doughnut", "doughty","doughy","dour","douse","dove", "dovecote","dovetail","dowager","dowdy","dowel", "dower","down","downbeat","downcast","downdraft", "downdraught","downer","downfall","downgrade","downhearted", "downhill","downpour","downright","downstage","downstairs", "downstream","downtown","downtrodden","downward","downwards", "downwind","downy","dowry","dowse","doxology", "doyen","doyley","doze","dozen","dozy", "dpt","drab","drabs","drachm","drachma", "draconian","
 draft","draftee","draftsman","drafty", "drag","draggled","draggy","dragnet","dragoman", "dragon","dragonfly","dragoon","drain","drainage", "drainpipe","drake","dram","drama","dramatic", "dramatics","dramatise","dramatist","dramatize","drank", "drape","draper","drapery","drastic","drat", "draught","draughtboard","draughts","draughtsman","draughty", "draw","drawback","drawbridge","drawer","drawers", "drawing","drawl","drawn","drawstring","dray", "dread","dreadful","dreadfully","dreadnaught","dreadnought", "dream","dreamboat","dreamer","dreamland","dreamless", "dreamlike","dreamy","drear","dreary","dredge", "dredger","dregs","drench","dress","dressage", "dresser","dressing","dressmaker","dressy","drew", "dribble","driblet","dribs","drier","drift", "driftage","drifter","driftnet","driftwood","drill", "drily","drink","drinkable","drinker","drip", "dripping","drive","drivel","driver","driveway", "driving","drizzle","drogue","droll","drollery", "dromedary","drone","drool","droop","drop", "
 dropkick","droplet","dropout","dropper","droppings", "drops","dropsy","dross","drought","drove", "drover","drown","drowse","drowsy","drub", "drudge","drudgery","drug","drugget","druggist", "drugstore","druid","drum","drumbeat","drumfire", "drumhead","drummer","drumstick","drunk","drunkard", "drunken","drupe","dry","dryad","dryer", "dual","dub","dubbin","dubiety","dubious", "ducal","ducat","duchess","duchy","duck", "duckboards","duckling","ducks","duckweed","ducky", "duct","ductile","dud","dude","dudgeon", "duds","due","duel","duenna","dues", "duet","duff","duffel","duffer","duffle", "dug","dugout","duke","dukedom","dukes", "dulcet","dulcimer","dull","dullard","duly", "dumb","dumbbell","dumbfound","dumbwaiter","dumfound", "dummy","dump","dumper","dumpling","dumps", "dumpy","dun","dunce","dunderhead","dung", "dungaree","dungarees","dungeon","dunghill","dunk", "duo","duodecimal","duodenum","duologue","dupe", "duplex","duplicate","duplicator","duplicity","durable", "duration","durbar","
 duress","durex","during", "durst","dusk","dusky","dust","dustbin", "dustbowl","dustcart","dustcoat","duster","dustman", "dustpan","dustsheet","dustup","dusty","dutch", "dutiable","dutiful","duty","duvet","dwarf", "dwell","dwelling","dwindle","dyarchy","dye", "dyestuff","dyeworks","dyke","dynamic","dynamics", "dynamism","dynamite","dynamo","dynasty","dysentery", "dyslexia","dyspepsia","dyspeptic","each","eager", "eagle","eaglet","ear","earache","eardrum", "eared","earful","earl","earliest","earlobe", "early","earmark","earmuff","earn","earnest", "earnings","earphone","earpiece","earplug","earring", "earshot","earth","earthbound","earthen","earthenware", "earthling","earthly","earthnut","earthquake","earthshaking", "earthwork","earthworm","earthy","earwax","earwig", "ease","easel","easily","east","eastbound", "easter","easterly","eastern","easterner","easternmost", "easy","easygoing","eat","eatable","eatables", "eater","eats","eaves","eavesdrop","ebb", "ebony","ebullience","ebullient"
 ,"eccentric","eccentricity", "ecclesiastic","ecclesiastical","ecg","echelon","echo", "eclectic","eclipse","ecliptic","eclogue","ecological", "ecologically","ecology","economic","economical","economically", "economics","economise","economist","economize","economy", "ecosystem","ecstasy","ecstatic","ect","ectoplasm", "ecumenical","ecumenicalism","eczema","edam","eddy", "edelweiss","eden","edge","edgeways","edging", "edgy","edible","edibles","edict","edification", "edifice","edify","edit","edition","editor", "editorial","editorialise","editorialize","educate","educated", "education","educational","educationist","educator","educe", "eec","eeg","eel","eerie","efface", "effect","effective","effectively","effectiveness","effectives", "effects","effectual","effectually","effectuate","effeminacy", "effeminate","effendi","effervesce","effete","efficacious", "efficacy","efficiency","efficient","effigy","efflorescence", "effluent","efflux","effort","effortless","effrontery", "effulgence","efful
 gent","effusion","effusive","eft", "egalitarian","egg","eggcup","egghead","eggnog", "eggplant","eggshell","egis","eglantine","ego", "egocentric","egoism","egoist","egotism","egotist", "egregious","egress","egret","eiderdown","eight", "eighteen","eightsome","eighty","eisteddfod","either", "ejaculate","ejaculation","eject","ejector","eke", "ekg","elaborate","elaboration","eland","elapse", "elastic","elasticity","elastoplast","elate","elated", "elation","elbow","elbowroom","elder","elderberry", "elderflower","elderly","eldest","elect","election", "electioneer","electioneering","elective","elector","electoral", "electorate","electric","electrical","electrician","electricity", "electrify","electrocardiogram","electrocardiograph","electrocute","electrode", "electroencephalogram","electroencephalograph","electrolysis","electrolyte","electron", "electronic","electronics","electroplate","eleemosynary","elegant", "elegiac","elegy","element","elemental","elementary", "elements","elephant","ele
 phantiasis","elephantine","elevate", "elevated","elevation","elevator","eleven","elevenses", "elf","elfin","elfish","elicit","elide", "eligible","eliminate","elite","elitism","elixir", "elizabethan","elk","elkhound","ellipse","ellipsis", "elliptic","elm","elocution","elocutionary","elocutionist", "elongate","elongation","elope","eloquence","eloquent", "else","elsewhere","elucidate","elucidatory","elude", "elusive","elver","elves","elvish","elysian", "elysium","emaciate","emanate","emancipate","emancipation", "emasculate","embalm","embankment","embargo","embark", "embarkation","embarrass","embarrassment","embassy","embattled", "embed","embellish","ember","embezzle","embitter", "emblazon","emblem","emblematic","embodiment","embody", "embolden","embolism","embonpoint","embosomed","emboss", "embowered","embrace","embrasure","embrocation","embroider", "embroidery","embroil","embryo","embryonic","emend", "emendation","emerald","emerge","emergence","emergency", "emergent","emeritus","emery
 ","emetic","emigrant", "emigrate","eminence","eminent","eminently","emir", "emirate","emissary","emission","emit","emmentaler", "emmenthaler","emollient","emolument","emote","emotion", "emotional","emotionalism","emotionally","emotive","empanel", "empathy","emperor","emphasis","emphasise","emphasize", "emphatic","emphatically","emphysema","empire","empirical", "empiricism","emplacement","emplane","employ","employable", "employee","employer","employment","emporium","empower", "empress","emptily","empty","empurpled","empyreal", "empyrean","emu","emulate","emulation","emulsify", "emulsion","enable","enabling","enact","enactment", "enamel","enamelware","enamored","enamoured","encamp", "encampment","encapsulate","encase","encaustic","encephalitis", "enchain","enchant","enchanter","enchanting","enchantment", "encipher","encircle","enclave","enclose","enclosure", "encode","encomium","encompass","encore","encounter", "encourage","encouragement","encroach","encroachment","encrust", "encumber
 ","encumbrance","encyclical","encyclopaedia","encyclopaedic", "encyclopedia","encyclopedic","end","endanger","endear", "endearing","endearment","endeavor","endeavour","endemic", "ending","endive","endless","endocrine","endorse", "endow","endowment","endpaper","endurance","endure", "enduring","endways","enema","enemy","energetic", "energize","energy","enervate","enfeeble","enfilade", "enfold","enforce","enfranchise","engage","engaged", "engagement","engaging","engender","engine","engineer", "engineering","english","englishman","engraft","engrave", "engraving","engross","engrossing","engulf","enhance", "enigma","enigmatic","enjoin","enjoy","enjoyable", "enjoyment","enkindle","enlarge","enlargement","enlighten", "enlightened","enlightenment","enlist","enliven","enmesh", "enmity","ennoble","ennui","enormity","enormous", "enormously","enough","enplane","enquire","enquiring", "enquiry","enrage","enrapture","enrich","enrol", "enroll","enrollment","enrolment","ensanguined","ensconce", "ense
 mble","enshrine","enshroud","ensign","enslave", "ensnare","ensue","ensure","entail","entangle", "entanglement","entente","enter","enteritis","enterprise", "enterprising","entertain","entertainer","entertaining","entertainment", "enthral","enthrall","enthrone","enthroned","enthuse", "enthusiasm","enthusiast","entice","enticement","entire", "entirety","entitle","entity","entomb","entomology", "entourage","entrails","entrain","entrance","entrant", "entrap","entreat","entreaty","entrench","entrenched", "entrenchment","entrepreneur","entresol","entropy","entrust", "entry","entwine","enumerate","enunciate","enunciation", "envelop","envenom","enviable","envious","environed", "environment","environmental","environmentalist","environs","envisage", "envoi","envoy","envy","enzyme","eon", "epaulet","epaulette","ephemeral","epic","epicenter", "epicentre","epicure","epicurean","epidemic","epidermis", "epidiascope","epiglottis","epigram","epigrammatic","epilepsy", "epileptic","epilogue","epiphany"
 ,"episcopacy","episcopal", "episcopalian","episode","episodic","epistle","epistolary", "epitaph","epithet","epitome","epitomise","epitomize", "epoch","eponymous","equability","equable","equal", "equalise","equalitarian","equality","equalize","equally", "equanimity","equate","equation","equator","equatorial", "equerry","equestrian","equidistant","equilateral","equilibrium", "equine","equinoctial","equinox","equip","equipage", "equipment","equipoise","equitable","equitation","equities", "equity","equivalence","equivalent","equivocal","equivocate", "equivocation","era","eradicate","eradicator","erase", "eraser","erasure","ere","erect","erectile", "erection","eremite","erg","ergo","ergonomics", "ermine","erode","erogenous","erosion","erotic", "erotica","eroticism","err","errand","errant", "erratic","erratum","erroneous","error","ersatz", "erse","eructation","erudite","erupt","eruption", "erysipelas","escalate","escalator","escalope","escapade", "escape","escapee","escapement","escapism"
 ,"escapology", "escarpment","eschatology","eschew","escort","escritoire", "escutcheon","eskimo","esophagus","esoteric","esp", "espalier","especial","especially","esperanto","espionage", "esplanade","espousal","espouse","espresso","espy", "essay","essence","essential","essentially","establish", "establishment","estaminet","estate","esteem","esthete", "esthetic","esthetics","estimable","estimate","estimation", "estimator","estrange","estrangement","estrogen","estuary", "etch","etching","eternal","eternity","ether", "ethereal","ethic","ethical","ethically","ethics", "ethnic","ethnically","ethnographer","ethnography","ethnologist", "ethnology","ethos","ethyl","etiolate","etiology", "etiquette","etymologist","etymology","eucalyptus","eucharist", "euclidean","euclidian","eugenic","eugenics","eulogise", "eulogist","eulogistic","eulogize","eulogy","eunuch", "euphemism","euphemistic","euphonious","euphonium","euphony", "euphoria","euphuism","eurasian","eureka","eurhythmic", "eurhythmics","eu
 rocrat","eurodollar","eurythmic","eurythmics", "euthanasia","evacuate","evacuee","evade","evaluate", "evanescent","evangelic","evangelical","evangelise","evangelist", "evangelize","evaporate","evasion","evasive","eve", "even","evening","evenings","evens","evensong", "event","eventful","eventide","eventual","eventuality", "eventually","eventuate","ever","evergreen","everlasting", "everlastingly","evermore","every","everybody","everyday", "everything","everywhere","evict","evidence","evident", "evidently","evil","evildoer","evince","eviscerate", "evocative","evoke","evolution","evolutionary","evolve", "ewe","ewer","exacerbate","exact","exacting", "exaction","exactly","exaggerate","exaggeration","exalt", "exaltation","exalted","exam","examination","examine", "example","exasperate","exasperation","excavate","excavation", "excavator","exceed","exceedingly","excel","excellence", "excellency","excellent","excelsior","except","excepted", "excepting","exception","exceptionable","exceptional"
 ,"excerpt", "excess","excesses","excessive","exchange","exchequer", "excise","excision","excitable","excite","excited", "excitement","exciting","exclaim","exclamation","exclamatory", "exclude","excluding","exclusion","exclusive","exclusively", "excogitate","excommunicate","excommunication","excoriate","excrement", "excrescence","excreta","excrete","excretion","excruciating", "exculpate","excursion","excursionist","excusable","excuse", "execrable","execrate","executant","execute","execution", "executioner","executive","executor","exegesis","exemplary", "exemplification","exemplify","exempt","exemption","exercise", "exercises","exert","exertion","exeunt","exhalation", "exhale","exhaust","exhaustion","exhaustive","exhibit", "exhibition","exhibitionism","exhibitor","exhilarate","exhilarating", "exhort","exhortation","exhume","exigency","exigent", "exiguous","exile","exist","existence","existent", "existential","existentialism","existing","exit","exodus", "exogamy","exonerate","exorbitan
 t","exorcise","exorcism", "exorcist","exorcize","exotic","expand","expanse", "expansion","expansive","expatiate","expatriate","expect", "expectancy","expectant","expectation","expectations","expectorate", "expediency","expedient","expedite","expedition","expeditionary", "expeditious","expel","expend","expendable","expenditure", "expense","expenses","expensive","experience","experienced", "experiment","experimental","experimentation","expert","expertise", "expiate","expiration","expire","explain","explanation", "explanatory","expletive","explicable","explicate","explicit", "explode","exploded","exploit","exploration","exploratory", "explore","explosion","explosive","expo","exponent", "exponential","export","exportation","exporter","expose", "exposition","expostulate","exposure","expound","express", "expression","expressionism","expressionless","expressive","expressly", "expressway","expropriate","expulsion","expunge","expurgate", "exquisite","extant","extemporaneous","extempore","ext
 emporise", "extemporize","extend","extension","extensive","extent", "extenuate","extenuation","exterior","exteriorise","exteriorize", "exterminate","external","externalise","externalize","externally", "externals","exterritorial","extinct","extinction","extinguish", "extinguisher","extirpate","extol","extort","extortion", "extortionate","extortions","extra","extract","extraction", "extracurricular","extraditable","extradite","extrajudicial","extramarital", "extramural","extraneous","extraordinarily","extraordinary","extrapolate", "extraterrestrial","extraterritorial","extravagance","extravagant","extravaganza", "extravert","extreme","extremely","extremism","extremities", "extremity","extricate","extrinsic","extrovert","extrude", "exuberance","exuberant","exude","exult","exultant", "exultation","eye","eyeball","eyebrow","eyecup", "eyeful","eyeglass","eyeglasses","eyelash","eyelet", "eyelid","eyeliner","eyepiece","eyes","eyeshot", "eyesight","eyesore","eyestrain","eyetooth","eyewash", 
 "eyewitness","eyot","eyrie","eyry","fabian", "fable","fabled","fabric","fabricate","fabrication", "fabulous","fabulously","face","facecloth","faceless", "facet","facetious","facial","facile","facilitate", "facilities","facility","facing","facings","facsimile", "fact","faction","factious","factitious","factor", "factorial","factorise","factorize","factory","factotum", "factual","faculty","fad","fade","faeces", "faerie","faery","fag","fagged","faggot", "fagot","fahrenheit","faience","fail","failing", "failure","fain","faint","fair","fairground", "fairly","fairway","fairy","fairyland","faith", "faithful","faithfully","faithless","fake","fakir", "falcon","falconer","falconry","fall","fallacious", "fallacy","fallen","fallible","fallout","fallow", "falls","false","falsehood","falsetto","falsies", "falsify","falsity","falter","fame","famed", "familial","familiar","familiarise","familiarity","familiarize", "familiarly","family","famine","famish","famished", "famous","famously","fan","fanati
 c","fanaticism", "fancier","fancies","fanciful","fancy","fancywork", "fandango","fanfare","fang","fanlight","fanny", "fantasia","fantastic","fantasy","far","faraway", "farce","fare","farewell","farfetched","farinaceous", "farm","farmer","farmhand","farmhouse","farming", "farmyard","farrago","farrier","farrow","farsighted", "fart","farther","farthest","farthing","fascia", "fascinate","fascinating","fascination","fascism","fascist", "fashion","fashionable","fast","fasten","fastener", "fastening","fastidious","fastness","fat","fatal", "fatalism","fatalist","fatality","fatally","fate", "fated","fateful","fates","fathead","father", "fatherhood","fatherly","fathom","fathomless","fatigue", "fatigues","fatless","fatted","fatten","fatty", "fatuity","fatuous","faucet","fault","faultfinding", "faultless","faulty","faun","fauna","favor", "favorable","favored","favorite","favoritism","favour", "favourable","favoured","favourite","favouritism","favours", "fawn","fay","faze","fbi","fealty", "fear"
 ,"fearful","fearless","fearsome","feasible", "feast","feat","feather","featherbed","featherbrained", "featherweight","feathery","feature","featureless","features", "febrile","february","feces","feckless","fecund", "fed","federal","federalism","federalist","federate", "federation","fee","feeble","feebleminded","feed", "feedback","feedbag","feeder","feel","feeler", "feeling","feelings","feet","feign","feint", "feldspar","felicitate","felicitous","felicity","feline", "fell","fellah","fellatio","fellow","fellowship", "felon","felony","felspar","felt","felucca", "fem","female","feminine","femininity","feminism", "feminist","femur","fen","fence","fencer", "fencing","fend","fender","fennel","feoff", "feral","ferment","fermentation","fern","ferocious", "ferocity","ferret","ferroconcrete","ferrous","ferrule", "ferry","ferryboat","ferryman","fertile","fertilise", "fertility","fertilize","fertilizer","ferule","fervent", "fervid","fervor","fervour","festal","fester", "festival","festive","festi
 vity","festoon","fetal", "fetch","fetching","fete","fetid","fetish", "fetishism","fetishist","fetlock","fetter","fettle", "fetus","feud","feudal","feudalism","feudatory", "fever","fevered","feverish","feverishly","few", "fey","fez","fiasco","fiat","fib", "fiber","fiberboard","fiberglass","fibre","fibreboard", "fibreglass","fibrositis","fibrous","fibula","fichu", "fickle","fiction","fictional","fictionalisation","fictionalization", "fictitious","fiddle","fiddler","fiddlesticks","fiddling", "fidelity","fidget","fidgets","fidgety","fie", "fief","field","fielder","fieldwork","fiend", "fiendish","fiendishly","fierce","fiery","fiesta", "fife","fifteen","fifth","fifty","fig", "fight","fighter","figment","figurative","figure", "figured","figurehead","figures","figurine","filament", "filbert","filch","file","filet","filial", "filibuster","filigree","filings","fill","filler", "fillet","filling","fillip","filly","film", "filmable","filmstrip","filmy","filter","filth", "filthy","fin","finable",
 "final","finale", "finalise","finalist","finality","finalize","finally", "finance","finances","financial","financially","financier", "finch","find","finder","finding","fine", "fineable","finely","finery","finesse","finger", "fingerboard","fingering","fingernail","fingerplate","fingerpost", "fingerprint","fingerstall","fingertip","finicky","finis", "finish","finished","finite","fink","fiord", "fir","fire","firearm","fireball","firebomb", "firebox","firebrand","firebreak","firebrick","firebug", "fireclay","firecracker","firedamp","firedog","firefly", "fireguard","firelight","firelighter","fireman","fireplace", "firepower","fireproof","fireside","firestorm","firetrap", "firewalking","firewatcher","firewater","firewood","firework", "fireworks","firkin","firm","firmament","first", "firstborn","firstfruits","firsthand","firstly","firth", "firtree","fiscal","fish","fishcake","fisherman", "fishery","fishing","fishmonger","fishplate","fishwife", "fishy","fissile","fission","fissionable","fis
 sure", "fist","fisticuffs","fistula","fit","fitful", "fitment","fitness","fitted","fitter","fitting", "five","fiver","fives","fix","fixation", "fixative","fixed","fixedly","fixity","fixture", "fizz","fizzle","fizzy","fjord","flabbergast", "flabby","flaccid","flag","flagellant","flagellate", "flageolet","flagon","flagpole","flagrancy","flagrant", "flagship","flagstaff","flagstone","flail","flair", "flak","flake","flaky","flambeau","flamboyant", "flame","flamenco","flaming","flamingo","flammable", "flan","flange","flank","flannel","flannelette", "flannels","flap","flapjack","flapper","flare", "flared","flares","flash","flashback","flashbulb", "flashcube","flasher","flashgun","flashlight","flashy", "flask","flat","flatcar","flatfish","flatfoot", "flatiron","flatlet","flatly","flatten","flatter", "flattery","flattop","flatulence","flaunt","flautist", "flavor","flavoring","flavour","flavouring","flaw", "flawless","flax","flaxen","flay","flea", "fleabag","fleabite","fleapit","fleck","fled
 ged", "fledgling","flee","fleece","fleecy","fleet", "fleeting","flesh","fleshings","fleshly","fleshpot", "fleshy","flew","flex","flexible","flibbertigibbet", "flick","flicker","flicks","flier","flies", "flight","flightless","flighty","flimsy","flinch", "fling","flint","flintlock","flinty","flip", "flippancy","flippant","flipper","flipping","flirt", "flirtation","flirtatious","flit","flitch","flivver", "float","floatation","floating","flock","floe", "flog","flogging","flood","floodgate","floodlight", "floor","floorboard","flooring","floorwalker","floosy", "floozy","flop","floppy","flora","floral", "floriculture","florid","florin","florist","floss", "flotation","flotilla","flounce","flounder","flour", "flourish","flourmill","floury","flout","flow", "flower","flowerbed","flowered","flowering","flowerless", "flowerpot","flowery","flowing","flown","flu", "fluctuate","flue","fluency","fluent","fluff", "fluffy","fluid","fluidity","fluke","flukey", "fluky","flume","flummery","flummox","flun
 g", "flunk","flunkey","flunky","fluorescent","fluoridate", "fluoride","fluorine","flurry","flush","flushed", "fluster","flute","fluting","flutist","flutter", "fluvial","flux","fly","flyaway","flyblown", "flyby","flycatcher","flyer","flying","flyleaf", "flyover","flypaper","flypast","flysheet","flyswatter", "flytrap","flyweight","flywheel","flywhisk","foal", "foam","fob","focal","focus","fodder", "foe","foeman","foetal","foetus","fog", "fogbank","fogbound","fogey","foggy","foghorn", "fogy","foible","foil","foist","fold", "foldaway","folder","foliage","folio","folk", "folklore","folklorist","folks","folksy","folktale", "folkway","follicle","follow","follower","following", "folly","foment","fomentation","fond","fondant", "fondle","fondly","fondu","fondue","font", "food","foodstuff","fool","foolery","foolhardy", "foolish","foolproof","foolscap","foot","footage", "football","footbath","footboard","footbridge","footer", "footfall","foothill","foothold","footing","footle", "footlights","fo
 otling","footloose","footman","footnote", "footpad","footpath","footplate","footprint","footrace", "footsie","footslog","footsore","footstep","footstool", "footsure","footwear","footwork","fop","foppish", "for","forage","foray","forbear","forbearance", "forbearing","forbid","forbidden","forbidding","force", "forced","forceful","forcemeat","forceps","forces", "forcible","forcibly","ford","fore","forearm", "forebode","foreboding","forecast","forecastle","foreclose", "foreclosure","forecourt","foredoomed","forefather","forefinger", "forefoot","forefront","forego","foregoing","foreground", "forehand","forehead","foreign","foreigner","foreknowledge", "foreland","foreleg","forelock","foreman","foremost", "forename","forenoon","forensic","foreordain","forepart", "foreplay","forerunner","foresail","foresee","foreseeable", "foreshadow","foreshore","foreshorten","foresight","foreskin", "forest","forestall","forester","forestry","foreswear", "foretaste","foretell","forethought","forever","fore
 warn", "forewent","forewoman","foreword","forfeit","forfeiture", "forgather","forgave","forge","forger","forgery", "forget","forgetful","forging","forgivable","forgive", "forgiveable","forgiveness","forgiving","forgo","fork", "forked","forkful","forklift","forlorn","form", "formal","formaldehyde","formalin","formalise","formalism", "formality","formalize","format","formation","formative", "formbook","former","formerly","formica","formidable", "formless","formula","formulaic","formulate","formulation", "fornicate","fornication","forrader","forsake","forsooth", "forswear","forsythia","fort","forte","forth", "forthcoming","forthright","forthwith","fortieth","fortification", "fortify","fortissimo","fortitude","fortnight","fortnightly", "fortress","fortuitous","fortunate","fortunately","fortune", "forty","forum","forward","forwarding","forwardly", "forwardness","forwent","foss","fosse","fossil", "fossilise","fossilize","foster","fought","foul", "found","foundation","foundations","founder
 ","foundling", "foundry","fount","fountain","fountainhead","four", "foureyes","fourpenny","fours","foursquare","fourteen", "fourth","fowl","fox","foxglove","foxhole", "foxhound","foxhunt","foxtrot","foxy","foyer", "fracas","fraction","fractional","fractionally","fractious", "fracture","fragile","fragment","fragmentary","fragmentation", "fragrance","fragrant","frail","frailty","frame", "frames","framework","franc","franchise","franciscan", "frank","frankfurter","frankincense","franklin","frankly", "frantic","fraternal","fraternise","fraternity","fraternize", "fratricide","frau","fraud","fraudulence","fraudulent", "fraught","fraulein","fray","frazzle","freak", "freakish","freckle","free","freebee","freebie", "freeboard","freebooter","freeborn","freedman","freedom", "freehand","freehanded","freehold","freeholder","freelance", "freeload","freely","freeman","freemason","freemasonry", "freepost","freesia","freestanding","freestone","freestyle", "freethinker","freeway","freewheel","freewhe
 eling","freewill", "freeze","freezer","freezing","freight","freighter", "freightliner","frenchman","frenetic","frenzied","frenzy", "frequency","frequent","fresco","fresh","freshen", "fresher","freshet","freshly","freshwater","fret", "fretful","fretsaw","fretwork","freudian","friable", "friar","friary","fricassee","fricative","friction", "friday","fridge","friend","friendless","friendly", "friends","friendship","frier","frieze","frig", "frigate","frigging","fright","frighten","frightened", "frightful","frightfully","frigid","frigidity","frill", "frilled","frills","frilly","fringe","frippery", "frisbee","frisian","frisk","frisky","frisson", "fritter","frivolity","frivolous","frizz","frizzle", "frizzy","fro","frock","frog","frogged", "frogman","frogmarch","frogspawn","frolic","frolicsome", "from","frond","front","frontage","frontal", "frontbench","frontier","frontiersman","frontispiece","frost", "frostbite","frostbitten","frostbound","frosting","frosty", "froth","frothy","frown","frows
 t","frowsty", "frowsy","frowzy","froze","frozen","frs", "fructification","fructify","frugal","frugality","fruit", "fruitcake","fruiterer","fruitful","fruition","fruitless", "fruits","fruity","frump","frustrate","frustration", "fry","fryer","fuchsia","fuck","fucker", "fucking","fuddle","fudge","fuehrer","fuel", "fug","fugitive","fugue","fuhrer","fulcrum", "fulfil","fulfill","fulfillment","fulfilment","full", "fullback","fuller","fully","fulmar","fulminate", "fulmination","fulness","fulsome","fumble","fume", "fumes","fumigate","fun","function","functional", "functionalism","functionalist","functionary","fund","fundamental", "fundamentalism","fundamentally","funds","funeral","funerary", "funereal","funfair","fungicide","fungoid","fungous", "fungus","funicular","funk","funky","funnel", "funnies","funnily","funny","fur","furbelow", "furbish","furious","furiously","furl","furlong", "furlough","furnace","furnish","furnishings","furniture", "furore","furrier","furrow","furry","further", "fu
 rtherance","furthermore","furthermost","furthest","furtive", "fury","furze","fuse","fused","fuselage", "fusilier","fusillade","fusion","fuss","fusspot", "fussy","fustian","fusty","futile","futility", "future","futureless","futures","futurism","futuristic", "futurity","fuzz","fuzzy","gab","gabardine", "gabble","gaberdine","gable","gabled","gad", "gadabout","gadfly","gadget","gadgetry","gaelic", "gaff","gaffe","gaffer","gag","gaga", "gaggle","gaiety","gaily","gain","gainful", "gainfully","gainsay","gait","gaiter","gal", "gala","galactic","galantine","galaxy","gale", "gall","gallant","gallantry","galleon","gallery", "galley","gallic","gallicism","gallivant","gallon", "gallop","galloping","gallows","gallstone","galore", "galosh","galumph","galvanic","galvanise","galvanism", "galvanize","gambit","gamble","gamboge","gambol", "game","gamecock","gamekeeper","games","gamesmanship", "gamey","gamma","gammon","gammy","gamp", "gamut","gamy","gander","gang","ganger", "gangling","ganglion","gangpl
 ank","gangrene","gangster", "gangway","gannet","gantry","gaol","gaolbird", "gaoler","gap","gape","gapes","garage", "garb","garbage","garble","garden","gardenia", "gardening","gargantuan","gargle","gargoyle","garish", "garland","garlic","garment","garner","garnet", "garnish","garret","garrison","garrote","garrotte", "garrulity","garrulous","garter","gas","gasbag", "gaseous","gash","gasholder","gasify","gasket", "gaslight","gasman","gasolene","gasoline","gasp", "gassy","gastric","gastritis","gastroenteritis","gastronomy", "gasworks","gat","gate","gatecrash","gatehouse", "gatekeeper","gatepost","gateway","gather","gathering", "gauche","gaucherie","gaucho","gaudy","gauge", "gaunt","gauntlet","gauze","gave","gavel", "gavotte","gawk","gawky","gawp","gay", "gayness","gaze","gazebo","gazelle","gazette", "gazetteer","gazump","gce","gear","gearbox", "gecko","gee","geese","geezer","geisha", "gel","gelatine","gelatinous","geld","gelding", "gelignite","gem","gemini","gen","gendarme", "gender","g
 ene","genealogist","genealogy","genera", "general","generalisation","generalise","generalissimo","generality", "generalization","generalize","generally","generate","generation", "generative","generator","generic","generous","genesis", "genetic","geneticist","genetics","genial","geniality", "genie","genital","genitals","genitive","genius", "genocide","genre","gent","genteel","gentian", "gentile","gentility","gentle","gentlefolk","gentleman", "gentlemanly","gentlewoman","gently","gentry","gents", "genuflect","genuine","genus","geocentric","geographer", "geography","geologist","geology","geometric","geometry", "geophysics","geopolitics","georgette","geranium","geriatric", "geriatrician","geriatrics","germ","germane","germanic", "germicide","germinal","germinate","gerontology","gerrymander", "gerund","gestalt","gestapo","gestation","gesticulate", "gesture","get","getaway","getup","geum", "gewgaw","geyser","gharry","ghastly","ghat", "ghaut","ghee","gherkin","ghetto","ghi", "ghost","ghost
 ly","ghoul","ghoulish","ghq", "ghyll","giant","giantess","gibber","gibberish", "gibbet","gibbon","gibbous","gibe","giblets", "giddy","gift","gifted","gig","gigantic", "giggle","gigolo","gild","gilded","gilding", "gill","gillie","gilly","gilt","gimcrack", "gimlet","gimmick","gimmicky","gin","ginger", "gingerbread","gingerly","gingham","gingivitis","gingko", "ginkgo","ginseng","gipsy","giraffe","gird", "girder","girdle","girl","girlfriend","girlhood", "girlie","girlish","girly","giro","girt", "girth","gist","give","giveaway","given", "gizzard","glacial","glacier","glad","gladden", "glade","gladiator","gladiolus","gladly","glamor", "glamorise","glamorize","glamorous","glamour","glamourous", "glance","glancing","gland","glandular","glare", "glaring","glass","glassblower","glasscutter","glasses", "glasshouse","glassware","glassworks","glassy","glaucoma", "glaucous","glaze","glazier","glazing","glc", "gleam","glean","gleaner","gleanings","glebe", "glee","gleeful","glen","glengarry","glib"
 , "glide","glider","gliding","glimmer","glimmerings", "glimpse","glint","glissade","glissando","glisten", "glister","glitter","glittering","gloaming","gloat", "global","globe","globefish","globetrotter","globular", "globule","glockenspiel","gloom","gloomy","gloria", "glorification","glorify","glorious","glory","gloss", "glossary","glossy","glottal","glottis","glove", "glow","glower","glowing","glucose","glue", "gluey","glum","glut","gluten","glutinous", "glutton","gluttonous","gluttony","glycerin","glycerine", "gnarled","gnash","gnat","gnaw","gnawing", "gneiss","gnocchi","gnome","gnp","gnu", "goad","goal","goalkeeper","goalmouth","goalpost", "goat","goatee","goatherd","goatskin","gob", "gobbet","gobble","gobbledegook","gobbledygook","gobbler", "goblet","goblin","god","godchild","goddam", "goddamn","goddie","godforsaken","godhead","godless", "godlike","godly","godown","godparent","gods", "godsend","godspeed","goer","goggle","goggles", "goings","goiter","goitre","gold","goldbeater", "
 golden","goldfield","goldfinch","goldfish","goldmine", "goldsmith","golf","goliath","golliwog","golly", "gollywog","gonad","gondola","gondolier","gone", "goner","gong","gonna","gonorrhea","gonorrhoea", "goo","good","goodbye","goodish","goodly", "goodness","goodnight","goods","goodwill","goody", "gooey","goof","goofy","googly","goon", "goose","gooseberry","gooseflesh","goosestep","gopher", "gore","gorge","gorgeous","gorgon","gorgonzola", "gorilla","gormandise","gormandize","gormless","gorse", "gory","gosh","gosling","gospel","gossamer", "gossip","gossipy","got","gothic","gotta", "gotten","gouache","gouda","gouge","goulash", "gourd","gourmand","gourmet","gout","gouty", "govern","governance","governess","governing","government", "governor","gown","gpo","grab","grace", "graceful","graceless","graces","gracious","gradation", "grade","gradient","gradual","graduate","graduation", "graffiti","graft","grafter","grail","grain", "gram","grammar","grammarian","grammatical","gramme", "gramophone
 ","grampus","gran","granary","grand", "grandad","grandchild","granddad","granddaughter","grandee", "grandeur","grandfather","grandiloquent","grandiose","grandma", "grandmother","grandpa","grandparent","grandson","grandstand", "grange","granite","grannie","granny","grant"};
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData4.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData4.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData4.cs
new file mode 100644
index 0000000..6ceefa8
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData4.cs
@@ -0,0 +1,53 @@
+/*
+Copyright © 2003,
+Center for Intelligent Information Retrieval,
+University of Massachusetts, Amherst.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+3. The names "Center for Intelligent Information Retrieval" and
+"University of Massachusetts" must not be used to endorse or promote products
+derived from this software without prior written permission. To obtain
+permission, contact info@ciir.cs.umass.edu.
+
+THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF MASSACHUSETTS AND OTHER CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
+*/
+/* This is a java version of Bob Krovetz' KStem.
+ *
+ * Java version by Sergio Guzman-Lara.
+ * CIIR-UMass Amherst http://ciir.cs.umass.edu
+ */
+namespace org.apache.lucene.analysis.en
+{
+
+	/// <summary>
+	/// A list of words used by Kstem
+	/// </summary>
+	internal class KStemData4
+	{
+		private KStemData4()
+		{
+		}
+	   internal static string[] data = new string[] {"granular","granulate","granule","grape","grapefruit", "grapeshot","grapevine","graph","graphic","graphical", "graphically","graphite","graphology","grapnel","grapple", "grasp","grasping","grass","grasshopper","grassland", "grassy","grate","grateful","grater","gratification", "gratify","gratifying","grating","gratis","gratitude", "gratuitous","gratuity","grave","gravel","gravelly", "gravestone","graveyard","gravitate","gravitation","gravity", "gravure","gravy","gray","graybeard","grayish", "graze","grease","greasepaint","greaseproof","greaser", "greasy","great","greatcoat","greater","greatly", "grebe","grecian","greed","greedy","green", "greenback","greenery","greenfly","greengage","greengrocer", "greenhorn","greenhouse","greenish","greenroom","greens", "greenwood","greet","greeting","gregarious","gremlin", "grenade","grenadier","grenadine","grew","grey", "greybeard","greyhound","greyish","grid","griddle", "gridiron","grief","grievan
 ce","grieve","grievous", "griffin","grill","grim","grimace","grime", "grimy","grin","grind","grinder","grindstone", "gringo","grip","gripe","gripes","gripping", "grisly","grist","gristle","grit","grits", "grizzle","grizzled","groan","groat","groats", "grocer","groceries","grocery","grog","groggy", "groin","groom","groove","groover","groovy", "grope","gropingly","gross","grotesque","grotto", "grotty","grouch","ground","grounding","groundless", "groundnut","grounds","groundsel","groundsheet","groundsman", "groundwork","group","groupie","grouping","grouse", "grove","grovel","grow","grower","growl", "grown","growth","groyne","grub","grubby", "grudge","grudging","gruel","grueling","gruelling", "gruesome","gruff","grumble","grumbling","grumpy", "grundyism","grunt","gryphon","guano","guarantee", "guarantor","guaranty","guard","guarded","guardhouse", "guardian","guardianship","guardrail","guardroom","guardsman", "guava","gubernatorial","gudgeon","guerilla","guerrilla", "guess","guesswork","
 guest","guesthouse","guestroom", "guffaw","guidance","guide","guidelines","guild", "guilder","guildhall","guile","guileless","guillemot", "guillotine","guilt","guilty","guinea","guipure", "guise","guitar","gulch","gulden","gulf", "gull","gullet","gulley","gullible","gully", "gulp","gum","gumbo","gumboil","gumboot", "gumdrop","gummy","gumption","gun","gunboat", "gundog","gunfire","gunge","gunman","gunmetal", "gunnel","gunner","gunnery","gunnysack","gunpoint", "gunpowder","gunrunner","gunshot","gunshy","gunsmith", "gunwale","guppy","gurgle","guru","gush", "gusher","gushing","gushy","gusset","gust", "gustatory","gusto","gusty","gut","gutless", "guts","gutsy","gutter","guttersnipe","guttural", "guv","guvnor","guy","guzzle","gym", "gymkhana","gymnasium","gymnast","gymnastic","gymnastics", "gymslip","gynaecology","gynecology","gyp","gypsum", "gypsy","gyrate","gyration","gyroscope","gyves", "haberdasher","haberdashery","habiliment","habit","habitable", "habitat","habitation","habitual","ha
 bituate","hacienda", "hack","hackles","hackney","hackneyed","hacksaw", "hackwork","had","haddock","hadji","haft", "hag","haggard","haggis","haggle","hagiography", "haiku","hail","hailstone","hailstorm","hair", "hairbrush","haircut","hairdo","hairdresser","hairgrip", "hairless","hairline","hairnet","hairpiece","hairpin", "hairspring","hairy","hajji","hake","halberd", "halcyon","hale","half","halfback","halfpence", "halfpenny","halfpennyworth","halftone","halfway","halibut", "halitosis","hall","halleluja","halliard","hallmark", "hallo","hallow","hallstand","hallucinate","hallucination", "hallucinatory","hallucinogenic","hallway","halma","halo", "halt","halter","halterneck","halting","halve", "halves","halyard","ham","hamadryad","hamburger", "hamlet","hammer","hammock","hamper","hamster", "hamstring","hand","handbag","handball","handbarrow", "handbill","handbook","handbrake","handcart","handclap", "handcuff","handcuffs","handful","handgun","handhold", "handicap","handicraft","handiwork
 ","handkerchief","handle", "handlebars","handler","handloom","handmade","handmaiden", "handout","handpick","handrail","handshake","handsome", "handstand","handwork","handwriting","handwritten","handy", "handyman","hang","hangar","hangdog","hanger", "hanging","hangings","hangman","hangnail","hangout", "hangover","hangup","hank","hanker","hankering", "hankie","hanky","hansard","hansom","hap", "haphazard","hapless","haply","happen","happening", "happily","happiness","happy","harangue","harass", "harassment","harbinger","harbor","harbour","hard", "hardback","hardboard","hardbound","harden","hardheaded", "hardihood","hardiness","hardly","hardness","hardship", "hardtop","hardware","hardwearing","hardwood","hardy", "hare","harebell","harebrained","harelip","harem", "haricot","hark","harlequin","harlequinade","harlot", "harm","harmless","harmonic","harmonica","harmonise", "harmonium","harmonize","harmony","harness","harp", "harpoon","harpsichord","harpy","harquebus","harridan", "harrier","h
 arrow","harrowing","harry","harsh", "hart","hartal","hartebeest","harvest","harvester", "has","hash","hashish","hasp","hassle", "hassock","hast","haste","hasten","hasty", "hat","hatband","hatch","hatchback","hatchery", "hatchet","hatching","hatchway","hate","hateful", "hath","hatless","hatpin","hatred","hatter", "hauberk","haughty","haul","haulage","haulier", "haulm","haunch","haunt","haunting","hautbois", "hautboy","hauteur","havana","have","haven", "haver","haversack","haves","havoc","haw", "hawk","hawker","hawser","hawthorn","hay", "haycock","hayfork","haymaker","haystack","haywire", "hazard","hazardous","haze","hazel","hazy", "head","headache","headband","headboard","headcheese", "headdress","header","headfirst","headgear","headhunter", "heading","headland","headless","headlight","headline", "headlong","headman","headmaster","headphone","headpiece", "headquarters","headrest","headroom","headset","headship", "headshrinker","headstall","headstone","headstrong","headway", "headwind
 ","headword","heady","heal","health", "healthful","healthy","heap","hear","hearer", "hearing","hearken","hearsay","hearse","heart", "heartache","heartbeat","heartbreak","heartbreaking","heartbroken", "heartburn","hearten","heartening","heartfelt","hearth", "hearthrug","heartily","heartless","heartrending","heartsease", "heartsick","heartstrings","heartthrob","heartwarming","heartwood", "hearty","heat","heated","heater","heath", "heathen","heather","heating","heatstroke","heave", "heaven","heavenly","heavenwards","heavy","heavyhearted", "heavyweight","hebdomadal","hebraic","hebrew","hecatomb", "heck","heckle","hectare","hectic","hector", "hedge","hedgehog","hedgehop","hedgerow","hedonism", "heed","heel","heelball","hefty","hegemony", "hegira","heifer","height","heighten","heinous", "heir","heiress","heirloom","hejira","held", "helicopter","heliograph","heliotrope","heliport","helium", "hell","hellcat","hellene","hellenic","hellenistic", "hellish","hellishly","hello","helm","helmet", 
 "helmeted","helmsman","helot","help","helpful", "helping","helpless","helpmate","helve","hem", "hemisphere","hemline","hemlock","hemoglobin","hemophilia", "hemophiliac","hemorrhage","hemorrhoid","hemp","hempen", "hemstitch","hen","henbane","hence","henceforth", "henchman","henna","hennaed","henpecked","hepatitis", "heptagon","her","herald","heraldic","heraldry", "herb","herbaceous","herbage","herbal","herbalist", "herbivorous","herculean","herd","herdsman","here", "hereabouts","hereafter","hereby","hereditament","hereditary", "heredity","herein","hereinafter","hereof","heresy", "heretic","hereto","heretofore","hereunder","hereupon", "herewith","heritable","heritage","hermaphrodite","hermetic", "hermit","hermitage","hernia","hero","heroic", "heroics","heroin","heroism","heron","heronry", "herpes","herr","herring","herringbone","hers", "herself","hertz","hesitancy","hesitant","hesitate", "hesitation","hesperus","hessian","heterodox","heterodoxy", "heterogeneous","heterosexual","heuris
 tic","heuristics","hew", "hewer","hex","hexagon","hexagram","hexameter", "hey","heyday","hiatus","hibernate","hibiscus", "hiccough","hiccup","hick","hickory","hide", "hideaway","hidebound","hideous","hiding","hie", "hierarchy","hieroglyph","hieroglyphics","high","highball", "highborn","highboy","highbrow","higher","highfalutin", "highland","highlander","highlands","highlight","highly", "highness","highpitched","highroad","highway","highwayman", "hijack","hike","hilarious","hilarity","hill", "hillbilly","hillock","hillside","hilly","hilt", "him","himself","hind","hinder","hindmost", "hindquarters","hindrance","hindsight","hindu","hinduism", "hinge","hint","hinterland","hip","hipbath", "hippie","hippodrome","hippopotamus","hippy","hipster", "hire","hireling","hirsute","his","hiss", "hist","histamine","histology","historian","historic", "historical","history","histrionic","histrionics","hit", "hitch","hitchhike","hither","hitherto","hive", "hives","hms","hoard","hoarding","hoarfrost", 
 "hoarse","hoary","hoax","hob","hobble", "hobbledehoy","hobby","hobbyhorse","hobgoblin","hobnail", "hobnob","hobo","hock","hockey","hod", "hodgepodge","hoe","hog","hoggish","hogmanay", "hogshead","hogwash","hoist","hold","holdall", "holder","holding","holdover","holdup","hole", "holiday","holidaymaker","holiness","holler","hollow", "holly","hollyhock","hollywood","holocaust","holograph", "holstein","holster","holy","homage","homburg", "home","homecoming","homegrown","homeland","homelike", "homely","homemade","homeopath","homeopathy","homeric", "homesick","homespun","homestead","hometown","homeward", "homewards","homework","homey","homicidal","homicide", "homiletic","homiletics","homily","homing","hominy", "homoeopath","homoeopathy","homogeneous","homogenise","homogenize", "homograph","homonym","homophone","homosexual","homy", "hone","honest","honestly","honesty","honey", "honeybee","honeycomb","honeycombed","honeydew","honeyed", "honeymoon","honeysuckle","honk","honkie","honky", "hon
 or","honorable","honorarium","honorary","honorific", "honors","honour","honourable","honours","hooch", "hood","hooded","hoodlum","hoodoo","hoodwink", "hooey","hoof","hook","hookah","hooked", "hooker","hookey","hookup","hookworm","hooky", "hooligan","hoop","hooray","hoot","hooter", "hoover","hooves","hop","hope","hopeful", "hopefully","hopeless","hopper","hopscotch","horde", "horizon","horizontal","hormone","horn","hornbeam", "hornbill","horned","hornet","hornpipe","horny", "horology","horoscope","horrendous","horrible","horrid", "horrific","horrify","horror","horrors","horse", "horseback","horsebox","horseflesh","horsefly","horsehair", "horselaugh","horseman","horsemanship","horsemeat","horseplay", "horsepower","horseracing","horseradish","horseshit","horseshoe", "horsewhip","horsewoman","horsy","hortative","horticulture", "hosanna","hose","hosier","hosiery","hospice", "hospitable","hospital","hospitalise","hospitality","hospitalize", "host","hostage","hostel","hosteler","hosteller"
 , "hostelry","hostess","hostile","hostilities","hostility", "hostler","hot","hotbed","hotchpotch","hotel", "hotelier","hotfoot","hothead","hothouse","hotly", "hotplate","hotpot","hottentot","hound","hour", "hourglass","houri","hourly","house","houseboat", "housebound","houseboy","housebreaker","housebroken","housecoat", "housecraft","housedog","housefather","housefly","houseful", "household","householder","housekeeper","housekeeping","housemaid", "houseman","housemaster","housemother","houseroom","housetops", "housewarming","housewife","housewifery","housework","housing", "hove","hovel","hover","hovercraft","how", "howdah","howdy","however","howitzer","howl", "howler","howling","howsoever","hoyden","hrh", "hub","hubbub","hubby","hubcap","hubris", "huckaback","huckleberry","huckster","huddle","hue", "huff","huffish","huffy","hug","huge", "hugely","huguenot","huh","hula","hulk", "hulking","hull","hullabaloo","hullo","hum", "human","humane","humanise","humanism","humanitarian", "humani
 tarianism","humanities","humanity","humanize","humankind", "humanly","humble","humbug","humdinger","humdrum", "humerus","humid","humidify","humidity","humidor", "humiliate","humility","hummingbird","hummock","humor", "humorist","humorous","humour","hump","humpback", "humph","humus","hun","hunch","hunchback", "hundred","hundredweight","hung","hunger","hungry", "hunk","hunkers","hunt","hunter","hunting", "huntress","huntsman","hurdle","hurl","hurling", "hurray","hurricane","hurried","hurry","hurt", "hurtful","hurtle","husband","husbandman","husbandry", "hush","husk","husky","hussar","hussy", "hustings","hustle","hustler","hut","hutch", "hutment","huzza","huzzah","hyacinth","hyaena", "hybrid","hybridise","hybridize","hydra","hydrangea", "hydrant","hydrate","hydraulic","hydraulics","hydrocarbon", "hydroelectric","hydrofoil","hydrogen","hydrophobia","hydroplane", "hydroponics","hydrotherapy","hyena","hygiene","hygienic", "hymen","hymeneal","hymn","hymnal","hyperbola", "hyperbole","hyperb
 olic","hypercritical","hypermarket","hypersensitive", "hyphen","hyphenate","hypnosis","hypnotise","hypnotism", "hypnotist","hypnotize","hypo","hypochondria","hypochondriac", "hypocrisy","hypocrite","hypodermic","hypotenuse","hypothermia", "hypothesis","hypothetical","hysterectomy","hysteria","hysterical", "hysterics","iamb","iberian","ibex","ibidem", "ibis","icbm","ice","iceberg","icebound", "icebox","icebreaker","icefall","icehouse","iceman", "icicle","icing","icon","iconoclast","icy", "idea","ideal","idealise","idealism","idealist", "idealize","ideally","idem","identical","identification", "identify","identikit","identity","ideogram","ideology", "ides","idiocy","idiom","idiomatic","idiosyncrasy", "idiot","idle","idol","idolater","idolatrous", "idolatry","idolise","idolize","idyl","idyll", "igloo","igneous","ignite","ignition","ignoble", "ignominious","ignominy","ignoramus","ignorance","ignorant", "ignore","iguana","ikon","ilex","ilk", "ill","illegal","illegality","illegible","ille
 gitimate", "illiberal","illicit","illimitable","illiterate","illness", "illogical","illuminate","illuminating","illumination","illuminations", "illusion","illusionist","illusory","illustrate","illustration", "illustrative","illustrator","illustrious","image","imagery", "imaginable","imaginary","imagination","imaginative","imagine", "imam","imbalance","imbecile","imbecility","imbed", "imbibe","imbroglio","imbue","imitate","imitation", "imitative","imitator","immaculate","immanence","immanent", "immaterial","immature","immeasurable","immediacy","immediate", "immediately","immemorial","immense","immensely","immensity", "immerse","immersion","immigrant","immigrate","imminence", "imminent","immobile","immobilise","immobilize","immoderate", "immodest","immolate","immoral","immorality","immortal", "immortalise","immortality","immortalize","immovable","immune", "immunise","immunize","immure","immutable","imp", "impact","impacted","impair","impala","impale", "impalpable","impanel","impart","
 impartial","impassable", "impasse","impassioned","impassive","impatience","impatient", "impeach","impeccable","impecunious","impedance","impede", "impediment","impedimenta","impel","impending","impenetrable", "impenitent","imperative","imperceptible","imperfect","imperial", "imperialism","imperialist","imperialistic","imperil","imperious", "imperishable","impermanent","impermeable","impersonal","impersonate", "impertinent","imperturbable","impervious","impetigo","impetuous", "impetus","impiety","impinge","impious","impish", "implacable","implant","implement","implicate","implication", "implicit","implore","implosion","imply","impolite", "impolitic","imponderable","import","importance","important", "importation","importunate","importune","impose","imposing", "imposition","impossible","impostor","imposture","impotent", "impound","impoverish","impracticable","impractical","imprecation", "impregnable","impregnate","impresario","impress","impression", "impressionable","impressionism","im
 pressionist","impressionistic","impressive", "imprimatur","imprint","imprison","improbability","improbable", "impromptu","improper","impropriety","improve","improvement", "improvident","improvise","imprudent","impudent","impugn", "impulse","impulsion","impulsive","impunity","impure", "impurity","imputation","impute","inability","inaccessible", "inaccurate","inaction","inactive","inadequacy","inadequate", "inadmissible","inadvertent","inalienable","inamorata","inane", "inanimate","inanition","inanity","inapplicable","inappropriate", "inapt","inaptitude","inarticulate","inartistic","inattention", "inattentive","inaudible","inaugural","inaugurate","inauspicious", "inboard","inborn","inbound","inbred","inbreeding", "inc","incalculable","incandescent","incantation","incapable", "incapacitate","incapacity","incarcerate","incarnate","incarnation", "incautious","incendiarism","incendiary","incense","incentive", "inception","incertitude","incessant","incest","incestuous", "inch","inchoate","
 incidence","incident","incidental", "incidentally","incidentals","incinerate","incinerator","incipience", "incipient","incise","incision","incisive","incisor", "incite","incivility","inclement","inclination","incline", "inclined","inclose","inclosure","include","included", "including","inclusion","inclusive","incognito","incoherent", "incombustible","income","incoming","incommensurable","incommensurate", "incommode","incommodious","incommunicable","incommunicado","incommunicative", "incomparable","incompatible","incompetence","incompetent","incomplete", "incomprehensible","incomprehensibly","incomprehension","inconceivable","inconclusive", "incongruity","incongruous","inconsequent","inconsequential","inconsiderable", "inconsiderate","inconsistent","inconsolable","inconspicuous","inconstant", "incontestable","incontinent","incontrovertible","inconvenience","inconvenient", "incorporate","incorporated","incorporeal","incorrect","incorrigible", "incorruptible","increase","increasingly",
 "incredible","incredulity", "incredulous","increment","incriminate","incrust","incrustation", "incubate","incubation","incubator","incubus","inculcate", "inculpate","incumbency","incumbent","incur","incurable", "incurious","incursion","incurved","indebted","indecent", "indecipherable","indecision","indecisive","indecorous","indecorum", "indeed","indefatigable","indefensible","indefinable","indefinite", "indefinitely","indelible","indelicate","indemnification","indemnify", "indemnity","indent","indentation","indenture","independence", "independent","indescribable","indestructible","indeterminable","indeterminate", "index","indian","indicate","indication","indicative", "indicator","indices","indict","indictable","indifferent", "indigenous","indigent","indigestible","indigestion","indignant", "indignation","indignity","indigo","indirect","indiscernible", "indiscipline","indiscreet","indiscretion","indiscriminate","indispensable", "indisposed","indisposition","indisputable","indissolubl
 e","indistinct", "indistinguishable","individual","individualise","individualism","individuality", "individualize","individually","indivisible","indocile","indoctrinate", "indolent","indomitable","indoor","indoors","indorse", "indrawn","indubitable","induce","inducement","induct", "induction","inductive","indue","indulge","indulgence", "indulgent","industrial","industrialise","industrialism","industrialist", "industrialize","industrious","industry","inebriate","inedible", "ineducable","ineffable","ineffaceable","ineffective","ineffectual", "inefficient","inelastic","inelegant","ineligible","ineluctable", "inept","ineptitude","inequality","inequitable","inequity", "ineradicable","inert","inertia","inescapable","inessential", "inestimable","inevitable","inexact","inexactitude","inexcusable", "inexhaustible","inexorable","inexpediency","inexpedient","inexpensive", "inexperience","inexperienced","inexpert","inexpiable","inexplicable", "inexplicably","inexpressible","inextinguishable","i
 nextricable","infallible", "infallibly","infamous","infamy","infancy","infant", "infanticide","infantile","infantry","infantryman","infatuated", "infatuation","infect","infection","infectious","infelicitous", "infer","inference","inferential","inferior","infernal", "inferno","infertile","infest","infidel","infidelity", "infield","infighting","infiltrate","infiltration","infinite", "infinitesimal","infinitive","infinitude","infinity","infirm", "infirmary","infirmity","inflame","inflamed","inflammable", "inflammation","inflammatory","inflatable","inflate","inflated", "inflation","inflationary","inflect","inflection","inflexible", "inflexion","inflict","infliction","inflow","influence", "influential","influenza","influx","info","inform", "informal","informant","information","informative","informed", "informer","infra","infraction","infrared","infrastructure", "infrequent","infringe","infuriate","infuse","infusion", "ingathering","ingenious","ingenuity","ingenuous","ingest", "inglenook"
 ,"inglorious","ingoing","ingot","ingraft", "ingrained","ingratiate","ingratiating","ingratitude","ingredient", "ingress","ingrown","inhabit","inhabitant","inhale", "inhaler","inharmonious","inhere","inherent","inherently", "inherit","inheritance","inhibit","inhibited","inhibition", "inhospitable","inhuman","inhumane","inhumanity","inimical", "inimitable","iniquitous","iniquity","initial","initially", "initiate","initiation","initiative","inject","injection", "injudicious","injunction","injure","injurious","injury", "injustice","ink","inkbottle","inkling","inkpad", "inkstand","inkwell","inky","inlaid","inland", "inlay","inlet","inmate","inmost","inn", "innards","innate","inner","inning","innings", "innkeeper","innocent","innocuous","innovate","innovation", "innuendo","innumerable","inoculate","inoffensive","inoperable", "inoperative","inopportune","inordinate","inorganic","input", "inquest","inquietude","inquire","inquiring","inquiry", "inquisition","inquisitive","inquisitor","inquis
 itorial","inroad", "inrush","insalubrious","insane","insanitary","insanity", "insatiable","insatiate","inscribe","inscription","inscrutable", "insect","insecticide","insectivore","insectivorous","insecure", "inseminate","insemination","insensate","insensibility","insensible", "insensitive","inseparable","insert","insertion","inset", "inshore","inside","insider","insidious","insight", "insignia","insignificant","insincere","insinuate","insinuation", "insipid","insist","insistence","insistency","insistent", "insole","insolent","insoluble","insolvable","insolvent", "insomnia","insomniac","insouciance","inspect","inspection", "inspector","inspectorate","inspectorship","inspiration","inspire", "inspired","instability","install","installation","installment", "instalment","instance","instant","instantaneous","instantly", "instead","instep","instigate","instigation","instil", "instill","instinct","instinctive","institute","institution", "instruct","instruction","instructive","instructor","i
 nstructress", "instrument","instrumental","instrumentalist","instrumentality","instrumentation", "insubordinate","insubstantial","insufferable","insufficiency","insufficient", "insular","insularity","insulate","insulation","insulator", "insulin","insult","insuperable","insupportable","insurance", "insure","insured","insurer","insurgent","insurmountable", "insurrection","intact","intaglio","intake","intangible", "integer","integral","integrate","integrated","integrity", "integument","intellect","intellectual","intelligence","intelligent", "intelligentsia","intelligible","intemperate","intend","intended", "intense","intensifier","intensify","intensity","intensive", "intent","intention","intentional","intentions","inter", "interact","interaction","interbreed","intercalary","intercalate", "intercede","intercept","interceptor","intercession","interchange", "interchangeable","intercity","intercollegiate","intercom","intercommunicate", "intercommunion","intercontinental","intercourse","int
 erdenominational","interdependent", "interdict","interest","interested","interesting","interests", "interface","interfere","interference","interim","interior", "interject","interjection","interlace","interlard","interleave", "interline","interlinear","interlink","interlock","interlocutor", "interloper","interlude","intermarriage","intermarry","intermediary", "intermediate","interment","intermezzo","interminable","intermingle", "intermission","intermittent","intern","internal","internalise", "internalize","international","internationale","internationalise","internationalism", "internationalize","interne","internecine","internee","internment", "interpellate","interpenetrate","interpersonal","interplanetary","interplay", "interpol","interpolate","interpolation","interpose","interposition", "interpret","interpretation","interpretative","interpreter","interracial", "interregnum","interrelate","interrelation","interrogate","interrogative", "interrogatory","interrupt","intersect","intersec
 tion","intersperse", "interstate","interstellar","interstice","intertribal","intertwine", "interurban","interval","intervene","intervention","interview", "interweave","intestate","intestinal","intestine","intimacy", "intimate","intimidate","intimidation","into","intolerable", "intolerant","intonation","intone","intoxicant","intoxicate", "intractable","intramural","intransigent","intransitive","intravenous", "intrench","intrepid","intricacy","intricate","intrigue", "intrinsic","intro","introduce","introduction","introductory", "introit","introspection","introspective","introvert","introverted", "intrude","intruder","intrusion","intrusive","intrust", "intuit","intuition","intuitive","intumescence","inundate", "inundation","inure","invade","invalid","invalidate", "invalidism","invaluable","invariable","invasion","invective", "inveigh","inveigle","invent","invention","inventive", "inventor","inventory","inverse","inversion","invert", "invertebrate","invest","investigate","investiture","
 investment", "inveterate","invidious","invigilate","invigorate","invincible", "inviolable","inviolate","invisible","invitation","invite", "inviting","invocation","invoice","invoke","involuntary", "involve","involved","invulnerable","inward","inwardness", "inwards","inwrought","iodin","iodine","iodise", "iodize","ion","ionic","ionise","ionize", "ionosphere","iota","iou","ipa","ira", "irascible","irate","ire","iridescent","iridium", "irishman","irk","irksome","iron","ironclad", "ironic","ironically","ironing","ironmonger","ironmongery", "ironmould","irons","ironstone","ironware","ironwork", "ironworks","irony","irradiate","irrational","irreconcilable", "irrecoverable","irredeemable","irreducible","irrefutable","irregular", "irregularity","irrelevance","irrelevant","irreligious","irremediable", "irremovable","irreparable","irreplaceable","irrepressible","irreproachable", "irresistible","irresolute","irresponsible","irretrievable","irreverent", "irreversible","irrevocable","irrigate","i
 rritable","irritant", "irritate","irritation","irruption","isinglass","islam", "island","islander","isle","islet","ism", "isobar","isolate","isolated","isolation","isolationism", "isotherm","isotope","israelite","issue","isthmus", "ita","italic","italicise","italicize","italics", "itch","itchy","item","itemise","itemize", "iterate","itinerant","itinerary","itn","its", "itself","itv","iud","ivied","ivory", "ivy","jab","jabber","jack","jackal", "jackanapes","jackaroo","jackass","jackboot","jackdaw", "jackeroo","jacket","jackpot","jackrabbit","jacobean", "jacobite","jade","jaded","jaffa","jag", "jagged","jaguar","jail","jailbird","jailbreak", "jailer","jailor","jalopy","jam","jamb", "jamboree","jammy","jangle","janissary","janitor", "january","japan","jape","japonica","jar", "jargon","jasmine","jasper","jaundice","jaundiced", "jaunt","jaunty","javelin","jaw","jawbone", "jawbreaker","jaws","jay","jaywalk","jazz", "jazzy","jealous","jealousy","jeans","jeep", "jeer","jehovah","jejune","je
 ll","jellied", "jello","jelly","jellyfish","jemmy","jenny", "jeopardise","jeopardize","jeopardy","jerboa","jeremiad", "jerk","jerkin","jerky","jeroboam","jerry", "jersey","jest","jester","jesting","jesuit", "jesuitical","jet","jetsam","jettison","jetty", "jew","jewel","jeweled","jeweler","jewelled", "jeweller","jewellery","jewelry","jewess","jewish", "jezebel","jib","jibe","jiffy","jig", "jigger","jiggered","jiggle","jigsaw","jihad", "jilt","jiminy","jimjams","jimmy","jingle", "jingo","jingoism","jinks","jinn","jinrikisha", "jinx","jitney","jitterbug","jitters","jiujitsu", "jive","jnr","job","jobber","jobbery", "jobbing","jobless","jockey","jockstrap","jocose", "jocular","jocund","jodhpurs","jog","joggle", "john","johnny","join","joiner","joinery", "joint","joist","joke","joker","jollification", "jollity","jolly","jolt","jolty","jonah", "jonquil","josh","jostle","jot","jotter", "jotting","joule","journal","journalese","journalism", "journalist","journey","journeyman","joust","jove",
  "jovial","jowl","joy","joyful","joyless", "joyous","joyride","joystick","jubilant","jubilation", "jubilee","judaic","judaism","judder","judge", "judgement","judgment","judicature","judicial","judiciary", "judicious","judo","jug","juggernaut","juggle", "juice","juicy","jujitsu","juju","jujube", "jukebox","julep","july","jumble","jumbo", "jump","jumper","jumps","jumpy","junction", "juncture","june","jungle","junior","juniper", "junk","junket","junketing","junkie","junky", "junoesque","junta","jupiter","juridical","jurisdiction", "jurisprudence","jurist","juror","jury","juryman", "just","justice","justifiable","justification","justified", "justify","jut","jute","juvenile","juxtapose", "juxtaposition","kaffir","kafir","kaftan","kail", "kaiser","kale","kaleidoscope","kaleidoscopic","kalends", "kampong","kangaroo","kaolin","kapok","kappa", "kaput","karat","karate","karma","katydid", "kayak","kazoo","kebab","kebob","kedgeree", "keel","keelhaul","keen","keep","keeper", "keeping","keeps","k
 eepsake","keg","kelp", "kelvin","ken","kennel","kennels","kepi", "kept","kerb","kerchief","kerfuffle","kernel", "kerosene","kerosine","kersey","kestrel","ketch", "ketchup","kettle","kettledrum","key","keyboard", "keyhole","keyless","keynote","keypunch","keystone", "khaki","khalif","khalifate","khan","kibbutz", "kibosh","kick","kickback","kicker","kickoff", "kicks","kid","kiddie","kiddy","kidnap", "kidney","kike","kill","killer","killing", "killjoy","kiln","kilo","kilogram","kilogramme", "kilohertz","kiloliter","kilolitre","kilometer","kilometre", "kilowatt","kilt","kimono","kin","kind", "kindergarten","kindle","kindling","kindly","kindness", "kindred","kine","kinetic","kinetics","kinfolk", "king","kingcup","kingdom","kingfisher","kingly", "kingmaker","kingpin","kings","kingship","kink", "kinky","kinsfolk","kinship","kinsman","kiosk", "kip","kipper","kirk","kirsch","kirtle", "kismet","kiss","kisser","kit","kitchen", "kitchenette","kite","kitsch","kitten","kittenish", "kittiwake","kit
 ty","kiwi","klaxon","kleenex", "kleptomania","kleptomaniac","knack","knacker","knackered", "knapsack","knave","knavery","knead","knee", "kneecap","kneel","knell","knew","knickerbockers", "knickers","knife","knight","knighthood","knightly", "knit","knitter","knitting","knitwear","knives", "knob","knobbly","knobkerrie","knock","knockabout", "knockdown","knocker","knockers","knockout","knoll", "knot","knothole","knotty","knout","know", "knowing","knowingly","knowledge","knowledgeable","known", "knuckle","koala","kohl","kohlrabi","kookaburra", "kopeck","kopek","kopje","koppie","koran", "kosher","kowtow","kraal","kremlin","kris", "krona","krone","kudos","kukri","kumis", "kumquat","kuomintang","kurus","kvass","kwashiorkor", "kwela","laager","lab","label","labial", "labor","laboratory","laborer","laborious","labour", "labourer","labourite","labrador","laburnum","labyrinth", "lace","lacerate","laceration","lachrymal","lachrymose", "lack","lackadaisical","lackey","lacking","lackluster", "lac
 klustre","laconic","lacquer","lacrosse","lactation", "lactic","lactose","lacuna","lacy","lad", "ladder","laddie","laddy","laden","ladies", "lading","ladle","lady","ladybird","ladylike", "ladyship","lag","lager","laggard","lagging", "lagoon","laid","lain","lair","laird", "laity","lake","lam","lama","lamaism", "lamasery","lamb","lambaste","lambent","lambkin", "lamblike","lambskin","lame","lament","lamentable", "lamentation","laminate","lamming","lamp","lampoon", "lamppost","lamprey","lampshade","lance","lancer", "lancers","lancet","land","landau","landed", "landfall","landing","landlady","landlocked","landlord", "landlubber","landmark","landmine","lands","landscape", "landslide","landslip","landward","landwards","lane", "language","languid","languish","languor","lank", "lanky","lanolin","lantern","lanternslide","lanyard", "lap","lapdog","lapel","lapidary","lapse", "lapsed","lapwing","larboard","larceny","larch", "lard","larder","large","largely","largess", "largesse","largo","lariat",
 "lark","larkspur", "larrup","larva","laryngeal","laryngitis","laryngoscope", "larynx","lasagna","lascivious","laser","lash", "lashing","lashings","lass","lasso","last", "lasting","lastly","lat","latch","latchkey", "late","latecomer","lately","latent","lateral", "latest","latex","lath","lathe","lather", "latin","latinise","latinize","latitude","latitudes", "latitudinal","latitudinarian","latrine","latter","latterly", "lattice","laud","laudable","laudanum","laudatory", "laugh","laughable","laughingstock","laughter","launch", "launder","launderette","laundress","laundry","laureate", "laurel","laurels","lava","lavatory","lave", "lavender","lavish","law","lawful","lawless", "lawn","lawsuit","lawyer","lax","laxative", "laxity","lay","layabout","layer","layette", "layman","layout","laze","lazy","lbw", "lcm","lea","leach","lead","leaden", "leader","leadership","leading","leads","leaf", "leafage","leafed","leaflet","leafy","league", "leak","leakage","leaky","lean","leaning", "leap","leapfrog
 ","learn","learned","learner", "learning","lease","leasehold","leash","least", "leastways","leather","leatherette","leathery","leave", "leaved","leaven","leavening","leaves","leavings", "lech","lecher","lecherous","lechery","lectern", "lecture","lecturer","lectureship","led","ledge", "ledger","lee","leech","leek","leer", "leery","lees","leeward","leeway","left", "leftist","leftovers","leftward","leftwards","leg", "legacy","legal","legalise","legality","legalize", "legate","legatee","legation","legato","legend", "legendary","leger","legerdemain","legged","leggings", "leggy","legible","legion","legionary","legislate", "legislation","legislative","legislator","legislature","legit", "legitimate","legitimatise","legitimatize","legroom","legume", "leguminous","lei","leisure","leisured","leisurely", "leitmotif","leitmotive","lemming","lemon","lemonade", "lemur","lend","length","lengthen","lengthways", "lengthy","lenience","lenient","lenity","lens", "lent","lentil","lento","leo","leonine", 
 "leopard","leotard","leper","leprechaun","leprosy", "lesbian","lesion","less","lessee","lessen", "lesser","lesson","lessor","lest","let", "letdown","lethal","lethargy","letraset","letter", "letterbox","lettered","letterhead","lettering","letterpress", "letters","letting","lettuce","letup","leucocyte", "leucotomy","leukaemia","leukemia","leukocyte","levee", "level","leveler","leveller","lever","leverage", "leveret","leviathan","levitate","levity","levodopa", "levy","lewd","lexical","lexicographer","lexicography", "lexicon","lexis","liability","liable","liaise", "liaison","liana","liar","lib","libation", "libel","libellous","libelous","liberal","liberalise", "liberalism","liberality","liberalize","liberally","liberate", "liberated","liberation","libertarian","liberties","libertine", "liberty","libidinous","libido","libra","librarian", "library","librettist","libretto","lice","licence", "licenced","license","licensed","licensee","licentiate", "licentious","lichen","licit","lick","licki
 ng", "licorice","lid","lido","lie","lieder", "lief","liege","lien","lieu","lieutenant", "life","lifeblood","lifeboat","lifeguard","lifeless", "lifelike","lifeline","lifelong","lifer","lifetime", "lift","liftboy","ligament","ligature","light", "lighten","lighter","lighterage","lighthouse","lighting", "lightly","lightness","lightning","lights","lightship", "lightweight","ligneous","lignite","likable","like", "likeable","likelihood","likely","liken","likeness", "likes","likewise","liking","lilac","lilliputian", "lilo","lilt","lily","limb","limber", "limbo","lime","limeade","limejuice","limekiln", "limelight","limerick","limestone","limey","limit", "limitation","limited","limiting","limitless","limn", "limousine","limp","limpet","limpid","limy", "linchpin","linctus","linden","line","lineage", "lineal","lineament","linear","lineman","linen", "lineout","liner","linertrain","lines","lineshooter", "linesman","lineup","ling","linger","lingerie", "lingering","lingo","lingual","linguist","ling
 uistic", "linguistics","liniment","lining","link","linkage", "linkman","links","linkup","linnet","linocut", "linoleum","linotype","linseed","lint","lintel", "lion","lionize","lip","lipid","lipstick", "liquefaction","liquefy","liquescent","liqueur","liquid", "liquidate","liquidation","liquidator","liquidity","liquidize", "liquidizer","liquor","liquorice","lira","lisle", "lisp","lissom","lissome","list","listen", "listenable","listener","listless","lists","lit", "litany","litchi","liter","literacy","literal", "literally","literary","literate","literati","literature", "lithe","lithium","lithograph","lithographic","lithography", "litigant","litigate","litigation","litigious","litmus", "litotes","litre","litter","litterateur","litterbin", "litterlout","little","littoral","liturgical","liturgy", "livable","live","liveable","livelihood","livelong", "lively","liven","liver","liveried","liverish", "livery","liveryman","lives","livestock","livid", "living","lizard","llama","load","loaded", "l
 oadstar","loadstone","loaf","loafsugar","loam", "loan","loanword","loath","loathe","loathing", "loathsome","loaves","lob","lobby","lobed", "lobotomy","lobster","lobsterpot","local","locale", "localise","localism","locality","localize","locally", "locate","located","location","loch","loci"};
+	}
+
+}
\ No newline at end of file


[22/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Es/SpanishLightStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Es/SpanishLightStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Es/SpanishLightStemFilterFactory.cs
new file mode 100644
index 0000000..cda8b1a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Es/SpanishLightStemFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.es
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="SpanishLightStemFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_eslgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.SpanishLightStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class SpanishLightStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new SpanishLightStemFilterFactory </summary>
+	  public SpanishLightStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new SpanishLightStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Es/SpanishLightStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Es/SpanishLightStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Es/SpanishLightStemmer.cs
new file mode 100644
index 0000000..b450eee
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Es/SpanishLightStemmer.cs
@@ -0,0 +1,137 @@
+namespace org.apache.lucene.analysis.es
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/* 
+	 * This algorithm is updated based on code located at:
+	 * http://members.unine.ch/jacques.savoy/clef/
+	 * 
+	 * Full copyright for that code follows:
+	 */
+
+	/*
+	 * Copyright (c) 2005, Jacques Savoy
+	 * All rights reserved.
+	 *
+	 * Redistribution and use in source and binary forms, with or without 
+	 * modification, are permitted provided that the following conditions are met:
+	 *
+	 * Redistributions of source code must retain the above copyright notice, this 
+	 * list of conditions and the following disclaimer. Redistributions in binary 
+	 * form must reproduce the above copyright notice, this list of conditions and
+	 * the following disclaimer in the documentation and/or other materials 
+	 * provided with the distribution. Neither the name of the author nor the names 
+	 * of its contributors may be used to endorse or promote products derived from 
+	 * this software without specific prior written permission.
+	 * 
+	 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+	 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+	 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+	 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+	 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+	 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+	 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+	 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+	 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+	 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+	 * POSSIBILITY OF SUCH DAMAGE.
+	 */
+
+	/// <summary>
+	/// Light Stemmer for Spanish
+	/// <para>
+	/// This stemmer implements the algorithm described in:
+	/// <i>Report on CLEF-2001 Experiments</i>
+	/// Jacques Savoy
+	/// </para>
+	/// </summary>
+	public class SpanishLightStemmer
+	{
+
+	  public virtual int stem(char[] s, int len)
+	  {
+		if (len < 5)
+		{
+		  return len;
+		}
+
+		for (int i = 0; i < len; i++)
+		{
+		  switch (s[i])
+		  {
+			case 'à':
+			case 'á':
+			case 'â':
+			case 'ä':
+				s[i] = 'a';
+				break;
+			case 'ò':
+			case 'ó':
+			case 'ô':
+			case 'ö':
+				s[i] = 'o';
+				break;
+			case 'è':
+			case 'é':
+			case 'ê':
+			case 'ë':
+				s[i] = 'e';
+				break;
+			case 'ù':
+			case 'ú':
+			case 'û':
+			case 'ü':
+				s[i] = 'u';
+				break;
+			case 'ì':
+			case 'í':
+			case 'î':
+			case 'ï':
+				s[i] = 'i';
+				break;
+		  }
+		}
+
+		switch (s[len - 1])
+		{
+		  case 'o':
+		  case 'a':
+		  case 'e':
+			  return len - 1;
+		  case 's':
+			if (s[len - 2] == 'e' && s[len - 3] == 's' && s[len - 4] == 'e')
+			{
+			  return len - 2;
+			}
+			if (s[len - 2] == 'e' && s[len - 3] == 'c')
+			{
+			  s[len - 3] = 'z';
+			  return len - 2;
+			}
+			if (s[len - 2] == 'o' || s[len - 2] == 'a' || s[len - 2] == 'e')
+			{
+			  return len - 2;
+			}
+		}
+
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Eu/BasqueAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Eu/BasqueAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Eu/BasqueAnalyzer.cs
new file mode 100644
index 0000000..0f042b8
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Eu/BasqueAnalyzer.cs
@@ -0,0 +1,137 @@
+using System;
+
+namespace org.apache.lucene.analysis.eu
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using Version = org.apache.lucene.util.Version;
+	using BasqueStemmer = org.tartarus.snowball.ext.BasqueStemmer;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Basque.
+	/// </summary>
+	public sealed class BasqueAnalyzer : StopwordAnalyzerBase
+	{
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// File containing default Basque stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "stopwords.txt";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop words set. </summary>
+	  /// <returns> default stop words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = loadStopwordSet(false, typeof(BasqueAnalyzer), DEFAULT_STOPWORD_FILE, "#");
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public BasqueAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  public BasqueAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+	  /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+	  /// stemming.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
+	  public BasqueAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates a
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> A
+	  ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided and <seealso cref="SnowballFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		result = new LowerCaseFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		result = new SnowballFilter(result, new BasqueStemmer());
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Fa/PersianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Fa/PersianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Fa/PersianAnalyzer.cs
new file mode 100644
index 0000000..1b94810
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Fa/PersianAnalyzer.cs
@@ -0,0 +1,155 @@
+using System;
+
+namespace org.apache.lucene.analysis.fa
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using ArabicLetterTokenizer = org.apache.lucene.analysis.ar.ArabicLetterTokenizer;
+	using ArabicNormalizationFilter = org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Persian.
+	/// <para>
+	/// This Analyzer uses <seealso cref="PersianCharFilter"/> which implies tokenizing around
+	/// zero-width non-joiner in addition to whitespace. Some persian-specific variant forms (such as farsi
+	/// yeh and keheh) are standardized. "Stemming" is accomplished via stopwords.
+	/// </para>
+	/// </summary>
+	public sealed class PersianAnalyzer : StopwordAnalyzerBase
+	{
+
+	  /// <summary>
+	  /// File containing default Persian stopwords.
+	  /// 
+	  /// Default stopword list is from
+	  /// http://members.unine.ch/jacques.savoy/clef/index.html The stopword list is
+	  /// BSD-Licensed.
+	  /// 
+	  /// </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "stopwords.txt";
+
+	  /// <summary>
+	  /// The comment character in the stopwords file. All lines prefixed with this
+	  /// will be ignored
+	  /// </summary>
+	  public const string STOPWORDS_COMMENT = "#";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop-words set. </summary>
+	  /// <returns> an unmodifiable instance of the default stop-words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = loadStopwordSet(false, typeof(PersianAnalyzer), DEFAULT_STOPWORD_FILE, STOPWORDS_COMMENT);
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words:
+	  /// <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public PersianAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words 
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          lucene compatibility version </param>
+	  /// <param name="stopwords">
+	  ///          a stopword set </param>
+	  public PersianAnalyzer(Version matchVersion, CharArraySet stopwords) : base(matchVersion, stopwords)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from a <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="LowerCaseFilter"/>, <seealso cref="ArabicNormalizationFilter"/>,
+	  ///         <seealso cref="PersianNormalizationFilter"/> and Persian Stop words </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source;
+		Tokenizer source;
+		if (matchVersion.onOrAfter(Version.LUCENE_31))
+		{
+		  source = new StandardTokenizer(matchVersion, reader);
+		}
+		else
+		{
+		  source = new ArabicLetterTokenizer(matchVersion, reader);
+		}
+		TokenStream result = new LowerCaseFilter(matchVersion, source);
+		result = new ArabicNormalizationFilter(result);
+		/* additional persian-specific normalization */
+		result = new PersianNormalizationFilter(result);
+		/*
+		 * the order here is important: the stopword list is normalized with the
+		 * above!
+		 */
+		return new TokenStreamComponents(source, new StopFilter(matchVersion, result, stopwords));
+	  }
+
+	  /// <summary>
+	  /// Wraps the Reader with <seealso cref="PersianCharFilter"/>
+	  /// </summary>
+	  protected internal override Reader initReader(string fieldName, Reader reader)
+	  {
+		return matchVersion.onOrAfter(Version.LUCENE_31) ? new PersianCharFilter(reader) : reader;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Fa/PersianCharFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Fa/PersianCharFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Fa/PersianCharFilter.cs
new file mode 100644
index 0000000..59b6a23
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Fa/PersianCharFilter.cs
@@ -0,0 +1,79 @@
+namespace org.apache.lucene.analysis.fa
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	/// <summary>
+	/// CharFilter that replaces instances of Zero-width non-joiner with an
+	/// ordinary space.
+	/// </summary>
+	public class PersianCharFilter : CharFilter
+	{
+
+	  public PersianCharFilter(Reader @in) : base(@in)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public int read(char[] cbuf, int off, int len) throws java.io.IOException
+	  public override int read(char[] cbuf, int off, int len)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int charsRead = input.read(cbuf, off, len);
+		int charsRead = input.read(cbuf, off, len);
+		if (charsRead > 0)
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int end = off + charsRead;
+		  int end = off + charsRead;
+		  while (off < end)
+		  {
+			if (cbuf[off] == '\u200C')
+			{
+			  cbuf[off] = ' ';
+			}
+			off++;
+		  }
+		}
+		return charsRead;
+	  }
+
+	  // optimized impl: some other charfilters consume with read()
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public int read() throws java.io.IOException
+	  public override int read()
+	  {
+		int ch = input.read();
+		if (ch == '\u200C')
+		{
+		  return ' ';
+		}
+		else
+		{
+		  return ch;
+		}
+	  }
+
+	  protected internal override int correct(int currentOff)
+	  {
+		return currentOff; // we don't change the length of the string
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Fa/PersianCharFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Fa/PersianCharFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Fa/PersianCharFilterFactory.cs
new file mode 100644
index 0000000..8ab5338
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Fa/PersianCharFilterFactory.cs
@@ -0,0 +1,65 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.fa
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using AbstractAnalysisFactory = org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+	using CharFilterFactory = org.apache.lucene.analysis.util.CharFilterFactory;
+	using MultiTermAwareComponent = org.apache.lucene.analysis.util.MultiTermAwareComponent;
+
+	/// <summary>
+	/// Factory for <seealso cref="PersianCharFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;charFilter class="solr.PersianCharFilterFactory"/&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class PersianCharFilterFactory : CharFilterFactory, MultiTermAwareComponent
+	{
+
+	  /// <summary>
+	  /// Creates a new PersianCharFilterFactory </summary>
+	  public PersianCharFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override CharFilter create(Reader input)
+	  {
+		return new PersianCharFilter(input);
+	  }
+
+	  public virtual AbstractAnalysisFactory MultiTermComponent
+	  {
+		  get
+		  {
+			return this;
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Fa/PersianNormalizationFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Fa/PersianNormalizationFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Fa/PersianNormalizationFilter.cs
new file mode 100644
index 0000000..bc2905f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Fa/PersianNormalizationFilter.cs
@@ -0,0 +1,54 @@
+namespace org.apache.lucene.analysis.fa
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="PersianNormalizer"/> to normalize the
+	/// orthography.
+	/// 
+	/// </summary>
+
+	public sealed class PersianNormalizationFilter : TokenFilter
+	{
+	  private readonly PersianNormalizer normalizer = new PersianNormalizer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+
+	  public PersianNormalizationFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = normalizer.normalize(termAtt.buffer(), termAtt.length());
+		  int newlen = normalizer.normalize(termAtt.buffer(), termAtt.length());
+		  termAtt.Length = newlen;
+		  return true;
+		}
+		return false;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Fa/PersianNormalizationFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Fa/PersianNormalizationFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Fa/PersianNormalizationFilterFactory.cs
new file mode 100644
index 0000000..935973d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Fa/PersianNormalizationFilterFactory.cs
@@ -0,0 +1,66 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.fa
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using AbstractAnalysisFactory = org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+	using MultiTermAwareComponent = org.apache.lucene.analysis.util.MultiTermAwareComponent;
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="PersianNormalizationFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_fanormal" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;charFilter class="solr.PersianCharFilterFactory"/&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.PersianNormalizationFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class PersianNormalizationFilterFactory : TokenFilterFactory, MultiTermAwareComponent
+	{
+
+	  /// <summary>
+	  /// Creates a new PersianNormalizationFilterFactory </summary>
+	  public PersianNormalizationFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override PersianNormalizationFilter create(TokenStream input)
+	  {
+		return new PersianNormalizationFilter(input);
+	  }
+
+	  public virtual AbstractAnalysisFactory MultiTermComponent
+	  {
+		  get
+		  {
+			return this;
+		  }
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Fa/PersianNormalizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Fa/PersianNormalizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Fa/PersianNormalizer.cs
new file mode 100644
index 0000000..a863834
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Fa/PersianNormalizer.cs
@@ -0,0 +1,97 @@
+namespace org.apache.lucene.analysis.fa
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+	/// <summary>
+	/// Normalizer for Persian.
+	/// <para>
+	/// Normalization is done in-place for efficiency, operating on a termbuffer.
+	/// </para>
+	/// <para>
+	/// Normalization is defined as:
+	/// <ul>
+	/// <li>Normalization of various heh + hamza forms and heh goal to heh.
+	/// <li>Normalization of farsi yeh and yeh barree to arabic yeh
+	/// <li>Normalization of persian keheh to arabic kaf
+	/// </ul>
+	/// 
+	/// </para>
+	/// </summary>
+	public class PersianNormalizer
+	{
+	  public const char YEH = '\u064A';
+
+	  public const char FARSI_YEH = '\u06CC';
+
+	  public const char YEH_BARREE = '\u06D2';
+
+	  public const char KEHEH = '\u06A9';
+
+	  public const char KAF = '\u0643';
+
+	  public const char HAMZA_ABOVE = '\u0654';
+
+	  public const char HEH_YEH = '\u06C0';
+
+	  public const char HEH_GOAL = '\u06C1';
+
+	  public const char HEH = '\u0647';
+
+	  /// <summary>
+	  /// Normalize an input buffer of Persian text
+	  /// </summary>
+	  /// <param name="s"> input buffer </param>
+	  /// <param name="len"> length of input buffer </param>
+	  /// <returns> length of input buffer after normalization </returns>
+	  public virtual int normalize(char[] s, int len)
+	  {
+
+		for (int i = 0; i < len; i++)
+		{
+		  switch (s[i])
+		  {
+		  case FARSI_YEH:
+		  case YEH_BARREE:
+			s[i] = YEH;
+			break;
+		  case KEHEH:
+			s[i] = KAF;
+			break;
+		  case HEH_YEH:
+		  case HEH_GOAL:
+			s[i] = HEH;
+			break;
+		  case HAMZA_ABOVE: // necessary for HEH + HAMZA
+			len = StemmerUtil.delete(s, i, len);
+			i--;
+			break;
+		  default:
+			break;
+		  }
+		}
+
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Fi/FinnishAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Fi/FinnishAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Fi/FinnishAnalyzer.cs
new file mode 100644
index 0000000..fe2bd0c
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Fi/FinnishAnalyzer.cs
@@ -0,0 +1,139 @@
+using System;
+
+namespace org.apache.lucene.analysis.fi
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using Version = org.apache.lucene.util.Version;
+	using FinnishStemmer = org.tartarus.snowball.ext.FinnishStemmer;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Finnish.
+	/// </summary>
+	public sealed class FinnishAnalyzer : StopwordAnalyzerBase
+	{
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// File containing default Italian stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "finnish_stop.txt";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop words set. </summary>
+	  /// <returns> default stop words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(typeof(SnowballFilter), DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public FinnishAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  public FinnishAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+	  /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+	  /// stemming.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
+	  public FinnishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates a
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> A
+	  ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided and <seealso cref="SnowballFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		result = new LowerCaseFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		result = new SnowballFilter(result, new FinnishStemmer());
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Fi/FinnishLightStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Fi/FinnishLightStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Fi/FinnishLightStemFilter.cs
new file mode 100644
index 0000000..ba9fa96
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Fi/FinnishLightStemFilter.cs
@@ -0,0 +1,66 @@
+namespace org.apache.lucene.analysis.fi
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="FinnishLightStemmer"/> to stem Finnish
+	/// words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// </summary>
+	public sealed class FinnishLightStemFilter : TokenFilter
+	{
+	  private readonly FinnishLightStemmer stemmer = new FinnishLightStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public FinnishLightStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Fi/FinnishLightStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Fi/FinnishLightStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Fi/FinnishLightStemFilterFactory.cs
new file mode 100644
index 0000000..6d1d7f1
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Fi/FinnishLightStemFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.fi
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="FinnishLightStemFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_filgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.FinnishLightStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class FinnishLightStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new FinnishLightStemFilterFactory </summary>
+	  public FinnishLightStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new FinnishLightStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Fi/FinnishLightStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Fi/FinnishLightStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Fi/FinnishLightStemmer.cs
new file mode 100644
index 0000000..d464824
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Fi/FinnishLightStemmer.cs
@@ -0,0 +1,335 @@
+namespace org.apache.lucene.analysis.fi
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/* 
+	 * This algorithm is updated based on code located at:
+	 * http://members.unine.ch/jacques.savoy/clef/
+	 * 
+	 * Full copyright for that code follows:
+	 */
+
+	/*
+	 * Copyright (c) 2005, Jacques Savoy
+	 * All rights reserved.
+	 *
+	 * Redistribution and use in source and binary forms, with or without 
+	 * modification, are permitted provided that the following conditions are met:
+	 *
+	 * Redistributions of source code must retain the above copyright notice, this 
+	 * list of conditions and the following disclaimer. Redistributions in binary 
+	 * form must reproduce the above copyright notice, this list of conditions and
+	 * the following disclaimer in the documentation and/or other materials 
+	 * provided with the distribution. Neither the name of the author nor the names 
+	 * of its contributors may be used to endorse or promote products derived from 
+	 * this software without specific prior written permission.
+	 * 
+	 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+	 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+	 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+	 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+	 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+	 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+	 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+	 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+	 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+	 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+	 * POSSIBILITY OF SUCH DAMAGE.
+	 */
+
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+	/// <summary>
+	/// Light Stemmer for Finnish.
+	/// <para>
+	/// This stemmer implements the algorithm described in:
+	/// <i>Report on CLEF-2003 Monolingual Tracks</i>
+	/// Jacques Savoy
+	/// </para>
+	/// </summary>
+	public class FinnishLightStemmer
+	{
+
+	  public virtual int stem(char[] s, int len)
+	  {
+		if (len < 4)
+		{
+		  return len;
+		}
+
+		for (int i = 0; i < len; i++)
+		{
+		  switch (s[i])
+		  {
+			case 'ä':
+			case 'å':
+				s[i] = 'a';
+				break;
+			case 'ö':
+				s[i] = 'o';
+				break;
+		  }
+		}
+
+		len = step1(s, len);
+		len = step2(s, len);
+		len = step3(s, len);
+		len = norm1(s, len);
+		len = norm2(s, len);
+		return len;
+	  }
+
+	  private int step1(char[] s, int len)
+	  {
+		if (len > 8)
+		{
+		  if (StemmerUtil.EndsWith(s, len, "kin"))
+		  {
+			return step1(s, len - 3);
+		  }
+		  if (StemmerUtil.EndsWith(s, len, "ko"))
+		  {
+			return step1(s, len - 2);
+		  }
+		}
+
+		if (len > 11)
+		{
+		  if (StemmerUtil.EndsWith(s, len, "dellinen"))
+		  {
+			return len - 8;
+		  }
+		  if (StemmerUtil.EndsWith(s, len, "dellisuus"))
+		  {
+			return len - 9;
+		  }
+		}
+		return len;
+	  }
+
+	  private int step2(char[] s, int len)
+	  {
+		if (len > 5)
+		{
+		  if (StemmerUtil.EndsWith(s, len, "lla") || StemmerUtil.EndsWith(s, len, "tse") || StemmerUtil.EndsWith(s, len, "sti"))
+		  {
+			return len - 3;
+		  }
+
+		  if (StemmerUtil.EndsWith(s, len, "ni"))
+		  {
+			return len - 2;
+		  }
+
+		  if (StemmerUtil.EndsWith(s, len, "aa"))
+		  {
+			return len - 1; // aa -> a
+		  }
+		}
+
+		return len;
+	  }
+
+	  private int step3(char[] s, int len)
+	  {
+		if (len > 8)
+		{
+		  if (StemmerUtil.EndsWith(s, len, "nnen"))
+		  {
+			s[len - 4] = 's';
+			return len - 3;
+		  }
+
+		  if (StemmerUtil.EndsWith(s, len, "ntena"))
+		  {
+			s[len - 5] = 's';
+			return len - 4;
+		  }
+
+		  if (StemmerUtil.EndsWith(s, len, "tten"))
+		  {
+			return len - 4;
+		  }
+
+		  if (StemmerUtil.EndsWith(s, len, "eiden"))
+		  {
+			return len - 5;
+		  }
+		}
+
+		if (len > 6)
+		{
+		  if (StemmerUtil.EndsWith(s, len, "neen") || StemmerUtil.EndsWith(s, len, "niin") || StemmerUtil.EndsWith(s, len, "seen") || StemmerUtil.EndsWith(s, len, "teen") || StemmerUtil.EndsWith(s, len, "inen"))
+		  {
+			  return len - 4;
+		  }
+
+		  if (s[len - 3] == 'h' && isVowel(s[len - 2]) && s[len - 1] == 'n')
+		  {
+			return len - 3;
+		  }
+
+		  if (StemmerUtil.EndsWith(s, len, "den"))
+		  {
+			s[len - 3] = 's';
+			return len - 2;
+		  }
+
+		  if (StemmerUtil.EndsWith(s, len, "ksen"))
+		  {
+			s[len - 4] = 's';
+			return len - 3;
+		  }
+
+		  if (StemmerUtil.EndsWith(s, len, "ssa") || StemmerUtil.EndsWith(s, len, "sta") || StemmerUtil.EndsWith(s, len, "lla") || StemmerUtil.EndsWith(s, len, "lta") || StemmerUtil.EndsWith(s, len, "tta") || StemmerUtil.EndsWith(s, len, "ksi") || StemmerUtil.EndsWith(s, len, "lle"))
+		  {
+			return len - 3;
+		  }
+		}
+
+		if (len > 5)
+		{
+		  if (StemmerUtil.EndsWith(s, len, "na") || StemmerUtil.EndsWith(s, len, "ne"))
+		  {
+			return len - 2;
+		  }
+
+		  if (StemmerUtil.EndsWith(s, len, "nei"))
+		  {
+			return len - 3;
+		  }
+		}
+
+		if (len > 4)
+		{
+		  if (StemmerUtil.EndsWith(s, len, "ja") || StemmerUtil.EndsWith(s, len, "ta"))
+		  {
+			return len - 2;
+		  }
+
+		  if (s[len - 1] == 'a')
+		  {
+			return len - 1;
+		  }
+
+		  if (s[len - 1] == 'n' && isVowel(s[len - 2]))
+		  {
+			return len - 2;
+		  }
+
+		  if (s[len - 1] == 'n')
+		  {
+			return len - 1;
+		  }
+		}
+
+		return len;
+	  }
+
+	  private int norm1(char[] s, int len)
+	  {
+		if (len > 5 && StemmerUtil.EndsWith(s, len, "hde"))
+		{
+			s[len - 3] = 'k';
+			s[len - 2] = 's';
+			s[len - 1] = 'i';
+		}
+
+		if (len > 4)
+		{
+		  if (StemmerUtil.EndsWith(s, len, "ei") || StemmerUtil.EndsWith(s, len, "at"))
+		  {
+			return len - 2;
+		  }
+		}
+
+		if (len > 3)
+		{
+		  switch (s[len - 1])
+		  {
+			case 't':
+			case 's':
+			case 'j':
+			case 'e':
+			case 'a':
+			case 'i':
+				return len - 1;
+		  }
+		}
+
+		return len;
+	  }
+
+	  private int norm2(char[] s, int len)
+	  {
+		if (len > 8)
+		{
+		  if (s[len - 1] == 'e' || s[len - 1] == 'o' || s[len - 1] == 'u')
+		  {
+			len--;
+		  }
+		}
+
+		if (len > 4)
+		{
+		  if (s[len - 1] == 'i')
+		  {
+			len--;
+		  }
+
+		  if (len > 4)
+		  {
+			char ch = s[0];
+			for (int i = 1; i < len; i++)
+			{
+			  if (s[i] == ch && (ch == 'k' || ch == 'p' || ch == 't'))
+			  {
+				len = StemmerUtil.delete(s, i--, len);
+			  }
+			  else
+			  {
+				ch = s[i];
+			  }
+			}
+		  }
+		}
+
+		return len;
+	  }
+
+	  private bool isVowel(char ch)
+	  {
+		switch (ch)
+		{
+		  case 'a':
+		  case 'e':
+		  case 'i':
+		  case 'o':
+		  case 'u':
+		  case 'y':
+			  return true;
+		  default:
+			  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchAnalyzer.cs
new file mode 100644
index 0000000..b6ef4f1
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchAnalyzer.cs
@@ -0,0 +1,205 @@
+using System;
+
+namespace org.apache.lucene.analysis.fr
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using StandardAnalyzer = org.apache.lucene.analysis.standard.StandardAnalyzer; // for javadoc
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using ElisionFilter = org.apache.lucene.analysis.util.ElisionFilter;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using Version = org.apache.lucene.util.Version;
+
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for French language. 
+	/// <para>
+	/// Supports an external list of stopwords (words that
+	/// will not be indexed at all) and an external list of exclusions (word that will
+	/// not be stemmed, but indexed).
+	/// A default set of stopwords is used unless an alternative list is specified, but the
+	/// exclusion list is empty by default.
+	/// </para>
+	/// 
+	/// <a name="version"/>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating FrenchAnalyzer:
+	/// <ul>
+	///   <li> As of 3.6, FrenchLightStemFilter is used for less aggressive stemming.
+	///   <li> As of 3.1, Snowball stemming is done with SnowballFilter, 
+	///        LowerCaseFilter is used prior to StopFilter, and ElisionFilter and 
+	///        Snowball stopwords are used by default.
+	///   <li> As of 2.9, StopFilter preserves position
+	///        increments
+	/// </ul>
+	/// 
+	/// </para>
+	/// <para><b>NOTE</b>: This class uses the same <seealso cref="Version"/>
+	/// dependent settings as <seealso cref="StandardAnalyzer"/>.</para>
+	/// </summary>
+	public sealed class FrenchAnalyzer : StopwordAnalyzerBase
+	{
+
+	  /// <summary>
+	  /// Extended list of typical French stopwords. </summary>
+	  /// @deprecated (3.1) remove in Lucene 5.0 (index bw compat) 
+	  [Obsolete("(3.1) remove in Lucene 5.0 (index bw compat)")]
+	  private static readonly string[] FRENCH_STOP_WORDS = new string[] {"a", "afin", "ai", "ainsi", "après", "attendu", "au", "aujourd", "auquel", "aussi", "autre", "autres", "aux", "auxquelles", "auxquels", "avait", "avant", "avec", "avoir", "c", "car", "ce", "ceci", "cela", "celle", "celles", "celui", "cependant", "certain", "certaine", "certaines", "certains", "ces", "cet", "cette", "ceux", "chez", "ci", "combien", "comme", "comment", "concernant", "contre", "d", "dans", "de", "debout", "dedans", "dehors", "delà", "depuis", "derrière", "des", "désormais", "desquelles", "desquels", "dessous", "dessus", "devant", "devers", "devra", "divers", "diverse", "diverses", "doit", "donc", "dont", "du", "duquel", "durant", "dès", "elle", "elles", "en", "entre", "environ", "est", "et", "etc", "etre", "eu", "eux", "excepté", "hormis", "hors", "hélas", "hui", "il", "ils", "j", "je", "jusqu", "jusque", "l", "la", "laquelle", "le", "lequel", "les", "lesquelles", "lesquels", "leur", "leurs", 
 "lorsque", "lui", "là", "ma", "mais", "malgré", "me", "merci", "mes", "mien", "mienne", "miennes", "miens", "moi", "moins", "mon", "moyennant", "même", "mêmes", "n", "ne", "ni", "non", "nos", "notre", "nous", "néanmoins", "nôtre", "nôtres", "on", "ont", "ou", "outre", "où", "par", "parmi", "partant", "pas", "passé", "pendant", "plein", "plus", "plusieurs", "pour", "pourquoi", "proche", "près", "puisque", "qu", "quand", "que", "quel", "quelle", "quelles", "quels", "qui", "quoi", "quoique", "revoici", "revoilà", "s", "sa", "sans", "sauf", "se", "selon", "seront", "ses", "si", "sien", "sienne", "siennes", "siens", "sinon", "soi", "soit", "son", "sont", "sous", "suivant", "sur", "ta", "te", "tes", "tien", "tienne", "tiennes", "tiens", "toi", "ton", "tous", "tout", "toute", "toutes", "tu", "un", "une", "va", "vers", "voici", "voilà", "vos", "votre", "vous", "vu", "vôtre", "vôtres", "y", "à", "ça", "ès", "été", "être", "ô"};
+
+	  /// <summary>
+	  /// File containing default French stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "french_stop.txt";
+
+	  /// <summary>
+	  /// Default set of articles for ElisionFilter </summary>
+	  public static readonly CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("l", "m", "t", "qu", "n", "s", "j", "d", "c", "jusqu", "quoiqu", "lorsqu", "puisqu"), true));
+
+	  /// <summary>
+	  /// Contains words that should be indexed but not stemmed.
+	  /// </summary>
+	  private readonly CharArraySet excltable;
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop-words set. </summary>
+	  /// <returns> an unmodifiable instance of the default stop-words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  private class DefaultSetHolder
+	  {
+		/// @deprecated (3.1) remove this in Lucene 5.0, index bw compat 
+		[Obsolete("(3.1) remove this in Lucene 5.0, index bw compat")]
+		internal static readonly CharArraySet DEFAULT_STOP_SET_30 = CharArraySet.unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(FRENCH_STOP_WORDS), false));
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(typeof(SnowballFilter), DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words (<seealso cref="#getDefaultStopSet"/>).
+	  /// </summary>
+	  public FrenchAnalyzer(Version matchVersion) : this(matchVersion, matchVersion.onOrAfter(Version.LUCENE_31) ? DefaultSetHolder.DEFAULT_STOP_SET : DefaultSetHolder.DEFAULT_STOP_SET_30)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          lucene compatibility version </param>
+	  /// <param name="stopwords">
+	  ///          a stopword set </param>
+	  public FrenchAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          lucene compatibility version </param>
+	  /// <param name="stopwords">
+	  ///          a stopword set </param>
+	  /// <param name="stemExclutionSet">
+	  ///          a stemming exclusion set </param>
+	  public FrenchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclutionSet) : base(matchVersion, stopwords)
+	  {
+		this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclutionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from a <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="ElisionFilter"/>,
+	  ///         <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>,
+	  ///         <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided, and <seealso cref="FrenchLightStemFilter"/> </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+		if (matchVersion.onOrAfter(Version.LUCENE_31))
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		  Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		  TokenStream result = new StandardFilter(matchVersion, source);
+		  result = new ElisionFilter(result, DEFAULT_ARTICLES);
+		  result = new LowerCaseFilter(matchVersion, result);
+		  result = new StopFilter(matchVersion, result, stopwords);
+		  if (!excltable.Empty)
+		  {
+			result = new SetKeywordMarkerFilter(result, excltable);
+		  }
+		  if (matchVersion.onOrAfter(Version.LUCENE_36))
+		  {
+			result = new FrenchLightStemFilter(result);
+		  }
+		  else
+		  {
+			result = new SnowballFilter(result, new org.tartarus.snowball.ext.FrenchStemmer());
+		  }
+		  return new TokenStreamComponents(source, result);
+		}
+		else
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		  Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		  TokenStream result = new StandardFilter(matchVersion, source);
+		  result = new StopFilter(matchVersion, result, stopwords);
+		  if (!excltable.Empty)
+		  {
+			result = new SetKeywordMarkerFilter(result, excltable);
+		  }
+		  result = new FrenchStemFilter(result);
+		  // Convert to lowercase after stemming!
+		  return new TokenStreamComponents(source, new LowerCaseFilter(matchVersion, result));
+		}
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchLightStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchLightStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchLightStemFilter.cs
new file mode 100644
index 0000000..14c9a4b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchLightStemFilter.cs
@@ -0,0 +1,66 @@
+namespace org.apache.lucene.analysis.fr
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="FrenchLightStemmer"/> to stem French
+	/// words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// </summary>
+	public sealed class FrenchLightStemFilter : TokenFilter
+	{
+	  private readonly FrenchLightStemmer stemmer = new FrenchLightStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public FrenchLightStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchLightStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchLightStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchLightStemFilterFactory.cs
new file mode 100644
index 0000000..9f73d5d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchLightStemFilterFactory.cs
@@ -0,0 +1,56 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.fr
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="FrenchLightStemFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_frlgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.ElisionFilterFactory"/&gt;
+	///     &lt;filter class="solr.FrenchLightStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class FrenchLightStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new FrenchLightStemFilterFactory </summary>
+	  public FrenchLightStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new FrenchLightStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchLightStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchLightStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchLightStemmer.cs
new file mode 100644
index 0000000..e54b0d2
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchLightStemmer.cs
@@ -0,0 +1,357 @@
+namespace org.apache.lucene.analysis.fr
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/* 
+	 * This algorithm is updated based on code located at:
+	 * http://members.unine.ch/jacques.savoy/clef/
+	 * 
+	 * Full copyright for that code follows:
+	 */
+
+	/*
+	 * Copyright (c) 2005, Jacques Savoy
+	 * All rights reserved.
+	 *
+	 * Redistribution and use in source and binary forms, with or without 
+	 * modification, are permitted provided that the following conditions are met:
+	 *
+	 * Redistributions of source code must retain the above copyright notice, this 
+	 * list of conditions and the following disclaimer. Redistributions in binary 
+	 * form must reproduce the above copyright notice, this list of conditions and
+	 * the following disclaimer in the documentation and/or other materials 
+	 * provided with the distribution. Neither the name of the author nor the names 
+	 * of its contributors may be used to endorse or promote products derived from 
+	 * this software without specific prior written permission.
+	 * 
+	 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+	 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+	 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+	 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+	 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+	 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+	 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+	 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+	 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+	 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+	 * POSSIBILITY OF SUCH DAMAGE.
+	 */
+
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+	/// <summary>
+	/// Light Stemmer for French.
+	/// <para>
+	/// This stemmer implements the "UniNE" algorithm in:
+	/// <i>Light Stemming Approaches for the French, Portuguese, German and Hungarian Languages</i>
+	/// Jacques Savoy
+	/// </para>
+	/// </summary>
+	public class FrenchLightStemmer
+	{
+
+	  public virtual int stem(char[] s, int len)
+	  {
+		if (len > 5 && s[len - 1] == 'x')
+		{
+		  if (s[len - 3] == 'a' && s[len - 2] == 'u' && s[len - 4] != 'e')
+		  {
+			s[len - 2] = 'l';
+		  }
+		  len--;
+		}
+
+		if (len > 3 && s[len - 1] == 'x')
+		{
+		  len--;
+		}
+
+		if (len > 3 && s[len - 1] == 's')
+		{
+		  len--;
+		}
+
+		if (len > 9 && StemmerUtil.EndsWith(s, len, "issement"))
+		{
+		  len -= 6;
+		  s[len - 1] = 'r';
+		  return norm(s, len);
+		}
+
+		if (len > 8 && StemmerUtil.EndsWith(s, len, "issant"))
+		{
+		  len -= 4;
+		  s[len - 1] = 'r';
+		  return norm(s, len);
+		}
+
+		if (len > 6 && StemmerUtil.EndsWith(s, len, "ement"))
+		{
+		  len -= 4;
+		  if (len > 3 && StemmerUtil.EndsWith(s, len, "ive"))
+		  {
+			len--;
+			s[len - 1] = 'f';
+		  }
+		  return norm(s, len);
+		}
+
+		if (len > 11 && StemmerUtil.EndsWith(s, len, "ficatrice"))
+		{
+		  len -= 5;
+		  s[len - 2] = 'e';
+		  s[len - 1] = 'r';
+		  return norm(s, len);
+		}
+
+		if (len > 10 && StemmerUtil.EndsWith(s, len, "ficateur"))
+		{
+		  len -= 4;
+		  s[len - 2] = 'e';
+		  s[len - 1] = 'r';
+		  return norm(s, len);
+		}
+
+		if (len > 9 && StemmerUtil.EndsWith(s, len, "catrice"))
+		{
+		  len -= 3;
+		  s[len - 4] = 'q';
+		  s[len - 3] = 'u';
+		  s[len - 2] = 'e';
+		  //s[len-1] = 'r' <-- unnecessary, already 'r'.
+		  return norm(s, len);
+		}
+
+		if (len > 8 && StemmerUtil.EndsWith(s, len, "cateur"))
+		{
+		  len -= 2;
+		  s[len - 4] = 'q';
+		  s[len - 3] = 'u';
+		  s[len - 2] = 'e';
+		  s[len - 1] = 'r';
+		  return norm(s, len);
+		}
+
+		if (len > 8 && StemmerUtil.EndsWith(s, len, "atrice"))
+		{
+		  len -= 4;
+		  s[len - 2] = 'e';
+		  s[len - 1] = 'r';
+		  return norm(s, len);
+		}
+
+		if (len > 7 && StemmerUtil.EndsWith(s, len, "ateur"))
+		{
+		  len -= 3;
+		  s[len - 2] = 'e';
+		  s[len - 1] = 'r';
+		  return norm(s, len);
+		}
+
+		if (len > 6 && StemmerUtil.EndsWith(s, len, "trice"))
+		{
+		  len--;
+		  s[len - 3] = 'e';
+		  s[len - 2] = 'u';
+		  s[len - 1] = 'r';
+		}
+
+		if (len > 5 && StemmerUtil.EndsWith(s, len, "ième"))
+		{
+		  return norm(s, len - 4);
+		}
+
+		if (len > 7 && StemmerUtil.EndsWith(s, len, "teuse"))
+		{
+		  len -= 2;
+		  s[len - 1] = 'r';
+		  return norm(s, len);
+		}
+
+		if (len > 6 && StemmerUtil.EndsWith(s, len, "teur"))
+		{
+		  len--;
+		  s[len - 1] = 'r';
+		  return norm(s, len);
+		}
+
+		if (len > 5 && StemmerUtil.EndsWith(s, len, "euse"))
+		{
+		  return norm(s, len - 2);
+		}
+
+		if (len > 8 && StemmerUtil.EndsWith(s, len, "ère"))
+		{
+		  len--;
+		  s[len - 2] = 'e';
+		  return norm(s, len);
+		}
+
+		if (len > 7 && StemmerUtil.EndsWith(s, len, "ive"))
+		{
+		  len--;
+		  s[len - 1] = 'f';
+		  return norm(s, len);
+		}
+
+		if (len > 4 && (StemmerUtil.EndsWith(s, len, "folle") || StemmerUtil.EndsWith(s, len, "molle")))
+		{
+		  len -= 2;
+		  s[len - 1] = 'u';
+		  return norm(s, len);
+		}
+
+		if (len > 9 && StemmerUtil.EndsWith(s, len, "nnelle"))
+		{
+		  return norm(s, len - 5);
+		}
+
+		if (len > 9 && StemmerUtil.EndsWith(s, len, "nnel"))
+		{
+		  return norm(s, len - 3);
+		}
+
+		if (len > 4 && StemmerUtil.EndsWith(s, len, "ète"))
+		{
+		  len--;
+		  s[len - 2] = 'e';
+		}
+
+		if (len > 8 && StemmerUtil.EndsWith(s, len, "ique"))
+		{
+		  len -= 4;
+		}
+
+		if (len > 8 && StemmerUtil.EndsWith(s, len, "esse"))
+		{
+		  return norm(s, len - 3);
+		}
+
+		if (len > 7 && StemmerUtil.EndsWith(s, len, "inage"))
+		{
+		  return norm(s, len - 3);
+		}
+
+		if (len > 9 && StemmerUtil.EndsWith(s, len, "isation"))
+		{
+		  len -= 7;
+		  if (len > 5 && StemmerUtil.EndsWith(s, len, "ual"))
+		  {
+			s[len - 2] = 'e';
+		  }
+		  return norm(s, len);
+		}
+
+		if (len > 9 && StemmerUtil.EndsWith(s, len, "isateur"))
+		{
+		  return norm(s, len - 7);
+		}
+
+		if (len > 8 && StemmerUtil.EndsWith(s, len, "ation"))
+		{
+		  return norm(s, len - 5);
+		}
+
+		if (len > 8 && StemmerUtil.EndsWith(s, len, "ition"))
+		{
+		  return norm(s, len - 5);
+		}
+
+		return norm(s, len);
+	  }
+
+	  private int norm(char[] s, int len)
+	  {
+		if (len > 4)
+		{
+		  for (int i = 0; i < len; i++)
+		  {
+			switch (s[i])
+			{
+			  case 'à':
+			  case 'á':
+			  case 'â':
+				  s[i] = 'a';
+				  break;
+			  case 'ô':
+				  s[i] = 'o';
+				  break;
+			  case 'è':
+			  case 'é':
+			  case 'ê':
+				  s[i] = 'e';
+				  break;
+			  case 'ù':
+			  case 'û':
+				  s[i] = 'u';
+				  break;
+			  case 'î':
+				  s[i] = 'i';
+				  break;
+			  case 'ç':
+				  s[i] = 'c';
+				  break;
+			}
+		  }
+
+		  char ch = s[0];
+		  for (int i = 1; i < len; i++)
+		  {
+			if (s[i] == ch && char.IsLetter(ch))
+			{
+			  len = StemmerUtil.delete(s, i--, len);
+			}
+			else
+			{
+			  ch = s[i];
+			}
+		  }
+		}
+
+		if (len > 4 && StemmerUtil.EndsWith(s, len, "ie"))
+		{
+		  len -= 2;
+		}
+
+		if (len > 4)
+		{
+			if (s[len - 1] == 'r')
+			{
+				len--;
+			}
+			if (s[len - 1] == 'e')
+			{
+				len--;
+			}
+			if (s[len - 1] == 'e')
+			{
+				len--;
+			}
+			if (s[len - 1] == s[len - 2] && char.IsLetter(s[len - 1]))
+			{
+				len--;
+			}
+		}
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchMinimalStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchMinimalStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchMinimalStemFilter.cs
new file mode 100644
index 0000000..ca1bf0a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchMinimalStemFilter.cs
@@ -0,0 +1,66 @@
+namespace org.apache.lucene.analysis.fr
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="FrenchMinimalStemmer"/> to stem French
+	/// words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// </summary>
+	public sealed class FrenchMinimalStemFilter : TokenFilter
+	{
+	  private readonly FrenchMinimalStemmer stemmer = new FrenchMinimalStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public FrenchMinimalStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchMinimalStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchMinimalStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchMinimalStemFilterFactory.cs
new file mode 100644
index 0000000..b9cfe33
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchMinimalStemFilterFactory.cs
@@ -0,0 +1,56 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.fr
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="FrenchMinimalStemFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_frminstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.ElisionFilterFactory"/&gt;
+	///     &lt;filter class="solr.FrenchMinimalStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class FrenchMinimalStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new FrenchMinimalStemFilterFactory </summary>
+	  public FrenchMinimalStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new FrenchMinimalStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file


[16/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilterFactory.cs
new file mode 100644
index 0000000..17d787a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilterFactory.cs
@@ -0,0 +1,69 @@
+using System.Collections.Generic;
+using Lucene.Net.Analysis.Util;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using AbstractAnalysisFactory = AbstractAnalysisFactory;
+	using MultiTermAwareComponent = org.apache.lucene.analysis.util.MultiTermAwareComponent;
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="ASCIIFoldingFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_ascii" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.ASCIIFoldingFilterFactory" preserveOriginal="false"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class ASCIIFoldingFilterFactory : TokenFilterFactory, MultiTermAwareComponent
+	{
+	  private readonly bool preserveOriginal;
+
+	  /// <summary>
+	  /// Creates a new ASCIIFoldingFilterFactory </summary>
+	  public ASCIIFoldingFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		preserveOriginal = getBoolean(args, "preserveOriginal", false);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override ASCIIFoldingFilter create(TokenStream input)
+	  {
+		return new ASCIIFoldingFilter(input, preserveOriginal);
+	  }
+
+	  public virtual AbstractAnalysisFactory MultiTermComponent
+	  {
+		  get
+		  {
+			return this;
+		  }
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs
new file mode 100644
index 0000000..94c8d4b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs
@@ -0,0 +1,208 @@
+using System;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+
+	/// <summary>
+	/// A filter to apply normal capitalization rules to Tokens.  It will make the first letter
+	/// capital and the rest lower case.
+	/// <p/>
+	/// This filter is particularly useful to build nice looking facet parameters.  This filter
+	/// is not appropriate if you intend to use a prefix query.
+	/// </summary>
+	public sealed class CapitalizationFilter : TokenFilter
+	{
+	  public static readonly int DEFAULT_MAX_WORD_COUNT = int.MaxValue;
+	  public static readonly int DEFAULT_MAX_TOKEN_LENGTH = int.MaxValue;
+
+	  private readonly bool onlyFirstWord;
+	  private readonly CharArraySet keep;
+	  private readonly bool forceFirstLetter;
+	  private readonly ICollection<char[]> okPrefix;
+
+	  private readonly int minWordLength;
+	  private readonly int maxWordCount;
+	  private readonly int maxTokenLength;
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+
+	  /// <summary>
+	  /// Creates a CapitalizationFilter with the default parameters.
+	  /// <para>
+	  /// Calls {@link #CapitalizationFilter(TokenStream, boolean, CharArraySet, boolean, Collection, int, int, int)
+	  ///   CapitalizationFilter(in, true, null, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH)}
+	  /// </para>
+	  /// </summary>
+	  public CapitalizationFilter(TokenStream @in) : this(@in, true, null, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates a CapitalizationFilter with the specified parameters. </summary>
+	  /// <param name="in"> input tokenstream </param>
+	  /// <param name="onlyFirstWord"> should each word be capitalized or all of the words? </param>
+	  /// <param name="keep"> a keep word list.  Each word that should be kept separated by whitespace. </param>
+	  /// <param name="forceFirstLetter"> Force the first letter to be capitalized even if it is in the keep list. </param>
+	  /// <param name="okPrefix"> do not change word capitalization if a word begins with something in this list. </param>
+	  /// <param name="minWordLength"> how long the word needs to be to get capitalization applied.  If the
+	  ///                      minWordLength is 3, "and" > "And" but "or" stays "or". </param>
+	  /// <param name="maxWordCount"> if the token contains more then maxWordCount words, the capitalization is
+	  ///                     assumed to be correct. </param>
+	  /// <param name="maxTokenLength"> ??? </param>
+	  public CapitalizationFilter(TokenStream @in, bool onlyFirstWord, CharArraySet keep, bool forceFirstLetter, ICollection<char[]> okPrefix, int minWordLength, int maxWordCount, int maxTokenLength) : base(@in)
+	  {
+		this.onlyFirstWord = onlyFirstWord;
+		this.keep = keep;
+		this.forceFirstLetter = forceFirstLetter;
+		this.okPrefix = okPrefix;
+		this.minWordLength = minWordLength;
+		this.maxWordCount = maxWordCount;
+		this.maxTokenLength = maxTokenLength;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (!input.incrementToken())
+		{
+			return false;
+		}
+
+		char[] termBuffer = termAtt.buffer();
+		int termBufferLength = termAtt.length();
+		char[] backup = null;
+
+		if (maxWordCount < DEFAULT_MAX_WORD_COUNT)
+		{
+		  //make a backup in case we exceed the word count
+		  backup = new char[termBufferLength];
+		  Array.Copy(termBuffer, 0, backup, 0, termBufferLength);
+		}
+
+		if (termBufferLength < maxTokenLength)
+		{
+		  int wordCount = 0;
+
+		  int lastWordStart = 0;
+		  for (int i = 0; i < termBufferLength; i++)
+		  {
+			char c = termBuffer[i];
+			if (c <= ' ' || c == '.')
+			{
+			  int len = i - lastWordStart;
+			  if (len > 0)
+			  {
+				processWord(termBuffer, lastWordStart, len, wordCount++);
+				lastWordStart = i + 1;
+				i++;
+			  }
+			}
+		  }
+
+		  // process the last word
+		  if (lastWordStart < termBufferLength)
+		  {
+			processWord(termBuffer, lastWordStart, termBufferLength - lastWordStart, wordCount++);
+		  }
+
+		  if (wordCount > maxWordCount)
+		  {
+			termAtt.copyBuffer(backup, 0, termBufferLength);
+		  }
+		}
+
+		return true;
+	  }
+
+	  private void processWord(char[] buffer, int offset, int length, int wordCount)
+	  {
+		if (length < 1)
+		{
+		  return;
+		}
+
+		if (onlyFirstWord && wordCount > 0)
+		{
+		  for (int i = 0; i < length; i++)
+		  {
+			buffer[offset + i] = char.ToLower(buffer[offset + i]);
+
+		  }
+		  return;
+		}
+
+		if (keep != null && keep.contains(buffer, offset, length))
+		{
+		  if (wordCount == 0 && forceFirstLetter)
+		  {
+			buffer[offset] = char.ToUpper(buffer[offset]);
+		  }
+		  return;
+		}
+
+		if (length < minWordLength)
+		{
+		  return;
+		}
+
+		if (okPrefix != null)
+		{
+		  foreach (char[] prefix in okPrefix)
+		  {
+			if (length >= prefix.Length) //don't bother checking if the buffer length is less than the prefix
+			{
+			  bool match = true;
+			  for (int i = 0; i < prefix.Length; i++)
+			  {
+				if (prefix[i] != buffer[offset + i])
+				{
+				  match = false;
+				  break;
+				}
+			  }
+			  if (match == true)
+			  {
+				return;
+			  }
+			}
+		  }
+		}
+
+		// We know it has at least one character
+		/*char[] chars = w.toCharArray();
+		StringBuilder word = new StringBuilder( w.length() );
+		word.append( Character.toUpperCase( chars[0] ) );*/
+		buffer[offset] = char.ToUpper(buffer[offset]);
+
+		for (int i = 1; i < length; i++)
+		{
+		  buffer[offset + i] = char.ToLower(buffer[offset + i]);
+		}
+		//return word.toString();
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilterFactory.cs
new file mode 100644
index 0000000..bd4f335
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilterFactory.cs
@@ -0,0 +1,117 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using TokenFilterFactory = TokenFilterFactory;
+
+
+	/// <summary>
+	/// Factory for <seealso cref="CapitalizationFilter"/>.
+	/// <p/>
+	/// The factory takes parameters:<br/>
+	/// "onlyFirstWord" - should each word be capitalized or all of the words?<br/>
+	/// "keep" - a keep word list.  Each word that should be kept separated by whitespace.<br/>
+	/// "keepIgnoreCase - true or false.  If true, the keep list will be considered case-insensitive.<br/>
+	/// "forceFirstLetter" - Force the first letter to be capitalized even if it is in the keep list<br/>
+	/// "okPrefix" - do not change word capitalization if a word begins with something in this list.
+	/// for example if "McK" is on the okPrefix list, the word "McKinley" should not be changed to
+	/// "Mckinley"<br/>
+	/// "minWordLength" - how long the word needs to be to get capitalization applied.  If the
+	/// minWordLength is 3, "and" > "And" but "or" stays "or"<br/>
+	/// "maxWordCount" - if the token contains more then maxWordCount words, the capitalization is
+	/// assumed to be correct.<br/>
+	/// 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_cptlztn" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.CapitalizationFilterFactory" onlyFirstWord="true"
+	///           keep="java solr lucene" keepIgnoreCase="false"
+	///           okPrefix="McK McD McA"/&gt;   
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// 
+	/// @since solr 1.3
+	/// </summary>
+	public class CapitalizationFilterFactory : TokenFilterFactory
+	{
+	  public const string KEEP = "keep";
+	  public const string KEEP_IGNORE_CASE = "keepIgnoreCase";
+	  public const string OK_PREFIX = "okPrefix";
+	  public const string MIN_WORD_LENGTH = "minWordLength";
+	  public const string MAX_WORD_COUNT = "maxWordCount";
+	  public const string MAX_TOKEN_LENGTH = "maxTokenLength";
+	  public const string ONLY_FIRST_WORD = "onlyFirstWord";
+	  public const string FORCE_FIRST_LETTER = "forceFirstLetter";
+
+	  internal CharArraySet keep;
+
+	  internal ICollection<char[]> okPrefix = Collections.emptyList(); // for Example: McK
+
+	  internal readonly int minWordLength; // don't modify capitalization for words shorter then this
+	  internal readonly int maxWordCount;
+	  internal readonly int maxTokenLength;
+	  internal readonly bool onlyFirstWord;
+	  internal readonly bool forceFirstLetter; // make sure the first letter is capital even if it is in the keep list
+
+	  /// <summary>
+	  /// Creates a new CapitalizationFilterFactory </summary>
+	  public CapitalizationFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		assureMatchVersion();
+		bool ignoreCase = getBoolean(args, KEEP_IGNORE_CASE, false);
+		HashSet<string> k = getSet(args, KEEP);
+		if (k != null)
+		{
+		  keep = new CharArraySet(luceneMatchVersion, 10, ignoreCase);
+		  keep.addAll(k);
+		}
+
+		k = getSet(args, OK_PREFIX);
+		if (k != null)
+		{
+		  okPrefix = new List<>();
+		  foreach (string item in k)
+		  {
+			okPrefix.Add(item.ToCharArray());
+		  }
+		}
+
+		minWordLength = getInt(args, MIN_WORD_LENGTH, 0);
+		maxWordCount = getInt(args, MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT);
+		maxTokenLength = getInt(args, MAX_TOKEN_LENGTH, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH);
+		onlyFirstWord = getBoolean(args, ONLY_FIRST_WORD, true);
+		forceFirstLetter = getBoolean(args, FORCE_FIRST_LETTER, true);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override CapitalizationFilter create(TokenStream input)
+	  {
+		return new CapitalizationFilter(input, onlyFirstWord, keep, forceFirstLetter, okPrefix, minWordLength, maxWordCount, maxTokenLength);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs
new file mode 100644
index 0000000..b410fe9
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs
@@ -0,0 +1,82 @@
+using FilteringTokenFilter = Lucene.Net.Analysis.Util.FilteringTokenFilter;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using FilteringTokenFilter = FilteringTokenFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Removes words that are too long or too short from the stream.
+	/// <para>
+	/// Note: Length is calculated as the number of Unicode codepoints.
+	/// </para>
+	/// </summary>
+	public sealed class CodepointCountFilter : FilteringTokenFilter
+	{
+
+	  private readonly int min;
+	  private readonly int max;
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+
+	  /// <summary>
+	  /// Create a new <seealso cref="CodepointCountFilter"/>. This will filter out tokens whose
+	  /// <seealso cref="CharTermAttribute"/> is either too short (<seealso cref="Character#codePointCount(char[], int, int)"/>
+	  /// &lt; min) or too long (<seealso cref="Character#codePointCount(char[], int, int)"/> &gt; max). </summary>
+	  /// <param name="version"> the Lucene match version </param>
+	  /// <param name="in">      the <seealso cref="TokenStream"/> to consume </param>
+	  /// <param name="min">     the minimum length </param>
+	  /// <param name="max">     the maximum length </param>
+	  public CodepointCountFilter(Version version, TokenStream @in, int min, int max) : base(version, @in)
+	  {
+		this.min = min;
+		this.max = max;
+	  }
+
+	  public override bool accept()
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int max32 = termAtt.length();
+		int max32 = termAtt.length();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int min32 = max32 >> 1;
+		int min32 = max32 >> 1;
+		if (min32 >= min && max32 <= max)
+		{
+		  // definitely within range
+		  return true;
+		}
+		else if (min32 > max || max32 < min)
+		{
+		  // definitely not
+		  return false;
+		}
+		else
+		{
+		  // we must count to be sure
+		  int len = char.codePointCount(termAtt.buffer(), 0, termAtt.length());
+		  return (len >= min && len <= max);
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs
new file mode 100644
index 0000000..e85fd1e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs
@@ -0,0 +1,61 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="CodepointCountFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_lngth" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.CodepointCountFilterFactory" min="0" max="1" /&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class CodepointCountFilterFactory : TokenFilterFactory
+	{
+	  internal readonly int min;
+	  internal readonly int max;
+	  public const string MIN_KEY = "min";
+	  public const string MAX_KEY = "max";
+
+	  /// <summary>
+	  /// Creates a new CodepointCountFilterFactory </summary>
+	  public CodepointCountFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		min = requireInt(args, MIN_KEY);
+		max = requireInt(args, MAX_KEY);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override CodepointCountFilter create(TokenStream input)
+	  {
+		return new CodepointCountFilter(luceneMatchVersion, input, min, max);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/EmptyTokenStream.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/EmptyTokenStream.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/EmptyTokenStream.cs
new file mode 100644
index 0000000..38af481
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/EmptyTokenStream.cs
@@ -0,0 +1,34 @@
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/// <summary>
+	/// An always exhausted token stream.
+	/// </summary>
+	public sealed class EmptyTokenStream : TokenStream
+	{
+
+	  public override bool incrementToken()
+	  {
+		return false;
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs
new file mode 100644
index 0000000..96a2dfa
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs
@@ -0,0 +1,164 @@
+using System.Text;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using org.apache.lucene.analysis;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+	/// <summary>
+	/// When the plain text is extracted from documents, we will often have many words hyphenated and broken into
+	/// two lines. This is often the case with documents where narrow text columns are used, such as newsletters.
+	/// In order to increase search efficiency, this filter puts hyphenated words broken into two lines back together.
+	/// This filter should be used on indexing time only.
+	/// Example field definition in schema.xml:
+	/// <pre class="prettyprint">
+	/// &lt;fieldtype name="text" class="solr.TextField" positionIncrementGap="100"&gt;
+	///  &lt;analyzer type="index"&gt;
+	///    &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///      &lt;filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/&gt;
+	///      &lt;filter class="solr.StopFilterFactory" ignoreCase="true"/&gt;
+	///      &lt;filter class="solr.HyphenatedWordsFilterFactory"/&gt;
+	///      &lt;filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/&gt;
+	///      &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///      &lt;filter class="solr.RemoveDuplicatesTokenFilterFactory"/&gt;
+	///  &lt;/analyzer&gt;
+	///  &lt;analyzer type="query"&gt;
+	///      &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///      &lt;filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/&gt;
+	///      &lt;filter class="solr.StopFilterFactory" ignoreCase="true"/&gt;
+	///      &lt;filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/&gt;
+	///      &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///      &lt;filter class="solr.RemoveDuplicatesTokenFilterFactory"/&gt;
+	///  &lt;/analyzer&gt;
+	/// &lt;/fieldtype&gt;
+	/// </pre>
+	/// 
+	/// </summary>
+	public sealed class HyphenatedWordsFilter : TokenFilter
+	{
+
+	  private readonly CharTermAttribute termAttribute = addAttribute(typeof(CharTermAttribute));
+	  private readonly OffsetAttribute offsetAttribute = addAttribute(typeof(OffsetAttribute));
+
+	  private readonly StringBuilder hyphenated = new StringBuilder();
+	  private State savedState;
+	  private bool exhausted = false;
+	  private int lastEndOffset = 0;
+
+	  /// <summary>
+	  /// Creates a new HyphenatedWordsFilter
+	  /// </summary>
+	  /// <param name="in"> TokenStream that will be filtered </param>
+	  public HyphenatedWordsFilter(TokenStream @in) : base(@in)
+	  {
+	  }
+
+	  /// <summary>
+	  /// {@inheritDoc}
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		while (!exhausted && input.incrementToken())
+		{
+		  char[] term = termAttribute.buffer();
+		  int termLength = termAttribute.length();
+		  lastEndOffset = offsetAttribute.endOffset();
+
+		  if (termLength > 0 && term[termLength - 1] == '-')
+		  {
+			// a hyphenated word
+			// capture the state of the first token only
+			if (savedState == null)
+			{
+			  savedState = captureState();
+			}
+			hyphenated.Append(term, 0, termLength - 1);
+		  }
+		  else if (savedState == null)
+		  {
+			// not part of a hyphenated word.
+			return true;
+		  }
+		  else
+		  {
+			// the final portion of a hyphenated word
+			hyphenated.Append(term, 0, termLength);
+			unhyphenate();
+			return true;
+		  }
+		}
+
+		exhausted = true;
+
+		if (savedState != null)
+		{
+		  // the final term ends with a hyphen
+		  // add back the hyphen, for backwards compatibility.
+		  hyphenated.Append('-');
+		  unhyphenate();
+		  return true;
+		}
+
+		return false;
+	  }
+
+	  /// <summary>
+	  /// {@inheritDoc}
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		hyphenated.Length = 0;
+		savedState = null;
+		exhausted = false;
+		lastEndOffset = 0;
+	  }
+
+	  // ================================================= Helper Methods ================================================
+
+	  /// <summary>
+	  /// Writes the joined unhyphenated term
+	  /// </summary>
+	  private void unhyphenate()
+	  {
+		restoreState(savedState);
+		savedState = null;
+
+		char[] term = termAttribute.buffer();
+		int length = hyphenated.Length;
+		if (length > termAttribute.length())
+		{
+		  term = termAttribute.resizeBuffer(length);
+		}
+
+		hyphenated.getChars(0, length, term, 0);
+		termAttribute.Length = length;
+		offsetAttribute.setOffset(offsetAttribute.startOffset(), lastEndOffset);
+		hyphenated.Length = 0;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs
new file mode 100644
index 0000000..946cd57
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="HyphenatedWordsFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_hyphn" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.HyphenatedWordsFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class HyphenatedWordsFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new HyphenatedWordsFilterFactory </summary>
+	  public HyphenatedWordsFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override HyphenatedWordsFilter create(TokenStream input)
+	  {
+		return new HyphenatedWordsFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
new file mode 100644
index 0000000..f110d37
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
@@ -0,0 +1,67 @@
+using System;
+using FilteringTokenFilter = Lucene.Net.Analysis.Util.FilteringTokenFilter;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	using FilteringTokenFilter = FilteringTokenFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// A TokenFilter that only keeps tokens with text contained in the
+	/// required words.  This filter behaves like the inverse of StopFilter.
+	/// 
+	/// @since solr 1.3
+	/// </summary>
+	public sealed class KeepWordFilter : FilteringTokenFilter
+	{
+	  private readonly CharArraySet words;
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+
+	  /// @deprecated enablePositionIncrements=false is not supported anymore as of Lucene 4.4. 
+	  [Obsolete("enablePositionIncrements=false is not supported anymore as of Lucene 4.4.")]
+	  public KeepWordFilter(Version version, bool enablePositionIncrements, TokenStream @in, CharArraySet words) : base(version, enablePositionIncrements, @in)
+	  {
+		this.words = words;
+	  }
+
+	  /// <summary>
+	  /// Create a new <seealso cref="KeepWordFilter"/>.
+	  /// <para><b>NOTE</b>: The words set passed to this constructor will be directly
+	  /// used by this filter and should not be modified.
+	  /// </para>
+	  /// </summary>
+	  /// <param name="version"> the Lucene match version </param>
+	  /// <param name="in">      the <seealso cref="TokenStream"/> to consume </param>
+	  /// <param name="words">   the words to keep </param>
+	  public KeepWordFilter(Version version, TokenStream @in, CharArraySet words) : base(version, @in)
+	  {
+		this.words = words;
+	  }
+
+	  public override bool accept()
+	  {
+		return words.contains(termAtt.buffer(), 0, termAtt.length());
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilterFactory.cs
new file mode 100644
index 0000000..8aa687f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilterFactory.cs
@@ -0,0 +1,113 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
+	using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
+	using TokenFilterFactory = TokenFilterFactory;
+
+
+	/// <summary>
+	/// Factory for <seealso cref="KeepWordFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_keepword" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.KeepWordFilterFactory" words="keepwords.txt" ignoreCase="false"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class KeepWordFilterFactory : TokenFilterFactory, ResourceLoaderAware
+	{
+	  private readonly bool ignoreCase;
+	  private readonly bool enablePositionIncrements;
+	  private readonly string wordFiles;
+	  private CharArraySet words;
+
+	  /// <summary>
+	  /// Creates a new KeepWordFilterFactory </summary>
+	  public KeepWordFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		assureMatchVersion();
+		wordFiles = get(args, "words");
+		ignoreCase = getBoolean(args, "ignoreCase", false);
+		enablePositionIncrements = getBoolean(args, "enablePositionIncrements", true);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void inform(org.apache.lucene.analysis.util.ResourceLoader loader) throws java.io.IOException
+	  public virtual void inform(ResourceLoader loader)
+	  {
+		if (wordFiles != null)
+		{
+		  words = getWordSet(loader, wordFiles, ignoreCase);
+		}
+	  }
+
+	  public virtual bool EnablePositionIncrements
+	  {
+		  get
+		  {
+			return enablePositionIncrements;
+		  }
+	  }
+
+	  public virtual bool IgnoreCase
+	  {
+		  get
+		  {
+			return ignoreCase;
+		  }
+	  }
+
+	  public virtual CharArraySet Words
+	  {
+		  get
+		  {
+			return words;
+		  }
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		// if the set is null, it means it was empty
+		if (words == null)
+		{
+		  return input;
+		}
+		else
+		{
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @SuppressWarnings("deprecation") final org.apache.lucene.analysis.TokenStream filter = new KeepWordFilter(luceneMatchVersion, enablePositionIncrements, input, words);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+		  TokenStream filter = new KeepWordFilter(luceneMatchVersion, enablePositionIncrements, input, words);
+		  return filter;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilter.cs
new file mode 100644
index 0000000..8918274
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilter.cs
@@ -0,0 +1,61 @@
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// Marks terms as keywords via the <seealso cref="KeywordAttribute"/>.
+	/// </summary>
+	/// <seealso cref= KeywordAttribute </seealso>
+	public abstract class KeywordMarkerFilter : TokenFilter
+	{
+
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  /// <summary>
+	  /// Creates a new <seealso cref="KeywordMarkerFilter"/> </summary>
+	  /// <param name="in"> the input stream </param>
+	  protected internal KeywordMarkerFilter(TokenStream @in) : base(@in)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (Keyword)
+		  {
+			keywordAttr.Keyword = true;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+
+	  protected internal abstract bool Keyword {get;}
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilterFactory.cs
new file mode 100644
index 0000000..14eeafa
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilterFactory.cs
@@ -0,0 +1,99 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
+	using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="KeywordMarkerFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_keyword" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.KeywordMarkerFilterFactory" protected="protectedkeyword.txt" pattern="^.+er$" ignoreCase="false"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class KeywordMarkerFilterFactory : TokenFilterFactory, ResourceLoaderAware
+	{
+	  public const string PROTECTED_TOKENS = "protected";
+	  public const string PATTERN = "pattern";
+	  private readonly string wordFiles;
+	  private readonly string stringPattern;
+	  private readonly bool ignoreCase;
+	  private Pattern pattern;
+	  private CharArraySet protectedWords;
+
+	  /// <summary>
+	  /// Creates a new KeywordMarkerFilterFactory </summary>
+	  public KeywordMarkerFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		wordFiles = get(args, PROTECTED_TOKENS);
+		stringPattern = get(args, PATTERN);
+		ignoreCase = getBoolean(args, "ignoreCase", false);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void inform(org.apache.lucene.analysis.util.ResourceLoader loader) throws java.io.IOException
+	  public virtual void inform(ResourceLoader loader)
+	  {
+		if (wordFiles != null)
+		{
+		  protectedWords = getWordSet(loader, wordFiles, ignoreCase);
+		}
+		if (stringPattern != null)
+		{
+		  pattern = ignoreCase ? Pattern.compile(stringPattern, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE) : Pattern.compile(stringPattern);
+		}
+	  }
+
+	  public virtual bool IgnoreCase
+	  {
+		  get
+		  {
+			return ignoreCase;
+		  }
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		if (pattern != null)
+		{
+		  input = new PatternKeywordMarkerFilter(input, pattern);
+		}
+		if (protectedWords != null)
+		{
+		  input = new SetKeywordMarkerFilter(input, protectedWords);
+		}
+		return input;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilter.cs
new file mode 100644
index 0000000..f584199
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilter.cs
@@ -0,0 +1,75 @@
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+
+
+	/// <summary>
+	/// This TokenFilter emits each incoming token twice once as keyword and once non-keyword, in other words once with
+	/// <seealso cref="KeywordAttribute#setKeyword(boolean)"/> set to <code>true</code> and once set to <code>false</code>.
+	/// This is useful if used with a stem filter that respects the <seealso cref="KeywordAttribute"/> to index the stemmed and the
+	/// un-stemmed version of a term into the same field.
+	/// </summary>
+	public sealed class KeywordRepeatFilter : TokenFilter
+	{
+
+	  private readonly KeywordAttribute keywordAttribute = addAttribute(typeof(KeywordAttribute));
+	  private readonly PositionIncrementAttribute posIncAttr = addAttribute(typeof(PositionIncrementAttribute));
+	  private State state;
+
+	  /// <summary>
+	  /// Construct a token stream filtering the given input.
+	  /// </summary>
+	  public KeywordRepeatFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (state != null)
+		{
+		  restoreState(state);
+		  posIncAttr.PositionIncrement = 0;
+		  keywordAttribute.Keyword = false;
+		  state = null;
+		  return true;
+		}
+		if (input.incrementToken())
+		{
+		  state = captureState();
+		  keywordAttribute.Keyword = true;
+		  return true;
+		}
+		return false;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		state = null;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilterFactory.cs
new file mode 100644
index 0000000..b6f7b86
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilterFactory.cs
@@ -0,0 +1,52 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="KeywordRepeatFilter"/>.
+	/// 
+	/// Since <seealso cref="KeywordRepeatFilter"/> emits two tokens for every input token, and any tokens that aren't transformed
+	/// later in the analysis chain will be in the document twice. Therefore, consider adding
+	/// <seealso cref="RemoveDuplicatesTokenFilterFactory"/> later in the analysis chain.
+	/// </summary>
+	public sealed class KeywordRepeatFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new KeywordRepeatFilterFactory </summary>
+	  public KeywordRepeatFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new KeywordRepeatFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs
new file mode 100644
index 0000000..802ff26
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs
@@ -0,0 +1,89 @@
+using System;
+using FilteringTokenFilter = Lucene.Net.Analysis.Util.FilteringTokenFilter;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using FilteringTokenFilter = FilteringTokenFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Removes words that are too long or too short from the stream.
+	/// <para>
+	/// Note: Length is calculated as the number of UTF-16 code units.
+	/// </para>
+	/// </summary>
+	public sealed class LengthFilter : FilteringTokenFilter
+	{
+
+	  private readonly int min;
+	  private readonly int max;
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+
+	  /// @deprecated enablePositionIncrements=false is not supported anymore as of Lucene 4.4. 
+	  [Obsolete("enablePositionIncrements=false is not supported anymore as of Lucene 4.4.")]
+	  public LengthFilter(Version version, bool enablePositionIncrements, TokenStream @in, int min, int max) : base(version, enablePositionIncrements, @in)
+	  {
+		if (min < 0)
+		{
+		  throw new System.ArgumentException("minimum length must be greater than or equal to zero");
+		}
+		if (min > max)
+		{
+		  throw new System.ArgumentException("maximum length must not be greater than minimum length");
+		}
+		this.min = min;
+		this.max = max;
+	  }
+
+	  /// <summary>
+	  /// Create a new <seealso cref="LengthFilter"/>. This will filter out tokens whose
+	  /// <seealso cref="CharTermAttribute"/> is either too short (<seealso cref="CharTermAttribute#length()"/>
+	  /// &lt; min) or too long (<seealso cref="CharTermAttribute#length()"/> &gt; max). </summary>
+	  /// <param name="version"> the Lucene match version </param>
+	  /// <param name="in">      the <seealso cref="TokenStream"/> to consume </param>
+	  /// <param name="min">     the minimum length </param>
+	  /// <param name="max">     the maximum length </param>
+	  public LengthFilter(Version version, TokenStream @in, int min, int max) : base(version, @in)
+	  {
+		if (min < 0)
+		{
+		  throw new System.ArgumentException("minimum length must be greater than or equal to zero");
+		}
+		if (min > max)
+		{
+		  throw new System.ArgumentException("maximum length must not be greater than minimum length");
+		}
+		this.min = min;
+		this.max = max;
+	  }
+
+	  public override bool accept()
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int len = termAtt.length();
+		int len = termAtt.length();
+		return (len >= min && len <= max);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs
new file mode 100644
index 0000000..6f0e4a3
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs
@@ -0,0 +1,67 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="LengthFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_lngth" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LengthFilterFactory" min="0" max="1" /&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class LengthFilterFactory : TokenFilterFactory
+	{
+	  internal readonly int min;
+	  internal readonly int max;
+	  internal readonly bool enablePositionIncrements;
+	  public const string MIN_KEY = "min";
+	  public const string MAX_KEY = "max";
+
+	  /// <summary>
+	  /// Creates a new LengthFilterFactory </summary>
+	  public LengthFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		min = requireInt(args, MIN_KEY);
+		max = requireInt(args, MAX_KEY);
+		enablePositionIncrements = getBoolean(args, "enablePositionIncrements", true);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override LengthFilter create(TokenStream input)
+	  {
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @SuppressWarnings("deprecation") final LengthFilter filter = new LengthFilter(luceneMatchVersion, enablePositionIncrements, input,min,max);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+		  LengthFilter filter = new LengthFilter(luceneMatchVersion, enablePositionIncrements, input,min,max);
+		return filter;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountAnalyzer.cs
new file mode 100644
index 0000000..58e9d60
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountAnalyzer.cs
@@ -0,0 +1,68 @@
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	/// <summary>
+	/// This Analyzer limits the number of tokens while indexing. It is
+	/// a replacement for the maximum field length setting inside <seealso cref="org.apache.lucene.index.IndexWriter"/>. </summary>
+	/// <seealso cref= LimitTokenCountFilter </seealso>
+	public sealed class LimitTokenCountAnalyzer : AnalyzerWrapper
+	{
+	  private readonly Analyzer @delegate;
+	  private readonly int maxTokenCount;
+	  private readonly bool consumeAllTokens;
+
+	  /// <summary>
+	  /// Build an analyzer that limits the maximum number of tokens per field.
+	  /// This analyzer will not consume any tokens beyond the maxTokenCount limit
+	  /// </summary>
+	  /// <seealso cref= #LimitTokenCountAnalyzer(Analyzer,int,boolean) </seealso>
+	  public LimitTokenCountAnalyzer(Analyzer @delegate, int maxTokenCount) : this(@delegate, maxTokenCount, false)
+	  {
+	  }
+	  /// <summary>
+	  /// Build an analyzer that limits the maximum number of tokens per field. </summary>
+	  /// <param name="delegate"> the analyzer to wrap </param>
+	  /// <param name="maxTokenCount"> max number of tokens to produce </param>
+	  /// <param name="consumeAllTokens"> whether all tokens from the delegate should be consumed even if maxTokenCount is reached. </param>
+	  public LimitTokenCountAnalyzer(Analyzer @delegate, int maxTokenCount, bool consumeAllTokens) : base(@delegate.ReuseStrategy)
+	  {
+		this.@delegate = @delegate;
+		this.maxTokenCount = maxTokenCount;
+		this.consumeAllTokens = consumeAllTokens;
+	  }
+
+	  protected internal override Analyzer getWrappedAnalyzer(string fieldName)
+	  {
+		return @delegate;
+	  }
+
+	  protected internal override TokenStreamComponents wrapComponents(string fieldName, TokenStreamComponents components)
+	  {
+		return new TokenStreamComponents(components.Tokenizer, new LimitTokenCountFilter(components.TokenStream, maxTokenCount, consumeAllTokens));
+	  }
+
+	  public override string ToString()
+	  {
+		return "LimitTokenCountAnalyzer(" + @delegate.ToString() + ", maxTokenCount=" + maxTokenCount + ", consumeAllTokens=" + consumeAllTokens + ")";
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilter.cs
new file mode 100644
index 0000000..4b60687
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilter.cs
@@ -0,0 +1,109 @@
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	/// <summary>
+	/// This TokenFilter limits the number of tokens while indexing. It is
+	/// a replacement for the maximum field length setting inside <seealso cref="org.apache.lucene.index.IndexWriter"/>.
+	/// <para>
+	/// By default, this filter ignores any tokens in the wrapped {@code TokenStream}
+	/// once the limit has been reached, which can result in {@code reset()} being 
+	/// called prior to {@code incrementToken()} returning {@code false}.  For most 
+	/// {@code TokenStream} implementations this should be acceptable, and faster 
+	/// then consuming the full stream. If you are wrapping a {@code TokenStream} 
+	/// which requires that the full stream of tokens be exhausted in order to 
+	/// function properly, use the 
+	/// <seealso cref="#LimitTokenCountFilter(TokenStream,int,boolean) consumeAllTokens"/> 
+	/// option.
+	/// </para>
+	/// </summary>
+	public sealed class LimitTokenCountFilter : TokenFilter
+	{
+
+	  private readonly int maxTokenCount;
+	  private readonly bool consumeAllTokens;
+	  private int tokenCount = 0;
+	  private bool exhausted = false;
+
+	  /// <summary>
+	  /// Build a filter that only accepts tokens up to a maximum number.
+	  /// This filter will not consume any tokens beyond the maxTokenCount limit
+	  /// </summary>
+	  /// <seealso cref= #LimitTokenCountFilter(TokenStream,int,boolean) </seealso>
+	  public LimitTokenCountFilter(TokenStream @in, int maxTokenCount) : this(@in, maxTokenCount, false)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Build an filter that limits the maximum number of tokens per field. </summary>
+	  /// <param name="in"> the stream to wrap </param>
+	  /// <param name="maxTokenCount"> max number of tokens to produce </param>
+	  /// <param name="consumeAllTokens"> whether all tokens from the input must be consumed even if maxTokenCount is reached. </param>
+	  public LimitTokenCountFilter(TokenStream @in, int maxTokenCount, bool consumeAllTokens) : base(@in)
+	  {
+		if (maxTokenCount < 1)
+		{
+		  throw new System.ArgumentException("maxTokenCount must be greater than zero");
+		}
+		this.maxTokenCount = maxTokenCount;
+		this.consumeAllTokens = consumeAllTokens;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (exhausted)
+		{
+		  return false;
+		}
+		else if (tokenCount < maxTokenCount)
+		{
+		  if (input.incrementToken())
+		  {
+			tokenCount++;
+			return true;
+		  }
+		  else
+		  {
+			exhausted = true;
+			return false;
+		  }
+		}
+		else
+		{
+		  while (consumeAllTokens && input.incrementToken()) // NOOP
+		  {
+		  }
+		  return false;
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		tokenCount = 0;
+		exhausted = false;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilterFactory.cs
new file mode 100644
index 0000000..ac55037
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilterFactory.cs
@@ -0,0 +1,67 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="LimitTokenCountFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_lngthcnt" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10" consumeAllTokens="false" /&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// <para>
+	/// The {@code consumeAllTokens} property is optional and defaults to {@code false}.  
+	/// See <seealso cref="LimitTokenCountFilter"/> for an explanation of it's use.
+	/// </para>
+	/// </summary>
+	public class LimitTokenCountFilterFactory : TokenFilterFactory
+	{
+
+	  public const string MAX_TOKEN_COUNT_KEY = "maxTokenCount";
+	  public const string CONSUME_ALL_TOKENS_KEY = "consumeAllTokens";
+	  internal readonly int maxTokenCount;
+	  internal readonly bool consumeAllTokens;
+
+	  /// <summary>
+	  /// Creates a new LimitTokenCountFilterFactory </summary>
+	  public LimitTokenCountFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		maxTokenCount = requireInt(args, MAX_TOKEN_COUNT_KEY);
+		consumeAllTokens = getBoolean(args, CONSUME_ALL_TOKENS_KEY, false);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new LimitTokenCountFilter(input, maxTokenCount, consumeAllTokens);
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilter.cs
new file mode 100644
index 0000000..931e492
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilter.cs
@@ -0,0 +1,116 @@
+namespace org.apache.lucene.analysis.miscellaneous
+{
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+
+	/// <summary>
+	/// This TokenFilter limits its emitted tokens to those with positions that
+	/// are not greater than the configured limit.
+	/// <para>
+	/// By default, this filter ignores any tokens in the wrapped {@code TokenStream}
+	/// once the limit has been exceeded, which can result in {@code reset()} being 
+	/// called prior to {@code incrementToken()} returning {@code false}.  For most 
+	/// {@code TokenStream} implementations this should be acceptable, and faster 
+	/// then consuming the full stream. If you are wrapping a {@code TokenStream}
+	/// which requires that the full stream of tokens be exhausted in order to 
+	/// function properly, use the 
+	/// <seealso cref="#LimitTokenPositionFilter(TokenStream,int,boolean) consumeAllTokens"/>
+	/// option.
+	/// </para>
+	/// </summary>
+	public sealed class LimitTokenPositionFilter : TokenFilter
+	{
+
+	  private readonly int maxTokenPosition;
+	  private readonly bool consumeAllTokens;
+	  private int tokenPosition = 0;
+	  private bool exhausted = false;
+	  private readonly PositionIncrementAttribute posIncAtt = addAttribute(typeof(PositionIncrementAttribute));
+
+	  /// <summary>
+	  /// Build a filter that only accepts tokens up to and including the given maximum position.
+	  /// This filter will not consume any tokens with position greater than the maxTokenPosition limit.
+	  /// </summary>
+	  /// <param name="in"> the stream to wrap </param>
+	  /// <param name="maxTokenPosition"> max position of tokens to produce (1st token always has position 1)
+	  /// </param>
+	  /// <seealso cref= #LimitTokenPositionFilter(TokenStream,int,boolean) </seealso>
+	  public LimitTokenPositionFilter(TokenStream @in, int maxTokenPosition) : this(@in, maxTokenPosition, false)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Build a filter that limits the maximum position of tokens to emit.
+	  /// </summary>
+	  /// <param name="in"> the stream to wrap </param>
+	  /// <param name="maxTokenPosition"> max position of tokens to produce (1st token always has position 1) </param>
+	  /// <param name="consumeAllTokens"> whether all tokens from the wrapped input stream must be consumed
+	  ///                         even if maxTokenPosition is exceeded. </param>
+	  public LimitTokenPositionFilter(TokenStream @in, int maxTokenPosition, bool consumeAllTokens) : base(@in)
+	  {
+		if (maxTokenPosition < 1)
+		{
+		  throw new System.ArgumentException("maxTokenPosition must be greater than zero");
+		}
+		this.maxTokenPosition = maxTokenPosition;
+		this.consumeAllTokens = consumeAllTokens;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (exhausted)
+		{
+		  return false;
+		}
+		if (input.incrementToken())
+		{
+		  tokenPosition += posIncAtt.PositionIncrement;
+		  if (tokenPosition <= maxTokenPosition)
+		  {
+			return true;
+		  }
+		  else
+		  {
+			while (consumeAllTokens && input.incrementToken()) // NOOP
+			{
+			}
+			exhausted = true;
+			return false;
+		  }
+		}
+		else
+		{
+		  exhausted = true;
+		  return false;
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		tokenPosition = 0;
+		exhausted = false;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilterFactory.cs
new file mode 100644
index 0000000..69877e8
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilterFactory.cs
@@ -0,0 +1,66 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="LimitTokenPositionFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_limit_pos" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LimitTokenPositionFilterFactory" maxTokenPosition="3" consumeAllTokens="false" /&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// <para>
+	/// The {@code consumeAllTokens} property is optional and defaults to {@code false}.  
+	/// See <seealso cref="LimitTokenPositionFilter"/> for an explanation of its use.
+	/// </para>
+	/// </summary>
+	public class LimitTokenPositionFilterFactory : TokenFilterFactory
+	{
+
+	  public const string MAX_TOKEN_POSITION_KEY = "maxTokenPosition";
+	  public const string CONSUME_ALL_TOKENS_KEY = "consumeAllTokens";
+	  internal readonly int maxTokenPosition;
+	  internal readonly bool consumeAllTokens;
+
+	  /// <summary>
+	  /// Creates a new LimitTokenPositionFilterFactory </summary>
+	  public LimitTokenPositionFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		maxTokenPosition = requireInt(args, MAX_TOKEN_POSITION_KEY);
+		consumeAllTokens = getBoolean(args, CONSUME_ALL_TOKENS_KEY, false);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new LimitTokenPositionFilter(input, maxTokenPosition, consumeAllTokens);
+	  }
+
+	}
+
+}
\ No newline at end of file


[18/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemFilterFactory.cs
new file mode 100644
index 0000000..6392329
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.lv
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="LatvianStemFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_lvstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.LatvianStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class LatvianStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new LatvianStemFilterFactory </summary>
+	  public LatvianStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new LatvianStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemmer.cs
new file mode 100644
index 0000000..d3e5ea8
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemmer.cs
@@ -0,0 +1,198 @@
+namespace org.apache.lucene.analysis.lv
+{
+
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/// <summary>
+	/// Light stemmer for Latvian.
+	/// <para>
+	/// This is a light version of the algorithm in Karlis Kreslin's PhD thesis
+	/// <i>A stemming algorithm for Latvian</i> with the following modifications:
+	/// <ul>
+	///   <li>Only explicitly stems noun and adjective morphology
+	///   <li>Stricter length/vowel checks for the resulting stems (verb etc suffix stripping is removed)
+	///   <li>Removes only the primary inflectional suffixes: case and number for nouns ; 
+	///       case, number, gender, and definitiveness for adjectives.
+	///   <li>Palatalization is only handled when a declension II,V,VI noun suffix is removed.
+	/// </ul>
+	/// </para>
+	/// </summary>
+	public class LatvianStemmer
+	{
+	  /// <summary>
+	  /// Stem a latvian word. returns the new adjusted length.
+	  /// </summary>
+	  public virtual int stem(char[] s, int len)
+	  {
+		int numVowels_Renamed = numVowels(s, len);
+
+		for (int i = 0; i < affixes.Length; i++)
+		{
+		  Affix affix = affixes[i];
+		  if (numVowels_Renamed > affix.vc && len >= affix.affix.Length + 3 && StemmerUtil.EndsWith(s, len, affix.affix))
+		  {
+			len -= affix.affix.Length;
+			return affix.palatalizes ? unpalatalize(s, len) : len;
+		  }
+		}
+
+		return len;
+	  }
+
+	  internal static readonly Affix[] affixes = {};
+
+	  internal class Affix
+	  {
+		internal char[] affix; // suffix
+		internal int vc; // vowel count of the suffix
+		internal bool palatalizes; // true if we should fire palatalization rules.
+
+		internal Affix(string affix, int vc, bool palatalizes)
+		{
+		  this.affix = affix.ToCharArray();
+		  this.vc = vc;
+		  this.palatalizes = palatalizes;
+		}
+	  }
+
+	  /// <summary>
+	  /// Most cases are handled except for the ambiguous ones:
+	  /// <ul>
+	  ///  <li> s -> š
+	  ///  <li> t -> š
+	  ///  <li> d -> ž
+	  ///  <li> z -> ž
+	  /// </ul>
+	  /// </summary>
+	  private int unpalatalize(char[] s, int len)
+	  {
+		// we check the character removed: if its -u then 
+		// its 2,5, or 6 gen pl., and these two can only apply then.
+		if (s[len] == 'u')
+		{
+		  // kš -> kst
+		  if (StemmerUtil.EndsWith(s, len, "kš"))
+		  {
+			len++;
+			s[len - 2] = 's';
+			s[len - 1] = 't';
+			return len;
+		  }
+		  // ņņ -> nn
+		  if (StemmerUtil.EndsWith(s, len, "ņņ"))
+		  {
+			s[len - 2] = 'n';
+			s[len - 1] = 'n';
+			return len;
+		  }
+		}
+
+		// otherwise all other rules
+		if (StemmerUtil.EndsWith(s, len, "pj") || StemmerUtil.EndsWith(s, len, "bj") || StemmerUtil.EndsWith(s, len, "mj") || StemmerUtil.EndsWith(s, len, "vj"))
+		{
+		  // labial consonant
+		  return len - 1;
+		}
+		else if (StemmerUtil.EndsWith(s, len, "šņ"))
+		{
+		  s[len - 2] = 's';
+		  s[len - 1] = 'n';
+		  return len;
+		}
+		else if (StemmerUtil.EndsWith(s, len, "žņ"))
+		{
+		  s[len - 2] = 'z';
+		  s[len - 1] = 'n';
+		  return len;
+		}
+		else if (StemmerUtil.EndsWith(s, len, "šļ"))
+		{
+		  s[len - 2] = 's';
+		  s[len - 1] = 'l';
+		  return len;
+		}
+		else if (StemmerUtil.EndsWith(s, len, "žļ"))
+		{
+		  s[len - 2] = 'z';
+		  s[len - 1] = 'l';
+		  return len;
+		}
+		else if (StemmerUtil.EndsWith(s, len, "ļņ"))
+		{
+		  s[len - 2] = 'l';
+		  s[len - 1] = 'n';
+		  return len;
+		}
+		else if (StemmerUtil.EndsWith(s, len, "ļļ"))
+		{
+		  s[len - 2] = 'l';
+		  s[len - 1] = 'l';
+		  return len;
+		}
+		else if (s[len - 1] == 'č')
+		{
+		  s[len - 1] = 'c';
+		  return len;
+		}
+		else if (s[len - 1] == 'ļ')
+		{
+		  s[len - 1] = 'l';
+		  return len;
+		}
+		else if (s[len - 1] == 'ņ')
+		{
+		  s[len - 1] = 'n';
+		  return len;
+		}
+
+		return len;
+	  }
+
+	  /// <summary>
+	  /// Count the vowels in the string, we always require at least
+	  /// one in the remaining stem to accept it.
+	  /// </summary>
+	  private int numVowels(char[] s, int len)
+	  {
+		int n = 0;
+		for (int i = 0; i < len; i++)
+		{
+		  switch (s[i])
+		  {
+			case 'a':
+		case 'e':
+	case 'i':
+			case 'o':
+		case 'u':
+	case 'ā':
+			case 'ī':
+		case 'ē':
+	case 'ū':
+			  n++;
+		  break;
+		  }
+		}
+		return n;
+	  }
+	}
+
+}
\ No newline at end of file